<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>saucis</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Sakarya University Journal of Computer and Information Sciences</journal-title>
            </journal-title-group>
                                        <issn pub-type="epub">2636-8129</issn>
                                                                                            <publisher>
                    <publisher-name>Sakarya University</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.35377/saucis...1722643</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Computer Software</subject>
                                                            <subject>Software Engineering (Other)</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Bilgisayar Yazılımı</subject>
                                                            <subject>Yazılım Mühendisliği (Diğer)</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                                                            <article-title>Fine-tuning Large Language Models for Turkish Flutter Code Generation</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0009-0000-3077-673X</contrib-id>
                                                                <name>
                                    <surname>Uluırmak</surname>
                                    <given-names>Bugra</given-names>
                                </name>
                                                                    <aff>ABDULLAH GUL UNIVERSITY, FACULTY OF ENGINEERING</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-0277-2210</contrib-id>
                                                                <name>
                                    <surname>Kurban</surname>
                                    <given-names>Rifat</given-names>
                                </name>
                                                                    <aff>ABDULLAH GUL UNIVERSITY, FACULTY OF ENGINEERING</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20251229">
                    <day>12</day>
                    <month>29</month>
                    <year>2025</year>
                </pub-date>
                                        <volume>8</volume>
                                        <issue>4</issue>
                                        <fpage>637</fpage>
                                        <lpage>650</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20250618">
                        <day>06</day>
                        <month>18</month>
                        <year>2025</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20250714">
                        <day>07</day>
                        <month>14</month>
                        <year>2025</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2018, Sakarya University Journal of Computer and Information Sciences</copyright-statement>
                    <copyright-year>2018</copyright-year>
                    <copyright-holder>Sakarya University Journal of Computer and Information Sciences</copyright-holder>
                </permissions>
            
                                                                                                                        <abstract><p>The rapid advancement of large language models (LLMs) for code generation has largely centered on English programming queries. This paper focuses on a low-resource language scenario, specifically Turkish, in the context of Flutter mobile app development. Two representative LLMs (a 4B-parameter multilingual model and a 3B code-specialized model) on a new Turkish question-and-answer dataset for Flutter/Dart are fine-tuned in this study. Fine-tuning with parameter-efficient techniques yields dramatic improvements in code generation quality: Bilingual Evaluation Understudy (BLEU), Recall-Oriented Understudy for Gisting Evaluation (ROUGE-L), Metric for Evaluation of Translation with Explicit Ordering (METEOR), Bidirectional Encoder Representations from Transformers Score (BERTScore), and CodeBLEU scores show significant increases. The rate of correct solutions increased from ~30–70% (for base models) to 80–90% after fine-tuning. The performance trade-offs between models are analyzed, revealing that the multilingual model slightly outperforms the code-focused model in accuracy after fine-tuning. However, the code-focused model demonstrates faster inference speeds. These results demonstrate that even with very limited non-English training data, customizing LLMs can bridge the gap in code generation, enabling high-quality assistance for Turkish developers comparable to that for English. The dataset was released on GitHub to facilitate further research in multilingual code generation.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>Code generation</kwd>
                                                    <kwd>  Large language models</kwd>
                                                    <kwd>  Fine-tuning</kwd>
                                                    <kwd>  Low-resource languages</kwd>
                                                    <kwd>  Flutter</kwd>
                                            </kwd-group>
                            
                                                                                                                                                    </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">J. He, C. Zhou, X. Ma, T. Berg-Kirkpatrick, &amp; G. Neubig, &quot;Towards a unified view of parameter-efficient transfer learning&quot;, 2021. doi: 10.48550/arxiv.2110.04366</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">N. Houlsby, A. Giurgiu, S. Jastrzȩbski, B. Morrone, Q. Laroussilhe, A. Gesmundoet al., &quot;Parameter-efficient transfer learning for nlp&quot;, 2019. doi: 10.48550/arxiv.1902.00751</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">X. Liu, P. He, W. Chen, &amp; J. Gao, &quot;Multi-task deep neural networks for natural language understanding&quot;, 2019. doi: 10.18653/v1/p19-1441</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">M. Anschütz, D. Lozano, &amp; G. Groh, &quot;This is not correct! negation-aware evaluation of language generation systems&quot;, 2023. doi: 10.18653/v1/2023.inlg-main.12</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Lodha, G. Belapurkar, S. Chalkapurkar, Y. Tao, R. Ghosh, S. Basuet al., &quot;On surgical fine-tuning for language encoders&quot;, 2023. doi: 10.18653/v1/2023.findings-emnlp.204</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">J. Hu, Y. Shen, P. Wallis, Z. Allen-Zhu, Y. Li, S. Wanget al., &quot;Lora: low-rank adaptation of large language models&quot;, 2021. doi: 10.48550/arxiv.2106.09685</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Y. Hu, Y. Xie, T. Wang, M. Chen, &amp; Z. Pan, &quot;Structure-aware low-rank adaptation for parameter-efficient fine-tuning&quot;, Mathematics, vol. 11, no. 20, p. 4317, 2023. doi: 10.3390/math11204317</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">N. Dhinagar, S. Ozarkar, K. Buwa, S. Thomopoulos, C. Owens‐Walton, E. Laltooet al., &quot;Parameter efficient fine-tuning of transformer-based masked autoencoder enhances resource constrained neuroimage analysis&quot;, 2025. doi: 10.1101/2025.02.15.638442</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">H. Wu, &quot;Large language models capsule: a research analysis of in-context learning (icl) and parameter-efficient fine-tuning (peft) methods&quot;, Applied and Computational Engineering, vol. 43, no. 1, pp. 327-331, 2024. doi: 10.54254/2755-2721/43/20230858</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">N. Sulaiman and F. Hamzah, &quot;Optimizing llama 7b for medical question answering: a study on fine-tuning strategies and performance on the multimedqa dataset&quot;, 2024. doi: 10.31219/osf.io/g5aes</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">J. Bogaert, E. Jean, C. Bodt, &amp; F. Standaert, &quot;Fine-tuning is not (always) overfitting artifacts&quot;, 2023. doi: 10.14428/esann/2023.es2023-152</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">G. Wiedemann, S. Yimam, &amp; C. Biemann, &quot;Uhh-lt at semeval-2020 task 12: fine-tuning of pre-trained transformer networks for offensive language detection&quot;, pp. 1638-1644, 2020. doi: 10.18653/v1/2020.semeval-1.213</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Aghajanyan, S. Gupta, &amp; L. Zettlemoyer, &quot;Intrinsic dimensionality explains the effectiveness of language model fine-tuning&quot;, 2021. doi: 10.18653/v1/2021.acl-long.568</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">L. Feng, Y. Yang, M. Tan, T. Zeng, Z. Li, H. Tanget al., &quot;Adaptive multi-source domain collaborative fine-tuning for transfer learning&quot;, 2023. doi: 10.20944/preprints202311.0124.v1</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">F. Ullah, U. Azam, A. Faheem, F. Kamiran, &amp; A. Karim, &quot;Comparing prompt-based and standard fine-tuning for urdu text classification&quot;, pp. 6747-6754, 2023. doi: 10.18653/v1/2023.findings-emnlp.449</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">M. Mosbach, M. Andriushchenko, &amp; D. Klakow, &quot;On the stability of fine-tuning bert: misconceptions, explanations, and strong baselines&quot;, 2020. doi: 10.48550/arxiv.2006.04884</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">X. Li and P. Liang, &quot;Prefix-tuning: optimizing continuous prompts for generation&quot;, 2021. doi: 10.18653/v1/2021.acl-long.353</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">X. Ma, C. Santos, &amp; A. Arnold, &quot;Contrastive fine-tuning improves robustness for neural rankers&quot;, 2021. doi: 10.18653/v1/2021.findings-acl.51</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">L. Pan, C. Hang, A. Sil, &amp; S. Potdar, &quot;Improved text classification via contrastive adversarial training&quot;, 2021. doi: 10.48550/arxiv.2107.10137</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Chen M., Tworek J., Jun H., Kaplan J., Yuan Q. and Zarinelli E., “Evaluating Large Language Models Trained on Code”, arXiv preprint arXiv:2107.03374, (2021). doi: 10.48550/arXiv.2107.03374</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Xu X., Sharma P., Kinne J. F., O’Neill M., Mazaitis K. and Bhatia S., “A Systematic Evaluation of Large Language Models of Code”, Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI), 662-678, (2022). doi: 10.48550/arXiv.2202.13169</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Wang Z., Cuenca G., Zhou S., Chen T., Lin B. and Matsuo Y., “MCoNaLa: A Benchmark for Code Generation from Multiple Natural Languages”, Findings of the Association for Computational Linguistics: EACL 2023, 265-273, (2023). doi: 10.48550/arXiv.2203.08388</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Cassano F., Gouwar J., Nguyen D., Bartolo M., Serrano S. and Sabour A., “MultiPL-E: A Scalable and Extensible Approach to Benchmarking Neural Code Generation”, arXiv preprint arXiv:2208.08227, (2022). doi: 10.48550/arXiv.2208.08227</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
