<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>lisans</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Ana Dili Eğitimi Dergisi</journal-title>
            </journal-title-group>
                                        <issn pub-type="epub">2147-6020</issn>
                                                                                            <publisher>
                    <publisher-name>Mehmet KURUDAYIOĞLU</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.16916/aded.1636416</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Turkish Education</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Türkçe Eğitimi</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <article-title>A Turkish Word Frequency Tool: LexiTR Frequency</article-title>
                                                                                                                                                                                                <trans-title-group xml:lang="tr">
                                    <trans-title>Türkçe Sözcük Sıklığı Aracı: LexiTR Sıklık Aracı</trans-title>
                                </trans-title-group>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-7328-7650</contrib-id>
                                                                <name>
                                    <surname>Sezer</surname>
                                    <given-names>Taner</given-names>
                                </name>
                                                                    <aff>MERSIN UNIVERSITY</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-4596-1203</contrib-id>
                                                                <name>
                                    <surname>Karadağ</surname>
                                    <given-names>Özay</given-names>
                                </name>
                                                                    <aff>HACETTEPE ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20250430">
                    <day>04</day>
                    <month>30</month>
                    <year>2025</year>
                </pub-date>
                                        <volume>13</volume>
                                        <issue>2</issue>
                                        <fpage>266</fpage>
                                        <lpage>276</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20250209">
                        <day>02</day>
                        <month>09</month>
                        <year>2025</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20250317">
                        <day>03</day>
                        <month>17</month>
                        <year>2025</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2013, Journal of Mother Tongue Education</copyright-statement>
                    <copyright-year>2013</copyright-year>
                    <copyright-holder>Journal of Mother Tongue Education</copyright-holder>
                </permissions>
            
                                                                                                <abstract><p>Word frequency is a fundamental concept in linguistics, computational linguistics, natural language processing (NLP) and language education. Word frequency plays a critical role in understanding the characteristics and usage patterns of a word. This study introduces the &quot;Turkish Word Frequency Tool&quot; (TWFT), developed as part of the LexiTR Project, along with its features. TWFT is based on a balanced corpus consisting of over 193 million words from four distinct text types: academic, social media, fictional, and informative texts. TWFT serves a scalable online platform that provides researchers with the ability to examine word usage trends across different text types. It enables comprehensive analyses through real-time querying, graphical data representation, and both raw and normalized frequency values. Additionally, it provides API support, presenting word frequency information in a structured format. By filling a significant gap in the existing literature, TWFT aims to establish a consistent, transparent, and comprehensive foundation for linguistic research and natural language processing applications.</p></abstract>
                                                                                                                                    <trans-abstract xml:lang="tr">
                            <p>Sözcük sıklığı, dilbilim, bilişimsel dilbilim, doğal dil işleme (NLP) ve dil eğitimi alanlarında temel bir kavramdır. Sözcük sıklığı bir sözcüğün özelliklerini ve kullanım eğilimlerini anlamada kritik bir rol oynamaktadır. Bu çalışmada, LexiTR Projesi kapsamında geliştirilen &quot;Türkçe Sözcük Sıklığı Aracı (TSSA)” ve özellikleri tanıtılmaktadır. TSSA, akademik, sosyal medya, kurgusal ve bilgilendirici metinler olmak üzere dört farklı türden oluşan 193 milyondan fazla sözcük içeren dengeli bir derleme dayanmaktadır. TSSA, araştırmacılara farklı metin türleri arasında sözcük kullanım eğilimlerini inceleme olanağı sunan, gerçek zamanlı sorgulama, grafiksel veri gösterimi, ham ve normalize edilmiş sıklık değerleri ile kapsamlı analiz imkânı sağlayan ölçeklenebilir bir çevrimiçi platformdur. Ayrıca, sağladığı API desteği ile sözcüğe ilişkin sıklık bilgilerini yapılandırılmış bir formatta sunmaktadır. Mevcut literatürdeki önemli bir boşluğu dolduran TSSA dilbilim araştırmaları ile doğal dil işleme uygulamaları için tutarlı, şeffaf ve kapsamlı bir temel oluşturmayı hedeflenmektedir.</p></trans-abstract>
                                                            
            
                                                            <kwd-group>
                                                    <kwd>Frequency</kwd>
                                                    <kwd>  lexicon</kwd>
                                                    <kwd>  tokenization</kwd>
                                                    <kwd>  TS Tokenizer</kwd>
                                                    <kwd>  LexiTR</kwd>
                                            </kwd-group>
                                                        
                                                                            <kwd-group xml:lang="tr">
                                                    <kwd>Sıklık</kwd>
                                                    <kwd>  sözcük listesi</kwd>
                                                    <kwd>  birimlendirme</kwd>
                                                    <kwd>  TS Tokenizer</kwd>
                                                    <kwd>  LexiTR</kwd>
                                            </kwd-group>
                                                                                                            </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Akın, A. A. ve Akın, M. D. (2007). Zemberek, an open source NLP framework for Turkic languages. Structure, 10(2007), 1-5.</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Arslan, K. ve Bay, Y. (2023). İlkokul Türkçe ders kitaplarının söz varlığı bakımından incelenmesi. Turkish Journal of Primary Education, 8(1), 14-27.</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Baş, B. (2011). Söz varlığı ile ilgili çalışmalarda kullanılacak ölçütler. Türklük Bilimi Araştırmaları, (29), 27-61.</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Başaran, B. (2022). Measuring word frequency in language teaching textbooks using LexiTürk. International Online Journal of Education and Teaching (IOJET), 9(1), 571-583.</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Çal, A. (2015). Türkiye’de farklı dönemlere ait kelime sıklığı çalışmaları üzerine bir değerlendirme. Turkish Studies: International Periodical for the Languages, Literature and History of Turkish or Turkic, 10(8), 715-730.</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Çınar, İ. ve İnce, B. (2015). Türkçe ve Türk kültürü ders kitaplarındaki söz varlığına derlem temelli bir bakış. International Journal of Languages&#039; Education and Teaching, 3(1), 198-209.</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Davies, M. (2009). The 385+ million word Corpus of Contemporary American English (1990–2008+): Design, architecture, and linguistic insights. International journal of corpus linguistics, 14(2), 159-190.</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Douglas, B. (1995). Dimensions of register variation: A cross-linguistic comparison. Cambridge: Cambridge University Press.</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">Evler, D. ve Aksoy, E. (2024). Şermin Yaşar&#039;ın çocuklara yönelik eserlerinde söz varlığı. SEBED, 2(1), 1-15.</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Göz, İ. (2003). Yazılı Türkçenin kelime sıklığı sözlüğü. Ankara: Türk Dil Kurumu Yayınları.</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Gürler, H. ve Yıldız, M. (2024). Doğan Kardeş Dergisinin söz varlığı üzerine bir araştırma. Milli Eğitim Dergisi, 53(242), 969-996.</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Hankamer, J. (1989). Morphological parsing and the lexicon. In W. Marslen-Wilson (Ed.), Lexical representation and process (pp. 392-408). United States: MIT Press.</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Inkelas, S., Küntay, A., Orgun, O. ve Sprouse, R. (2000). Turkish electronic living lexicon (TELL). Turkic Languages, 4, 253-275.</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Karadağ, Ö. (2005). İlköğretim I. kademe öğrencilerinin kelime hazinesi üzerine bir araştırma (Unpublished doctoral dissertation). Gazi University, Institute of Educational Sciences, Ankara.</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Kurudayıoğlu, M. (2005). İlköğretim II. kademe öğrencilerinin kelime hazinesi üzerine bir araştırma (Unpublished doctoral dissertation). Gazi University, Institute of Educational Sciences, Ankara.</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Leech, G. N. (2011). Frequency, corpora and language learning. In A taste for corpora: In honour of Sylviane Granger (pp. 7-32). Netherlands: John Benjamins Publishing Company.</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">McEnery, T. ve Andrew H. (2011). Corpus linguistics: Method, theory and practice. Cambridge: Cambridge University Press.</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Ölker, G. (2011). Yazılı Türkçenin kelime sıklığı sözlüğü (1945-1950 arası) (Unpublished doctoral dissertation). Selçuk University, Institute of Social Sciences, Konya.</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Popescu, I. I., Mačutek, J. ve Altmann, G. (2009). Aspects of word frequencies. Lüdenscheid: RAM-Verlag.</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Pilten-Ufuk, Ş. (2021). Derlem dilbilim ve edebiyat çalışmalarının kesişim noktası: Derlem biçem bilimi. Ö. Solak ve S. Doykun (Ed.), Disiplinlerarası edebiyat çalışmaları içinde (ss. 145-171). İstabul: Paradigma Akademi Yayın.</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Rust, P., Pfeiffer, J., Vulić, I., Ruder, S. ve Gurevych, I. (2020). How good is your tokenizer? On the monolingual performance of multilingual language models. In Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (pp. 3118-3135). Association for Computational Linguistics, Bangkok.</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Rychlý, P. ve Spalek, S. (2022, December). Utok: The fast rule-based tokenizer. In Proceedings of recent advances in Slavonic natural language processing. (pp. 149-154). South Moravia: Tribun EU.</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Schützler, O. (2023). Frequencies in corpus linguistics: Issues of scaling and visualisation. In Data visualization in corpus linguistics: Critical reflections and future directions. Helsinki: Varieng.</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Sezer, T., Sezer, B. ve Üniversitesi, M. (2013, May). TS corpus: Herkes için Türkçe derlem. In Proceedings of the 27th national linguistics conference (pp. 217-225).</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">Sezer, T. (2016). Tweets corpus: Building a corpus by social media. Journal of National Education and Social Sciences, 210, 621-633.</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">Sezer, T. (2017). TS corpus project: An online Turkish dictionary and TS DIY corpus. European Journal of Language and Literature Studies, 3(3), 18-24.</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">Sezer, T. (2021). TS Corpus word list (Version 001) [Data set]. TS Corpus. Erişim adresi: https://doi.org/10.57672/B6M8-8333</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">Sinclair, J. (1991). Corpus, concordance, collocation. Oxford: Oxford University Press.</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">Soliman, R. ve Familiar, L. (2024). Creating a CEFR Arabic vocabulary profile: A frequency-based multi-dialectal approach. Critical Multilingualism Studies, 11(1), 266-286.</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">Törenli, N. ve Kıyan, Z. (2023). The importance of sustainable communication in the covid-19 period: The case of Turkey. In SDG18 Communication for All, Volume 2: Regional perspectives and special cases (pp. 225-246). Springer International.</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">Tüfekçi, P. (2020). Turkish dataset for identification of author gender [Data set]. Mendele Data. https://doi.org/10.17632/8f93rjhgjk.1</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">Webster, J. J. ve Kit, C. (1992). Tokenization as the initial phase in NLP. In Proceedings of COLING 1992, Volume 4: The 14th International Conference on Computational Linguistics (pp. 1106-1110).</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">Xu, J. (2022). A historical overview of using corpora in English language teaching. In The Routledge handbook of corpora and English language teaching and learning (pp. 11-25). England: Routledge.</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
