<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                    <journal-id></journal-id>
            <journal-title-group>
                                                                                    <journal-title>Politeknik Dergisi</journal-title>
            </journal-title-group>
                                        <issn pub-type="epub">2147-9429</issn>
                                                                                            <publisher>
                    <publisher-name>Gazi Üniversitesi</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.2339/politeknik.1085512</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Engineering</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Mühendislik</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="tr">
                                    <trans-title>Türkçe Otomatik Konuşma Tanıma Sistemi için Dil Modeli Optimizasyon Yöntemi</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                <article-title>A Language Model Optimization Method for Turkish Automatic Speech Recognition System</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-3880-3039</contrib-id>
                                                                <name>
                                    <surname>Oyucu</surname>
                                    <given-names>Saadin</given-names>
                                </name>
                                                                    <aff>ADIYAMAN ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-4128-2625</contrib-id>
                                                                <name>
                                    <surname>Polat</surname>
                                    <given-names>Hüseyin</given-names>
                                </name>
                                                                    <aff>GAZI UNIVERSITY, FACULTY OF TECHNOLOGY</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20231001">
                    <day>10</day>
                    <month>01</month>
                    <year>2023</year>
                </pub-date>
                                        <volume>26</volume>
                                        <issue>3</issue>
                                        <fpage>1167</fpage>
                                        <lpage>1178</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20220310">
                        <day>03</day>
                        <month>10</month>
                        <year>2022</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20220706">
                        <day>07</day>
                        <month>06</month>
                        <year>2022</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 1998, Politeknik Dergisi</copyright-statement>
                    <copyright-year>1998</copyright-year>
                    <copyright-holder>Politeknik Dergisi</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="tr">
                            <p>Türkçe gibi sınırlı kaynaklara sahip dillerle karşı karşıya kaldığında mevcut Otomatik Konuşma Tanıma (ASR: Automatic Speech Recognition) modelleme stratejisi hala büyük bir performans düşüşü yaşıyor. Özellikle Dil modeli, akustik modeli yeterince desteklemediğinde Kelime Hata Oranı (WER: Word Error Rate) yükselmektedir. Bu yüzden, sağlam bir Dil modeli (LM: Language Model) mevcut corpus&#039;dan kelime bağıntıları oluşturarak ASR performansını iyileştirmeye güçlü bir katkı sağlar. Ancak Türkçenin sondan eklemeli yapısı nedeniyle sağlam bir dil modeli geliştirmek zorlu bir görevdir. Bu çalışmada, sınırlı kaynaklara sahip Türkçe ASR&#039;nin WER performansını iyileştirmek için cümle düzeyinde bir LM optimizasyon yöntemi önerilmiştir. Önerilen yöntemde Markov varsayımlarından elde edilen sabit bir kelime dizisi yerine, kelime dizisinin cümle oluşturma olasılığı hesaplanmıştır. Kelime dizisi olasılığını elde etmek için n-gram ve atlama gramı özelliklerine sahip bir yöntem sunulmuştur. Önerilen yöntem hem istatistiksel hem de Yapay Sinir Ağı (ANN: Artificial Neural Network) tabanlı LM&#039;ler üzerinde test edilmiştir. Sadece kelimeler değil, aynı zamanda alt kelime seviyesi kullanılarak yapılan deneylerde, Dilsel Veri Konsorsiyumu (LDC: Linguistic Data Consortium) aracılığıyla paylaşılan iki Türkçe korpus (ODTÜ ve Boğaziçi) ve HS olarak adlandırdığımız özel olarak oluşturduğumuz ayrı bir korpus kullanılmıştır. İstatistik tabanlı LM&#039;den elde edilen deneysel sonuçlara göre, ODTÜkcorpusda %0,5 WER artışı, Boğaziçi korpusda %1.6 WER azalması ve HS kopusta %2,5 WER azalması gözlemlenmiştir. İleri Beslemeli Sinir Ağları tabanlı LM&#039;de ODTÜ corpusda %0,2, Boğaziçi korpusda %0,8 ve HS korpusda %1.6 WER düşüşleri gözlendi. Ayrıca Tekrarlayan Sinir Ağı - Uzun Kısa Süreli Bellek tabanlı LM&#039;de ODTÜ korpusda %0,6, Boğaziçi korpusda %1.1 ve HS korpusda %1.5 WER düşüşleri gözlendi. Sonuç olarak önerilen yöntem Turkçe ASR’de kullanılan LM&#039;lere uygulandığında WER azalmış ve ASR&#039;nin toplam performansı artmıştır.</p></trans-abstract>
                                                                                                                                    <abstract><p>The current Automatic Speech Recognition (ASR) modeling strategy still suffers from huge performance degradation when faced with languages with limited resources such as Turkish. Especially when the Language Model (LM) does not support the Acoustic Model (AM) sufficiently, the Word Error Rate (WER) increases. Therefore, a robust LM makes a strong contribution to improving ASR performance by generating word relations from the existing corpus. However, developing a robust language model is a challenging task due to the agglutinative nature of Turkish. Therefore, within the scope of the study, a sentence-level LM optimization method is proposed to improve the WER performance of Turkish ASR. In the proposed method, instead of a fixed word sequence obtained from the Markov assumptions, the probability of the word sequence forming a sentence was calculated. A method with n-gram and skip-gram properties is presented to obtain the word sequence probability. The proposed method has been tested on both statistical and Artificial Neural Network (ANN) based LMs. In the experiments carried out using, not only words but also sub-word level, two Turkish corpora (METU and Bogazici) shared via Linguistic Data Consortium (LDC) and a separate corpus, which we separate corpus that we specially created as HS was used. According to the experimental results obtained from statistical-based LM, 0.5% WER increases for the METU corpus, 1.6% WER decreases for the Bogazici corpus, and a 2.5% WER decrease for the HS corpus were observed. In the Feedforward Neural Networks (FNN) based LM, WER decreases were observed 0.2% for the METU corpus, 0.8% for the Bogazici corpus, and 1.6% for the HS corpus. Also, in the Recurrent Neural Network (RNN)-Long Short Term Memory (LSTM) based LM, WER decreases were observed 0.6% for METU corpus, 1.1% for the Bogazici corpus and 1.5% for the HS corpus. As a result, when the proposed method was applied to the LMs required for ASR, WER decreased, and the total performance of ASR increased.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>Turkish Automatic speech recognition</kwd>
                                                    <kwd>  Turkish language model</kwd>
                                                    <kwd>  Turkish language model score optimization</kwd>
                                                    <kwd>  Turkish corpus</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="tr">
                                                    <kwd>Türkçe Otomatik konuşma tanıma</kwd>
                                                    <kwd>  Türkçe korpus</kwd>
                                                    <kwd>  Türkçe dil modelleme</kwd>
                                                    <kwd>  Türkçe dil modeli optimizasyonu</kwd>
                                            </kwd-group>
                                                                                                                                        </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">[1]	Hamdan P., Ridi F., Rudy H., “Indonesian automatic speech recognition system using CMUSphinx toolkit and limited dataset”, International Symposium on Electronics and Smart Devices, 283-286 (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">[2]	Kelebekler E., İnal M., “Otomobil içindeki cihazların sesle kontrolüne yönelik konuşma tanıma sisteminin gerçek zamanlı laboratuar uygulaması”, Politeknik Dergisi, 2: 109-114, (2008).</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">[3]	Avuçlu E., Özçiftçi A., Elen A., “An application to control media player with voice commands”, Politeknik Dergisi, 23(4): 1311-1315, (2020).</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">[4]	Burunkaya M. ve Dijle M., “Yerleşik ve gömülü uygulamalarda kontrol işlemleri ve pc’de yazı yazmak için kullanabilen düşük maliyetli genel amaçlı bir konuşma tanılama sistemi”, Politeknik Dergisi, 
21(2): 477-488, (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">[5]	Yajie, M., “Kaldi+PDNN: building DNN-based ASR systems with kaldi and PDNN”, arXiv:1401.6984 (2014).</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">[6]	Davis, S., Mermelstein, P., “Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences”, IEEE Transactions on Acoustics, Speech, and Signal 
Processing, 28, 357–366 (1980).</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">[7]	Shreya, N., Divya, G., “International journal of computer science and mobile computing speech feature extraction techniques: a review”, International Journal Computer Science Mobil Computer, 
4, 107-114 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">[8]	Tombaloǧlu, B., Erdem, H., “Development of a MFCC-SVM based Turkish speech recognition system”, 24th Signal Processing Communication Applied Conference, 1-4 (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">[9]	Dave, N., “Feature extraction methods LPC, PLP and MFCC. International Journal for Advance Research in Engineering and Technology, 1, 1-5 (2013).</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">[10]	Harshita, G., Divya, G., “LPC and LPCC method of feature extraction in speech recognition system”, International Conference Cloud System Big Data Engineering Confluence, 498-502 (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">[11]	Geoffrey, H., Li, D., Dong, Y., George, E. D., Abdel-rahman, M., Navdeep, J., Andrew, S., Vincent, V., Patrick, N., Tara, N. S., Brian, K., “Deep neural networks for acoustic modeling in speech recognition: 
The shared views of four research groups”, IEEE Signal Processing Magazine, 29, 82-97 (2012).</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">[12]	Ussen, A. K., Osman, B., “Turkish speech recognition based on deep neural networks”, Suleyman Demirel University Journal of Natural and Applied Sciences, 22, 319-329 (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">[13]	Longfei, L., Yong, Z., Dongmei, J., Yanning, Z., Fengna, W., Isabel, G., Enescu, V., Hichem, S., “Hybrid deep neural network - hidden markov model (DNN-HMM) based speech emotion recognition”, 
Humaine Association Conference on Affective Computing and Intelligent Interaction. 312-317 (2013).</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">[14]	Xavier, L. A., “An overview of decoding techniques for large vocabulary continuous speech recognition”, Computer Speech Language, 16, 89-114 (2002).</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">[15]	Stolcke, A., “Entropy-based pruning of backoff language models. arXiv:cs/0006025 (2000).</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">[16]	Biljana, P., Sinisa, I., “Recognition of vowels in continuous speech by using formants”, Facta Universitatis - Series: Electronics and Energetics, 379-393 (2010).</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">[17]	Haşim, S., Murat, S., Tunga, G., “Morpholexical and discriminative language models for Turkish automatic speech recognition”, IEEE Transaction Audio, Speech-Language Processing, 20, 2341-2351 
(2012).</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">[18]	Berlin, C., Jia-Wen, L., “Discriminative language modeling for speech recognition with relevance information”, IEEE International Conference on Multimedia and Expo, 1-4 (2001).</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">[19]	Ahmet, A. A., Cemil, D., Mehmet, U. D., “Improving sub-word language modeling for Turkish speech recognition”, Signal Processing and Communications Applications Conference, 1-4 (2012).</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">[20]	Asefisaray, B., “End-to-end speech recognition model: experiments in Turkish”, Ph. D. Dissertation, University of Hacettepe, Ankara, Turkey (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">[21]	Anusuya, M., Katti, S., “Speech recognition by machine: a review”, International journal of Computer Science and Information Security, 6, 181-205 (2009).</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">[22]	Dikici, E., Saraçlar, M., “Semi-supervised and unsupervised discriminative language model training for automatic speech recognition”, Speech Communication, 83, 54-63 (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">[23]	Kazuki, I., Zoltan, T., Tamer, A., Ralf, S., Hermann. N., “LSTM, GRU, highway and a bit of attention: An empirical overview for language modeling in speech recognition”, Annual Conference of the 
International Speech Communication Association, 3519-3523 (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">[24]	Siddharth, D., Xinjian, L., Florian, M., Alan, W. B., “Domain robust feature extraction for rapid low resource ASR development”, ArXiv: 1807.10984v2 (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">[25]	Hirofumi, I., Jaejin, C., Murali, K. B., Tatsuya, K., Shinji, W., “Transfer learning of language-independent end-to-end ASR with language model fusion”, IEEE International Conference on Acoustics, 
Speech and Signal Processing, 6096-6100 (2019).</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">[26]	Peter, F. B., Vincent, J., Della, P., Peter, V., Jenifer, C., Robert, L. M., “Class-Based N-gram models of natural language”, Computer Linguistic, 14-18 (1990).</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">[27]	Martin, S., Hermann, N., Ralf, S., “From feedforward to recurrent LSTM neural networks for language modeling”, IEEE Trans Audio, Speech Lang Processing, 23, 517-529 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">[28]	Tomas, M., Martin, K., Lukás, B., Jan, Č., Sanjeev, K., “Recurrent neural network based language model”, Annual Conference of the International Speech Communication Association, 1045-1048 
(2010).</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">[29]	Han, Z., Zhengdong, L., Pascal, P., “Self-adaptive hierarchical sentence model”, arXiv:1504.05070 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">[30]	WimDe, M., Steven, B., Marie-Francine, M., “A survey on the application of recurrent neural networks to statistical language modeling”, Computer Speech Language, 30, 61-98 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">[31]	Popova, I., Stepanova, E., “Estimation of inorganic phosphate in presence of phosphocarbohydrates (Russian)”, Vopr Meditsinskoj Khimii, 2, 135-139 (1977).</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">[32]	Jen-Tzung, C., Yuan-Chu, K., “Bayesian recurrent neural network language model”, IEEE Spoken Language Technology Workshop, 206-211 (2014).</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">[33]	Ebru, A., Abhinav, S., Bhuvana, R., Stanley, C., “Bidirectional recurrent neural network language models for automatic speech recognition”, International Conference on Acoustics, Speech and 
Signal Processing, 5421-5425 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref34">
                        <label>34</label>
                        <mixed-citation publication-type="journal">[34]	Ahmet, A. A., Mehmet, D., “Zemberek, an open source NLP framework for Turkic Languages”, Structure, 1, 1-5 (2007).</mixed-citation>
                    </ref>
                                    <ref id="ref35">
                        <label>35</label>
                        <mixed-citation publication-type="journal">[35]	Xuedong, H., Li, D., “An overview of modern speech recognition.” Handbook natural language process. Microsoft Corporation. 339-367 (2010).</mixed-citation>
                    </ref>
                                    <ref id="ref36">
                        <label>36</label>
                        <mixed-citation publication-type="journal">[36]	Chao, H., Eric, C., Jianlai, Z., Kai-Fu, L., “Accent modeling based on pronunciation dictionary adaptation for large vocabulary Mandarin speech recognition”, International Conference on Spoken 
Language Processing, 818-821 (2000).</mixed-citation>
                    </ref>
                                    <ref id="ref37">
                        <label>37</label>
                        <mixed-citation publication-type="journal">[37]	Erdoǧan, H., Büyük, O., Oflazer, K., “Incorporating language constraints in sub-word based speech recognition”, IEEE Workshop on Automatic Speech Recognition and Understanding, 281-286 
(2005).</mixed-citation>
                    </ref>
                                    <ref id="ref38">
                        <label>38</label>
                        <mixed-citation publication-type="journal">[38]	Daniel, J., James, H.M., “Speech and language processing: an introduction to natural language processing, computational linguistics, and speech recognition”, Journal of Perspectives in 
Public Health, 1, 639-641 (2010).</mixed-citation>
                    </ref>
                                    <ref id="ref39">
                        <label>39</label>
                        <mixed-citation publication-type="journal">[39]	Arisoy, E., Dutaǧaci, H., Arslan, M., L., “A unified language model for large vocabulary continuous speech recognition of Turkish”, Signal Processing, 86, 2844-2862 (2006).</mixed-citation>
                    </ref>
                                    <ref id="ref40">
                        <label>40</label>
                        <mixed-citation publication-type="journal">[40]	David, G., Wei, L., Louise, G., Yorick, W., “A closer look at skip-gram modelling”, International Conference on Language Resources and Evaluation, 148-150 (2006).</mixed-citation>
                    </ref>
                                    <ref id="ref41">
                        <label>41</label>
                        <mixed-citation publication-type="journal">[41]	Shiliang, Z., Hui, J., Mingbin, X., Junfeng, H., Lirong, D., “The fixed-size ordinally-forgetting encoding method for neural network language models”, Annual Meeting of the Association for 
Computational Linguistics, 495-500 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref42">
                        <label>42</label>
                        <mixed-citation publication-type="journal">[42]	Liu, D., Fei, S., Hou, Z. G., Zhang, H., Sun, C., “Advances in neural networks. Springer-Verlag Berlin Heidelberg. (2007).</mixed-citation>
                    </ref>
                                    <ref id="ref43">
                        <label>43</label>
                        <mixed-citation publication-type="journal">[43]	Eric, B., Jitong, C., Rewon, C,. Adam, C., Yashesh, G., Yi, L., Hairong, L., Sanjeev, S., David, S., Anuroop, S., Zhenyao, Z., “Exploring neural transducers for end-to-end speech recognition”, 
arXiv:1707.07413 (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref44">
                        <label>44</label>
                        <mixed-citation publication-type="journal">[44]	Yiwen, Z., Xuanmin, L., “A speech recognition acoustic model based on LSTM-CTC. IEEE 18th International Conference on Communication Technology, 1052-1055 (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref45">
                        <label>45</label>
                        <mixed-citation publication-type="journal">[45]	Sepp, H., Schmidhuber, J., Long short-term memory”, Neural Computation, 9, 1735-1780 (1997).</mixed-citation>
                    </ref>
                                    <ref id="ref46">
                        <label>46</label>
                        <mixed-citation publication-type="journal">[46]	Kyunghyun, C., Dzmitry, B., Fethi, B., Holger, S., Yoshua, B., “Learning phrase representations using RNN encoder-decoder for statistical machine translation”, Conference on Empirical Methods 
in Natural Language Processing, 1724-1734 (2014).</mixed-citation>
                    </ref>
                                    <ref id="ref47">
                        <label>47</label>
                        <mixed-citation publication-type="journal">[47]	Rafal, J., Wojciech, Z., lya, S., “An empirical exploration of recurrent network architectures”, International Conference on International Conference on Machine Learning, 2332-2340 (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref48">
                        <label>48</label>
                        <mixed-citation publication-type="journal">[48]	Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., Hannemann, M., Motlicek, P., Qian, Y., Schwarz, P., Silovsky, J., Stemmer, G., Vesely, K., “The Kaldi speech recognition toolkit”, 
Workshop on Automatic Speech Recognition and Understanding, 1-4 (2011).</mixed-citation>
                    </ref>
                                    <ref id="ref49">
                        <label>49</label>
                        <mixed-citation publication-type="journal">[49]	Stolcke, A., “Srilm - an extensible language modeling toolkit”, International Conference on Spoken Language Processing, 901–904 (2002).</mixed-citation>
                    </ref>
                                    <ref id="ref50">
                        <label>50</label>
                        <mixed-citation publication-type="journal">[50]	Frank, S., Amit, A., “CNTK: Microsoft&#039;s open-source deep-learning toolkit”, 22nd International Conference on Knowledge Discovery and Data Mining, 2135-2135 (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref51">
                        <label>51</label>
                        <mixed-citation publication-type="journal">[51]	Ebru, A., Doğan, C., Sıddıka, P., Haşim, S., Murat, S., “Turkish broadcast news transcription and retrieval”, Transaction Audio, Speech Language Process, 17, 874-883 (2009).</mixed-citation>
                    </ref>
                                    <ref id="ref52">
                        <label>52</label>
                        <mixed-citation publication-type="journal">[52]	Salor, Ö., Pellom, L. B., Ciloglu, T., Demirekler, M., “Turkish speech corpora and recognition tools developed by porting SONIC: Towards multilingual speech recognition”, Computer Speech 
Language, 21, 580-593 (2007).</mixed-citation>
                    </ref>
                                    <ref id="ref53">
                        <label>53</label>
                        <mixed-citation publication-type="journal">[53]	Polat, H., Oyucu, S., “Building a speech and text corpus of Turkish: large corpus collection with initial speech recognition results”, Symmetry, 12, 290-304 (2020).</mixed-citation>
                    </ref>
                                    <ref id="ref54">
                        <label>54</label>
                        <mixed-citation publication-type="journal">[54]	Cem, A., Suha, O., Mutluergil, Hakan, E., “The anatomy of a Turkish speech recognition system”, Signal Processing and Communications Applications Conference, 512-515 (2009).</mixed-citation>
                    </ref>
                                    <ref id="ref55">
                        <label>55</label>
                        <mixed-citation publication-type="journal">[55]	Vesa, S., Teemu, H., Sami, V., “On growing and pruning kneser–ney smoothed n-gram models”, Transactions On Audio, Speech, And Language Processing, 15, 1617-1624 (2007).</mixed-citation>
                    </ref>
                                    <ref id="ref56">
                        <label>56</label>
                        <mixed-citation publication-type="journal">[56]	Rongfeng, S., Lan, W., Xunying, L., “Multimodal learning using 3d audio-visual data for audio-visual speech recognition”, International Conference on Asian Language Processing, 40-43 (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref57">
                        <label>57</label>
                        <mixed-citation publication-type="journal">[57]	Ahmed, A., Preslav, N., Peter, B., Steve, R., “WERd: using social text spelling variants for evaluating dialectal speech recognition”, arXiv:1709.07484 (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref58">
                        <label>58</label>
                        <mixed-citation publication-type="journal">[58]	Reuhkala, E., Jalanko, M., Kohonen, T., “Redundant hash addressing method adapted for the post processing and error-correction of computer-recognized speech”, International Conference 
Acoustic Speech Signal Processing, 591-594 (1979).</mixed-citation>
                    </ref>
                                    <ref id="ref59">
                        <label>59</label>
                        <mixed-citation publication-type="journal">[59]	Büyük, O., Erdoǧan, H., Oflazer, K., “Konuşma tanımada karma dil birimleri kullanımı ve dil kısıtlarının gerçeklenmesi”, Signal Processing and Communications Applications Conference, 111-114 
(2005).</mixed-citation>
                    </ref>
                                    <ref id="ref60">
                        <label>60</label>
                        <mixed-citation publication-type="journal">[60]	Steve, R., Nelson, M., Hervé, B., Michael, A. C., Horacio F., “Connectionist probability estimation in HMM speech recognition”, Transaction Speech Audio Processing, 2, 161-174 (1994).</mixed-citation>
                    </ref>
                                    <ref id="ref61">
                        <label>61</label>
                        <mixed-citation publication-type="journal">[61]	Yadava, G., Thimmaraja, S., Jayanna, H., “Creating language and acoustic models using Kaldi to build an automatic speech recognition system for Kannada language”, International Conference 
Recent Trends Electronic Information Communication Technology, 161-165 (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref62">
                        <label>62</label>
                        <mixed-citation publication-type="journal">[62]	Çiloğlu, T., Çömez, M., Sahin, S., “Language modelling for Turkish as a agglutinative languages”, IEEE Signal Processing and Communications Applications Conference, 1-2 (2004).</mixed-citation>
                    </ref>
                                    <ref id="ref63">
                        <label>63</label>
                        <mixed-citation publication-type="journal">[63]	Keser, S., Edizkan, R., “Phonem-based isolated Turkish word recognition with subspace classifier. IEEE Signal Processing and Communications Applications Conference, 93-96 (2009).</mixed-citation>
                    </ref>
                                    <ref id="ref64">
                        <label>64</label>
                        <mixed-citation publication-type="journal">[64]	Arslan, R., S., Barışçı, N., “Development of output correction methodology for long short-term memory-based speech recognition”, Sustainability, 11, 1-16 (2019).</mixed-citation>
                    </ref>
                                    <ref id="ref65">
                        <label>65</label>
                        <mixed-citation publication-type="journal">[65]	Eşref, Y., Can, B., “Using Morpheme-Level Attention Mechanism for Turkish Sequence Labelling”, Signal Processing and Communications Applications Conference, 1-4 (2019).</mixed-citation>
                    </ref>
                                    <ref id="ref66">
                        <label>66</label>
                        <mixed-citation publication-type="journal">[66]	Liu, C., Zhang, Y., Zhang, P., Wang, Y., “Evaluating Modeling Units and Sub-word Features in Language Models for Turkish ASR”, International Symposium on Chinese Spoken Language 
Processing, 414-418 (2019).</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
