<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>kefad</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Ahi Evran Üniversitesi Kırşehir Eğitim Fakültesi Dergisi</journal-title>
            </journal-title-group>
                            <issn pub-type="ppub">2147-1037</issn>
                                                                                                        <publisher>
                    <publisher-name>Kırşehir Ahi Evran Üniversitesi</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.29299/kefad.1732570</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Measurement Theories and Applications in Education and Psychology</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Eğitimde ve Psikolojide Ölçme Teorileri ve Uygulamaları</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <article-title>Yapısal Konu Modellemesi Yoluyla Eğitimde Ölçme Alanındaki Eğilimler ve İçgörüler: Dil Değerlendirmesi Üzerine Bir İnceleme</article-title>
                                                                                                                                                                                                <trans-title-group xml:lang="en">
                                    <trans-title>Trends and Insights in Educational Measurement through Structural Topic Modeling:  A Study in Language Assessment</trans-title>
                                </trans-title-group>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-3580-5568</contrib-id>
                                                                <name>
                                    <surname>Atalay Kabasakal</surname>
                                    <given-names>Kübra</given-names>
                                </name>
                                                                    <aff>Hacettepe Üniversitesi, Eğitim Fakültesi</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-3211-0426</contrib-id>
                                                                <name>
                                    <surname>Koçak</surname>
                                    <given-names>Duygu</given-names>
                                </name>
                                                                    <aff>ALANYA ALAADDİN KEYKUBAT ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-3025-774X</contrib-id>
                                                                <name>
                                    <surname>Akcan</surname>
                                    <given-names>Rabia</given-names>
                                </name>
                                                                    <aff>Milli Eğitim Bakanlığı</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20260131">
                    <day>01</day>
                    <month>31</month>
                    <year>2026</year>
                </pub-date>
                                        <volume>27</volume>
                                        <issue>1</issue>
                                        <fpage>291</fpage>
                                        <lpage>318</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20250702">
                        <day>07</day>
                        <month>02</month>
                        <year>2025</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20250916">
                        <day>09</day>
                        <month>16</month>
                        <year>2025</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2000, Ahi Evran Üniversitesi Kırşehir Eğitim Fakültesi Dergisi</copyright-statement>
                    <copyright-year>2000</copyright-year>
                    <copyright-holder>Ahi Evran Üniversitesi Kırşehir Eğitim Fakültesi Dergisi</copyright-holder>
                </permissions>
            
                                                                                                <abstract><p>Bu araştırmada, eğitimde ölçme alanındaki tematik eğilimleri ve araştırma yönelimlerini ortaya koymak amacıyla Yapısal Konu Modellemesi (STM) kullanılmıştır. Bu doğrultuda, örnek bir alt alan uygulaması olarak Language Testing ve Language Assessment Quarterly dergilerinde son 16 yılda yayımlanan toplam 778 makale analiz edilmiştir. STM analizi, en belirgin konuların “Dil Testinin Sosyal, Politik ve Etik Boyutları”, “Dil Değerlendirme Okuryazarlığının Geliştirilmesi” ve “Okuma ve Dinleme Değerlendirmelerinde Psikometrik Yaklaşımlar” olduğu on farklı tema ortaya koymuştur. Çalışmada ayrıca, değerlendirici güvenirliğine ilişkin kritik sorunlar vurgulanmakta ve bu konunun dil değerlendirme araştırmalarındaki merkezi rolüne dikkat çekilmektedir. Ayrıca, işaret dili ve iki dillilik bağlamlarında özellikle sözcük bilgisinin dil yeterliğindeki rolüne ilişkin iki birbiriyle bağlantılı tema öne çıkmaktadır. Dil testinin sosyal, politik ve etik boyutlarına artan vurgu, bu alanın yalnızca yeterlilik ölçümünü aşarak eğitim politikalarını ve uygulamalarını şekillendirme gücünü göstermektedir. Psikometrik yöntemlerin ve dil değerlendirme okuryazarlığının öne çıkması ise alandaki süregelen kuramsal ve yöntemsel gelişmelere işaret etmektedir. Bu bulgular, dil değerlendirme araştırmalarındaki önceliklerin ve yönelimlerin nasıl değiştiğine ilişkin araştırmacılar, politika yapıcılar ve uygulayıcılar için önemli içgörüler sunmaktadır.</p></abstract>
                                                                                                                                    <trans-abstract xml:lang="en">
                            <p>In this study, Structural Topic Modeling (STM) was employed to identify thematic trends and research orientations within the field of educational measurement. Accordingly, as a representative subfield application, a total of 778 articles published over the past 16 years in the journals Language Testing and Language Assessment Quarterly were analyzed. The STM analysis identified ten distinct themes, with the most prominent topics being “Social, Political, and Ethical Dimensions of Language Testing,” “Advancing Language Assessment Literacy,” and “Psychometric Approaches to Reading and Listening Assessment.” The study also highlights critical issues related to rater reliability, emphasizing its centrality in language assessment research. Furthermore, two interconnected themes emerge concerning the role of vocabulary in language proficiency, particularly in the contexts of sign language and bilingualism. The increasing emphasis on social, political, and ethical dimensions underscores the expanding impact of language testing beyond proficiency measurement, shaping policies and educational practices. Additionally, the prominence of psychometric methodologies and language assessment literacy reflects the field’s ongoing methodological and theoretical advancements. These findings offer valuable insights into emerging priorities and shift in language assessment research for scholars, policymakers, and practitioners.</p></trans-abstract>
                                                            
            
                                                            <kwd-group>
                                                    <kwd>Metin madenciliği</kwd>
                                                    <kwd>  Yapısal konu modellemesi</kwd>
                                                    <kwd>  Dil testi ve değerlendirmesi</kwd>
                                            </kwd-group>
                                                        
                                                                            <kwd-group xml:lang="en">
                                                    <kwd>Text mining</kwd>
                                                    <kwd>  Structural topic modelling</kwd>
                                                    <kwd>  Language testing and assesment</kwd>
                                            </kwd-group>
                                                                                                            </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Aryadoust, V., Eckes, T., &amp; In’nami, Y. (2021). Editorial: Frontiers in Language Assessment and Testing. Frontiers in Psychology, 12. https://doi.org/10.3389/fpsyg.2021.691614</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Aryadoust, V., Goh, C. C. M., &amp; Kim, L. O. (2011). An investigation of differential item functioning in the MELAB listening Test. Language Assessment Quarterly, 8(4), 361–385. https://doi.org/10.1080/15434303.2011.628632
 
Aryadoust, V., Zakaria, A., Lim, M. H., &amp; Chen, C. (2020). An extensive knowledge mapping review of measurement and validity in language assessment and SLA research. Frontiers in Psychology, 11. https://doi.org/10.3389/fpsyg.2020.01941</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Bachman, L. F., &amp; Clark, J. L. D. (1987). The measurement of Foreign/Second Language Proficiency. The Annals of the American Academy of Political and Social Science, 490(1), 20–33. https://doi.org/10.1177/0002716287490001003</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Bae, J., Bentler, P. M., &amp; Lee, Y. (2016). On the role of content in writing assessment. Language Assessment Quarterly, 13(4), 302–328. https://doi.org/10.1080/15434303.2016.1246552</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Baker, B. A., &amp; Riches, C. (2017). The development of EFL examinations in Haiti: Collaboration and language assessment literacy development. Language Testing, 35(4), 557–581. https://doi.org/10.1177/0265532217716732</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Banks, G. C., Woznyj, H. M., Wesslen, R. S., &amp; Ross, R. L. (2018). A review of best practice recommendations for text analysis in R (and a User-Friendly app). Journal of Business and Psychology, 33(4), 445–459. https://doi.org/10.1007/s10869-017-9528-3</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Barkaoui, K. (2010a). Explaining ESL essay holistic scores: A multilevel modeling approach. Language Testing, 27(4), 515-535. https://doi.org/10.1177/0265532210368717</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Barkaoui, K. (2010b). Think-aloud protocols in research on essay rating: An empirical study of their veridicality and reactivity. Language Testing, 28(1), 51–75. https://doi.org/10.1177/0265532210376379</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">Barkaoui, K. (2010c). Variability in ESL essay rating processes: The role of the rating scale and rater experience. Language Assessment Quarterly, 7(1), 54–74. https://doi.org/10.1080/15434300903464418</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Barkaoui, K. (2024). The Academic Achievement of Undergraduate Students with Different English Language Proficiency Profiles. Language Assessment Quarterly, 21(3), 224–244. https://doi.org/10.1080/15434303.2024.2346089</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Barkaoui, K. (2025). The relationship between English language proficiency test scores and academic achievement: A longitudinal study of two tests. Language Testing, 0(0). https://doi.org/10.1177/02655322251319284</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Blei, D. M., Ng, A. Y., &amp; Jordan, M. I. (2003). Latent dirichlet allocation. Journal of Machine Learning Research, 3, 993–1022. https://doi.org/10.5555/944919.944937</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Bochner, J. H., Samar, V. J., Hauser, P. C., Garrison, W. M., Searls, J. M., &amp; Sanders, C. A. (2015). Validity of the American Sign Language Discrimination Test. Language Testing, 33(4), 473–495. https://doi.org/10.1177/0265532215590849</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Carlsen, C. H., &amp; Rocca, L. (2021). Language test misuse. Language Assessment Quarterly, 18(5), 477–491. https://doi.org/10.1080/15434303.2021.1947288</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Cho, Y., &amp; Bridgeman, B. (2012). Relationship of TOEFL iBT® scores to academic performance: Some evidence from American universities. Language Testing, 29(3), 421–442. https://doi.org/10.1177/0265532211430368</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Choi, H., &amp; Woo, J. (2022). Investigating emerging hydrogen technology topics and comparing national level technological focus: Patent analysis using a structural topic model. Applied Energy, 313, 118898.https://doi.org/10.1016/j.apenergy.2022.118898</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Coghlan, S., Miller, T., &amp; Paterson, J. (2021). Good proctor or “big brother”? Ethics of online exam supervision technologies. Philosophy &amp; Technology, 34(4), 1581–1606. https://doi.org/10.1007/s13347-021-00476-1</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Eckes, T. (2012). Operational Rater types in writing assessment: linking rater cognition to rater behavior. Language Assessment Quarterly, 9(3), 270–292. https://doi.org/10.1080/15434303.2011.64938</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Elder, C., &amp; McNamara, T. (2015). The hunt for “indigenous criteria” in assessing communication in the physiotherapy workplace. Language Testing, 33(2), 153–174. https://doi.org/10.1177/0265532215607398</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Fan, J., &amp; Yan, X. (2020). Assessing Speaking Proficiency: A narrative review of speaking assessment research within the Argument-Based Validation Framework. Frontiers in Psychology, 11. https://doi.org/10.3389/fpsyg.2020.00330</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Fraenkel, J. R., Wallen, N. E., &amp; Hyun, H. H. (2012). How to design and evaluate research in education. McGrawhill.</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Gamaroff, R. (2000). Rater reliability in language assessment: The bug of all bears. System, 28(1), 31–53. https://doi.org/10.1016/S0346-251X(99)00059-7</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Gardner, R. C., &amp; MacIntyre, P. D. (1992). A student’s contributions to second language learning. Part I: Cognitive variables. Language Teaching, 25(4), 211–220. https://doi.org/10.1017/S026144480000700X</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Gokturk, N., &amp; Chukharev, E. (2024). Exploring the potential of a spoken Dialog System-Delivered Paired Discussion task for assessing interactional competence. Language Assessment Quarterly, 21(1), 60–99. https://doi.org/10.1080/15434303.2023.2289173</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">Hamdani, S., Chan, A., Kan, R., Chiat, S., Gagarina, N., Haman, E., … Armon-Lotem, S. (2024). Identifying developmental language disorder (DLD) in multilingual children: A case study tutorial. International Journal of Speech-Language Pathology, 1–15. https://doi.org/10.1080/17549507.2024.2326095</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">Hauck, M. C., Wolf, M. K., &amp; Mislevy, R. (2016). Creating a Next-Generation system of K-12 English learner language proficiency assessments. ETS Research Report Series, 2016(1), 1–10. https://doi.org/10.1002/ets2.12092</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">Huang, F. L., &amp; Konold, T. R. (2013). A latent variable investigation of the Phonological Awareness Literacy Screening-Kindergarten assessment: Construct identification and multigroup comparisons between Spanish-speaking English-language learners (ELLs) and non-ELL students. Language Testing, 31(2), 205–221. https://doi.org/10.1177/0265532213496773</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">Isaacs, T., Hu, R., Trenkic, D., &amp; Varga, J. (2023). Examining the predictive validity of the Duolingo English Test: Evidence from a major UK university. Language Testing, 40(3), 748–770. https://doi.org/10.1177/02655322231158550</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">Isaacs, T., &amp; Thomson, R. I. (2013). Rater experience, rating scale length, and judgments of L2 pronunciation: Revisiting research conventions. Language Assessment Quarterly, 10(2), 135–159. https://doi.org/10.1080/15434303.2013.769545</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">Isbell, D. R., Kremmel, B., &amp; Kim, J. (2023). Remote proctoring in Language Testing: Implications for fairness and justice. Language Assessment Quarterly, 20(4–5), 469–487. https://doi.org/10.1080/15434303.2023.2288251</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">Jang, E. E., Cummins, J., Wagner, M., Stille, S., &amp; Dunlop, M. (2015). Investigating the homogeneity and distinguishability of STEP proficiency descriptors in assessing English language learners in Ontario schools. Language Assessment Quarterly, 12(1), 87–109. https://doi.org/10.1080/15434303.2014.936602</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">Javidanmehr, Z., &amp; Sarab, M. R. A. (2019). Retrofitting non-diagnostic reading comprehension assessment: Application of the G-DINA model to a high-stake reading comprehension test. Language Assessment Quarterly, 16(3), 294–311. https://doi.org/10.1080/15434303.2019.1654479</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">Kessler, G. (2018). Technology and the future of language teaching. Foreign Language Annals, 51(1), 205–218.</mixed-citation>
                    </ref>
                                    <ref id="ref34">
                        <label>34</label>
                        <mixed-citation publication-type="journal">Kokhan, K. (2012). Investigating the possibility of using TOEFL scores for university ESL decision-making: Placement trends and effect of time lag. Language Testing, 29(2), 291–308. https://doi.org/10.1177/0265532211429403</mixed-citation>
                    </ref>
                                    <ref id="ref35">
                        <label>35</label>
                        <mixed-citation publication-type="journal">Kotowicz, J., Woll, B., &amp; Herman, R. (2020). Adaptation of the British Sign Language Receptive Skills Test into Polish Sign Language. Language Testing, 38(1), 132–153. https://doi.org/10.1177/0265532220924598</mixed-citation>
                    </ref>
                                    <ref id="ref36">
                        <label>36</label>
                        <mixed-citation publication-type="journal">Kozaki, Y. (2010). An alternative decision-making procedure for performance assessments: Using the multifaceted Rasch model to generate cut estimates. Language Assessment Quarterly, 7(1), 75–95. https://doi.org/10.1080/15434300903464400</mixed-citation>
                    </ref>
                                    <ref id="ref37">
                        <label>37</label>
                        <mixed-citation publication-type="journal">Kremmel, B., &amp; Schmitt, N. (2016). Interpreting vocabulary test scores: What do various item formats tell us about learners’ ability to employ words? Language Assessment Quarterly, 13(4), 377–392. https://doi.org/10.1080/15434303.2016.1237516</mixed-citation>
                    </ref>
                                    <ref id="ref38">
                        <label>38</label>
                        <mixed-citation publication-type="journal">Kuhn, K. D. (2018). Using structural topic modeling to identify latent topics and trends in aviation incident reports. Transportation Research Part C Emerging Technologies, 87, 105–122. https://doi.org/10.1016/j.trc.2017.12.018</mixed-citation>
                    </ref>
                                    <ref id="ref39">
                        <label>39</label>
                        <mixed-citation publication-type="journal">Kunnan, A. J. (2009). Testing for citizenship: The U.S. naturalization test. Language Assessment Quarterly, 6(1), 89–97. https://doi.org/10.1080/15434300802606630</mixed-citation>
                    </ref>
                                    <ref id="ref40">
                        <label>40</label>
                        <mixed-citation publication-type="journal">Kyle, K., &amp; Crossley, S. (2017). Assessing syntactic sophistication in L2 writing: A usage-based approach. Language Testing, 34(4), 513–535. https://doi.org/10.1177/0265532217712554</mixed-citation>
                    </ref>
                                    <ref id="ref41">
                        <label>41</label>
                        <mixed-citation publication-type="journal">Kyle, K., Crossley, S. A., &amp; Jarvis, S. (2021). Assessing the validity of lexical diversity indices using direct judgements. Language Assessment Quarterly, 18(2), 154–170. https://doi.org/10.1080/15434303.2020.1844205</mixed-citation>
                    </ref>
                                    <ref id="ref42">
                        <label>42</label>
                        <mixed-citation publication-type="journal">Lam, R. (2014). Language assessment training in Hong Kong: Implications for language assessment literacy. Language Testing, 32(2), 169–197. https://doi.org/10.1177/0265532214554321</mixed-citation>
                    </ref>
                                    <ref id="ref43">
                        <label>43</label>
                        <mixed-citation publication-type="journal">Lam, D. M. K. (2019). Interactional Competence with and without Extended Planning Time in a Group Oral Assessment. Language Assessment Quarterly, 16(1), 1–20. https://doi.org/10.1080/15434303.2019.1602627</mixed-citation>
                    </ref>
                                    <ref id="ref44">
                        <label>44</label>
                        <mixed-citation publication-type="journal">Laufer, B., &amp; McLean, S. (2016). Loanwords and vocabulary size test scores: A case of different estimates for different L1 learners. Language Assessment Quarterly, 13(3), 202–217. https://doi.org/10.1080/15434303.2016.1210611</mixed-citation>
                    </ref>
                                    <ref id="ref45">
                        <label>45</label>
                        <mixed-citation publication-type="journal">Li, X., Dai, A., Tran, R., &amp; Wang, J. (2023). Text mining-based identification of promising miRNA biomarkers for diabetes mellitus. Frontiers in Endocrinology, 14. https://doi.org/10.3389/fendo.2023.1195145</mixed-citation>
                    </ref>
                                    <ref id="ref46">
                        <label>46</label>
                        <mixed-citation publication-type="journal">Liu, H. Y., You, X. F., Wang, W. Y., Ding, S. L., &amp; Chang, H. H. (2013). The development of computerized adaptive testing with cognitive diagnosis for an English achievement test in China. Journal of Classification, 30(2), 152-172. https://doi.org/10.1007/s00357-013-9128-5</mixed-citation>
                    </ref>
                                    <ref id="ref47">
                        <label>47</label>
                        <mixed-citation publication-type="journal">Liu, T., Aryadoust, V., &amp; Foo, S. (2021). Examining the factor structure and its replicability across multiple listening test forms: Validity evidence for the Michigan English Test. Language Testing, 39(1), 142–171. https://doi.org/10.1177/02655322211018139</mixed-citation>
                    </ref>
                                    <ref id="ref48">
                        <label>48</label>
                        <mixed-citation publication-type="journal">Manias, E., &amp; McNamara, T. (2016). Standard setting in specific-purpose language testing: What can a qualitative study add? Language Testing, 33(2), 235–249. https://doi.org/10.1177/0265532215608411</mixed-citation>
                    </ref>
                                    <ref id="ref49">
                        <label>49</label>
                        <mixed-citation publication-type="journal">May, L. (2011). Interactional competence in a paired speaking test: Features salient to raters. Language Assessment Quarterly, 8(2), 127–145. https://doi.org/10.1080/15434303.2011.565845</mixed-citation>
                    </ref>
                                    <ref id="ref50">
                        <label>50</label>
                        <mixed-citation publication-type="journal">McNamara, T. (2009). Australia: The dictation tests redux? Language Assessment Quarterly, 6(1), 106–111. https://doi.org/10.1080/15434300802606663</mixed-citation>
                    </ref>
                                    <ref id="ref51">
                        <label>51</label>
                        <mixed-citation publication-type="journal">McNamara, T., &amp; Ryan, K. (2011). Fairness versus justice in language testing: The place of English literacy in the Australian citizenship Test. Language Assessment Quarterly, 8(2), 161–178. https://doi.org/10.1080/15434303.2011.565438</mixed-citation>
                    </ref>
                                    <ref id="ref52">
                        <label>52</label>
                        <mixed-citation publication-type="journal">Min, S., &amp; He, L. (2014). Applying unidimensional and multidimensional item response theory models in testlet-based reading assessment. Language Testing, 31(4), 453–477. https://doi.org/10.1177/0265532214527277</mixed-citation>
                    </ref>
                                    <ref id="ref53">
                        <label>53</label>
                        <mixed-citation publication-type="journal">Min, S., Cai, H., &amp; He, L. (2021). Application of bi-factor MIRT and higher-order CDM models to an in-house EFL listening test for diagnostic purposes. Language Assessment Quarterly, 19(2), 189–213. https://doi.org/10.1080/15434303.2021.1980571</mixed-citation>
                    </ref>
                                    <ref id="ref54">
                        <label>54</label>
                        <mixed-citation publication-type="journal">Myford, C. M., &amp; Wolfe, E. W. (2003). Detecting and measuring rater effects using many-facet Rasch measurement: Part I. Journal of Applied Measurement, 4(4), 386–422.</mixed-citation>
                    </ref>
                                    <ref id="ref55">
                        <label>55</label>
                        <mixed-citation publication-type="journal">O’Hagan, S., Pill, J., &amp; Zhang, Y. (2015). Extending the scope of speaking assessment criteria in a specific-purpose language test: Operationalizing a health professional perspective. Language Testing, 33(2), 195–216. https://doi.org/10.1177/0265532215607920</mixed-citation>
                    </ref>
                                    <ref id="ref56">
                        <label>56</label>
                        <mixed-citation publication-type="journal">Olson, D. J. (2023). Measuring bilingual language dominance: An examination of the reliability of the Bilingual Language Profile. Language Testing, 40(3), 521–547. https://doi.org/10.1177/02655322221139162</mixed-citation>
                    </ref>
                                    <ref id="ref57">
                        <label>57</label>
                        <mixed-citation publication-type="journal">Peña, E. D., Bedore, L. M., Lugo-Neris, M. J., &amp; Albudoor, N. (2020). Identifying developmental language disorder in school-age bilinguals: Semantics, grammar, and narratives. Language Assessment Quarterly, 17(5), 541–558. https://doi.org/10.1080/15434303.2020.1827258</mixed-citation>
                    </ref>
                                    <ref id="ref58">
                        <label>58</label>
                        <mixed-citation publication-type="journal">Plough, I. C., &amp; Bogart, P. S. H. (2008). Perceptions of examiner behavior modulate power relations in oral performance testing. Language Assessment Quarterly, 5(3), 195–217. https://doi.org/10.1080/15434300802229375</mixed-citation>
                    </ref>
                                    <ref id="ref59">
                        <label>59</label>
                        <mixed-citation publication-type="journal">Pill, J. (2015). Drawing on indigenous criteria for more authentic assessment in a specific-purpose language test: Health professionals interacting with patients. Language Testing, 33(2), 175–193. https://doi.org/10.1177/0265532215607400</mixed-citation>
                    </ref>
                                    <ref id="ref60">
                        <label>60</label>
                        <mixed-citation publication-type="journal">Roberts, M. E., Stewart, B. M., Tingley, D., Lucas, C., Leder‐Luis, J., Gadarian, S. K., Albertson, B., &amp; Rand, D. G. (2014). Structural topic models for Open‐Ended Survey Responses. American Journal of Political Science, 58(4), 1064–1082. https://doi.org/10.1111/ajps.12103</mixed-citation>
                    </ref>
                                    <ref id="ref61">
                        <label>61</label>
                        <mixed-citation publication-type="journal">Roberts, M. E., Stewart, B. M., &amp; Tingley, D. (2019). stm: An R package for structural topic models. Journal of Statistical Software, 91(2). https://doi.org/10.18637/jss.v091.i02</mixed-citation>
                    </ref>
                                    <ref id="ref62">
                        <label>62</label>
                        <mixed-citation publication-type="journal">Robles-García, P., McLean, S., Stewart, J., Shin, J. young, &amp; Sánchez-Gutiérrez, C. H. (2024). The development and initial validation of O-WSVLT, a meaning-recall online L2 Spanish vocabulary levels test. Language Assessment Quarterly, 21(2), 181–205. https://doi.org/10.1080/15434303.2024.2311724</mixed-citation>
                    </ref>
                                    <ref id="ref63">
                        <label>63</label>
                        <mixed-citation publication-type="journal">Scarino, A. (2013). Language assessment literacy as self-awareness: Understanding the role of interpretation in assessment and in teacher learning. Language Testing, 30(3), 309–327. https://doi.org/10.1177/0265532213480128</mixed-citation>
                    </ref>
                                    <ref id="ref64">
                        <label>64</label>
                        <mixed-citation publication-type="journal">Schaefer, E. (2008). Rater bias patterns in an EFL writing assessment. Language Testing, 25(4), 465–493. https://doi.org/10.1177/0265532208094273</mixed-citation>
                    </ref>
                                    <ref id="ref65">
                        <label>65</label>
                        <mixed-citation publication-type="journal">Schissel, J. L., López-Gopar, M., Leung, C., Morales, J., &amp; Davis, J. R. (2019). Classroom-based assessments in linguistically Diverse communities: a case for collaborative research methodologies. Language Assessment Quarterly, 16(4–5), 393–407. https://doi.org/10.1080/15434303.2019.1678041</mixed-citation>
                    </ref>
                                    <ref id="ref66">
                        <label>66</label>
                        <mixed-citation publication-type="journal">Segbers, J., &amp; Schroeder, S. (2017). How many words do children know? A corpus-based estimation of children’s total vocabulary size. Language Testing, 34(3), 297–320. https://doi.org/10.1177/0265532216641152</mixed-citation>
                    </ref>
                                    <ref id="ref67">
                        <label>67</label>
                        <mixed-citation publication-type="journal">Shi, B., Huang, L., &amp; Lu, X. (2020). Effect of prompt type on test-takers’ writing performance and writing strategy use in the continuation task. Language Testing, 37(3), 361–388. https://doi.org/10.1177/0265532220911626</mixed-citation>
                    </ref>
                                    <ref id="ref68">
                        <label>68</label>
                        <mixed-citation publication-type="journal">Silge, J., &amp; Robinson, D. (2016). tidytext: Text mining and analysis using tidy data principles in R. Journal of Open-Source Software, 1(3), 37. https://doi.org/10.21105/joss.00037</mixed-citation>
                    </ref>
                                    <ref id="ref69">
                        <label>69</label>
                        <mixed-citation publication-type="journal">Stewart, J., Vitta, J. P., Nicklin, C., McLean, S., Pinchbeck, G. G., &amp; Kramer, B. (2021). The Relationship between Word Difficulty and Frequency: A Response to Hashimoto. Language Assessment Quarterly, 19(1), 90–101. https://doi.org/10.1080/15434303.2021.1992629</mixed-citation>
                    </ref>
                                    <ref id="ref70">
                        <label>70</label>
                        <mixed-citation publication-type="journal">Tonidandel, S., Summerville, K. M., Gentry, W. A., &amp; Young, S. F. (2021). Using structural topic modeling to gain insight into challenges faced by leaders. The Leadership Quarterly, 33(5), 101576. https://doi.org/10.1016/j.leaqua.2021.101576</mixed-citation>
                    </ref>
                                    <ref id="ref71">
                        <label>71</label>
                        <mixed-citation publication-type="journal">Usman, N., Hendrik, H., &amp; Madehang, M. (2024). Difficulties in understanding the TOEFL reading test of English language education study program at university. IDEAS: Journal on English Language Teaching and Learning, Linguistics and Literature, 12(1), 755–773. https://doi.org/10.24256/ideas.v12i1.5179</mixed-citation>
                    </ref>
                                    <ref id="ref72">
                        <label>72</label>
                        <mixed-citation publication-type="journal">Vogt, K., Tsagari, D., &amp; Spanoudis, G. (2020). What do teachers think they want? A comparative study of In-Service Language Teachers’ beliefs on LAL training needs. Language Assessment Quarterly, 17(4), 386–409. https://doi.org/10.1080/15434303.2020.1781128</mixed-citation>
                    </ref>
                                    <ref id="ref73">
                        <label>73</label>
                        <mixed-citation publication-type="journal">Wang, P. A., &amp; Hsieh, S. (2023). Incorporating structural topic modeling into     short text analysis. Concentric Studies in Linguistics, 49(1), 96–138. https://doi.org/10.1075/consl.22026.wan</mixed-citation>
                    </ref>
                                    <ref id="ref74">
                        <label>74</label>
                        <mixed-citation publication-type="journal">Wolfersberger, M. (2013). Refining the construct of Classroom-Based Writing-From-Readings Assessment: The role of task Representation. Language Assessment Quarterly, 10(1), 49–72. https://doi.org/10.1080/15434303.2012.750661</mixed-citation>
                    </ref>
                                    <ref id="ref75">
                        <label>75</label>
                        <mixed-citation publication-type="journal">Youn, S. J. (2019). Managing proposal sequences in role-play assessment: Validity evidence of interactional competence across levels. Language Testing, 37(1), 76–106. https://doi.org/10.1177/0265532219860077</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
