<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>tred</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Trakya Eğitim Dergisi</journal-title>
            </journal-title-group>
                            <issn pub-type="ppub">2630-6301</issn>
                                                                                                        <publisher>
                    <publisher-name>Trakya Üniversitesi</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.24315/tred.1665684</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Computer Based Exam Applications</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Bilgisayar Tabanlı Sınav Uygulamaları</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="en">
                                    <trans-title>Comparative Study of Fixed and On-the-Fly Computerized Multistage Testing: Implications for Measurement Accuracy and Item Security</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                                                    <article-title>SABİT VE ANINDA BİREYSELLEŞTİRİLMİŞ ÇOK AŞAMALI TESTLERİN KARŞILAŞTIRMALI İNCELENMESİ: ÖLÇME KESİNLİĞİ VE MADDE GÜVENLİĞİNE İLİŞKİN ÇIKARIMLAR</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-2896-0201</contrib-id>
                                                                <name>
                                    <surname>Yiğiter</surname>
                                    <given-names>Mahmut Sami</given-names>
                                </name>
                                                                    <aff>ANKARA SOSYAL BİLİMLER ÜNİVERSİTESİ, UZAKTAN EĞİTİM UYGULAMA VE ARAŞTIRMA MERKEZİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0001-6274-2016</contrib-id>
                                                                <name>
                                    <surname>Doğan</surname>
                                    <given-names>Nuri</given-names>
                                </name>
                                                                    <aff>HACETTEPE ÜNİVERSİTESİ, EĞİTİM BİLİMLERİ ENSTİTÜSÜ</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20260425">
                    <day>04</day>
                    <month>25</month>
                    <year>2026</year>
                </pub-date>
                                        <volume>16</volume>
                                        <issue>2</issue>
                                        <fpage>766</fpage>
                                        <lpage>820</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20250325">
                        <day>03</day>
                        <month>25</month>
                        <year>2025</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20251027">
                        <day>10</day>
                        <month>27</month>
                        <year>2025</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2011, Trakya Eğitim Dergisi</copyright-statement>
                    <copyright-year>2011</copyright-year>
                    <copyright-holder>Trakya Eğitim Dergisi</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="en">
                            <p>In recent years, adaptive testing techniques such as Computerized Adaptive Testing (CAT) and Computerized Multistage Testing (MST) have been increasingly incorporated into large-scale evaluations. This study aims to compare Fixed-MST (F-MST) and On-the-Fly MST (O-MST), a novel approach in which items are grouped into modules based on the participant’s ability level, in terms of measurement precision and item security across various simulation scenarios. The simulations were carried out using item parameter distributions derived from the 3PL model applied in TIMSS. A total of 72 different conditions were analyzed to compare O-MST with F-MST. The findings on measurement precision reveal that O-MST performs better than F-MST, especially when the test lengths are shorter, where O-MST shows substantially higher measurement precision. Moreover, when examining ability distributions, O-MST demonstrates better measurement precision compared to F-MST, particularly in cases of non-normal distributions. A significant result from this study is that the measurement precision of O-MST improves as the length of the final module increases, whereas the measurement precision of F-MST becomes more similar to O-MST as the length of the initial module increases. Regarding item security, O-MST employed a greater number of items and exhibited a lower item exposure rate compared to F-MST in all conditions. The favorable results in terms of measurement precision and item security for O-MST are discussed within the framework of large-scale assessments and relevant literature.</p></trans-abstract>
                                                                                                                                                            <abstract><p>Son yıllarda, Bireyselleştirilmiş Bilgisayarlı Testler (BBT) ve Bireyselleştirilmiş Çok Aşamalı Testler (BÇAT) gibi uyarlanabilir test teknikleri, büyük ölçekli değerlendirmelere giderek daha fazla dahil edilmektedir. Bu çalışmanın amacı, maddelerin katılımcının yetenek düzeyine göre modüller halinde gruplandırıldığı yeni bir yaklaşım olan Sabit-BÇAT (S-BÇAT) ve Anında BÇAT&#039;ı (A-BÇAT) çeşitli simülasyon senaryolarında ölçme kesinliği ve madde güvenliği açısından karşılaştırmaktır. Simülasyonlar, TIMSS&#039;te uygulanan maddelerin 3PL modelinden türetilen madde parametre dağılımları kullanılarak gerçekleştirilmiştir. A-BÇAT ile S-BÇAT&#039;ı karşılaştırmak için toplam 72 farklı koşul analiz edilmiştir. Ölçme kesinliğine ilişkin bulgular, A-BÇAT&#039;ın S-BÇAT&#039;tan daha iyi performans gösterdiğini, özellikle de test uzunlukları daha kısa olduğunda, A-BÇAT&#039;ın önemli ölçüde daha yüksek ölçme kesinliği gösterdiğini ortaya koymaktadır. Ayrıca, yetenek dağılımları incelendiğinde, A-BÇAT, özellikle normal olmayan dağılımlarda S-BÇAT&#039;a kıyasla daha iyi ölçme kesinliği göstermektedir. Bu çalışmadan elde edilen önemli bir sonuç, A-BÇAT&#039;ın ölçme kesinliğinin son modülün uzunluğu arttıkça iyileşmesi, S-BÇAT&#039;ın ölçme kesinliğinin ise başlangıç modülünün uzunluğu arttıkça A-BÇAT&#039;a daha çok benzemesidir. Madde güvenliği ile ilgili olarak, A-BÇAT daha fazla sayıda madde kullanmış ve tüm koşullarda S-BÇAT&#039;a kıyasla daha düşük bir madde maruz kalma oranı sergilemiştir. A-BÇAT için ölçme kesinliği ve madde güvenliği açısından olumlu sonuçlar tartışılmaktadır.</p></abstract>
                                                            
            
                                                                                                                    <kwd-group>
                                                    <kwd>Madde Güvenliği</kwd>
                                                    <kwd>  Bireyselleştirilmiş Çok Aşamalı Testler</kwd>
                                                    <kwd>  Bilgisayarlı Testler</kwd>
                                                    <kwd>  Madde Güvenliği</kwd>
                                                    <kwd>  Madde Teşhir Oranı</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="en">
                                                    <kwd>Computerized Multistage Testing</kwd>
                                                    <kwd>  Adaptive Testing</kwd>
                                                    <kwd>  Item Security</kwd>
                                                    <kwd>  Item Exposure Rate.</kwd>
                                            </kwd-group>
                                                                                                                                                                    </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Arvey, R. D., Strickland, W., Drauden, G., &amp; Martin, C. (1990). Motivational components of test taking. Personnel Psychology, 43(4), 695–716. https://doi.org/10.1111/j.1744-6570.1990.tb00679.x</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Bergstrom, B. A., Lunz, M. E., &amp; Gershon, R. C. (1992). Altering the level of difficulty in computer adaptive testing. Applied Measurement in Education, 5(2), 137–149. https://doi.org/10.1207/s15324818ame0502_4</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Boztunç Öztürk, N. (2019). How the length and characteristics of routing module affect ability estimation in ca-MST? Universal Journal of Educational Research, 7(1), 164–170. https://doi.org/10.13189/ujer.2019.070121</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Breithaupt, K. J., Mills, C. N., &amp; Melican, G. J. (2006). Facing the opportunities of the future. Computer-based testing and the Internet: Issues and advances, 219-251.</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Bulut, O. (2021). Beyond multiple-choice with digital assessments. ELearn, 2021(Special Issue), 1–10. https://doi.org/10.1145/3472394</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Bulut, O., &amp; Sünbül, Ö. (2017). R Programlama Dili ile Madde Tepki Kuramında Monte Carlo Simülasyon Çalışmaları. Egitimde ve Psikolojide Olcme ve Degerlendirme Dergisi, 8(3), 266–287. https://doi.org/10.21031/epod.305821</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Cai, L., Albano, A. D., &amp; Roussos, L. A. (2021). An investigation of item calibration methods in multistage testing. Measurement: Interdisciplinary Research and Perspectives, 19(3), 163–178. 
https://doi.org/10.1080/15366367.2021.1878778</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Carlson, S. (2000). ETS finds flaws in the way online GRE rates some students. Chronicle of Higher Education, 47(8), A47.</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">Cetin-Berber, D. D., Sari, H. I., &amp; Huggins-Manley, A. C. (2019). Imputation methods to deal with missing responses in computerized adaptive multistage testing. Educational and psychological measurement, 79(3), 495-511.</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Chang, H.-H. (2004). Understanding computerized adaptive testing: From Robbins-Monro to Lord and beyond. In D. Kaplan (Ed.), The Sage handbook of quantitative methodology for the social sciences (pp. 117-133). Thousand Oaks, CA: Sage.</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Chang, H.-H. (2015). Psychometrics behind Computerized Adaptive Testing. Psychometrika, 80(1), 1–20. https://doi.org/10.1007/s11336-014-9401-5</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Chang, H.-H., &amp; Ying, Z. (2008). To weight or not to weight? Balancing influence of initial items in adaptive testing. Psychometrika, 73(3), 441–450.</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Choi, S. W., &amp; van der Linden, W. J. (2018). Ensuring content validity of patient-reported outcomes: a shadow-test approach to their adaptive measurement. Quality of Life Research, 27(7), 1683-1693.</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Choi, S. W., Lim, S., &amp; van der Linden, W. J. (2021). TestDesign: an optimal test design approach to constructing fixed and adaptive tests in R. Behaviormetrika, 1-39.</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Choi, S. W., Moellering, K. T., Li, J., &amp; van der Linden, W. J. (2016). Optimal reassembly of shadow tests in CAT. Applied psychological measurement, 40(7), 469-485. https://doi.org/10.1177/0146621616654597.</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Cohen, J. (1988). Statistical power analysis fort he behavioral sciences (2nd ed.). Hillsdale, NJ: Erlbaum.</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Drasgow, F., Luecht, R. M., &amp; Bennett, R. E. (2006). Technology and testing. Educational measurement, 4, 471-515.</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Demir, H., &amp; Gelbal, S. (2025). A systematic review on Computerized Adaptive Testing. Journal of Education Faculty, 27(1), 137–150. https://doi.org/10.17556/erziefd.1577880</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Ebenbeck, N., &amp; Gebhardt, M. (2022). Simulating computerized adaptive testing in special education based on inclusive progress monitoring data. Frontiers in Education, 7. https://doi.org/10.3389/feduc.2022.945733</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Feinberg, R. A., &amp; Rubright, J. D. (2016). Conducting simulation studies in psychometrics. Educational Measurement: Issues and Practice, 35(2), 36-49.</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Fleishman, A. I. (1978). A method for simulating non-normal distributions. Psychometrika, 43(4), 521-532.</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Fraenkel, J. R., Wallen, N. E., &amp; Hyun, H. H. (2012). How to design and evaluate research in education. McGram-Hill Publishing.</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Gür, R., &amp; Gülleroğlu, H. (2020). The effect of item exposure control methods on measurement precision and test security under different measurement conditions in computerized adaptive testing. TED EĞİTİM VE BİLİM, 45(202), 113–139. https://doi.org/10.15390/eb.2020.8256</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Hambleton, R. K., &amp; Xing, D. (2006). Optimal and nonoptimal computer-based test designs for making pass–fail decisions. Applied Measurement in Education, 19(3), 221-239.</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">Han, K. C. T., &amp; Guo, F. (2016). Multistage testing by shaping modules on the fly. In Computerized Multistage Testing (pp. 157-172). Chapman and Hall/CRC.</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">Han, K. T. (2007). WinGen: Windows software that generates item response theory parameters and item responses. Applied Psychological Measurement, 31(5), 457–459. https://doi.org/10.1177/0146621607299271</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">Harwell, M., Stone, C. A., Hsu, T. C. &amp; Kirisci, L. (1996). Monte Carlo studies in item response theory. Applied 
Psychological Measurement, 20(2), 101-125. doi: 10.1177/014662169602000201</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">Harwell, M., Stone, C. A., Hsu, T.-C., &amp; Kirisci, L. (1996). Monte Carlo studies in item response theory. Applied Psychological Measurement, 20(2), 101–125. https://doi.org/10.1177/014662169602000201</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">Hendrickson, A. (2007). An NCME instructional module on multistage testing. Educational Measurement Issues and Practice, 26(2), 44–52. https://doi.org/10.1111/j.1745-3992.2007.00093.x</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">Khorramdel, L., Pokropek, A., Joo, S. H., Kirsch, I., &amp; Halderman, L. (2020). Examining gender DIF and gender differences in the PISA 2018 reading literacy scale: A partial invariance approach. Psychological Test and Assessment Modeling, 62(2), 179-231.</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">Kim, H., &amp; Plake, B. (1993). Monte Carlo simulation comparison of two-stage testing and computer adaptive testing. Unpublished doctoral dissertation, University of Nebraska, Lincoln.</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">Kirsch, I., &amp; Lennon, M. L. (2017). PIAAC: a new design for a new era. Large-scale Assessments in Education, 5(1), 1-22.</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">Ling, G., Attali, Y., Finn, B., &amp; Stone, E. A. (2017). Is a computerized adaptive test more motivating than a fixed-item test? Applied Psychological Measurement, 41(7), 495–511. https://doi.org/10.1177/0146621617707556</mixed-citation>
                    </ref>
                                    <ref id="ref34">
                        <label>34</label>
                        <mixed-citation publication-type="journal">Lord, F. M. (1971). A theoretical study of two-stage testing. Psychometrika, 36(3), 227-242. https://doi.org/10.1007/BF02297844</mixed-citation>
                    </ref>
                                    <ref id="ref35">
                        <label>35</label>
                        <mixed-citation publication-type="journal">Luo, X., &amp; Kim, D. (2018). A top‐down approach to designing the computerized adaptive multistage test. Journal of Educational Measurement, 55(2), 243-263.</mixed-citation>
                    </ref>
                                    <ref id="ref36">
                        <label>36</label>
                        <mixed-citation publication-type="journal">Magis, D., Yan, D., &amp; Von Davier, A. A. (2017). Computerized adaptive and multistage testing with R: Using packages catr and mstr. Springer.</mixed-citation>
                    </ref>
                                    <ref id="ref37">
                        <label>37</label>
                        <mixed-citation publication-type="journal">Makhorin A (2017). GNU Linear Programming Kit. Version 4.61, URL http://www.gnu. org/software/glpk/glpk.html.
Martin, A. J., &amp; Lazendic, G. (2018). Computer-adaptive testing: Implications for students’ achievement, motivation, engagement, and subjective test experience. Journal of Educational Psychology, 110(1), 27–45. https://doi.org/10.1037/edu0000205</mixed-citation>
                    </ref>
                                    <ref id="ref38">
                        <label>38</label>
                        <mixed-citation publication-type="journal">Mead, A. D. (2006). An introduction to multistage testing. Applied Measurement in Education, 19(3), 185–187. https://doi.org/10.1207/s15324818ame1903_1</mixed-citation>
                    </ref>
                                    <ref id="ref39">
                        <label>39</label>
                        <mixed-citation publication-type="journal">MEB (2021). 2021 Ortaöğretim Kurumlarına İlişkin Merkezi Sınav Raporu. Milli Eğitim Bakanlığı.</mixed-citation>
                    </ref>
                                    <ref id="ref40">
                        <label>40</label>
                        <mixed-citation publication-type="journal">Morris, T. P., White, I. R., &amp; Crowther, M. J. (2019). Using simulation studies to evaluate statistical methods. Statistics in medicine, 38(11), 2074-2102.</mixed-citation>
                    </ref>
                                    <ref id="ref41">
                        <label>41</label>
                        <mixed-citation publication-type="journal">OECD (2023). PISA 2022 Results (Volume I): The State of Learning and Equity in Education, OECD Publishing, Paris, https://doi.org/10.1787/53f23881-en.</mixed-citation>
                    </ref>
                                    <ref id="ref42">
                        <label>42</label>
                        <mixed-citation publication-type="journal">OECD (2024), PISA 2022 Technical Report, PISA, OECD Publishing, Paris, https://doi.org/10.1787/01820d6d-en.</mixed-citation>
                    </ref>
                                    <ref id="ref43">
                        <label>43</label>
                        <mixed-citation publication-type="journal">Ortner, T. M., Weißkopf, E., &amp; Koch, T. (2014). I will probably fail: Higher ability students’ motivational experiences during adaptive achievement testing. European Journal of Psychological Assessment: Official Organ of the European Association of Psychological Assessment, 30(1), 48–56. https://doi.org/10.1027/1015-5759/a000168</mixed-citation>
                    </ref>
                                    <ref id="ref44">
                        <label>44</label>
                        <mixed-citation publication-type="journal">Patsula, L. N., &amp; Hambleton, R. K. (1999). A comparative study of ability estimates obtained from computer-adaptive and multi-stage testing. In annual meeting of the National Council on Measurement in Education, Montreal, Quebec.</mixed-citation>
                    </ref>
                                    <ref id="ref45">
                        <label>45</label>
                        <mixed-citation publication-type="journal">Pine, S. M., Church, A. T., Gialluca, K. A., &amp; Weiss, D. J. (1979). Effects of Computerized Adaptive Testing on Black and White Students. Minnesota Univ Minneapolis Dept Of Psychology.</mixed-citation>
                    </ref>
                                    <ref id="ref46">
                        <label>46</label>
                        <mixed-citation publication-type="journal">Saatçi̇oğlu, F. M., &amp; Atar, H. Y. (2022). Investigation of the effect of parameter estimation and classification accuracy in mixture IRT models under different conditions. International Journal of Assessment Tools in Education, 9(4), 1013–1029. https://doi.org/10.21449/ijate.1164590</mixed-citation>
                    </ref>
                                    <ref id="ref47">
                        <label>47</label>
                        <mixed-citation publication-type="journal">Stark, S., &amp; Chernyshenko, O. S. (2006). Multistage testing: Widely or narrowly applicable?. Applied Measurement in Education, 19(3), 257-260.</mixed-citation>
                    </ref>
                                    <ref id="ref48">
                        <label>48</label>
                        <mixed-citation publication-type="journal">Şahin, A., &amp; Weiss, D. J. (2015). Effects of calibration sample size and item bank size on ability estimation in computerized adaptive testing. Educational Sciences Theory &amp; Practice, 15(6). https://doi.org/10.12738/estp.2015.6.0102</mixed-citation>
                    </ref>
                                    <ref id="ref49">
                        <label>49</label>
                        <mixed-citation publication-type="journal">Tay, P. H. (2015). On-the-fly assembled multistage adaptive testing. University of Illinois at Urbana-Champaign.
Tomashev, M. V., Avdeev, A. S., &amp; Krasnova, M. V. (2018). Adaptive testing as a tool for managing quality of education. Informatics and Education, 9, 27–33. https://doi.org/10.32517/0234-0453-2018-33-9-27-33</mixed-citation>
                    </ref>
                                    <ref id="ref50">
                        <label>50</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J. (2009). Constrained adaptive testing with shadow tests. In Elements of adaptive testing (pp. 31-55). Springer, New York, NY.</mixed-citation>
                    </ref>
                                    <ref id="ref51">
                        <label>51</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J. (2010). Elements of adaptive testing. C. A. Glas (Ed.). New York, NY: Springer.</mixed-citation>
                    </ref>
                                    <ref id="ref52">
                        <label>52</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J. (2018). Optimal test design. Handbook of item response theory: Vol. 3. Applications, 167-195.</mixed-citation>
                    </ref>
                                    <ref id="ref53">
                        <label>53</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J. (2021). Review of the shadow-test approach to adaptive testing. Behaviormetrika, 1-22.</mixed-citation>
                    </ref>
                                    <ref id="ref54">
                        <label>54</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J., &amp; Diao, Q. (2016). Using a universal shadow-test assembler with multistage testing. Computerized multistage testing: Theory and applications, 101-118.</mixed-citation>
                    </ref>
                                    <ref id="ref55">
                        <label>55</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J., &amp; Veldkamp, B. P. (2004). Constraining item exposure in computerized adaptive testing with shadow tests. Journal of Educational and Behavioral Statistics: A Quarterly Publication Sponsored by the American Educational Research Association and the American Statistical Association, 29(3), 273–291. https://doi.org/10.3102/10769986029003273</mixed-citation>
                    </ref>
                                    <ref id="ref56">
                        <label>56</label>
                        <mixed-citation publication-type="journal">van der Linden, W. J., Breithaupt, K., Chuah, S. C., &amp; Zhang, Y. (2007). Detecting differential speededness in multistage testing. Journal of Educational Measurement, 44(2), 117–130. https://doi.org/10.1111/j.1745-3984.2007.00030.x</mixed-citation>
                    </ref>
                                    <ref id="ref57">
                        <label>57</label>
                        <mixed-citation publication-type="journal">Xu, L., Jiang, Z., Han, Y., Liang, H., &amp; Ouyang, J. (2023). Developing computerized Adaptive Testing for a national health professionals exam: An attempt from psychometric simulations. Perspectives on Medical Education, 12(1), 462–471. https://doi.org/10.5334/pme.855</mixed-citation>
                    </ref>
                                    <ref id="ref58">
                        <label>58</label>
                        <mixed-citation publication-type="journal">Yamamoto, K., Shin, H. J., &amp; Khorramdel, L. (2018). Multistage adaptive testing design in international large-scale assessments. Educational Measurement Issues and Practice, 37(4), 16–27. https://doi.org/10.1111/emip.12226</mixed-citation>
                    </ref>
                                    <ref id="ref59">
                        <label>59</label>
                        <mixed-citation publication-type="journal">Yan, D., Von Davier, A. A., &amp; Lewis, C. (Eds.). (2016). Computerized multistage testing: Theory and applications. CRC Press.</mixed-citation>
                    </ref>
                                    <ref id="ref60">
                        <label>60</label>
                        <mixed-citation publication-type="journal">Yasuda, J. I., Mae, N., Hull, M. M., &amp; Taniguchi, M. A. (2021). Optimizing the length of computerized adaptive testing for the force concept inventory. Physical review physics education research, 17(1), 1-15.</mixed-citation>
                    </ref>
                                    <ref id="ref61">
                        <label>61</label>
                        <mixed-citation publication-type="journal">Yasuda, J.-I., Mae, N., Hull, M. M., &amp; Taniguchi, M.-A. (2021). Optimizing the length of computerized adaptive testing for the Force Concept Inventory. Physical Review Physics Education Research, 17(1). https://doi.org/10.1103/physrevphyseducres.17.010115</mixed-citation>
                    </ref>
                                    <ref id="ref62">
                        <label>62</label>
                        <mixed-citation publication-type="journal">Yigiter, M. S., &amp; Dogan, N. (2023). Computerized multistage testing: Principles, designs and practices with R. Measurement: Interdisciplinary Research and Perspectives, 21(4), 254–277. https://doi.org/10.1080/15366367.2022.2158017</mixed-citation>
                    </ref>
                                    <ref id="ref63">
                        <label>63</label>
                        <mixed-citation publication-type="journal">Yiğiter, M. S., &amp; Boduroğlu, E. (2024). Item Response Theory assumptions: A comprehensive review of studies with document analysis. International Journal of Educational Studies and Policy, 5(2), 119-138. https://doi.org/10.5281/ZENODO.14016086</mixed-citation>
                    </ref>
                                    <ref id="ref64">
                        <label>64</label>
                        <mixed-citation publication-type="journal">Yi̇ği̇ter, M. S., &amp; Doğan, N. (2023). The effect of test design on misrouting in computerized multistage testing. International Journal of Turkish Education Sciences, 2023(21), 549–587. https://doi.org/10.46778/goputeb.1267319
 
Zheng, W. (2016). Making test batteries adaptive by using multistage testing techniques (Doctoral dissertation, University of North Carolina, Greensboro, NC).</mixed-citation>
                    </ref>
                                    <ref id="ref65">
                        <label>65</label>
                        <mixed-citation publication-type="journal">Zheng, Y., &amp; Chang, H.-H. (2015). On-the-fly assembled multistage adaptive testing. Applied Psychological Measurement, 39(2), 104–118. https://doi.org/10.1177/0146621614544519</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
