<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>gujgef</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Gazi Eğitim Fakültesi Dergisi</journal-title>
            </journal-title-group>
                            <issn pub-type="ppub">1301-9058</issn>
                                        <issn pub-type="epub">3108-5342</issn>
                                                                                            <publisher>
                    <publisher-name>Gazi University</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.17152/gefad.1202751</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Measurement Theories and Applications in Education and Psychology</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Eğitimde ve Psikolojide Ölçme Teorileri ve Uygulamaları</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="tr">
                                    <trans-title>Çoklu Gruplarda Madde Tepki Kuramı Parametre Kestirimi ve Güvenirliğinin İncelenmesi</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                <article-title>An Investigation of Item Response Theory Parameter Estimations and Reliability in Multiple Groups</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-4335-2949</contrib-id>
                                                                <name>
                                    <surname>Büyükkıdık</surname>
                                    <given-names>Serap</given-names>
                                </name>
                                                                    <aff>SINOP UNIVERSITY</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-2813-0873</contrib-id>
                                                                <name>
                                    <surname>İnal</surname>
                                    <given-names>Hatice</given-names>
                                </name>
                                                                    <aff>MEHMET AKIF ERSOY UNIVERSITY</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20230902">
                    <day>09</day>
                    <month>02</month>
                    <year>2023</year>
                </pub-date>
                                        <volume>43</volume>
                                        <issue>2</issue>
                                        <fpage>825</fpage>
                                        <lpage>855</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20221111">
                        <day>11</day>
                        <month>11</month>
                        <year>2022</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20230529">
                        <day>05</day>
                        <month>29</month>
                        <year>2023</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 1985, Journal of Gazi Faculty of Education</copyright-statement>
                    <copyright-year>1985</copyright-year>
                    <copyright-holder>Journal of Gazi Faculty of Education</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="tr">
                            <p>Bu çalışmada, aynı evrendeki çoklu gruplardan elde edilen ikili verilerde madde tepki kuramı (MTK) parametre kestirimi ve güvenirliğinin incelenmesi amaçlanmıştır. Araştırma kapsamında TEOG 2017 (Nisan) matematik alt testi kullanılmıştır. Araştırma 7500 kişilik bir alt grupta ve 3750 kişilik iki alt grupta yer alan öğrencilerin verileri ile gerçekleştirilmiştir. Araştırmada öncelikle MTK varsayımları incelenmiştir. Varsayımlar sağlandıktan sonra, ikili puanlanan veriler için 1PLM, 2PLM, 3PLM ve 4PLM ile madde ve yetenek kestirimleri gerçekleştirilmiştir. Model veri uyumları incelendiğinde her koşulda en iyi uyumun 3PLM ile elde edildiği görülmüştür. Örneklem değiştikçe madde parametrelerinin önemli ölçüde farklılaşmadığı gözlemlenmiştir. a ve b parametrelerinin farklı MTK modellerine göre farklılık gösterdiği bulgusuna ulaşılmıştır. Yetenek parametreleri arasında örneklemler değiştikçe kısmi farklılık bulunurken, kullanılan modeller değiştikçe de farklılık olduğu bulunmuştur. Yetenek kestirim yöntemlerine (Beklenen A Posteriori (EAP) ve Maksiimum A Posteriori (MAP)) göre elde edilen yetenek parametreleri arasında bazı küçük farklılıkların olduğu görülmüştür. Marjinal güvenilirlik katsayıları tüm koşullarda benzerlik göstermiştir. Bu çalışmadan yola çıkarak, MTK&#039;de analiz yaparken daha fazla bilgi sağlamak için araştırmacıların 3PLM veya 4PLM&#039;den en iyi model veri uyumuna sahip olan modelle parametre kestirimi yapmaları önerilir</p></trans-abstract>
                                                                                                                                    <abstract><p>This study aimed to investigate the parameters estimation of item response theory (IRT) and their reliability in the context of binary data across multiple groups derived from the same population. Within the scope of the research, 2017 (April) mathematics subtest of the Transition from Primary to Secondary Education exam (TPSEE) was used. The dataset encompassed 7500 students as a single-sample subgroup and 3750 students distributed across two subgroups. In the research, IRT assumptions were examined first. After meeting the assumptions, item and ability estimations were performed with 1PLM, 2PLM, 3PLM, and 4PLM for dichotomous data. When the model data fits were examined, it was found that the best fit was obtained with 3PLM in all conditions. It was observed that the item parameters did not differ significantly as the sample changed. The a and b parameters differ according to the different IRT models. While there is a partial difference between the ability parameters as the samples change, there are also differences as the models change. Minor differences have been observed among the ability parameters obtained through ability estimation methods (Expected A Posteriori (EAP) and Maximum A Posteriori (MAP)). The marginal reliability coefficients were similar in all conditions. It is recommended that researchers perform parameter estimation with which have the best model data fit out of 3PLM or 4PLM to provide more information while performing analysis in IRT.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>IRT</kwd>
                                                    <kwd>  Transition from Primary to Secondary Education</kwd>
                                                    <kwd>  Multi-group</kwd>
                                                    <kwd>  parameter estimation</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="tr">
                                                    <kwd>MTK</kwd>
                                                    <kwd>  Temel eğitimden ortaöğretime geçiş</kwd>
                                                    <kwd>  Çoklu-grup</kwd>
                                                    <kwd>  parametre kestirimi</kwd>
                                            </kwd-group>
                                                                                                                                        </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Acar, T., &amp; Kelecioğlu, H. (2008). Genelleştirilmiş aşamalı doğrusal model ile rasch modelinin parametre değişmezliğinin karşılaştırılması. Ist National Congress of Measurement and Evaluation in Education and Psychology, 14-16 Mayıs, Ankara, 181-193.</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Adedoyin, O. O. (2010). Investigating the invariance of person parameter estimates based on classical test and item response theories. International Journal of Education Science, 2(2), 107-113. https://doi.org/10.1080/09751122.2010.11889987</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Adedoyin, O. O., Nenty, H. J., &amp; Chilasa, B. (2008). Investigating the invariance of item difficulty parameter estimates based on CTT and IRT. Educational Research and Review, 3(2), 83-93. https://doi.org/10.5897/ERR.9000209</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Baker, F. B. (2001). The basics of item response theory. United States of America: ERIC Clearinghouse on Assessment and Evaluation.</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Barton, M. A., &amp; Lord, F. M. (1981). An upper asymptote for the three-parameter logistic item response model. Research Bulletin, 81-20.</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Birnbaum, A. (1968). Some latent trait models and their use in inferring an examinee’s ability. In F.M. Lord &amp; M.R. Novick, Statistical theories of mental test scores (pp. 392-479). Reading, MA: Addison-Wesley.</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Cohen, J. (1988). Statistical Power Analysis for the Behavioral Sciences. 2nd edition. New York: Academic Press.</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Custer, M., Sharairi, S., Yamazaki, K., Signatur, D., Swift, D., &amp; Frey, S. (2008). A paradox between IRT invariance and model-data fit when utilizing the one-parameter and three-parameter models. Annual Meeting of the American Educational Research Association, 24-28 March, New York, 70-71.</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">DeMars, C. (2010). Item response theory. New York: Oxford University Press.</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Doğan, N., &amp; Kılıç, A. F. (2017). Madde tepki kuramı yetenek ve madde parametreleri kestirimlerinin değişmezliğinin incelenmesi. ss 297-314. Demirel, Ö., Dinçer, S., ed. Küreselleşen Dünyada Eğitim, Pegem Akademi, Ankara.</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Doğan, N., &amp; Tezbaşaran, A. A. (2003). Klasik test kuramı ve örtük özellikler kuramının örneklemler bağlamında karşılaştırılması. Hacettepe Üniversitesi Eğitim Fakültesi Dergisi, 25(25), 58-67.</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Doğruöz, E., &amp; Arıkan, Ç. A. (2020). Comparison of different ability estimation methods based on 3 and 4PL item response theory. PAU Journal of Education 50, 50-69. https://doi.org/10.9779/pauefd.585774</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Edelen, M. O., &amp; Reeve, B. B. (2007). Applying item response theory (IRT) modeling to questionnaire development, evaluation, and refinement. Quality of Life Research, 16(1), 5-18. https://doi.org/10.1007/s11136-007-9198-0</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Embretson, S. E., &amp; Reise, S. P. (2000). Item Response Theory for Psychologists. New Jersey: Lawrence Erlbaum.</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Erdemir, A., &amp; Önen, E. (2019). Bir, iki, üç ve dört parametreli lojistik madde tepki kuramı modellerinin karşılaştırılması [Comparison of 1PL, 2PL, 3PL and 4PL item response theory models]. e-Turkish Studies, 14(1), 307-332. https://doi.org/10.7827/TurkishStudies.14745</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Fan, X. (1998). Item response theory and classical test theory: an empirical comparison of their item/person statistics. Educational and Psychological Measurement, 58(3), 357–381. https://doi.org/10.1177/0013164498058003001</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Fan, X., &amp; Ping, Y. (1999). Assessing the effect of model-data misfit on the invariance. Journal of Mathematical and Statistical Psychology, 42, 139-167.</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Feuerstahler, L. M., &amp; Waller, N. G. (2014). Estimation of the 4-parameter model with marginal maximum likelihood. Multivariate behavioral research, 49(3), 285-285. https://doi.org/10.1080/00273171.2014.912889</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Hambleton, R. K., &amp; Swaminathan, H. (1985). Item response theory: Principles and applications. Boston: Kluwer Nijhoff.</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Hambleton, R. K., Swaminathan, H., &amp; Rogers, H. J. (1991). Fundamentals of item response theory. London: Sage.</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Han, K. T., &amp; Hambleton, R. K. (2014). User&#039;s manual for WINGEN 3: windows software that generates IRT model parameters and item responses (Center for Educational Assessment Report No. 642). Amherst, MA: University of Massachusetts.</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Kalkan, Ö. K. (2022). The comparison of estimation methods for the four-parameter logistic item response theory model. Measurement: Interdisciplinary Research and Perspectives, 20(2), 73-90. https://doi.org/10.1080/15366367.2021.1897398</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Kaplan, R. M. &amp; Saccuzo, D. P. (1997). Psychological testing: principles, applications and issues. Pacific Grove: Brooks Cole Pub. Company.</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Kean, J., &amp; Reilly, J. (2014). Item response theory. Handbook for clinical research: Design, statistics and implementation, 195-198.</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">Kelkar, V., Wightman, L.F., &amp; Luecht, R.M. (2000). Evaluation of the IRT parameter Invariance property for the MCAT. Annual Meeting of the National Council on Measurement in Education, 25-27 April, New Orleans.</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">Kolen, M. J. &amp; Brennan, R. L. (2014). Test equating, scalling, and linking. (third edition). USA: Springer.</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">Lembke, E. &amp; Stecker, P. (2007). Curriculum-based measurement in mathematics: an evidence-based formative assessment procedure. Portsmouth, NH: RMC Research Corporation, Center on Instruction.</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">Liao, W., Ho, R., &amp; Yen, Y. (2012). The four-parameter logistic item response theory model as a robust method of estimating ability despite aberrant responses. Social Behavior and Personality, 40(10), 1679–1694. https://doi.org/10.2224/sbp.2012.40.10.1679</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">Loken, E., &amp; Rulison, K. L. (2010). Estimation of a four-parameter item response theory model. The British Journal of Mathematical and Statistical Psychology, 63(3), 509–25. https://doi.org/10.1348/000711009X474502</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">Lord, F. M. (1952). A theory of test scores (Psychometric Monograph No. 7). Iowa City, IA: Psychometric Society, 35.</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">Magis, D. (2013). A note on the item information function of the four-parameter logistic model. Applied Psychological Measurement, 37(4), 304-315. https://doi.org/10.1177/0146621613475471</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">R Core Team. (2021). R: a language and environment for statistical computing. Vienna: R Foundation for Statistical Computing.</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">Reise, S. P., &amp; Waller, N. G. (2003). How many IRT parameters does it take to model psychopathology items? Psychological Methods, 8(2), 164–184. https://doi.org/10.1037/1082-989X.8.2.164</mixed-citation>
                    </ref>
                                    <ref id="ref34">
                        <label>34</label>
                        <mixed-citation publication-type="journal">Robitzsch, A. (2021). sirt: Supplementary item response theory models. R package version 3.11-21, https://cran.r-project.org/web/packages/sirt/sirt.pdf</mixed-citation>
                    </ref>
                                    <ref id="ref35">
                        <label>35</label>
                        <mixed-citation publication-type="journal">Rulison, K. L., &amp; Loken, E. (2009). I’ve fallen and i can’t get up: can high-ability students recover from early mistakes in CAT? Applied Psychological Measurement, 33(2), 83–101. https://doi.org/10.1177/0146621608324023</mixed-citation>
                    </ref>
                                    <ref id="ref36">
                        <label>36</label>
                        <mixed-citation publication-type="journal">Rupp, A. A. (2003). Item response modeling with BILOG-MG and MULTILOG for windows. International Journal of Testing, 3(4), 365–384. https://doi.org/10.1207/S15327574IJT0304_5</mixed-citation>
                    </ref>
                                    <ref id="ref37">
                        <label>37</label>
                        <mixed-citation publication-type="journal">Sünbül, Ö., &amp; Erkuş, A. (2013). Madde parametrelerinin değişmezliğinin çeşitli boyutluluk özelliği gösteren yapılarda madde tepki kuramına göre incelenmesi. Mersin Üniversitesi Eğitim Fakültesi Dergisi, 9(2), 378- 398.</mixed-citation>
                    </ref>
                                    <ref id="ref38">
                        <label>38</label>
                        <mixed-citation publication-type="journal">U. S. Department of Education (2001). The elementary and secondary education act (The No Child Left Behind Act of 2001). Retrieved September 3, 2019, from http://www.ed.gov/legislation/ESEA02</mixed-citation>
                    </ref>
                                    <ref id="ref39">
                        <label>39</label>
                        <mixed-citation publication-type="journal">Waller, N. G., &amp; Reise, S. P. (2010). Measuring psychopathology with nonstandard item response theory models: fitting the four-parameter model to the Minnesota Multiphasic Personality Inventory. S. E. Embretson (Ed.), In Measuring psychological constructs: Advances in modelbased approaches (147-173). Washington, DC, US: American Psychological Association. http://dx.doi.org/10.1037/12074-007</mixed-citation>
                    </ref>
                                    <ref id="ref40">
                        <label>40</label>
                        <mixed-citation publication-type="journal">Wu, M., Tam, H. P., &amp; Jen, T. H. (2016). Classical test theory. In Educational measurement for applied researchers (pp. 73-90). Springer, Singapore.</mixed-citation>
                    </ref>
                                    <ref id="ref41">
                        <label>41</label>
                        <mixed-citation publication-type="journal">Yalçın, S. (2018). Data fit comparison of mixture item response theory models and traditional models. International Journal of Assessment Tools in Education, 5(2), 301-313. https://doi.org/10.21449/ijate.402806</mixed-citation>
                    </ref>
                                    <ref id="ref42">
                        <label>42</label>
                        <mixed-citation publication-type="journal">Yen, Y., Ho, R., Liao, W., &amp; Chen, L. (2012). Reducing the impact of inappropriate items on reviewable computerized adaptive testing. Educational Technology &amp; Society, 15, 231–243.</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
