@article{article_1730221, title={The Effect of the Weight Variable on Predicting Reading Comprehension Achievement in PISA 2018: A Data Mining Approach}, journal={OPUS Journal of Society Research}, volume={22}, pages={1146–1159}, year={2025}, DOI={10.26466/opusjsr.1730221}, author={Kasap, Yusuf and Köroğlu, Mustafa}, keywords={Sınıflama, Örneklem ağırlığı, Veri madenciliği}, abstract={This study investigates how student-level sample weights affect model performance in predicting achievement scores. The analyses employed Classification and Regression Tree (CART) and Random Forest (RF) methods with 34 independent variables from the 2018 PISA student survey. Since no prior data mining studies in Turkey have considered sample weights, this research provides an original contribution to the field. According to the findings, when sample weights were used, only one of the ten significant variables identified by the CART method differed, while the order of variable importance also shifted. In the models created with the RF method, only five variables remained common, and the others differed. When sample weights were included in both methods, a slight, statistically non-significant decrease was observed in the prediction performance of the models. These results indicate that sample weights are effective in variable selection but do not significantly affect overall model accuracy. Overall, the findings highlight the necessity of incorporating sample weights to ensure valid and reliable results in large-scale educational data mining.}, number={5}, publisher={İdeal Kent Yayınları}