@article{article_1674501, title={Comparison of Clustering Methods for Mixed Data: A Case Study on Hypothetical Student Scholarship Data}, journal={Educational Academic Research}, pages={1–14}, year={2025}, DOI={10.33418/education.1674501}, author={Ataseven, Hüseyin and Çokluk Bökeoğlu, Ömay and Taşdemir, Fazilet}, keywords={Clustering mixed data, K-Means, K-Prototypes, Latent Class Analysis, Factor Analysis with Mixed Data}, abstract={Clustering is a widely used technique for uncovering patterns and grouping individuals within complex datasets, particularly in fields like education where both academic and contextual variables are essential. This study aims to introduce the basics and explore the performance of six clustering methods in classifying students into scholarship eligibility groups using a hypothetical student scholarship dataset generated in R software. The dataset consists of two numerical variables (GPA and Scholarship Exam Result) and four categorical variables (Financial Need, Number of Parents Employed, Employment Status, and Accommodation), reflecting typical criteria in educational funding decisions. Students were labeled as Primary, Secondary, or Rejected Candidates, and the clustering methods—K-Means, K-Modes, K-Prototypes, Partitioning Around Medoids (PAM), Latent Class Analysis (LCA), and Factor Analysis for Mixed Data (FAMD) followed by K-Means—were assessed based on how accurately they reproduced these labels. Results indicate that hybrid approaches, particularly K-Prototypes (95.6%) and PAM (92.5%), achieved the highest accuracy. FAMD + K-Means (93.9%) offered a robust alternative through dimensionality reduction while LCA produced an 85.9% accuracy. The findings highlight the value of categorical variables in clustering applications, and it also demonstrates the importance of selecting suitable clustering techniques for mixed-type educational data, especially in high-stakes contexts such as scholarship selection.}, number={59}, publisher={Ataturk University}