@article{article_1626239, title={Investigating the Robustness of Text Mining Classification Algorithms: A Study of Algorithm and Expert Performance on Class-Inconsistent Data}, journal={Sakarya University Journal of Computer and Information Sciences}, volume={8}, pages={422–440}, year={2025}, DOI={10.35377/saucis...1626239}, url={https://izlik.org/JA26BH75JH}, author={Ataseven, Hüseyin and Çokluk-bökeoglu, Ömay}, keywords={Text mining, Document classification, Class-inconsistent data, Robustness of classification algorithms}, abstract={This study compares the classification accuracy of text mining algorithms for foreign language proficiency exam items. The dataset included 2,868 items from ÜDS English tests (2006–2012) across Natural and Applied Sciences (n=956), Health Sciences (n=956), and Social Sciences (n=956). Algorithms tested were k-Nearest Neighbors (kNN), Naïve Bayes (NB), Naïve Bayes-Kernel (NB-K), Random Forest (RF), and Support Vector Machines (SVM). Binary classification accuracies ranged from 83.08% (NB) to 92.48% (SVM), while multiclass accuracies ranged from 71.93% (NB) to 84.96% (kNN). Expert analysis and cross-validation identified class-inconsistent items that negatively affected accuracy. Removing these items improved binary classification by 7.39%–9.83% and multiclass classification by 10.58%–17.89%. Among algorithms, kNN was least impacted by class-inconsistent data. These findings highlight the importance of addressing inconsistencies for improving algorithmic performance, with kNN showing robust results across scenarios.}, number={3}