@article{article_1556260, title={The Impact of Balancing Techniques and Feature Selection on Machine Learning Models for Diabetes Detection}, journal={Fırat Üniversitesi Mühendislik Bilimleri Dergisi}, volume={37}, pages={303–320}, year={2025}, DOI={10.35234/fumbd.1556260}, author={Sinap, Vahid}, keywords={Diyabet tespiti, veri dengeleme teknikleri, dengesiz veri setleri, tahmine dayalı modelleme, sağlık bilişimi}, abstract={The detection of diabetes is crucial for effective management and prevention of the disease, which poses significant health risks globally. This study introduces a novel approach to diabetes detection by combining advanced data balancing techniques and feature selection methods, including Lasso (L1) regularization, to enhance the performance of predictive models in imbalanced datasets. Techniques such as Random Under Sampling (RUS), Adaptive Synthetic Sampling (ADASYN), and Synthetic Minority Over-sampling Technique (SMOTE) were employed alongside models including Random Forest (RF), CatBoost (CB), Extreme Gradient Boosting (XGB), K-Nearest Neighbors (KNN), Gaussian Naive Bayes (GNB), Logistic Regression (LR), and Gradient Boosting (GB) to assess their impact on model accuracy and generalization capabilities. The findings reveal that the RF model achieved the highest accuracy of 93.25% when utilizing the SMOTE technique, underscoring the importance of appropriate data handling strategies in improving predictive outcomes. Furthermore, when all features were utilized without selection, the RF model attained an accuracy of 95.31%, indicating the model’s capacity to capture complex patterns when feature richness is maximized. The comprehensive methodology used in the study achieved a higher accuracy in diabetes detection than research in the literature and provided important outputs for developing reliable prediction models in healthcare.}, number={1}, publisher={Fırat Üniversitesi}