@article{article_1565447, title={A Comparative Analysis of Traditional and Deep Learning Approaches for Adressing Challenges in Speaker Diarization}, journal={Afyon Kocatepe Üniversitesi Fen Ve Mühendislik Bilimleri Dergisi}, volume={25}, pages={1095–1105}, year={2025}, DOI={10.35414/akufemubid.1565447}, author={Altinay, Emsal and Küçüksille, Ecir Uğur}, keywords={Konuşmacı Diyarizasyonu, Geleneksel Kümeleme Algoritması, Derin Öğrenme, Örtüşen Konuşma, Hesaplama Karmaşıklığı}, abstract={Speaker diarization is the task of distinguishing and segmenting speech from multiple speakers in an audio recording, a crucial task for various applications such as meeting transcription, voice activated systems, and audio indexing. Traditional clustering-based methods have been widely used, but they struggle with challenges in real-world scenarios, including noisy environments, overlapping speech, speaker variability and variable recording conditions. This study addresses these limitations by focusing on deep learning-based approaches, which have demonstrated significant advancements in improving the accuracy of multi-speaker diarization. The aim of this study is to compare traditional clustering methods with new deep learning techniques, including Time Delay Neural Networks (TDNN), End-to-End Neural Diarization (EEND), and the Fully Supervised UIS-RNN, to solve the challenges of multi-speaker diarization. The results show that on the CallHome dataset, TDNN systems indicated slight improvements in non-overlapping speech, with a Diarization Error Rate (DER) of 12-14%, in comparison to 13-15% for traditional clustering methods. However, in overlapping speech, EEND outperformed traditional methods, achieving a DER of 12.6%, which was significantly lower than the 23.7% observed with traditonal clustering. The Fully Supervised UIS-RNN model delivered the best overall performance, achieving a DER of 7.6%. Future research should focus on integrating the strengths of traditional and deep learning techniques while reducing the computational and data requirements for more accessible, real-time speaker diarization systems. The findings indicated that deep learning will make a substantial contribution to the field of speaker diarisation.}, number={5}, publisher={Afyon Kocatepe Üniversitesi}