<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>veri bilim derg</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Veri Bilimi</journal-title>
            </journal-title-group>
                                        <issn pub-type="epub">2667-582X</issn>
                                                                                            <publisher>
                    <publisher-name>Murat GÖK</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id/>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Engineering</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Mühendislik</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <article-title>Ses Özniteliklerini Kullanan Ses Duygu Durum Sınıflandırma İçin Derin Öğrenme Tabanlı Bir Yazılımsal Araç</article-title>
                                                                                                                                                                                                <trans-title-group xml:lang="en">
                                    <trans-title>A Deep Learning based Software Tool for Audio Emotional State Classification using Audio Features</trans-title>
                                </trans-title-group>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                <name>
                                    <surname>Kıvrak</surname>
                                    <given-names>Emir Ali</given-names>
                                </name>
                                                                    <aff>ÇANAKKALE ONSEKİZ MART ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0001-8524-874X</contrib-id>
                                                                <name>
                                    <surname>Karasulu</surname>
                                    <given-names>Bahadir</given-names>
                                </name>
                                                                    <aff>ÇANAKKALE ONSEKİZ MART ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                <name>
                                    <surname>Sözbir</surname>
                                    <given-names>Can</given-names>
                                </name>
                                                                    <aff>ÇANAKKALE ONSEKİZ MART ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                <name>
                                    <surname>Türkay</surname>
                                    <given-names>Atakan</given-names>
                                </name>
                                                                    <aff>ÇANAKKALE ONSEKİZ MART ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20211230">
                    <day>12</day>
                    <month>30</month>
                    <year>2021</year>
                </pub-date>
                                        <volume>4</volume>
                                        <issue>3</issue>
                                        <fpage>14</fpage>
                                        <lpage>27</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20210706">
                        <day>07</day>
                        <month>06</month>
                        <year>2021</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20211017">
                        <day>10</day>
                        <month>17</month>
                        <year>2021</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2018, Veri Bilimi</copyright-statement>
                    <copyright-year>2018</copyright-year>
                    <copyright-holder>Veri Bilimi</copyright-holder>
                </permissions>
            
                                                                                                <abstract><p>Ses duygu durum analizi için kullanıcı grafik arayüzü yardımıyla ses verilerini kullanarak ses duygu durumları herhangi bir kaynak kodu satırı yazmadan sınıflandıran derin öğrenme mimari modellerini oluşturan bir yazılımsal araç çalışmamızda tasarlanmıştır. Veri kümelerinin elde edilmesi, ses verilerine yönelik ses özniteliklerinin elde edilmesi, mimarinin oluşturulması ve derin öğrenme modelinin istenilen sinir ağı katmanları ve üstün parametreler ile modelin eğitilmesi sağlanmıştır. Model eğitilirken, eğitim değerlerinin gerçek zamanlı izlenmesi yazılımsal araç ile yapılabilmektedir. Çalışma boyunca, ilgili adımlar hem salt kaynak kodu düzenleme hem de yazılımsal araç kullanılarak gerçekleştirilmiştir. Kod düzenleme tabanlı melez model, mimarisinde uzun kısa süreli bellek ve evrişimli sinir ağları kullanılarak oluşturulmuş, %81,49 doğruluk oranına ulaşmıştır. Ayrıca, herhangi bir kodlama müdahalesi olmaksızın grafik yazılımsal araç tabanlı tekil model, mimarisinde evrişimli sinir ağı ile oluşturulmuştur. Böylece %75,76 doğruluk oranına ulaşmıştır. Yazılımsal aracın geliştirilmesindeki ana motivasyon, farklı ses duygu durumları sınıflandırmak için kullanılabilecek potansiyel bir derin öğrenme mimari modeli oluşturmaktır. Deneysel sonuçlar, yazılımsal aracın yüksek doğrulukla sınıflandırmayı oldukça başarılı bir şekilde gerçekleştirdiğini kanıtlamaktadır. Elde edilen sonuçlara dair tartışmaya da çalışmamızda yer verilmiştir.</p></abstract>
                                                                                                                                    <trans-abstract xml:lang="en">
                            <p>For audio emotional state analysis, a software tool was designed in our study that build deep learning architectural models that classify audio emotional states using audio data with the help of the user graphical interface without writing any line of source codes. Obtaining the desired data sets and audio features for audio data, creating the architecture and training the model with the desired neural network layers and hyperparameters of deep learning model were provided. While the model is being trained, real-time monitoring of training values can be performed over the software tool. Throughout the study, the relevant steps were carried out using both pure source code editing and software tool. The code editing based hybrid model built with long short-term memory and convolutional neural networks in its architecture that achieved an accuracy rate of 81.49%. In addition, the graphical software tool based standalone model without any coding intervention was built with convolutional neural network in its architecture. Thence, it achieved 75.76% accuracy rate. The main motivation in the development of software tool is to build a potential deep learning architectural model that can be used to classify different audio emotional states. Experimental results prove that the software tool performs classification with high accuracy quite successfully. The discussion on the results obtained is included in our study.</p></trans-abstract>
                                                            
            
                                                            <kwd-group>
                                                    <kwd>Ses duygu analizi</kwd>
                                                    <kwd>  duygu durum</kwd>
                                                    <kwd>  derin öğrenme</kwd>
                                                    <kwd>  yazılımsal araç</kwd>
                                            </kwd-group>
                                                        
                                                                            <kwd-group xml:lang="en">
                                                    <kwd>Audio emotion analysis</kwd>
                                                    <kwd>  emotional state</kwd>
                                                    <kwd>  deep learning</kwd>
                                                    <kwd>  software tool</kwd>
                                            </kwd-group>
                                                                                                            </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Liu B. Sentiment Analysis and Opinion Mining. California, USA, Morgan Claypool Poblishers, 2012.</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Neri F, Aliprandi C. Capeci F, Cuadros M, By T. “Sentiment Analysis on Social Media”. IEEE/ACM 2012 Internation Conferance on Advances in Social Networks Analysis and Mining, 919-926, 2012.</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Agarwal B, Mittal N. “Machine Learning Approach for Sentiment Analysis”. Prominent feature extraction for sentiment analysis. Springer, Cham, 21-45 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Aldeneh Z, Provost EM. “Using Regional Saliency for Speech Emotion Recognition”. IEEE Int&#039;l Conferance Acoustics Speech and Signal Processing (ICASSP),  2741-2745, 2017.</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Seehapoch T, Wongthanavasu S. “Speech Emotion Recognition Using Support Vector Machines”. International 5th conferance on Knowledge and Smart Technology (KST), 86-91, 2013.</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Schuller B., Rigoll G, Lang M. &quot;Hidden Markov Model-Based Speech Emotion Recognation“. IEEE 2th International Conferance on Acoustics, Speech, and Signal Processing, II-1, 2013.</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Lee CC, Mower E, Busso C, Lee S, Narayanan S. “ Emotion Recognation Using a Hierarchial Binary Decision Tree Approach”. Speech Communication 55(9-10), 1162-1171, 2011.</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Bertero D, Fung P. “First Look Into a Convolutional Neural Network For Speech Emotion Detection” Acoustics Speech and Signal Processing (ICASSP) 2017 IEEE Intl. Conference, 5115-5119, 2017.</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">Badshah AM, Jamil A, Rahim N, Baik SW. “Speech Emotion Recognition From Spectrograms With Deep Convolutional Neural Network”. IEEE Int&#039;l Conference On Platform Technology And Service (Platcon), 1-5, 2017.</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Yoon S, Byun S, Jung K. “Multimodal Speech Emotion Recognition Using Audio and Text”. IEEE Spoken Language Technology Workshop (SLT), 112-118, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Livingstone SR, Russo FA. “The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS)  A dynamic, multimodal set of facial and vocal expressions in North American English”. PIoS one, 13(5), e0196391, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Cao H, Copper DG, Keutmann MK, Gur RC, Nenkova A, Verma R. “CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset”. IEEE Transactions on Affective Computing, 5(4), 377-390, 2014.</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Burkhardt F, Paescheke A, Rolfes M, Sendlmeier F, Weiss B.  “A database of German emotional speech”.  9th European Conference on Speech Communication and Technology, 2005.</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Haq S, Jackson PJB. “Speaker-Dependent Audio-Visual Emotion Recognition (SAVEE)”. AVSP, 53-58, 2009.</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Google LLC. “Google Teachable Machine” https://teachablemachine.withgoogle.com/ (18.04.2021).</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Dey N, Borra S, Ashour AS, Shi F. “Medical Images Analysis Based on Multilabel Classification”. Machine Learning in Bio-Signal Analysis and Diagnostic Imaging. Academic Press, Chap. 9, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Github. “Github Keras Repository”. https://github.com/fchollet/keras</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Sundermeyer M, Schlüter R, Ney H.  “LSTM neural networks for language modeling”. 13th Annual Conference Of The İnternational Speech Communication Association, 2012.</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Srivastava N, Hingon G, Krizhevsky A, Sutskever I, Salakhutdinov R. “Dropout: A Simple Way to Prevent Neural Networks from Overfitting”. The Journal of Machine Learning Research, 15(1), 1929-1958, 2014.</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Ioffe S, Szegedy C. “Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift”. International conference on machine learning. PMLR, 448-456, 2015.</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Salimans T. Kingma DP. “Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks”. arXiv preprint arXiv1602.07868, 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Le X, Wang Y, Jo J. “Combining Deep and Handcrafted Image Features for Vehicle Classification in Drone Imagery”. Digital Image Computing: Techniques and Applications (DICTA), IEEE, 1-6, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Rossum GV. Python Reference Manual. Amsterdam, Netherlands, Centrum voor Wiskunde en Informatica, 1995.</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">McFee B, Raffel C, Liang D, Ellis DPW, McVicar M, Battenbergk E, Nieto O. “Librosa: Audio and Music Signal Analysis İn Python”. Proceedings Of The 14th Python İn Science Conference, 18-25, 2015.</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">Grinberg M. Flask Web Development: Developing Web Applications with Python. O’Reilly Media INC., California, USA, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">The Pallets Projects. “Werkzeug The Python WSGI Utility Library”. https://werkzeug.palletsprojects. com (18.04.2021).</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">The Pallets Projects. “Click”. www.palletprojects.com/p/click (18.04.2021).</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">The Pallets Projects. “Jinja”. www.palletprojects.com/p/jinja (18.04.2021).</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">Allen G, Owens M. The Definitive Guide to SQLite. Apress LP, New York, USA, 2010.</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">Copeland R. Essential SQLAlchemy. O’Reilly Media INC., California, USA, 2008.</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">Abadi M, Barham P, Chen J, Chen Z, Davis A, Dean J, Devin M, Ghemawat S, Irving G, Isard M, Kudlur M, Levenberg J, Monga R, Moore S, Murray DG, Steiner B, Tucker P, Vasudevan V, Warden P, Wicke M, Zheng X, Google Brain. “Combining Deep and Handcrafted Image Features for Vehicle Classification in Drone Imagery”. 12th Symp. On Operating Systems Design And Implementation, 2016</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">Start Bootstrap, “SB Admin No 2”. https://startbootstrap.com/theme/sb-admin-2 (18.04.2021).</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">Bewick V, Liz C, Ball J. “Statistics review 13: Receiver operating characteristic curves”. Critical Care,  8(6) , 1-5, 2004.</mixed-citation>
                    </ref>
                                    <ref id="ref34">
                        <label>34</label>
                        <mixed-citation publication-type="journal">TowardsDataScience, &quot;Confusion Matrix for Your Multi-Class Machine Learning Model&quot;. https://towardsdatascience.com/confusion-matrix-for-your-multi-class-machine-learning-model-ff9aa3bf7826 (29.08.2021).</mixed-citation>
                    </ref>
                                    <ref id="ref35">
                        <label>35</label>
                        <mixed-citation publication-type="journal">Ses duygu durum analizi yazılımsal araç (DepSemo).https://github.com/CanakkaleDevelopers/audio-sentiment-analysis-deep-learning-tool (29.08.2021).</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
