@article{article_467036, title={COMPARISON OF THE DATA MATCHING PERFORMANCES OF STRING SIMILARITY ALGORITHMS IN BIG DATA}, journal={Mühendislik Bilimleri ve Tasarım Dergisi}, volume={7}, pages={608–618}, year={2019}, DOI={10.21923/jesd.467036}, author={Aksoy, Bekir and Uğuz, Sinan and Oral, Okan}, keywords={Algoritmalar,Metin analizi,Doğal dil işleme,Veri analizi,Veri tabanları}, abstract={<p class="MsoNormal" style="margin-bottom:.0001pt;text-align:justify;line-height:normal;"> <span style="font-size:10pt;font-family:’Calibri Light’, sans-serif;">The great mobility in the world tourism in recent years has also enabled this sector to be included among the study areas of big data. In this study, a solution proposal was put forward by using the big data and string similarity algorithms (SSA) for the problems arising from the entry of the hotel data coming from different providers into databases with different names and addresses. Therefore, 2599 hotels of a tourism agency with a wide hotel network located in London were selected as the sample, and the Map-Reduce process was performed by using the Soundex algorithm to match these hotels with approximately three million hotel data coming from seventy different providers. Matching with Map-Reduce ensured a significant reduction in process count and process time. Furthermore, the Dice coefficient, Levenshtein and Longest common subsequence (LCS) algorithms were compared in terms of the data that they correctly matched, and process time. In this stage, the words decreasing the score of the algorithms in the database were detected and removed before the algorithms were implemented. The Dice coefficient algorithm yielded better results in terms of correct matching, and the Levenshtein algorithm yielded better results in terms of process time. </span> </p> <p> </p>}, number={3}, publisher={Süleyman Demirel Üniversitesi}