@article{article_505501, title={Identification of OOV words in Turkish texts}, journal={Gaziosmanpaşa Bilimsel Araştırma Dergisi}, volume={8}, pages={35–48}, year={2019}, author={Arslan, Enis and Orhan, Umut}, keywords={Bilinmeyen kelimeler,Eşdizimlilik,Birliktelik,Sözlük dışı kelimeler}, abstract={<p> <span style="font-size:10pt;font-family:’Times New Roman’, serif;">In this study, we present a semantic graph network model which is capable of detecting out-of-vocabulary (OOV) words in Turkish texts. In natural language processing (NLP) field, morphological analyzers can encounter unknown words (UW) during word processing. This mostly occurs when these kind of tools depend on a dictionary to find the probable lemmas in order to </span> <span lang="en-us" style="font-size:10pt;font-family:’Times New Roman’, serif;" xml:lang="en-us">further process </span> <span style="font-size:10pt;font-family:’Times New Roman’, serif;"> parsing. Sometimes, an analyzer is unable to find any candidates because of the non-existence of the lemma candidates in the dictionary. This results in degraded parsing output. The proposed model for OOV detection is able to define OOV words which are suitable for dictionaries. Also co-occurrence relations of the lemmas in texts are modelled as a semantic sub-graph and it is used to discover collocations to propose as new lemma candidates.   </span> <br /> </p>}, number={2}, publisher={Tokat Gaziosmanpaşa Üniversitesi}