<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>jnse</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Journal of Naval Sciences and Engineering</journal-title>
            </journal-title-group>
                            <issn pub-type="ppub">1304-2025</issn>
                                                                                                        <publisher>
                    <publisher-name>Millî Savunma Üniversitesi</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.56850/jnse.1828189</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Computer Vision</subject>
                                                            <subject>Natural Language Processing</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Bilgisayar Görüşü</subject>
                                                            <subject>Doğal Dil İşleme</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="tr">
                                    <trans-title>Mobil Uygulama İle Derin Öğrenme Tabanlı Nesne Tespiti ve Büyük Dil Modeli İle İfade Üretme</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                <article-title>DEEP LEARNING-BASED OBJECT DETECTION WITH MOBILE APPLICATION AND EXPRESSION GENERATION USING A LARGE LANGUAGE MODEL</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0009-0009-6072-6990</contrib-id>
                                                                <name>
                                    <surname>Dere</surname>
                                    <given-names>Nurcihan</given-names>
                                </name>
                                                                    <aff>Architecht Information Systems and Marketing Trade</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0001-6999-1410</contrib-id>
                                                                <name>
                                    <surname>Yıldız</surname>
                                    <given-names>Kazım</given-names>
                                </name>
                                                                    <aff>MARMARA UNIVERSITY</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-4540-663X</contrib-id>
                                                                <name>
                                    <surname>Demir</surname>
                                    <given-names>Önder</given-names>
                                </name>
                                                                    <aff>MARMARA UNIVERSITY</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                                                <issue>Advanced Online Publication</issue>
                                        <fpage>69</fpage>
                                        <lpage>93</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20251121">
                        <day>11</day>
                        <month>21</month>
                        <year>2025</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20251216">
                        <day>12</day>
                        <month>16</month>
                        <year>2025</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2003, Journal of Naval Sciences and Engineering</copyright-statement>
                    <copyright-year>2003</copyright-year>
                    <copyright-holder>Journal of Naval Sciences and Engineering</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="tr">
                            <p>Bu çalışma, kullanıcıların çevrelerindeki nesneleri algılamalarını, bu nesnelerin uzaklıklarını ölçmelerini ve nesneler arasındaki konumsal ilişkileri anlamalarını sağlayan bütünleşik bir mobil çözüm sunmaktadır. Sistem, YOLOv11 tabanlı gerçek zamanlı nesne tespiti, LiDAR destekli mesafe ölçümü ve GPT-4o’nun ifade üretimini bir araya getirerek kullanıcının istediği nesneyi bulmasını ve nesnenin çevresindeki diğer nesneleri de öğrenmesini sağlamaktadır. Bu sayede kullanıcı yalnızca nesnelerin varlığını değil, aynı zamanda konumlarını ve birbirleriyle olan konumsal düzenlerini de öğrenebilmektedir. Çalışmada, nesne tespiti sırasında görüntüler mobil uygulama ile yakalanarak nesnenin her zaman görsel çerçeve içerisinde yer alması sağlanır. Bu, görme engelli kullanıcıların oluşturduğu fotoğraflarda sıklıkla karşılaşılan bulanıklık ve yanlış çerçeveleme gibi sorunların önüne geçer. Deneysel sonuçlar, YOLOv11 modelinin 0.77 F1 puanı ve 0.806 mAP değeri ile etkili bir performans ortaya koyduğunu göstermektedir. Ayrıca ince ayar gerçekleştirilen GPT-4o modeli, görüntülerdeki nesne konumlarını doğru biçimde belirleyerek nesneyi ve etrafındaki diğer nesneleri içeren ifadeler üretmektedir. Bu çalışma, nesne tespiti, LiDAR tabanlı mesafe ölçümü ve büyük bir dil modelinin ifade üretimini birleştiren bir sistem önermektedir. Gelecekte daha gelişmiş çözümlerin uygulanması için bir referans oluşturmaktadır.</p></trans-abstract>
                                                                                                                                    <abstract><p>This work presents an integrated mobile solution that allows users to detect objects in their environment, measure their distances, and understand the spatial relationships between them. The system combines YOLOv11-based real-time object detection, LiDAR-assisted distance measurement, and GPT-4o expression generation, allowing users to locate desired objects and learn about nearby objects. This allows the user to understand not only the presence of objects but also their locations and their spatial relationships. In this study, images are captured with a mobile application during object detection, ensuring that the object is always within the frame. This prevents problems such as blurring and incorrect framing, which are frequently encountered in photos created by visually impaired users. Experimental results show that the YOLOv11 model demonstrates effective performance with an F1 score of 0.77 and a mAP value of 0.806. Furthermore, the fine-tuned GPT-4o model identifies object locations in images and generates expressions that include other surrounding objects. The present work proposes a system that integrates object detection, LiDAR-based distance measurement, and expression generation from a large language model. It provides a reference for the implementation of more advanced solutions in the future.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>Object Detection</kwd>
                                                    <kwd>  YOLOv11</kwd>
                                                    <kwd>  Deep Learning</kwd>
                                                    <kwd>  GPT-4o</kwd>
                                                    <kwd>  Mobile
Application.</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="tr">
                                                    <kwd>Nesne Tespiti</kwd>
                                                    <kwd>  YOLOv11</kwd>
                                                    <kwd>  Derin Öğrenme</kwd>
                                                    <kwd>  GPT-4o</kwd>
                                                    <kwd>  Mobil Uygulama</kwd>
                                            </kwd-group>
                                                                                                                                        </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Abed, A. A., Al-Ibadi, A., &amp; Abed, I. A. (2023). Real-time multiple face mask and fever detection using YOLOv3 and TensorFlow lite platforms. Bulletin of Electrical Engineering and Informatics, 12(2), 922-929.</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Achiam, J., Adler, S., Agarwal, S., Ahmad, L., Akkaya, I., Aleman, F. L., Almeida, D., Altenschmidt, J., Altman, S., &amp; Anadkat, S. (2023). Gpt-4 technical report. arXiv preprint arXiv:2303.08774.</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Alamsyah, D. P., Ramdhani, Y., Syam, A. T., &amp; Setiadi, A. (2022). Augmented Reality English Education Based iOS with MobileNetV2 Image Recognition Model. 2022 Seventh International Conference on Informatics and Computing (ICIC),</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Alemdar, K. D., Kayacı Çodur, M., Codur, M. Y., &amp; Uysal, F. (2023). Environmental Effects of Driver Distraction at Traffic Lights: Mobile Phone Use. Sustainability, 15(20), 15056.</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Boyar, T., &amp; Yıldız, K. (2022). Powdery mildew detection in hazelnut with deep learning. Hittite Journal of Science and Engineering, 9(3), 159-166.</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Chen, C., Anjum, S., &amp; Gurari, D. (2022). Grounding answers for visual questions asked by visually impaired people. Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
 
Chen, C., Tseng, Y.-Y., Li, Z., Venkatesh, A., &amp; Gurari, D. (2025). Acknowledging Focus Ambiguity in Visual Questions. arXiv preprint arXiv:2501.02201.</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Chen, J., &amp; Zhu, Z. (2023). Real-time 3D object detection, recognition and presentation using a mobile device for assistive navigation. SN Computer Science, 4(5), 543.
 
Furniture Computer Vision Dataset. (2022).  Retrieved 19.11.2025 from https://universe.roboflow.com/objectdetection-uzld5/furniture-ngpea-h6zxi/</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Gurari, D., Li, Q., Stangl, A. J., Guo, A., Lin, C., Grauman, K., Luo, J., &amp; Bigham, J. P. (2018). Vizwiz grand challenge: Answering visual questions from blind people. Proceedings of the IEEE conference on computer vision and pattern recognition,
 
Han, X., Zhang, Z., Ding, N., Gu, Y., Liu, X., Huo, Y., Qiu, J., Yao, Y., Zhang, A., &amp; Zhang, L. (2021). Pre-trained models: Past, present and future. AI Open, 2, 225-250.
 
He, L., Zhou, Y., Liu, L., Zhang, Y., &amp; Ma, J. (2025). Application of the YOLOv11-seg algorithm for AI-based landslide detection and recognition. Scientific Reports, 15(1), 12421.</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">HomeObjects. (2025).  Retrieved 19.11.2025 from https://app.roboflow.com/objectdetection-uzld5/homeobjects/4</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Huh, M., Xu, F., Peng, Y.-H., Chen, C., Gurari, D., Choi, E., &amp; Pavel, A. (2024). Long-form answers to visual questions from blind and low vision people. Workshop on Demographic Diversity in Computer Vision@ CVPR 2025,</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Khoshsirat, S., &amp; Kambhamettu, C. (2023). Embedding attention blocks for the vizwiz answer grounding challenge. VizWiz Grand Challenge Workshop,</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Kotthapalli, M., Ravipati, D., &amp; Bhatia, R. (2025). YOLOv1 to YOLOv11: A comprehensive survey of real-time object detection innovations and challenges. arXiv preprint arXiv:2508.02067.</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Kumar, S., Ratan, R., &amp; Desai, J. (2022). Cotton disease detection using tensorflow machine learning technique. Advances in Multimedia, 2022.</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Liao, Y., Li, L., Xiao, H., Xu, F., Shan, B., &amp; Yin, H. (2025). YOLO-MECD: citrus detection algorithm based on YOLOv11. Agronomy, 15(3), 687.</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Dollár, P., &amp; Zitnick, C. L. (2014). Microsoft coco: Common objects in context. Computer Vision–ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13,</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Mahi, A. B. S., Eshita, F. S., &amp; Helaly, T. (2023). An automated system for wrong-way vehicle detection using yolo and deepsort. 2023 5th International Conference on Sustainable Technologies for Industry 5.0 (STI).</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Massiceti, D., Zintgraf, L., Bronskill, J., Theodorou, L., Harris, M. T., Cutrell, E., Morrison, C., Hofmann, K., &amp; Stumpf, S. (2021). Orbit: A real-world few-shot dataset for teachable object recognition. Proceedings of the IEEE/CVF International Conference on Computer Vision.</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Moreira, F. W. R., Hermes, G., &amp; de Lima, J. M. M. (2024). Development of a Cross Platform Mobile Application Using Gemini to Assist Visually Impaired Individuals. 2024 9th International Conference on Intelligent Informatics and Biomedical Sciences (ICIIBMS).
 
Morishita, M., Fukuda, H., Yamaguchi, S., Muraoka, K., Nakamura, T., Hayashi, M., Yoshioka, I., Ono, K., &amp; Awano, S. (2024). An exploratory assessment of GPT-4o and GPT-4 performance on the Japanese National Dental Examination. The Saudi Dental Journal, 36(12), 1577-1581.</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Open Neural Network Exchange.  Retrieved 10.12.2025 from https://onnx.ai Prechelt, L. (2002). Early stopping-but when? In Neural Networks: Tricks of the trade (pp. 55-69). Springer.</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Pudari, R., Bhutada, S., &amp; Mudavath, S. P. (2020). Real Time Face Recognition Using Convoluted Neural Networks. arXiv preprint arXiv:2010.04517.
 
Sujaini, H., Ramadhan, E. Y., &amp; Novriando, H. (2021). Comparing the performance of linear regression versus deep learning on detecting melanoma skin cancer using apple core ML. Bulletin of Electrical Engineering and Informatics, 10(6), 3110-3120.</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Tautkute, I., Możejko, A., Stokowiec, W., Trzciński, T., Brocki, Ł., &amp; Marasek, K. (2017). What looks good with my sofa: Multimodal search engine for interior design. 2017 Federated Conference on Computer Science and Information Systems (FedCSIS).</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Tinn, R., Cheng, H., Gu, Y., Usuyama, N., Liu, X., Naumann, T., Gao, J., &amp; Poon, H. (2023). Fine-tuning large neural language models for biomedical natural language processing. Patterns, 4(4).</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Wang, Z., Li, C., Xu, H., Zhu, X., &amp; Li, H. (2025). Mamba YOLO: A Simple Baseline for Object Detection with State Space Model. Proceedings of the AAAI Conference on Artificial Intelligence.</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Wehr, A., &amp; Lohr, U. (1999). Airborne laser scanning—an introduction and overview. ISPRS Journal of photogrammetry and remote sensing, 54(2-3), 68-82.</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
