<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                    <journal-id></journal-id>
            <journal-title-group>
                                                                                    <journal-title>Politeknik Dergisi</journal-title>
            </journal-title-group>
                                        <issn pub-type="epub">2147-9429</issn>
                                                                                            <publisher>
                    <publisher-name>Gazi Üniversitesi</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.2339/politeknik.1347054</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Computer Software</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Bilgisayar Yazılımı</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="en">
                                    <trans-title>EMACrawler: Web Search Engine Database Freshness Optimization</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                <article-title>EMACrawler: Web Arama Motoru Veritabanı Tazeliği  Optimizasyonu</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0001-9710-5658</contrib-id>
                                                                <name>
                                    <surname>Alanoğlu</surname>
                                    <given-names>Zülfü</given-names>
                                </name>
                                                                    <aff>MUSTAFA KEMAL ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-6615-1237</contrib-id>
                                                                <name>
                                    <surname>Akcayol</surname>
                                    <given-names>M. Ali</given-names>
                                </name>
                                                                    <aff>GAZİ ÜNİVERSİTESİ, MÜHENDİSLİK FAKÜLTESİ, BİLGİSAYAR MÜHENDİSLİĞİ BÖLÜMÜ</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20241212">
                    <day>12</day>
                    <month>12</month>
                    <year>2024</year>
                </pub-date>
                                        <volume>27</volume>
                                        <issue>6</issue>
                                        <fpage>2201</fpage>
                                        <lpage>2214</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20230821">
                        <day>08</day>
                        <month>21</month>
                        <year>2023</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20231115">
                        <day>11</day>
                        <month>15</month>
                        <year>2023</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 1998, Politeknik Dergisi</copyright-statement>
                    <copyright-year>1998</copyright-year>
                    <copyright-holder>Politeknik Dergisi</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="en">
                            <p>In today&#039;s information and technology age, search engines have become an important part of our lives. Although search engines are the first to be used to access information, old and unnecessary information is included in the content offered to users. In terms of providing up-to-date data, today&#039;s search engines often cannot offer the desired success. In order to keep the data presented by web browsers up-to-date, the time of return visits must be accurately estimated. In this study, EMACrawler based on exponential moving average is proposed to determine the revisit times, which is the most important feature that affects the performance of search engines. The proposed method is tested using precision, total coverage and efficiency metrics. It has been seen that EMACrawler obtains the current data on the web pages in an accurate and quick manner. As a result of the experimental studies, it has been seen that EMACrawler is more successful than other methods in obtaining up-to-date data and maintaining the freshness of the browser database.</p></trans-abstract>
                                                                                                                                    <abstract><p>Günümüz bilgi ve teknoloji çağında arama motorları hayatımızın önemli bir parçası haline gelmiştir. Her ne kadar bilgiye erişimde ilk başvurulan arama motorları olsa da kullanıcılara sunulan içerikte eski ve gereksiz bilgiler yer almaktadır. Güncel verileri sağlamak açısından günümüzdeki arama motorları çoğunlukla istenen başarıyı sunamamaktadır. Web tarayıcılarının sunduğu verilerin güncelliğini sağlamak için tekrar ziyaret zamanının doğru tahmin edilmesi gerekmektedir. Bu çalışmada arama motorlarının performanslarını etkileyen en önemli özellik olan tekrar ziyaret zamanlarının belirlenmesi için üstel hareketli ortalamaya dayanan EMACrawler önerilmiştir. Önerilen yöntem kesinlik, toplam kapsama alanı ve verimlilik metrikleri kullanılarak test edilmiştir. EMACrawler’ın web sayfalarındaki güncel veriyi doğru tahmin zamanında ve hızlı bir şekilde elde ettiği görülmüştür. Yapılan deneysel çalışmaların sonucunda EMACrawler’ın güncel verilerin elde edilmesi ve tarayıcı veri tabanının tazeliğinin korunmasında diğer yöntemlerden daha başarılı olduğu görülmüştür.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>Web tarayıcısı</kwd>
                                                    <kwd>  güncelleme modülü</kwd>
                                                    <kwd>  veri toplama</kwd>
                                                    <kwd>  veri indeksleme</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="en">
                                                    <kwd>Web crawler</kwd>
                                                    <kwd>  update module</kwd>
                                                    <kwd>  data collection</kwd>
                                                    <kwd>  data indexing</kwd>
                                            </kwd-group>
                                                                                                                                    <funding-group specific-use="FundRef">
                    <award-group>
                                                    <funding-source>
                                <named-content content-type="funder_name">Türkiye Bilimsel ve Teknolojik Araştırma Kurumu (TÜBİTAK)</named-content>
                            </funding-source>
                                                                            <award-id>118C127</award-id>
                                            </award-group>
                </funding-group>
                                </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">[1]	Google,&quot;How Google Search  Works&quot;, www.google.com, [Erişim Tarihi: 10/08/2022).</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">[2]	Sadiku M., Musa S., and Nelatury S. R., &quot;Future Internet research,&quot; International Journal of Advances in Scientific Research and Engineering (IJASRE), Erie, PY 2(3):23-25, (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">[3]	Jaiganesh S., Babu P., and Satheesh K. N., &quot;Comparative study of various web search algorithms for the improvement of web crawler,&quot; Int. J. Eng. Res. Technol.(IJERT), 4(2): (2013).</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">[4]	Li K., Fei J., and Fan C., &quot;Optimization and application of web crawler architecture,&quot; SPIE, 12506: 151-155, (2022).</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">[5]	Patil T. A. and Chobe S., &quot;Web Crawler for searching Deep web sites,&quot; in 2017 International Conference on Computing, Communication, Control and Automation (ICCUBEA), Pune, India,1-5 (2017).</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">[6]	Avrachenkov K., Borkar V., and Patil K., &quot;Deep reinforcement learning for web crawling,&quot; in Seventh Indian Control Conference (ICC), Mumbai, India:201-206 (2021).</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">[7]	Mallawaarachchi V., Meegahapola L., Madhushanka R., Heshan E., Meedeniya D., and Jayarathna S., &quot;Change detection and notification of web pages: A survey,&quot; ACM Computing Surveys (CSUR), 1(53):1-35, (2020).</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">[8]	Bullot H., Gupta S. K., and Mohania M. K., &quot;A data-mining approach for optimizing performance of an incremental crawler,&quot; in Proceedings IEEE/WIC International Conference on Web Intelligence (WI 2003), 13(17):610-615, (2003)</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">[9]	Kharazmi S., Nejad A. F., and Abolhassani H., &quot;Freshness of Web search engines: Improving performance of Web search engines using data mining techniques,&quot; in 2009 International Conference for Internet Technology and Secured Transactions, (ICITST), London, UK,1-7, (2009).</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">[10]	Jianchao H., Cercone N., and Xiaohua H., &quot;A Weighted Freshness Metric for Maintaining Search Engine Local Repository,&quot; in IEEE/WIC/ACM International Conference on Web Intelligence (WI&#039;04), Beijing, China, 677-680, (2004).</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">[11]	Amudhan V. and Thirupathi D., &quot;Traffic Adaptive Optimum Updating Scheme for Search Engines,&quot; in 2006 1st International Conference on Digital Information Management, 6(6):395-403, (2007)</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">[12]	Zhu W., Li Y., Li S., Xu Y., and Cui X., &quot;Optimal bandwidth allocation for web crawler systems with time constraints,&quot; Journal of Ambient Intelligence and Humanized Computing, 5(14):5279-5292, (2023)</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">[13]	Souza C., Laber E., Valentim C., and Cardoso E., &quot;A Polite Policy for Revisiting Web Pages,&quot; in 2007 Latin American Web Conference (LA-WEB 2007), Santiago, Chile,128-135, (2007).</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">[14]	Bhatia S., Sharma M., and Bhatia K. K., &quot;A Novel Approach for Crawling the Opinions from World Wide Web,&quot; (in English), International journal of information retrieval research, 2(6): 1-23, (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">[15]	Tan Q. and Mitra P., &quot;Clustering-based incremental web crawling,&quot; ACM Trans. Inf. Syst.,4(28):1-27, (2010)</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">[16]	Radinsky K. and Bennett P. N., &quot;Predicting content change on the web,&quot; presented at the Proceedings of the sixth ACM international conference on Web search and data mining, Rome,415-424 (2013).</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">[17]	Li H., Guo M., Cai L., and Yang Y., &quot;An incremental update strategy in Deep Web,&quot; in 2010 Sixth International Conference on Natural Computation, Yantai, China, 131-134, (2010).</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">[18]	Mor J., Rai D., and Kumar N., &quot;An XML based Web Crawler with Page Revisit Policy and Updation in Local Repository of Search Engine,&quot; International Journal of Engineering &amp; Technology,7(3): 1119-1123, (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">[19]	Kausar M. A., Nasar M., and Singh S. K., &quot;Maintaining the repository of search engine freshness using mobile crawler,&quot; in 2013 Annual International Conference on Emerging Research Areas and 2013 International Conference on Microelectronics, Communications and Renewable Energy, Kanjirapally, India, 1-6,(2013).</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">[20]	Badawi M., Mohamed A., Hussein A., and Gheith M., &quot;Maintaining the search engine freshness using mobile agent,&quot; Egyptian Informatics Journal, 1(14):27-36, (2013)</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">[21]	Gupta A., Dixit A., and Sharma A., &quot;A Novel Web Page Change Detection Technique for Migrating Crawlers,&quot; In: Sensors and Image Processing: Proceedings of CSI. Springer, Singapore, 49-57 (2018).</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">[22]	Sethi S., &quot;An optimized crawling technique for maintaining fresh repositories,&quot; Multimedia Tools and Applications, 7(80):11049-11077, (2021).</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">[23]	Santos A. S. R., Carvalho C. R., Almeida J. M., Moura E. S. de, Silva A. S. da, and Ziviani N., &quot;A genetic programming framework to schedule webpage updates,&quot; Information Retrieval Journal, 1(18):73-94, (2015).</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">[24]	Fasolin K. et al., &quot;Efficient Execution of Conjunctive Complex Queries on Big Multimedia Databases,&quot; in 2013 IEEE International Symposium on Multimedia, Anaheim, CA, 536-543,(2013).</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">[25]	Gani A., Siddiqa A., Shamshirband S., and Hanum F., &quot;A survey on indexing techniques for big data: taxonomy and performance evaluation,&quot; Knowledge and Information Systems, 2(46): 241-284 (2016).</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">[26]	Shah S. and Shaikh A., &quot;Hash based optimization for faster access to inverted index,&quot; in 2016 International Conference on Inventive Computation Technologies (ICICT), Coimbatore, India,1-5,(2016).</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">[27]	Petri M. and Moffat A., &quot;Compact inverted index storage using general-purpose compression libraries,&quot; Software: Practice and Experience, 4(48):974-982,(2018).</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">[28]	&quot;World Wide Web Size&quot;, https://www.worldwidewebsize.com/ [Erişim Tarihi : 18/8/2023].</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">[29]	Burkov A. and Chaib-draa B., &quot;Effective learning in the presence of adaptive counterparts,&quot; Journal of Algorithms, 4(65):127-138, (2009).</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">[30]	Hansun S., &quot;A new approach of moving average method in time series analysis,&quot; in 2013 Conference on New Media Studies (CoNMedia), Tangerang, Indonesia, 1-4, (2013).</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">[31]	Zuo X. L., Wang W. Wang B., Y., and Zuo W. L., &quot;Research and Implementation of Improved Real-Time Crawler Modeling,&quot; in Applied Mechanics and Materials, vol. 312:791-795 (2013).</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">[32]	Zerfos P., Cho J., and Ntoulas A., &quot;Downloading textual hidden web content through keyword queries,&quot; in Proceedings of the 5th ACM/IEEE-CS Joint Conference on Digital Libraries (JCDL &#039;05), Denver, CO:100-109, (2005).</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">[33]	&quot;Most Visited Websites in Turkey &quot;https://www.semrush.com/website/top/turkey/all/ [Erişim Tarihi: 12/03/2023]</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
