<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>gummfd</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Gazi Üniversitesi Mühendislik Mimarlık Fakültesi Dergisi</journal-title>
            </journal-title-group>
                            <issn pub-type="ppub">1300-1884</issn>
                                        <issn pub-type="epub">1304-4915</issn>
                                                                                            <publisher>
                    <publisher-name>Gazi Üniversitesi</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.17341/gazimmfd.1199811</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Engineering</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Mühendislik</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="en">
                                    <trans-title>CComp: A parallel compression algorithm for compressed word search</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                <article-title>CComp: Sıkıştırılmış kelime arama için paralel bir sıkıştırma algoritması</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-3734-5171</contrib-id>
                                                                <name>
                                    <surname>Öztürk</surname>
                                    <given-names>Emir</given-names>
                                </name>
                                                                    <aff>TRAKYA ÜNİVERSİTESİ, MÜHENDİSLİK FAKÜLTESİ, BİLGİSAYAR MÜHENDİSLİĞİ BÖLÜMÜ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-1477-3093</contrib-id>
                                                                <name>
                                    <surname>Mesut</surname>
                                    <given-names>Altan</given-names>
                                </name>
                                                                    <aff>Trakya Üniversitesi, Mühendislik ve Mimarlık Fakültesi, Bilgisayar Mühendisliği Bölümü, Edirne, Türkiye</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20240520">
                    <day>05</day>
                    <month>20</month>
                    <year>2024</year>
                </pub-date>
                                        <volume>39</volume>
                                        <issue>3</issue>
                                        <fpage>1933</fpage>
                                        <lpage>1944</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20221105">
                        <day>11</day>
                        <month>05</month>
                        <year>2022</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20231127">
                        <day>11</day>
                        <month>27</month>
                        <year>2023</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 1986, Gazi Üniversitesi Mühendislik Mimarlık Fakültesi Dergisi</copyright-statement>
                    <copyright-year>1986</copyright-year>
                    <copyright-holder>Gazi Üniversitesi Mühendislik Mimarlık Fakültesi Dergisi</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="en">
                            <p>It is important to save space storing the generated data. To achieve this, compression algorithms are used. Stored data is compressed once but accessed many times to search on it. For this reason, the biggest disadvantage of compressed data is that it needs to be decompressed when it will be used. This disadvantage can be eliminated by using a fast decompression algorithm or a compressed search method that does not require decompression. Compressed search can achieve faster results than open-and-search methods, thanks to its small search space and not using decompression. In this article, CComp, a parallel semi-static word-based compression algorithm that supports compressed search, is presented. The purpose of CComp is to obtain faster search results while compressing-decompressing at the speed of other parallel compression algorithms. CComp performs these operations in parallel. CComp has been compared to other parallel methods. As shown in the results, the compression ratios of CComp give results in parallel with other word-based algorithms. In the compressed search process, results were obtained approximately 7 times faster than the Zstd algorithm, which gave the best results before. With these results, CComp can be shown as a better alternative to algorithms that support compressed search.</p></trans-abstract>
                                                                                                                                    <abstract><p>Üretilen verilerin saklamasında alandan tasarruf etmek önemlidir. Sıkıştırma algoritmaları bu tasarrufu sağlamak amacıyla kullanılmaktadır. Saklanmak istenen veri bir kere sıkıştırılmakta fakat üzerinde arama yapmak amacıyla defalarca erişilmektedir. Bu sebeple sıkıştırılmış verinin en büyük dezavantajı bu verinin kullanılmak istendiğinde açılması gerekliliğidir. Hızlı bir açma algoritması ile veya açma işlemine ihtiyaç duymayan bir sıkıştırılmış arama yönteminin kullanılması ile bu dezavantajlı durum giderebilir. Sıkıştırılmış arama hem arama uzayının küçük olması hem de açma yapmaması sayesinde aç-ve-ara yöntemlere göre daha hızlı sonuçlar elde edebilmektedir. Bu makalede sıkıştırılmış arama desteği sunan paralel yarı statik kelime tabanlı bir sıkıştırma algoritması olan CComp sunulmuştur. CComp’un amacı diğer paralel sıkıştırma algoritmalarının hızında sıkıştırma-açma ve daha hızlı sıkıştırılmış arama yapmaktır. CComp sıkıştırma, açma ve arama işlemlerini paralel olarak gerçekleştirmektedir. CComp diğer paralel yöntemler ile karşılaştırılmıştır. Sonuçlarda gösterildiği gibi CComp’un sıkıştırma oranları diğer kelime tabanlı algoritmalarla paralel sonuçlar vermektedir. Sıkıştırılmış arama işleminde ise daha önce en iyi sonucu veren Zstd algoritmasına göre yaklaşık 7 kat daha hızlı arama sonuçları elde edilmiştir.  Bu sonuçlar ile CComp sıkıştırılmış arama desteği sunan algoritmalara daha iyi bir alternatif olarak gösterilebilmektedir.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>Sıkıştırılmış arama</kwd>
                                                    <kwd>  Veri sıkıştırma</kwd>
                                                    <kwd>  Paralel programlama</kwd>
                                                    <kwd>  Metin sıkıştırma</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="en">
                                                    <kwd>Compressed matching</kwd>
                                                    <kwd>  Data compression</kwd>
                                                    <kwd>  Parallel programming</kwd>
                                                    <kwd>  Text compression</kwd>
                                            </kwd-group>
                                                                                                                                        </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Özköse H, Arı ES, Gencer C., Yesterday, Today and Tomorrow of Big Data. Procedia - Social and Behavioral Sciences, 195, 1042–1050, 2015.</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Lawnik M, Pelka A, Kapczyński A., A New Way to Store Simple Text Files. Algorithms, 13, 101 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Gupta A, Nigam S., A Review on Different Types of Lossless Data Compression Techniques. 2021.</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Suneetha D, Kishore DR, Babu PN., A Compression Algorithm for DNA Palindrome Compression Technique, ITM Web of Conferences, Mumbai-Hindistan, 1-5, 27-28 Haziran 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Rădescu R., Concordance Techniques in Lossless Data Compression of Text Files, 2021 12th International Symposium on Advanced Topics in Electrical Engineering (ATEE), Bükreş-Romanya, 1–4, 23-25 Mart 2021.</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Abliz W, Wu H, Maimaiti M, Wushouer J, Abiderexiti K, Yibulayin T, Wumaier A., A Syllable-Based Technique for Uyghur Text Compression. Information, 11, 172, 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Pandey M, Shrivastava S, Pandey S, Shridevi S., An Enhanced Data Compression Algorithm, 2020 International Conference on Emerging Trends in Information Technology and Engineering (ic-ETITE), Vellore-Hindistan, 1–4, 24-25 Şubat 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Murugesan G., Codon Based Compression Algorithm for DNA Sequences with Two Bit Encoding. European Journal of Molecular &amp; Clinical Medicine, 7, 33-41, 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">Silva M, Pratas D, Pinho AJ., Efficient DNA sequence compression with neural networks. GigaScience, 9(11), 1-15, 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Ghuge S., Map and Trie based Compression Algorithm for Data Transmission, 2nd International Conference on Innovative Mechanisms for Industry Applications (ICIMIA), Bangalore-Hindistan, 137–141, 24-25 Şubat 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">Hilal TA, Hilal HA., Turkish Text Compression via Characters Encoding. Procedia Computer Science, 175, 286–91 2020.</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Nguyen VH, Nguyen HT, Duong HN, Snasel V., n-Gram-based text compression. Computational intelligence and neuroscience, 2016, 1-12, 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Demchenko Y, De Laat C, Membrey P., Defining architecture components of the Big Data Ecosystem, 2014 International conference on collaboration technologies and systems (CTS), Minneapolis-Minnesota-USA, 104–112, 19-23 Mayıs 2014.</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Rattanaopas K, Kaewkeeree S., Improving Hadoop MapReduce performance with data compression: A study using wordcount job, 14th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON), Phuket-Tayland, 564–567, 27-30 Haziran 2017.</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Bartík M, Ubik S, Kubalik P., LZ4 compression algorithm on FPGA, IEEE International Conference on Electronics, Circuits, and Systems (ICECS), Kahire-Mısır, 179–182, 6 Aralık 2015.</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Guerra A, Lotero J, Isaza S., Performance comparison of sequential and parallel compression applications for DNA raw data. The Journal of Supercomputing, 72, 4696–717, 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Sun Y, Gong X, Yang Y., Data compression and parallel computation model research under big data environment, International Conference on Computer Communication and Informatics (ICCCI), Lefkoşa-Kıbrıs, 1–4, 27-29 Eylül 2017.</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Kumar VS, Nanjundiah R, Thazhuthaveetil MJ, Govindarajan R., Impact of message compression on the scalability of an atmospheric modeling application on clusters. Parallel Computing, 34, 1–16, 2008.</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Ahmad I, He Y, Liou ML., Video compression with parallel processing. Parallel Computing, 28, 1039–78, 2002.</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Adler M., pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines. Jet Propulsion Laboratory, 2015.</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Gilchrist J., Parallel data compression with bzip2, Proceedings of the 16th IASTED international conference on parallel and distributed computing and systems, Dallas-USA, 559–564, 14-16 Aralık 2004.</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Bell T, Adjeroh D, Mukherjee A., Pattern matching in compressed texts and images. 2001.</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Mishra SP, Singh CG, Prasad R., A review on compressed pattern matching. Perspectives in Science, 8, 727–9, 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Karcıoğlu AA, Bulut H., DNA sekansları için q-gram hash karşılaştırmasına dayalı çoklu kesin dizi eşleştirme algoritması. Gazi Üniversitesi Mühendislik Mimarlık Fakültesi Dergisi, 38, 875–88, 2022.</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">BULUŞ HN, Carus A, Mesut A., A new word-based compression model allowing compressed pattern matching. Turkish Journal of Electrical Engineering &amp; Computer Sciences, 25, 3607–22, 2017.</mixed-citation>
                    </ref>
                                    <ref id="ref26">
                        <label>26</label>
                        <mixed-citation publication-type="journal">Öztürk E, Mesut A, Diri B., Multi-stream word-based compression algorithm for compressed text search. Arabian Journal for Science and Engineering, 43, 8209–21, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref27">
                        <label>27</label>
                        <mixed-citation publication-type="journal">Srivastav S, Singh PK, Yadav D., A Method to Improve Exact Matching Results in Compressed Text using Parallel Wavelet Tree. Scalable Computing: Practice and Experience, 22, 387–400, 2021.</mixed-citation>
                    </ref>
                                    <ref id="ref28">
                        <label>28</label>
                        <mixed-citation publication-type="journal">Russo LMS, Navarro G, Oliveira AL, Morales P., Approximate String Matching with Compressed Indexes. Algorithms, 2, 1105–36, 2009.</mixed-citation>
                    </ref>
                                    <ref id="ref29">
                        <label>29</label>
                        <mixed-citation publication-type="journal">Melink S, Raghavan S, Yang B, Garcia-Molina H., Building a distributed full-text index for the web. ACM Transactions on Information Systems (TOIS), 19, 217–41, 2001.</mixed-citation>
                    </ref>
                                    <ref id="ref30">
                        <label>30</label>
                        <mixed-citation publication-type="journal">Bast H, Buchhold B., An index for efficient semantic full-text search, Proceedings of the 22nd ACM international conference on Information &amp; Knowledge Management, California-USA, 369–78, 27 Ekim - 1 Kasım 2013.</mixed-citation>
                    </ref>
                                    <ref id="ref31">
                        <label>31</label>
                        <mixed-citation publication-type="journal">Deutsch P, others., GZIP file format specification version 4.3. 1996.</mixed-citation>
                    </ref>
                                    <ref id="ref32">
                        <label>32</label>
                        <mixed-citation publication-type="journal">Deutsch P., Rfc1951: Deflate compressed data format specification version 1.3, RFC Editor, 1996.</mixed-citation>
                    </ref>
                                    <ref id="ref33">
                        <label>33</label>
                        <mixed-citation publication-type="journal">Oswal S, Singh A, Kumari K., Deflate compression algorithm. International Journal of Engineering Research and General Science, 4, 430–6, 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref34">
                        <label>34</label>
                        <mixed-citation publication-type="journal">Aşşık MM, Oral M., Kanonik Huffman kod sözcükleri uzunluklarının evrim stratejileri algoritması ile belirlenmesi. Gazi Üniversitesi Mühendislik Mimarlık Fakültesi Dergisi, 38, 771–80, 2022.</mixed-citation>
                    </ref>
                                    <ref id="ref35">
                        <label>35</label>
                        <mixed-citation publication-type="journal">Deutsch P, Gailly J-L., Zlib compressed data format specification version 3.3, RFC 1950, Mayıs, 1996.</mixed-citation>
                    </ref>
                                    <ref id="ref36">
                        <label>36</label>
                        <mixed-citation publication-type="journal">Burrows M, Wheeler D., A block-sorting lossless data compression algorithm, Digital SRC Research Report, 1994.</mixed-citation>
                    </ref>
                                    <ref id="ref37">
                        <label>37</label>
                        <mixed-citation publication-type="journal">Manzini G., An analysis of the Burrows—Wheeler transform. Journal of the ACM (JACM), 48, 407–30, 2001.</mixed-citation>
                    </ref>
                                    <ref id="ref38">
                        <label>38</label>
                        <mixed-citation publication-type="journal">Collet Y, Kucherawy M., Zstandard Compression and the application/zstd Media Type. RFC 8478, 2018.</mixed-citation>
                    </ref>
                                    <ref id="ref39">
                        <label>39</label>
                        <mixed-citation publication-type="journal">Duda J, Tahboub K, Gadgil NJ, Delp EJ., The use of asymmetric numeral systems as an accurate replacement for Huffman coding, 2015 Picture Coding Symposium (PCS), Cairns-Avustralya, 65–69, 31 Mayıs - 3 Haziran 2015.</mixed-citation>
                    </ref>
                                    <ref id="ref40">
                        <label>40</label>
                        <mixed-citation publication-type="journal">Belkov R, Kirilenko I., Compressing Embedded GNU/Linux and Windows 10 IoT Images Using XZ Utilities, 1st Scientific and Practical Conference “Software Engineering and Information Organization”, SEIM-2016, St Petersburg-Rusya, 41, 22 Nisan 2016.</mixed-citation>
                    </ref>
                                    <ref id="ref41">
                        <label>41</label>
                        <mixed-citation publication-type="journal">Kirby G., Zipf’s law. UK Journal of Naval Science, 10, 180–5, 1985.</mixed-citation>
                    </ref>
                                    <ref id="ref42">
                        <label>42</label>
                        <mixed-citation publication-type="journal">Ferragina P, Navarro G., Pizza&amp;Chili Corpus—Compressed indexes and their testbeds. September, 2005.</mixed-citation>
                    </ref>
                                    <ref id="ref43">
                        <label>43</label>
                        <mixed-citation publication-type="journal">Mahoney M., Large text compression benchmark, http://www.mattmahoney.net/dc/text.html, Yayın tarihi Eylül 15, 2022. Erişim tarihi Mayıs 19, 2022.</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
