<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20241031//EN"
        "https://jats.nlm.nih.gov/publishing/1.4/JATS-journalpublishing1-4.dtd">
<article  article-type="research-article"        dtd-version="1.4">
            <front>

                <journal-meta>
                                                                <journal-id>dubi̇ted</journal-id>
            <journal-title-group>
                                                                                    <journal-title>Duzce University Journal of Science and Technology</journal-title>
            </journal-title-group>
                                        <issn pub-type="epub">2148-2446</issn>
                                                                                            <publisher>
                    <publisher-name>Duzce University</publisher-name>
                </publisher>
                    </journal-meta>
                <article-meta>
                                        <article-id pub-id-type="doi">10.29130/dubited.1793166</article-id>
                                                                <article-categories>
                                            <subj-group  xml:lang="en">
                                                            <subject>Machine Learning Algorithms</subject>
                                                    </subj-group>
                                            <subj-group  xml:lang="tr">
                                                            <subject>Makine Öğrenmesi Algoritmaları</subject>
                                                    </subj-group>
                                    </article-categories>
                                                                                                                                                        <title-group>
                                                                                                                        <trans-title-group xml:lang="tr">
                                    <trans-title>Yapay Sinir Ağlarının Donanım Hataları Altındaki Hata Toleransı ve Zafiyetinin Değerlendirilmesi</trans-title>
                                </trans-title-group>
                                                                                                                                                                                                <article-title>Evaluating the Fault Tolerance and Vulnerability of Artificial Neural Networks Under Hardware Errors</article-title>
                                                                                                    </title-group>
            
                                                    <contrib-group content-type="authors">
                                                                        <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-1104-9307</contrib-id>
                                                                <name>
                                    <surname>Aktaş Aydın</surname>
                                    <given-names>Hatice</given-names>
                                </name>
                                                                    <aff>SİVAS BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-1138-0577</contrib-id>
                                                                <name>
                                    <surname>Kahira</surname>
                                    <given-names>Albert Njoroge</given-names>
                                </name>
                                                                    <aff>AstraZeneca</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0003-3929-8126</contrib-id>
                                                                <name>
                                    <surname>Yalçın</surname>
                                    <given-names>Gülay</given-names>
                                </name>
                                                                    <aff>ABDULLAH GUL UNIVERSITY</aff>
                                                            </contrib>
                                                    <contrib contrib-type="author">
                                                                    <contrib-id contrib-id-type="orcid">
                                        https://orcid.org/0000-0002-0544-9697</contrib-id>
                                                                <name>
                                    <surname>Ünsal</surname>
                                    <given-names>Osman</given-names>
                                </name>
                                                                    <aff>Barcelona Supercomputing Center</aff>
                                                            </contrib>
                                                                                </contrib-group>
                        
                                        <pub-date pub-type="pub" iso-8601-date="20260419">
                    <day>04</day>
                    <month>19</month>
                    <year>2026</year>
                </pub-date>
                                        <volume>14</volume>
                                        <issue>2</issue>
                                        <fpage>537</fpage>
                                        <lpage>550</lpage>
                        
                        <history>
                                    <date date-type="received" iso-8601-date="20250929">
                        <day>09</day>
                        <month>29</month>
                        <year>2025</year>
                    </date>
                                                    <date date-type="accepted" iso-8601-date="20260224">
                        <day>02</day>
                        <month>24</month>
                        <year>2026</year>
                    </date>
                            </history>
                                        <permissions>
                    <copyright-statement>Copyright © 2013, Duzce University Journal of Science and Technology</copyright-statement>
                    <copyright-year>2013</copyright-year>
                    <copyright-holder>Duzce University Journal of Science and Technology</copyright-holder>
                </permissions>
            
                                                                                                <trans-abstract xml:lang="tr">
                            <p>Yapay Sinir Ağları (YSA), yapay zekaya olan ilgi ve gelişmelerin artması, Yüksek Performanslı Hesaplama (YBH) sistemlerinin sunduğu hesaplama gücünün artması nedeniyle tekrar popülerlik kazanmıştır. Sinir ağı uygulamaları büyük veri merkezlerinde ve YBH sistemlerinde kullanıldığından, bu sistemlerde yaygın olan kayıtlarda ve bellek yapılarında bit kayması gibi benzer güvenilirlik sorunlarıyla karşı karşıyadırlar. Bu nedenle, sistem maliyetini önemli ölçüde artırabilen özel sağlamlık ve koruma mekanizmaları gerektirirler. Ancak, donanım arızalarının YBH uygulamalarının farklı bileşenleri üzerindeki etkisini anlamak, hangi parçaların daha savunmasız olduğunu ve daha yüksek güvenilirlik gerektirdiğini belirlemeye yardımcı olabilir. Bu çalışmada, YBH sistemlerinde ve büyük ölçekli veri merkezlerinde çalıştırıldığında donanım arızalarının YBH uygulamaları üzerindeki etkileri değerlendirilmiş ve böylece güvenilirlik maliyetlerinin düşürülmesi hedeflenmiştir. Geleneksel tekniklerle gerçekleştirilen hata enjeksiyon deneyleri YBH uygulamaları için oldukça zaman alıcı olabilir. Bu nedenle, bu tür uygulamalarda hata enjeksiyon süresini azaltmak için bir yöntem sunulmuştur. CPU tabanlı (Intel Xeon) ve GPU tabanlı (NVIDIA V100) yüksek performanslı bilgi işlem (HPC) sistemlerinde çalışan Yapay Sinir Ağı (YSA) uygulamaları üzerinde donanım arızalarının etkilerini değerlendirdiğimizde, sonuçlarımız YSA&#039;ların bazı donanım arızalarına, özellikle belirli katmanlarda ve mimari kayıtlarda oluşan arızalara karşı savunmasız olduğunu göstermektedir.</p></trans-abstract>
                                                                                                                                    <abstract><p>Artificial Neural Networks (ANN) have gained popularity again due to the increasing interest and developments in artificial intelligence, as well as the increased computational power offered by High Performance Computing (HPC) systems. Since neural network applications are used in large data centers and HPC systems, they face similar reliability issues such as bit slippage in registers and memory structures that are common in these systems. Therefore, they require special robustness and protection mechanisms that can significantly increase the system cost. However, understanding the impact of hardware failures on different components of ANN applications can help determine which parts are more vulnerable and require higher reliability. In this study, the effects of hardware faults on ANN applications when they are run in HPC systems and large-scale data centers are evaluated, and thus, the reliability costs are aimed to be reduced. Fault injection experiments performed with traditional techniques can be quite time-consuming for ANN applications. Therefore, a method is presented to reduce the fault injection time in such applications. When we evaluate the effects of hardware faults on Artificial Neural Network (ANN) applications running on CPU-based (Intel Xeon) and GPU-based (NVIDIA V100) high-performance computing (HPC) systems, our results show that ANNs are vulnerable to some hardware faults, especially those occurring in certain layers and architectural registers.</p></abstract>
                                                            
            
                                                                                        <kwd-group>
                                                    <kwd>Fault tolerance</kwd>
                                                    <kwd>  Reliability</kwd>
                                                    <kwd>  Machine learning</kwd>
                                                    <kwd>  Artificial neural networks</kwd>
                                                    <kwd>  Artificial intelligence</kwd>
                                            </kwd-group>
                            
                                                <kwd-group xml:lang="tr">
                                                    <kwd>Hata Toleransı</kwd>
                                                    <kwd>  güvenilirlik</kwd>
                                                    <kwd>  makine öğrenmesi</kwd>
                                                    <kwd>  yapay sinir ağları</kwd>
                                                    <kwd>  yapay zeka</kwd>
                                            </kwd-group>
                                                                                                                                    <funding-group specific-use="FundRef">
                    <award-group>
                                                    <funding-source>
                                <named-content content-type="funder_name">This research received no external funding.</named-content>
                            </funding-source>
                                                                    </award-group>
                </funding-group>
                                </article-meta>
    </front>
    <back>
                            <ref-list>
                                    <ref id="ref1">
                        <label>1</label>
                        <mixed-citation publication-type="journal">Alobaid, A., Bonny, T., &amp; Alrahhal, M. (2025). Disruptive attacks on artificial neural networks: A systematic review of attack techniques, detection methods, and protection strategies. Intelligent Systems with Applications, 26(1), 200529. https://doi.org/10.1016/j.iswa.2025.200529</mixed-citation>
                    </ref>
                                    <ref id="ref2">
                        <label>2</label>
                        <mixed-citation publication-type="journal">Bautista Gomez, L. A. B., &amp; Cappello, F. (2015). Detecting and correcting data corruption in stencil applications through multivariate interpolation. In Proceedings of the IEEE International Conference on Cluster Computing (pp. 595–602). IEEE. https://doi.org/10.1109/CLUSTER.2015.108</mixed-citation>
                    </ref>
                                    <ref id="ref3">
                        <label>3</label>
                        <mixed-citation publication-type="journal">Bengio, Y., Lecun, Y., &amp; Hinton, G. (2021). Deep learning for AI. Communications of the ACM, 64(7), 58–65. https://doi.org/10.1145/3448250</mixed-citation>
                    </ref>
                                    <ref id="ref4">
                        <label>4</label>
                        <mixed-citation publication-type="journal">Borkar, S., &amp; Chien, A. A. (2011). The future of microprocessors. Communications of the ACM, 54(5), 67–77. https://doi.org/10.1145/1941487.1941507</mixed-citation>
                    </ref>
                                    <ref id="ref5">
                        <label>5</label>
                        <mixed-citation publication-type="journal">Cappello, F., Geist, A., Gropp, B., Kale, L., Kramer, B., &amp; Snir, M. (2009). Toward exascale resilience. The International Journal of High Performance Computing Applications, 23(4), 374–388. https://doi.org/10.1177/1094342009347767</mixed-citation>
                    </ref>
                                    <ref id="ref6">
                        <label>6</label>
                        <mixed-citation publication-type="journal">Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., &amp; Fei-Fei, L. (2009). ImageNet: A large-scale hierarchical image database. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (pp. 248–255). IEEE. https://doi.org/10.1109/CVPR.2009.5206848</mixed-citation>
                    </ref>
                                    <ref id="ref7">
                        <label>7</label>
                        <mixed-citation publication-type="journal">Di Martino, C., Kramer, W., Kalbarczyk, Z., &amp; Iyer, R. (2015). Measuring and understanding extreme-scale application resilience: A field study of 5,000,000 HPC application runs. In Proceedings of the 45th Annual IEEE/IFIP International Conference on Dependable Systems and Networks (pp. 25–36). IEEE. https://doi.org/10.1109/DSN.2015.50</mixed-citation>
                    </ref>
                                    <ref id="ref8">
                        <label>8</label>
                        <mixed-citation publication-type="journal">Henning, J. L. (2006). SPEC CPU2006 benchmark descriptions. ACM SIGARCH Computer Architecture News, 34(4), 1–17. https://doi.org/10.1145/1186736.1186737</mixed-citation>
                    </ref>
                                    <ref id="ref9">
                        <label>9</label>
                        <mixed-citation publication-type="journal">Kulakov, A., Zwolinski, M., &amp; Reeve, J. (2015). Fault tolerance in distributed neural computing [Preprint]. https://doi.org/10.13140/RG.2.1.1387.0800</mixed-citation>
                    </ref>
                                    <ref id="ref10">
                        <label>10</label>
                        <mixed-citation publication-type="journal">Luk, C. K., Cohn, R., Muth, R., Patil, H., Klauser, A., Lowney, G., Wallace, S., Reddi, V. J., &amp; Hazelwood, K. (2005). Pin: Building customized program analysis tools with dynamic instrumentation. ACM SIGPLAN Notices, 40(6), 190–200. https://doi.org/10.1145/1064978.1065034</mixed-citation>
                    </ref>
                                    <ref id="ref11">
                        <label>11</label>
                        <mixed-citation publication-type="journal">LeCun, Y., Cortes, C., &amp; Burges, C. J. C. (n.d.). MNIST handwritten digit database. Retrieved March 18, 2026, from https://yann.lecun.org/exdb/mnist/index.html</mixed-citation>
                    </ref>
                                    <ref id="ref12">
                        <label>12</label>
                        <mixed-citation publication-type="journal">Nazari, N., Makrani, H. M., Fang, C., Sayadi, H., Rafatirad, S., Khasawneh, K. N., &amp; Homayoun, H. (2024). Forget and rewire: Enhancing the resilience of transformer-based models against bit-flip attacks. In Proceedings of the 33rd USENIX Security Symposium (pp. 1348-1366). https://www.usenix.org/conference/usenixsecurity24/presentation/nazari</mixed-citation>
                    </ref>
                                    <ref id="ref13">
                        <label>13</label>
                        <mixed-citation publication-type="journal">Oh, N., Shirvani, P. P., &amp; McCluskey, E. J. (2002). Error detection by duplicated instructions in super-scalar processors. IEEE Transactions on Reliability, 51(1), 63–75. https://doi.org/10.1109/24.994913</mixed-citation>
                    </ref>
                                    <ref id="ref14">
                        <label>14</label>
                        <mixed-citation publication-type="journal">Piuri, V. (2001). Analysis of fault tolerance in artificial neural networks. Journal of Parallel and Distributed Computing, 61(1), 18–48. https://doi.org/10.1006/jpdc.2000.1663</mixed-citation>
                    </ref>
                                    <ref id="ref15">
                        <label>15</label>
                        <mixed-citation publication-type="journal">Rajagede, R. A., Santriaji, M. H., Fikriansyah, M. A., Nuha, H. H., Fu, Y., &amp; Solihin, Y. (2025). NAPER: Fault protection for real-time resource-constrained deep neural networks. In Proceedings of the IEEE 31st International Symposium on On-Line Testing and Robust System Design (IOLTS). IEEE. https://doi.org/10.1109/IOLTS65288.2025.11116827</mixed-citation>
                    </ref>
                                    <ref id="ref16">
                        <label>16</label>
                        <mixed-citation publication-type="journal">Ruospo, A., Gavarini, G., de Sio, C., Guerrero, J., Sterpone, L., Reorda, M. S., Sanchez, E., Mariani, R., Aribido, J., &amp; Athavale, J. (2023). Assessing convolutional neural networks reliability through statistical fault injections. In Proceedings of the Design, Automation and Test in Europe Conference &amp; Exhibition (DATE) (pp. 1-6). IEEE. https://doi.org/10.23919/DATE56975.2023.10136998</mixed-citation>
                    </ref>
                                    <ref id="ref17">
                        <label>17</label>
                        <mixed-citation publication-type="journal">Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., Berg, A. C., &amp; Fei-Fei, L. (2015). ImageNet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211–252. https://doi.org/10.1007/s11263-015-0816-y</mixed-citation>
                    </ref>
                                    <ref id="ref18">
                        <label>18</label>
                        <mixed-citation publication-type="journal">Simonyan, K., &amp; Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. [arXiv preprint]. https://arxiv.org/pdf/1409.1556</mixed-citation>
                    </ref>
                                    <ref id="ref19">
                        <label>19</label>
                        <mixed-citation publication-type="journal">Su, F., Yuan, P., Wang, Y., &amp; Zhang, C. (2016). The superior fault tolerance of artificial neural network training with a fault/noise injection-based genetic algorithm. Protein &amp; Cell, 7(10), 735–748. https://doi.org/10.1007/s13238-016-0302-5</mixed-citation>
                    </ref>
                                    <ref id="ref20">
                        <label>20</label>
                        <mixed-citation publication-type="journal">Tchernev, E. B., Mulvaney, R. G., &amp; Phatak, D. S. (2005). Investigating the fault tolerance of neural networks. Neural Computation, 17(7), 1646–1664. https://doi.org/10.1162/0899766053723096</mixed-citation>
                    </ref>
                                    <ref id="ref21">
                        <label>21</label>
                        <mixed-citation publication-type="journal">Tiwari, D., Gupta, S., Rogers, J., Maxwell, D., Rech, P., Vazhkudai, S., Oliveira, D., Londo, D., Debardeleben, N., Navaux, P., Carro, L., &amp; Bland, A. (2015). Understanding GPU errors on large-scale HPC systems and the implications for system design and operation. In Proceedings of the IEEE 21st International Symposium on High Performance Computer Architecture (HPCA) (pp. 331–342). IEEE. https://doi.org/10.1109/HPCA.2015.7056044</mixed-citation>
                    </ref>
                                    <ref id="ref22">
                        <label>22</label>
                        <mixed-citation publication-type="journal">Tsai, T., Hari, S. K. S., Sullivan, M., Villa, O., &amp; Keckler, S. W. (2021). NVBitFI: Dynamic Fault Injection for GPUs. In Proceedings of the 51st Annual IEEE/IFIP International Conference on Dependable Systems and Networks (DSN) (pp. 284–291). IEEE. https://doi.org/10.1109/DSN48987.2021.00041</mixed-citation>
                    </ref>
                                    <ref id="ref23">
                        <label>23</label>
                        <mixed-citation publication-type="journal">Villa, O., Stephenson, M., Nellans, D., &amp; Keckler, S. W. (2019). NVBit: A dynamic binary instrumentation framework for NVIDIA GPUs. In Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture (pp. 372–383). https://doi.org/10.1145/3352460.3358307</mixed-citation>
                    </ref>
                                    <ref id="ref24">
                        <label>24</label>
                        <mixed-citation publication-type="journal">Vinck, T., Jonckers, N., Dekkers, G., Prinzie, J., &amp; Karsmakers, P. (2025). Mitigating multiple single-event upsets during deep neural network inference using fault-aware training. Journal of Instrumentation, 20(02), Article C02044. https://doi.org/10.1088/1748-0221/20/02/C02044</mixed-citation>
                    </ref>
                                    <ref id="ref25">
                        <label>25</label>
                        <mixed-citation publication-type="journal">Wang, C., Zhao, P., Wang, S., &amp; Lin, X. (2024). Detection and recovery against deep neural network fault injection attacks based on contrastive learning [arXiv preprint]. http://arxiv.org/abs/2401.16766</mixed-citation>
                    </ref>
                            </ref-list>
                    </back>
    </article>
