@article{article_1588198, title={A Chaos-Causality Approach to Principled Pruning of Dense Neural Networks}, journal={Chaos Theory and Applications}, volume={7}, pages={154–165}, year={2025}, DOI={10.51537/chaos.1588198}, url={https://izlik.org/JA32NN27ZU}, author={Sahu, Rajan and Chadha, Shivam and Mathur, Archana and Nagaraj, Nithin and Saha, Snehanshu}, keywords={Chaos, Granger causality, Neural networks, Lyapunov exponent, Weight pruning}, abstract={Reducing the size of a neural network (pruning) by removing weights without impacting its performance is an important problem for resource-constrained devices. In the past, pruning was typically accomplished by ranking or penalizing weights based on criteria like magnitude and removing low-ranked weights before retraining the remaining ones. Pruning strategies also involve removing neurons from the network to achieve the desired reduction in network size. We formulate pruning as an optimization problem to minimize misclassifications by selecting specific weights. We have introduced the concept of chaos in learning (Lyapunov Exponents) through weight updates and used causality-based investigations to identify the causal weight connections responsible for misclassification. Two architectures are proposed in the current work - Lyapunov Exponent Granger Causality driven Fully Trained Network (LEGCNet-FT) and Lyapunov Exponent Granger Causality driven Partially Trained Network (LEGCNet-PT). The proposed methodology gauges causality between weight-specific Lyapunov Exponents (LEs) and misclassification, facilitating the identification of weights for pruning in the network. The performance of both the dense and pruned neural networks is evaluated using accuracy, F1 scores, FLOPS, and percentage pruned. It is observed that, using LEGCNet-PT/LEGCNet-FT, a dense over-parameterized network can be pruned without compromising accuracy, F1 score, or other performance metrics. Additionally, the sparse networks are trained with fewer epochs and fewer FLOPs than their dense counterparts across all datasets. Our methods are compared with random and magnitude pruning and observed that the pruned network maintains the original performance while retaining feature explainability. Feature explainability is investigated using SHAP and WeightWatchers. The SHAP values computed for the proposed pruning architecture, as well as for the baselines (random and magnitude), indicate that feature importance is maintained in LEGCNet-PT and LEGCNet-FT when compared to the dense network. WeightWatchers results reveal that the network layers are well-trained.}, number={2}