@article{article_1693015, title={PERFORMANCE COMPARISON OF VISION-LANGUAGE MODELS IN IMAGE CLASSIFICATION}, journal={International Journal of 3D Printing Technologies and Digital Industry}, volume={9}, pages={247–262}, year={2025}, DOI={10.46519/ij3dptdi.1693015}, author={Özeren, Doğukan and Yüksel, Erkan and Yüksel, Asım Sinan}, keywords={Vision-Language Models, Image Classification, Multimodal Learning, Zero-Shot Classification, Few-Shot Learning, Model Generalization.}, abstract={Vision-Language Models (VLMs) have introduced a new paradigm shift in image classification by integrating visual and textual modalities. While these models have demonstrated strong performance on multimodal tasks, their effectiveness in purely visual classification remains underexplored. This study presents a comprehensive, metric-driven comparative analysis of eight state-of-the-art VLMs—GPT-4o-latest, GPT-4o-mini, Gemini-flash-1.5-8b, LLaMA-3.2-90B-vision-instruct, Grok-2-vision-1212, Qwen2.5-vl-7b-instruct, Claude-3.5-sonnet, and Pixtral-large-2411—across four datasets: CIFAR-10, ImageNet, COCO, and the domain-specific New Plant Diseases dataset. Model performance was evaluated using accuracy, precision, recall, F1-score, and robustness under zero-shot and few-shot settings. Quantitative results indicate that GPT-4o-latest consistently achieves the highest performance on typical benchmarks (accuracy: 0.91, F1-score: 0.91 on CIFAR-10), substantially surpassing lightweight models such as Pixtral-large-2411 (accuracy: 0.13, F1-score: 0.13). Near-perfect results on ImageNet and COCO likely reflect pre-training overlap, whereas notable performance degradation on the New Plant Diseases dataset underscores domain adaptation challenges. Our findings emphasize the need for robust, parameter-efficient, and domain-adaptive fine-tuning strategies to advance VLMs in real-world image classification.}, number={2}, publisher={Kerim ÇETİNKAYA}, organization={This research was supported by The Scientific and Technological Research Council of Türkiye (TÜBİTAK) under the 1005-National New Ideas and Products Research Support Program (Project No: 124E769).}