@article{article_1821518, title={Reliability of Human Expert and AI Raters in Translation Assessment}, journal={OPUS Journal of Society Research}, volume={22}, pages={1305–1317}, year={2025}, DOI={10.26466/opusjsr.1821518}, author={Uzun, Yasemin}, keywords={Artificial intelligence, inter-rater reliability, teaching Turkish as a foreign language, translation assessment}, abstract={Although AI-based assessment systems offer new opportunities in education, their consistency with human judgment in measuring complex cognitive skills such as translation remains debatable. This study examines inter-rater reliability between a domain expert and AI raters (ChatGPT-5 and Gemini 1.5 Pro) in evaluating C2-level Turkish translations. Using a convergent mixed-methods design, translations from 14 students were scored with a 5-point analytic rubric. Krippendorff’s alpha revealed low overall agreement (α = .392), particularly weak in "Semantic Accuracy" (α = .288). Qualitative analysis identified three key divergences: task fidelity, error severity perception, and criterion interpretation variability. Findings show AI models exhibit partial consistency in formal accuracy but systematically diverge from human experts in semantic nuance, style, and contextual appropriateness. The expert adopted a "task-oriented" approach, while AI models were more "form-focused" (Gemini) or "surface coherence-oriented" (ChatGPT). Although AI systems serve as useful auxiliary tools in translation assessment, they are not able to replace expert judgment}, number={6}, publisher={İdeal Kent Yayınları}, organization={Canakkale Onsekiz Mart University}