@inproceedings{33fe1b72319048909db9d25f928295af,
title = "The Helsinki-NLP Submissions at NADI 2023 Shared Task: Walking the Baseline",
abstract = "The Helsinki-NLP team participated in the NADI 2023 shared tasks on Arabic dialect translation with seven submissions. We used statistical (SMT) and neural machine translation (NMT) methods and explored character- and subword-based data preprocessing. Our submissions placed second in both tracks. In the open track, our winning submission is a character-level SMT system with additional Modern Standard Arabic language models. In the closed track, our best BLEU scores were obtained with the leave-as-is baseline, a simple copy of the input, and narrowly followed by SMT systems. In both tracks, fine-tuning existing multilingual models such as AraT5 or ByT5 did not yield superior performance compared to SMT.",
author = "Yves Scherrer and Aleksandra Mileti{\'c} and Olli Kuparinen",
year = "2023",
month = dec,
day = "1",
language = "English",
pages = "670--677",
editor = "Hassan Sawaf and Samhaa El-Beltagy and Wajdi Zaghouani and Walid Magdy and Ahmed Abdelali and Nadi Tomeh and {Abu Farha}, Ibrahim and Nizar Habash and Salam Khalifa and Amr Keleg and Hatem Haddad and Imed Zitouni and Khalil Mrini and Rawan Almatham",
booktitle = "Proceedings of ArabicNLP 2023",
publisher = "ASSOCIATION FOR COMPUTATIONAL LINGUISTICS",
note = "Arabic Natural Language Conference, ArabicNLP 2023 ; Conference date: 30-11-2023",
}