💎 Pearl is a large-scale Arabic multimodal dataset and benchmark explicitly designed for cultural understanding. Constructed through advanced agentic workflows and extensive human-in-the-loop annotations by 37 annotators from across the Arab world, Pearl comprises over 309K multimodal examples spanning ten culturally significant domains covering all Arab countries.
Supervisor: Prof. Muhammad Abdul-Mageed
Multimodal Examples
Cultural Domains
Images





































@inproceedings{alwajih-etal-2025-pearl,
title = "Pearl: A Multimodal Culturally-Aware {A}rabic Instruction Dataset",
author = "Alwajih, Fakhraddin and
Magdy, Samar M. and
El Mekki, Abdellah and
Nacar, Omer and
Nafea, Youssef and
Abdelfadil, Safaa Taher and
Yahya, Abdulfattah Mohammed and
Luqman, Hamzah and
Almarwani, Nada and
Aloufi, Samah and
Qawasmeh, Baraah and
Atou, Houdaifa and
Sibaee, Serry and
Alsayadi, Hamzah A. and
Al-Dhabyani, Walid and
Al-shaibani, Maged S. and
El aatar, Aya and
Qandos, Nour and
Alhamouri, Rahaf and
Ahmad, Samar and
AL-Ghrawi, Mohammed Anwar and
Yacoub, Aminetou and
AbuHweidi, Ruwa and
Lemin, Vatimetou Mohamed and
Abdel-Salam, Reem and
Bashiti, Ahlam and
Ammar, Adel and
Alansari, Aisha and
Ashraf, Ahmed and
Alturayeif, Nora and
Alcoba Inciarte, Alcides and
Elmadany, AbdelRahim A. and
Tourad, Mohamedou Cheikh and
Berrada, Ismail and
Jarrar, Mustafa and
Shehata, Shady and
Abdul-Mageed, Muhammad",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1254/",
pages = "23048--23079",
ISBN = "979-8-89176-335-7"
}