paper.bib

@Misc{johnson2014:2014,
  title		= {CLTK: The Classical Language Toolkit},
  author	= {Kyle Johnson},
  howpublished	= {\url{https://github.com/cltk/cltk}},
  year		= {2014}
}

@article{mikhael2014greek:2014,
  title={The Greek-Arabic New Testament Interlinear Process: greekarabicnt. org},
  author={Mikhael, Kamal Abou},
  journal={LRE-REL2},
  pages={1},
  year={2014}
}
@phdthesis{abufayad2018semantic:2018,
  title={Semantic Word Clustering from Large Arabic Text},
  author={Abufayad, Tareq Issa},
  year={2018},
  institution={The Islamic University of Gaza}
}
@Article{abozinadah2016improved:2016,
  title		= {Improved microblog classification for detecting abusive
		  arabic twitter accounts},
  author	= {Abozinadah, Ehab A and Jones Jr, James H},
  journal	= {International Journal of Data Mining \& Knowledge
		  Management Process (IJDKP)},
  year		= {2016},
  number	= {6},
  pages		= {17--28},
  doi={10.5121/ijdkp.2016.6602},
  volume	= {6}
}


@article{oussous2020asa:2020,
  title={ASA: A framework for Arabic sentiment analysis},
  author={Oussous, Ahmed and Benjelloun, Fatima-Zahra and Lahcen, Ayoub Ait and Belfkih, Samir},
  journal={Journal of Information Science},
  volume={46},
  number={4},
  pages={544--559},
  year={2020},
  doi={10.1177/0165551519849516},
  publisher={SAGE Publications Sage UK: London, England}
}
@inproceedings{kaibi2019comparative:2019,
  title={A Comparative Evaluation of Word Embeddings Techniques for Twitter Sentiment Analysis},
  author={Kaibi, Ibrahim and Nfaoui, El Habib and Satori, Hassan},
  booktitle={2019 International Conference on Wireless Technologies, Embedded and Intelligent Systems (WITS)},
  pages={1--4},
  year={2019},
  doi={10.1109/wits.2019.8723864},
  organization={IEEE}
}


@article{hamed2017building:2017,
title = {Building a First Language Model for Code-switch Arabic-English},
journal = {Procedia Computer Science},
volume = {117},
pages = {208-216},
year = {2017},
note = {Arabic Computational Linguistics},
issn = {1877-0509},
doi = {10.1016/j.procs.2017.10.111},
url = {https://www.sciencedirect.com/science/article/pii/S1877050917321683},
author = {Injy Hamed and Mohamed Elmahdy and Slim Abdennadher},
keywords = {Automatic Speech Recognition, language model, code-mixing, code-switching, Arabic-English corpus, web corpus, web crawling},
abstract = {The use of mixed languages in daily conversations, referred to as “code-switching”, has become a common linguistic phenomenon among bilingual/multilingual communities. Code-switching involves the alternating use of distinct languages or “codes” at sentence boundaries or within the same sentence. With the rise of globalization, code-switching has become prevalent in daily conversations, especially among urban youth. This lead to an increasing demand on automatic speech recognition systems to be able to handle such mixed speech. In this paper, we present the first steps towards building a multilingual language model (LM) for code-switched Arabic-English. One of the main challenges faced when building a multilingual LM is the need of explicit mixed text corpus. Since code-switching is a behaviour used more commonly in spoken than written form, text corpora with code-switching are usually scarce. Therefore, the first aim of this paper is to introduce a code-switch Arabic-English text corpus that is collected by automatically downloading relevant documents from the web. The text is then extracted from the documents and processed to be useable by NLP tasks. For language modeling, a baseline LM was built from existing monolingual corpora. The baseline LM gave a perplexity of 11841.9 and Out-of-Vocabulary (OOV) rate of 4.07%. The gathered code-switch Arabic-English corpus, along with the existing monolingual corpora were then used to construct several LMs. The best LM achieved a great improvement over the baseline LM, with a perplexity of 275.41 and an OOV rate of 0.71%.}
}


@phdthesis{zerrouki2020towards:2020,
 title={Towards an open platform for arabic language processing},
  author={Zerrouki, Taha},
type = {PhD},
institution = {Ecole Nationale Supérieure d'Informatique ESI, Algiers,  Algeria},
year={2020},
pages = {39},
pagetotal = {125}
}

@inproceedings{al2016sentiment:2016,
  title={Sentiment analysis of Arabic tweets using text mining techniques},
  author={Al-Horaibi, Lamia and Khan, Muhammad Badruddin},
  booktitle={First International Workshop on Pattern Recognition},
  volume={10011},
  pages={288--292},
  year={2016},
  doi={10.1117/12.2242187},
  organization={SPIE}
}

@inproceedings{ajlouni2021experience:2021,
  title={Experience Simple Transformer library in solving Mojaz Multi-Topic Labelling Task},
  author={Ajlouni, Moataz},
  booktitle={2021 12th International Conference on Information and Communication Systems (ICICS)},
  pages={466--467},
  year={2021},
  doi={10.1109/icics52457.2021.9464602},
  organization={IEEE}
}

@inproceedings{tarmom2019non:2019,
       booktitle = {International Conference on Islamic Applications in Computer Science and Technologies (IMAN 2019)},
           month = {December},
           title = {Non-authentic Hadith Corpus: Design and Methodology},
          author = {T Tarmom and E Atwell and M Alsalka},
            year = {2019},
            note = {Reproduced in accordance with the publisher's self-archiving policy. },
         journal = {Proceedings of IMAN 2019},
        abstract = {The primary religious text of Islam is the Quran. The Hadith{--}the second source{--}refers to any action, saying, order or silent approval of the holy prophet Muhammad that has been delivered through a chain of narrators. Each Hadith has an Isnad{--}the chain of narrators{--}and a Matan{--}the act of the Prophet Muhammad. In contrast to the Quran, some Hadiths, which have been handed down over the centuries, have been corrupted by narrators who were not competent in transferring them. These have been classified by Hadith scholars as a non-authentic Hadith (NAH). To evaluate different classifiers regarding the automatic classification of Arabic Hadith, it was necessary to build Arabic Hadith corpora that contained samples of authentic and non-authentic Hadith, which were used for training models and testing. This paper aimed to create a new NAH corpus which consists of 452,624 words from six different Hadith books. The subsequent aim is to annotate this corpus to determine some Hadith features such as the Isnad, the Matan and the Hadith authenticity and to provide a ground truth.}
}


@inproceedings{yusuf2019arabic:2019,
  title={Arabic text stemming using query expansion method},
  author={Yusuf, Nuhu and Mohd Yunus, Mohd Amin and Wahid, Norfaradilla},
  booktitle={International Conference of Reliable Information and Communication Technology},
  pages={3--11},
  year={2019},
  doi={10.1007/978-3-030-33582-3_1},
  organization={Springer}
}

@inproceedings{habash2021team:2021,
author={Habash, Mohammad},
booktitle={2021 12th International Conference on Information and Communication Systems (ICICS)}, 
title={Team MohammadHabash at Mowjaz Multi-Topic Labelling Task},
year={2021},
pages={468-470},
doi={10.1109/ICICS52457.2021.9464614}
}

@article{zhang2021rise:2021,
  title={Rise and fall of the global conversation and shifting sentiments during the COVID-19 pandemic},
  author={Zhang, Xiangliang and Yang, Qiang and Albaradei, Somayah and Lyu, Xiaoting and Alamro, Hind and Salhi, Adil and Ma, Changsheng and Alshehri, Manal and Jaber, Inji Ibrahim and Tifratene, Faroug and others},
  journal={Humanities and social sciences communications},
  volume={8},
  number={1},
  pages={1--10},
  year={2021},
  doi={10.1057/s41599-021-00798-7},
  publisher={Palgrave}
}


@article{Moaz:2022,
author = {Taha, Moaz and Barakat, Nahla},
year = {2022},
month = {08},
pages = {2022},
title = {Arabic Image Captioning: The Effect of Text Pre- processing on the Attention Weights and the BLEU-N Scores},
volume = {13},
journal = {International Journal of Advanced Computer Science and Applications},
doi = {10.14569/IJACSA.2022.0130751}
}

@article{mihi2020mstd:2020,
  title={MSTD: Moroccan sentiment twitter dataset},
  author={Mihi, Soukaina and Ait, B and El, I and Arezki, Sara and Laachfoubi, Nabil},
  journal={International Journal of Advanced Computer Science and Applications},
  volume={11},
  number={10},
  pages={363--372},
  doi={10.14569/ijacsa.2020.0111045},
  year={2020}
}


@incollection{kaibi2020sentiment:2020,
  title={Sentiment analysis approach based on combination of word embedding techniques},
  author={Kaibi, Ibrahim and Nfaoui, El Habib and Satori, Hassan},
  booktitle={Embedded Systems and Artificial Intelligence},
  pages={805--813},
  year={2020},
  doi={10.1007/978-981-15-0947-6_76},
  publisher={Springer}
}

@inproceedings{alharbi2020asad:2020,
  title={Asad: A Twitter-based benchmark Arabic sentiment analysis dataset},
  author={Alharbi, Basma and Alamro, Hind and Alshehri, Manal and Khayyat, Zuhair and Kalkatawi, Manal and Jaber, Inji Ibrahim and Zhang, Xiangliang},
 booktitle={KAUST arabic sentiment analysis challenge},
  organization={King Abdullah University of Sciences and Technology, Saudi Arabia},
  year={2020}
}

@inproceedings{alotaibi2019sentiment:2019,
  title={Sentiment analysis of Arabic tweets in smart cities: A review of Saudi dialect},
  author={Alotaibi, Shoayee and Mehmood, Rashid and Katib, Iyad},
  booktitle={2019 Fourth International Conference on Fog and Mobile Edge Computing (FMEC)},
  pages={330--335},
  year={2019},
  doi={10.1109/fmec.2019.8795331},
  organization={IEEE}
}

@inproceedings{mgheed2021scalable:2021,
  title={Scalable arabic text classification using machine learning model},
  author={Mgheed, Rahaf M AL},
  booktitle={2021 12th International Conference on Information and Communication Systems (ICICS)},
  pages={483--485},
  year={2021},
  doi={10.1109/icics52457.2021.9464566},
  organization={IEEE}
}


@InProceedings{laachfoubi2019comparative:2019,
author={Mihi, Soukaina
and Ali, Brahim Ait Ben
and Bazi, Ismail El
and Arezki, Sara
and Laachfoubi, Nabil",
editor="Serrhini, Mohammed
and Silva, Carla
and Aljahdali, Sultan},
title={A Comparative Study of Feature Selection Methods for Informal Arabic},
booktitle={Innovation in Information Systems and Technologies to Support Learning Research},
year={2020},
publisher={Springer International Publishing},
pages={203--213},
doi={10.1007/978-3-030-36778-7_22},
abstract={The advent of web 2.0 and new Big Data technologies has created a diversity of data and information that can be used in many fields of application. The case of opinion mining is of increasing interest to researchers because of its impact on policy, marketing, etc. Through this document, we are interested in the study of sentiments more specifically in informal Arabic. We present a new approach of processing and analysis that is improved through feature selection methods. The experiments we have carried out are based on the comparison of 3 feature selection methods combined with several machine learning algorithms applied on a twitter dataset. Our paper reports the enhanced results (Accuracy of 98{\%}) and shows the importance of feature selection for Arabic Sentiment Analysis.",
isbn="978-3-030-36778-7}
}

@inproceedings{choe2019word2word:2019,
  title={word2word: A Collection of Bilingual Lexicons for 3,564 Language Pairs},
  author={Choe, Yo Joong and Park, Kyubyong and Kim, Dongwoo},
  booktitle={Proceedings of the 12th Language Resources and Evaluation Conference},
  pages={3036--3045},
  year={2020}
}


@inproceedings{AlBatayha:2021, 
 author={AlBatayha, Duha},
 booktitle={2021 12th International Conference on Information and Communication Systems (ICICS)}, 
 title={Multi-Topic Labelling Classification Based on LSTM},
 year={2021},
 volume={},
 number={},
 pages={471-474},
 doi={10.1109/ICICS52457.2021.9464531}
 }

@misc{Nguyen:2019,
  doi = {10.48550/ARXIV.1910.00421},
   url = {https://arxiv.org/abs/1910.00421},
    author = {Nguyen, Khanh and Daumé, Hal},
    keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
    title = {Global Voices: Crossing Borders in Automatic News Summarization},
    publisher = {arXiv},
    year = {2019},
    copyright = {arXiv.org perpetual, non-exclusive license}
}

@inproceedings{jimin:2021,
author = {Sun, Jimin and Ahn, Hwijeen and Park, Chan and Tsvetkov, Yulia and Mortensen, David},
year = {2021},
month = {01},
pages = {2403-2414},
booktitle={Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics}
,
title = {Cross-Cultural Similarity Features for Cross-Lingual Transfer Learning of Pragmatically Motivated Tasks},
doi = {10.18653/v1/2021.eacl-main.204}
}

@inproceedings{alzu2021detecting:2021,
  title={Detecting Regional Arabic Dialect based on Recurrent Neural Network},
  author={Alzu'bi, Dalia and Duwairi, Rehab},
  booktitle={2021 12th International Conference on Information and Communication Systems (ICICS)},
  pages={90--93},
  year={2021},
  doi={10.1109/icics52457.2021.9464605},
  organization={IEEE}
}


@article{AlSarem:2020,
  title={A Deep Learning-based Artificial Neural Network Method for Instance-based Arabic Language Authorship Attribution},
  author={Al-Sarem, Mohammad and Alsaeedi, Abdullah and Saeed, Faisal},
  journal={International Journal of Advances in Soft Computing and its Applications},
  volume={12},
  number={2},
  year={2020}
}


@article{elouali2020hate:2020,
  title={Hate Speech Detection on Multilingual Twitter Using Convolutional Neural Networks.},
  author={Elouali, Aya and Elberrichi, Zakaria and Elouali, Nadia},
  journal={Revue d'Intelligence Artificielle},
  volume={34},
  number={1},
  pages={81--88},
  doi={10.18280/ria.340111},
  year={2020}
}

@article{alrumayyan2022neural:2022,
  title={Neural Embeddings for the Elicitation of Jurisprudence Principles: The Case of Arabic Legal Texts},
  author={Alrumayyan, Nafla and Al-Yahya, Maha},
  journal={Applied Sciences},
  volume={12},
  number={9},
  pages={4188},
  year={2022},
  doi={10.3390/app12094188},
  publisher={MDPI}
}


@article{al2020exploration:2020,
  title={Exploration of the best performance method of emotions classification for arabic tweets},
  author={Al-Hagery, Mohammed Abdullah and Al-Assaf, Manar Abdullah and Al-Kharboush, Faiza Mohammad},
  journal={Indonesian Journal of Electrical Engineering and Computer Science},
  volume={19},
  number={2},
  doi={10.11591/ijeecs.v19.i2.pp1010-1020},
  pages={1010--1020},
  year={2020}
}


@inproceedings{mouty2019effect:2019,
  title={The effect of the similarity between the two names of twitter users on the credibility of their publications},
  author={Mouty, Rabeaa and Gazdar, Achraf},
  booktitle={2019 Joint 8th International Conference on Informatics, Electronics \& Vision (ICIEV) and 2019 3rd International Conference on Imaging, Vision \& Pattern Recognition (icIVPR)},
  pages={196--201},
  year={2019},
  doi={10.1109/iciev.2019.8858561},
  organization={IEEE}
}

@article{mihi2022dialectal:2022,
  title={Dialectal Arabic sentiment analysis based on tree-based pipeline optimization tool},
  author={Mihi, Soukaina and Ali, Brahim Ait Ben and El Bazi, Ismail and Arezki, Sara and Laachfoubi, Nabil},
  journal={International Journal of Electrical and Computer Engineering (IJECE)},
  volume={12},
  number={4},
  pages={4195--4205},
  doi={10.11591/ijece.v12i4.pp4195-4205},
  year={2022}
}


@inproceedings{al2022flusa:2022,
  title={FluSa-Tweet: A Benchmark Dataset for Influenza Detection in Saudi Arabia},
  author={Al-Jamaan, Rawabe and Ykhlef, Mourad and Alothaim, Abdulrahman},
  booktitle={2022 13th International Conference on Information and Communication Systems (ICICS)},
  pages={346--351},
  year={2022},
  doi={10.1109/icics55353.2022.9811149},
  organization={IEEE}
}


@article{almutairi2021cyberbullying:2021,
  title={Cyberbullying Detection by Sentiment Analysis of Tweets' Contents Written in Arabic in Saudi Arabia Society},
  author={Almutairi, Amjad Rasmi and Al-Hagery, Muhammad Abdullah},
  journal={International Journal of Computer Science \& Network Security},
  volume={21},
  number={3},
  pages={112--119},
  year={2021},
  publisher={International Journal of Computer Science \& Network Security}
}


@article{solyman2022automatic:2022,
  title={Automatic Arabic Grammatical Error Correction based on Expectation-Maximization routing and target-bidirectional agreement},
  author={Solyman, Aiman and Wang, Zhenyu and Tao, Qian and Elhag, Arafat Abdulgader Mohammed and Zhang, Rui and Mahmoud, Zeinab},
  journal={Knowledge-Based Systems},
  volume={241},
  pages={108180},
  year={2022},
  doi={10.1016/j.knosys.2022.108180},
  publisher={Elsevier}
}


@article{marie2022samee:2022,
  title={Samee’a: a new system for Arabic recitation using speech recognition and Jaro Winkler algorithm: Samee'a Arabic Recitation},
  author={Marie-Sainte, Souad Larabi},
  journal={Kuwait Journal of Science},
  volume={49},
  number={1},
  year={2022}
}


@article{duwairi2021deep:2021,
  title={A deep learning framework for automatic detection of hate speech embedded in Arabic tweets},
  author={Duwairi, Rehab and Hayajneh, Amena and Quwaider, Muhannad},
  journal={Arabian Journal for Science and Engineering},
  volume={46},
  number={4},
  pages={4001--4014},
  year={2021},
  doi={10.1007/s13369-021-05383-3},
  publisher={Springer}
}


@article{khabour2022new:2022,
  title={A New Ontology-Based Method for Arabic Sentiment Analysis},
  author={Khabour, Safaa M and Al-Radaideh, Qasem A and Mustafa, Dheya},
  journal={Big Data and Cognitive Computing},
  volume={6},
  number={2},
  pages={48},
  year={2022},
  doi={10.3390/bdcc6020048},
  publisher={Multidisciplinary Digital Publishing Institute}
}


@article{alasmari2022hybrid:2022,
  title={Hybrid machine learning approach for Arabic medical web page credibility assessment},
  author={Alasmari, Amal and Alhothali, Areej and Allinjawi, Arwa},
  journal={Health Informatics Journal},
  volume={28},
  number={1},
  pages={14604582211070998},
  year={2022},
  doi={10.1177/14604582211070998},
  publisher={SAGE Publications Sage UK: London, England}
}

@misc{mishkal,
  author = {Taha Zerrouki},
  title = {Mishkal Arabic text vocalization software },
  year = {2022},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/linuxscout/mishkal}
}
@misc{qutrub,
  author = {Taha Zerrouki},
  title = {Qutrub: Arabic verb conjugation software },
  year = {2022},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/linuxscout/qutrub}
}
@misc{tashaphyne,
  author = {Taha Zerrouki},
  title = {Tashaphyne: Arabic Light Stemmer},
  year = {2022},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/linuxscout/tashaphyne}
}
@misc{qalsadi,
  author = {Taha Zerrouki},
  title = {Qalsadi Arabic Morphological Analyzer and Lemmatizer for Python},
  year = {2022},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/linuxscout/qalsadi}
}
@misc{ar-php,
  title={Ar-PHP, PHP library for website developers to process Arabic content},
  author={Al-Shamaa, Khaled},
  url={https://github.com/khaled-alshamaa/ar-php},
  version = {6.3.1},
  year={2022}
}
@inproceedings{obeid-etal-2020-camel,
   title = "{CAM}e{L} Tools: An Open Source Python Toolkit for {A}rabic Natural Language Processing",
   author = "Obeid, Ossama  and
      Zalmout, Nasser  and
      Khalifa, Salam  and
      Taji, Dima  and
      Oudah, Mai  and
      Alhafni, Bashar  and
      Inoue, Go  and
      Eryani, Fadhl  and
      Erdmann, Alexander  and
      Habash, Nizar",
   booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
   month = may,
   year = "2020",
   address = "Marseille, France",
   publisher = "European Language Resources Association",
   url = "https://www.aclweb.org/anthology/2020.lrec-1.868",
   pages = "7022--7032",
   abstract = "We present CAMeL Tools, a collection of open-source tools for Arabic natural language processing in Python. CAMeL Tools currently provides utilities for pre-processing, morphological modeling, Dialect Identification, Named Entity Recognition and Sentiment Analysis. In this paper, we describe the design of CAMeL Tools and the functionalities it provides.",
   language = "English",
   ISBN = "979-10-95546-34-4",
}
@misc{tkseem2020,
  author = {Zaid Alyafeai and Maged Saeed},
  title = {tkseem: A Tokenization Library for Arabic.},
  year = {2020},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/ARBML/tkseem}}
}

@misc{tnkeeh2020,
  author = {Zaid Alyafeai and Maged Saeed},
  title = {tkseem: A Preprocessing Library for Arabic.},
  year = {2020},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/ARBML/tnkeeh}}
}

@inproceedings{pasha2014madamira,
  title={Madamira: A fast, comprehensive tool for morphological analysis and disambiguation of arabic},
  author={Pasha, Arfath and Al-Badrashiny, Mohamed and Diab, Mona and El Kholy, Ahmed and Eskander, Ramy and Habash, Nizar and Pooleery, Manoj and Rambow, Owen and Roth, Ryan},
  booktitle={Proceedings of the ninth international conference on language resources and evaluation (LREC'14)},
  pages={1094--1101},
  year={2014}
}
@inproceedings{abdelali2016farasa,
  title={Farasa: A fast and furious segmenter for arabic},
  doi={10.18653/v1/N16-3003},
  author={Abdelali, Ahmed and Darwish, Kareem and Durrani, Nadir and Mubarak, Hamdy},
  booktitle={Proceedings of the 2016 conference of the North American chapter of the association for computational linguistics: Demonstrations},
  pages={11--16},
  year={2016}
}
@article{loper2002nltk,
  title={Nltk: The natural language toolkit},
  author={Loper, Edward and Bird, Steven},
  journal={arXiv preprint cs/0205028},
  year={2002}
}
@book{vasiliev2020natural,
  title={Natural Language Processing with Python and SpaCy: A Practical Introduction},
  author={Vasiliev, Yuli},
  year={2020},
  publisher={No Starch Press}
}