From f72c9997c694d723bb5990ab6663a7476ad8f72f Mon Sep 17 00:00:00 2001 From: Abdullah Alhazmy Date: Tue, 11 Sep 2018 20:45:26 +0300 Subject: [PATCH] Improve Arabic implementation (#176) * Change Arabic Lang implementation * Improve Arabic class, * update Arabic test cases * Support EGP and KWD currency * Add test cases * Remove .cache and .pytest_cache folder * Increase coverage from 84 to 90 #122 * Increase coverage from 90 to 92 #122 * Fix ordinal issue #166 --- num2words/lang_AR.py | 418 +++++++++++++++++++++++++++++++++---------- tests/test_ar.py | 128 +++++++++---- 2 files changed, 414 insertions(+), 132 deletions(-) diff --git a/num2words/lang_AR.py b/num2words/lang_AR.py index d9e5958..843d797 100644 --- a/num2words/lang_AR.py +++ b/num2words/lang_AR.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- # Copyright (c) 2003, Taro Ogawa. All Rights Reserved. # Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. +# Copyright (c) 2018, Abdullah Alhazmy, Alhazmy13. All Rights Reserved. + # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -15,106 +17,336 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301 USA -from __future__ import division, print_function, unicode_literals +import re +from decimal import Decimal +from math import floor -from . import lang_EU +CURRENCY_SR = [("ريال", "ريالان", "ريالات", "ريالاً"), + ("هللة", "هللتان", "هللات", "هللة")] +CURRENCY_EGP = [("جنيه", "جنيهان", "جنيهات", "جنيهاً"), + ("قرش", "قرشان", "قروش", "قرش")] +CURRENCY_KWD = [("دينار", "ديناران", "دينارات", "ديناراً"), + ("فلس", "فلسان", "فلس", "فلس")] + +ARABIC_ONES = [ + "", "واحد", "اثنان", "ثلاثة", "أربعة", "خمسة", "ستة", "سبعة", "ثمانية", + "تسعة", + "عشرة", "أحد عشر", "اثنا عشر", "ثلاثة عشر", "أربعة عشر", "خمسة عشر", + "ستة عشر", "سبعة عشر", "ثمانية عشر", + "تسعة عشر" +] -class Num2Word_AR(lang_EU.Num2Word_EU): - def set_high_numwords(self, high): - max = 3 + 3 * len(high) - for word, n in zip(high, range(max, 3, -3)): - self.cards[10 ** n] = word + "illion" +class Num2Word_AR(object): + errmsg_too_big = "Too large" + max_num = 10 ** 36 - def setup(self): - self.negword = "سالب " - self.pointword = "فاصلة" - self.errmsg_nornum = "Only numbers may be converted to words." - self.exclude_title = ["و", "فاصلة", "سالب"] + def __init__(self): + self.number = 0 + self.arabicPrefixText = "" + self.arabicSuffixText = "" + self.integer_value = 0 + self._decimalValue = "" + self.partPrecision = 2 + self.currency_unit = CURRENCY_SR[0] + self.currency_subunit = CURRENCY_SR[1] + self.isCurrencyPartNameFeminine = True + self.isCurrencyNameFeminine = False + self.separator = 'و' - self.mid_numwords = [(1000000, "مليون"), (1000, "ألف"), (100, "مئة"), - (90, "تسعين"), (80, "ثمانين"), (70, "سبعين"), - (60, "ستين"), (50, "خمسين"), (40, "أربعين"), - (30, "ثلاثين")] - self.low_numwords = ["عشرين", "تسعة عشر", "ثمانية عشر", "سبعة عشر", - "ستة عشر", "خمسة عشر", "أربعة عشر", "ثلاثة عشر", - "اثناعشر", "أحد عشر", "عشرة", "تسعة", "ثمانية", - "سبعة", "ستة", "خمسة", "أربعة", "ثلاثة", "اثنين", - "واحد", "صفر"] - self.ords = {"واحد": "أول", - "اثنين": "ثاني", - "ثلاثة": "ثالث", - "أربعة": "رابع", - "خمسة": "خامس", - "ثمانية": "ثامن", - "تسعة": "تاسع", - "اثناعشر": "ثاني عشر"} + self.arabicOnes = ARABIC_ONES + self.arabicFeminineOnes = [ + "", "إحدى", "اثنتان", "ثلاث", "أربع", "خمس", "ست", "سبع", "ثمان", + "تسع", + "عشر", "إحدى عشرة", "اثنتا عشرة", "ثلاث عشرة", "أربع عشرة", + "خمس عشرة", "ست عشرة", "سبع عشرة", "ثماني عشرة", + "تسع عشرة" + ] + self.arabicOrdinal = [ + "", "اول", "ثاني", "ثالث", "رابع", "خامس", "سادس", "سابع", "ثامن", + "تاسع", "عاشر", "حادي عشر", "ثاني عشر", "ثالث عشر", "رابع عشر", + "خامس عشر", "سادس عشر", "سابع عشر", "ثامن عشر", "تاسع عشر" + ] + self.arabicTens = [ + "عشرون", "ثلاثون", "أربعون", "خمسون", "ستون", "سبعون", "ثمانون", + "تسعون" + ] + self.arabicHundreds = [ + "", "مائة", "مئتان", "ثلاثمائة", "أربعمائة", "خمسمائة", "ستمائة", + "سبعمائة", "ثمانمائة", "تسعمائة" + ] + self.arabicAppendedTwos = [ + "مئتا", "ألفا", "مليونا", "مليارا", "تريليونا", "كوادريليونا", + "كوينتليونا", "سكستيليونا" + ] + self.arabicTwos = [ + "مئتان", "ألفان", "مليونان", "ملياران", "تريليونان", + "كوادريليونان", "كوينتليونان", "سكستيليونان" + ] + self.arabicGroup = [ + "مائة", "ألف", "مليون", "مليار", "تريليون", "كوادريليون", + "كوينتليون", "سكستيليون" + ] + self.arabicAppendedGroup = [ + "", "ألفاً", "مليوناً", "ملياراً", "تريليوناً", "كوادريليوناً", + "كوينتليوناً", "سكستيليوناً" + ] + self.arabicPluralGroups = [ + "", "آلاف", "ملايين", "مليارات", "تريليونات", "كوادريليونات", + "كوينتليونات", "سكستيليونات" + ] - def merge(self, lpair, rpair): - ltext, lnum = lpair - rtext, rnum = rpair - if lnum == 1 and rnum < 100: - return (rtext, rnum) - elif 100 > lnum > rnum: - return ("%s و%s" % (rtext, ltext), rnum + lnum) - elif lnum >= 100 > rnum: - return ("%s و %s" % (ltext, rtext), lnum + rnum) - elif rnum > lnum: - if lnum == 1 and rnum in [100, 1000, 1000000]: - return ("%s" % (rtext), rnum * lnum) - if lnum == 2 and rnum == 100: - return ("مئتين", rnum * lnum) - if lnum == 2 and rnum in [100, 1000]: - return ("%sين" % (rtext), rnum * lnum) - return ("%s %s" % (ltext, rtext), lnum * rnum) - return ("%s، %s" % (ltext, rtext), lnum + rnum) + def number_to_arabic(self, arabic_prefix_text, arabic_suffix_text): + self.arabicPrefixText = arabic_prefix_text + self.arabicSuffixText = arabic_suffix_text + self.extract_integer_and_decimal_parts() - def to_ordinal(self, value): - self.verify_ordinal(value) - outwords = self.to_cardinal(value).split(" ") - lastwords = outwords[-1].split("-") - lastword = lastwords[-1].lower() - try: - lastword = self.ords[lastword] - except KeyError: - lastword += "" - lastwords[-1] = self.title(lastword) - outwords[-1] = "،".join(lastwords) - return " ".join(outwords) + def extract_integer_and_decimal_parts(self): + re.split('\\.', str(self.number)) + splits = re.split('\\.', str(self.number)) + + self.integer_value = int(splits[0]) + if len(splits) > 1: + self._decimalValue = int(self.decimal_value(splits[1])) + else: + self._decimalValue = 0 + + def decimal_value(self, decimal_part): + + if self.partPrecision is not len(decimal_part): + decimal_part_length = len(decimal_part) + + decimal_part_builder = decimal_part + for i in range(0, self.partPrecision - decimal_part_length): + decimal_part_builder += '0' + decimal_part = decimal_part_builder + + if len(decimal_part) <= self.partPrecision: + dec = len(decimal_part) + else: + dec = self.partPrecision + result = decimal_part[0: dec] + else: + result = decimal_part + + for i in range(len(result), self.partPrecision): + result += '0' + return result + + def digit_feminine_status(self, digit, group_level): + if group_level == -1: + if self.isCurrencyPartNameFeminine: + return self.arabicFeminineOnes[int(digit)] + else: + return self.arabicOnes[int(digit)] + elif group_level == 0: + if self.isCurrencyNameFeminine: + return self.arabicFeminineOnes[int(digit)] + else: + return self.arabicOnes[int(digit)] + + else: + return self.arabicOnes[int(digit)] + + def process_arabic_group(self, group_number, group_level, + remaining_number): + tens = Decimal(group_number) % Decimal(100) + hundreds = Decimal(group_number) / Decimal(100) + ret_val = "" + + if int(hundreds) > 0: + if tens == 0 and int(hundreds) == 2: + ret_val = "{}".format(self.arabicAppendedTwos[0]) + else: + ret_val = "{}".format(self.arabicHundreds[int(hundreds)]) + + if tens > 0: + if tens < 20: + if tens == 2 and int(hundreds) == 0 and group_level > 0: + if self.integer_value in [2000, 2000000, 2000000000, + 2000000000000, 2000000000000000, + 2000000000000000000]: + ret_val = "{}".format( + self.arabicAppendedTwos[int(group_level)]) + else: + ret_val = "{}".format( + self.arabicTwos[int(group_level)]) + else: + if ret_val != "": + ret_val += " و " + + if tens == 1 and group_level > 0 and hundreds == 0: + ret_val += "" + elif (tens == 1 or tens == 2) and ( + group_level == 0 or group_level == -1) and \ + hundreds == 0 and remaining_number == 0: + ret_val += "" + else: + ret_val += self.digit_feminine_status(int(tens), + group_level) + else: + ones = tens % 10 + tens = (tens / 10) - 2 + if ones > 0: + if ret_val is not "" and tens < 4: + ret_val += " و " + + ret_val += self.digit_feminine_status(ones, group_level) + if ret_val is not "" and ones != 0: + ret_val += " و " + + ret_val += self.arabicTens[int(tens)] + + return ret_val + + def convert(self, value): + self.number = "{:.9f}".format(value) + self.number_to_arabic(self.arabicPrefixText, self.arabicSuffixText) + return self.convert_to_arabic() + + def convert_to_arabic(self): + temp_number = Decimal(self.number) + + if temp_number == Decimal(0): + return "صفر" + + decimal_string = self.process_arabic_group(self._decimalValue, + -1, + Decimal(0)) + ret_val = "" + group = 0 + + while temp_number > Decimal(0): + + number_to_process = int( + Decimal(str(temp_number)) % Decimal(str(1000))) + temp_number = int(Decimal(temp_number) / Decimal(1000)) + + group_description = \ + self.process_arabic_group(number_to_process, + group, + Decimal(floor(temp_number))) + if group_description is not '': + if group > 0: + if ret_val is not "": + ret_val = "{} و {}".format("", ret_val) + if number_to_process != 2: + if number_to_process % 100 != 1: + if 3 <= number_to_process <= 10: + ret_val = "{} {}".format( + self.arabicPluralGroups[group], ret_val) + else: + if ret_val is not "": + ret_val = "{} {}".format( + self.arabicAppendedGroup[group], + ret_val) + else: + ret_val = "{} {}".format( + self.arabicGroup[group], ret_val) + + else: + ret_val = "{} {}".format(self.arabicGroup[group], + ret_val) + ret_val = "{} {}".format(group_description, ret_val) + group += 1 + formatted_number = "" + if self.arabicPrefixText is not "": + formatted_number += "{} ".format(self.arabicPrefixText) + formatted_number += ret_val + if self.integer_value != 0: + remaining100 = int(self.integer_value % 100) + + if remaining100 == 0: + formatted_number += self.currency_unit[0] + elif remaining100 == 1: + formatted_number += self.currency_unit[0] + elif remaining100 == 2: + if self.integer_value == 2: + formatted_number += self.currency_unit[1] + else: + formatted_number += self.currency_unit[0] + elif 3 <= remaining100 <= 10: + formatted_number += self.currency_unit[2] + elif 11 <= remaining100 <= 99: + formatted_number += self.currency_unit[3] + if self._decimalValue != 0: + formatted_number += " {} ".format(self.separator) + formatted_number += decimal_string + + if self._decimalValue != 0: + formatted_number += " " + remaining100 = int(self._decimalValue % 100) + + if remaining100 == 0: + formatted_number += self.currency_subunit[0] + elif remaining100 == 1: + formatted_number += self.currency_subunit[0] + elif remaining100 == 2: + formatted_number += self.currency_subunit[1] + elif 3 <= remaining100 <= 10: + formatted_number += self.currency_subunit[2] + elif 11 <= remaining100 <= 99: + formatted_number += self.currency_subunit[3] + + if self.arabicSuffixText is not "": + formatted_number += " {}".format(self.arabicSuffixText) + + return formatted_number + + def validate_number(self, number): + if number >= self.max_num: + raise OverflowError(self.errmsg_too_big) + return number + + def set_currency_prefer(self, currency): + if currency is 'EGP': + self.currency_unit = CURRENCY_EGP[0] + self.currency_subunit = CURRENCY_EGP[1] + elif currency is 'KWD': + self.currency_unit = CURRENCY_KWD[0] + self.currency_subunit = CURRENCY_KWD[1] + else: + self.currency_unit = CURRENCY_SR[0] + self.currency_subunit = CURRENCY_SR[1] + + def to_currency(self, value, currency='SR', prefix='', suffix=''): + self.set_currency_prefer(currency) + self.isCurrencyNameFeminine = False + self.separator = "و" + self.arabicOnes = ARABIC_ONES + self.arabicPrefixText = prefix + self.arabicSuffixText = suffix + return self.convert(value=value) + + def to_ordinal(self, number, prefix=''): + if number <= 19: + return "{}".format(self.arabicOrdinal[number]) + if number < 100: + self.isCurrencyNameFeminine = True + else: + self.isCurrencyNameFeminine = False + self.currency_subunit = ('', '', '', '') + self.currency_unit = ('', '', '', '') + self.arabicPrefixText = prefix + self.arabicSuffixText = "" + return "{}".format(self.convert(abs(number)).strip()) + + def to_year(self, value): + value = self.validate_number(value) + return self.to_cardinal(value) def to_ordinal_num(self, value): - self.verify_ordinal(value) - return "%s%s" % (value, self.to_ordinal(value)[-2:]) + return self.to_ordinal(value).strip() - def to_year(self, val, longval=True): - if not (val // 100) % 10: - return self.to_cardinal(val) - return self.to_splitnum(val, hightxt="مئة", jointxt="و", - longval=longval) - - def to_currency(self, val, longval=True): - return self.to_splitnum(val, hightxt="ريال", lowtxt="هللة", - jointxt="و", longval=longval, cents=True) - - -n2w = Num2Word_AR() -to_card = n2w.to_cardinal -to_ord = n2w.to_ordinal -to_ordnum = n2w.to_ordinal_num -to_year = n2w.to_year - - -def main(): - for val in [1, 11, 12, 21, 31, 33, 71, 80, 81, 91, 99, 100, 101, 102, 155, - 180, 300, 308, 832, 1000, 1001, 1061, 1100, 1500, 1701, 3000, - 8280, 8291, 150000, 500000, 1000000, 2000000, 2000001, - -21212121211221211111, -2.121212, -1.0000100]: - n2w.test(val) - n2w.test(13253254360678768017687001076010010122121321432104732075403270573) - for val in [1, 120, 1000, 1120, 1800, 1976, 2000, 2010, 2099, 2171]: - print(val, "is", n2w.to_currency(val)) - print(val, "is", n2w.to_year(val)) - - -if __name__ == "__main__": - main() + def to_cardinal(self, number): + number = self.validate_number(number) + minus = '' + if number < 0: + minus = 'سالب ' + self.separator = ',' + self.currency_subunit = ('', '', '', '') + self.currency_unit = ('', '', '', '') + self.arabicPrefixText = "" + self.arabicSuffixText = "" + self.arabicOnes = ARABIC_ONES + return minus + self.convert(value=abs(number)).strip() diff --git a/tests/test_ar.py b/tests/test_ar.py index e499805..b7337c4 100644 --- a/tests/test_ar.py +++ b/tests/test_ar.py @@ -1,4 +1,4 @@ -# encoding: UTF-8 +# -*- encoding: utf-8 -*- # Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. # This library is free software; you can redistribute it and/or @@ -13,51 +13,101 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301 USA -from __future__ import unicode_literals +# from __future__ import unicode_literals from unittest import TestCase from num2words import num2words -TEST_CASES_CARDINAL = ( - (1, 'واحد'), - (2, 'اثنين'), - (11, 'أحد عشر'), - (12, 'اثناعشر'), - (20, 'عشرين'), - (21, 'واحد وعشرين'), - (26, 'ستة وعشرين'), - (30, 'ثلاثين'), - (67, 'سبعة وستين'), - (70, 'سبعين'), - (100, 'مئة'), - (101, 'مئة و واحد'), - (199, 'مئة و تسعة وتسعين'), - (203, 'مئتين و ثلاثة'), - (1000, 'ألف'), - (1001, 'ألف و واحد'), - (1097, 'ألف و سبعة وتسعين'), - (1000000, 'مليون'), - (1000001, 'مليون و واحد'), -) - -TEST_CASES_ORDINAL = ( - (1, 'أول'), - (8, 'ثامن'), - (12, 'ثاني عشر'), - (100, 'مئة'), -) - class Num2WordsARTest(TestCase): - def test_number(self): - for test in TEST_CASES_CARDINAL: - self.assertEqual(num2words(test[0], lang='ar'), test[1]) + def test_default_currency(self): + self.assertEqual(num2words(1, to='currency', lang='ar'), 'واحد ريال') + self.assertEqual(num2words(2, to='currency', lang='ar'), + 'اثنان ريالان') + self.assertEqual(num2words(10, to='currency', lang='ar'), + 'عشرة ريالات') + self.assertEqual(num2words(100, to='currency', lang='ar'), 'مائة ريال') + self.assertEqual(num2words(652.12, to='currency', lang='ar'), + 'ستمائة و اثنان و خمسون ريالاً و اثنتا عشرة هللة') + self.assertEqual(num2words(324, to='currency', lang='ar'), + 'ثلاثمائة و أربعة و عشرون ريالاً') + self.assertEqual(num2words(2000, to='currency', lang='ar'), + 'ألفا ريال') + self.assertEqual(num2words(541, to='currency', lang='ar'), + 'خمسمائة و واحد و أربعون ريالاً') + self.assertEqual(num2words(10000, to='currency', lang='ar'), + 'عشرة آلاف ريال') + self.assertEqual(num2words(20000.12, to='currency', lang='ar'), + 'عشرون ألف ريال و اثنتا عشرة هللة') + self.assertEqual(num2words(1000000, to='currency', lang='ar'), + 'واحد مليون ريال') + val = 'تسعمائة و ثلاثة و عشرون ألفاً و أربعمائة و أحد عشر ريالاً' + self.assertEqual(num2words(923411, to='currency', lang='ar'), val) + self.assertEqual(num2words(63411, to='currency', lang='ar'), + 'ثلاثة و ستون ألفاً و أربعمائة و أحد عشر ريالاً') + self.assertEqual(num2words(1000000.99, to='currency', lang='ar'), + 'واحد مليون ريال و تسع و تسعون هللة') + + def test_currency_parm(self): + self.assertEqual( + num2words(1, to='currency', lang='ar', currency="KWD"), + 'واحد دينار') + self.assertEqual( + num2words(10, to='currency', lang='ar', currency="EGP"), + 'عشرة جنيهات') + self.assertEqual( + num2words(20000.12, to='currency', lang='ar', currency="EGP"), + 'عشرون ألف جنيه و اثنتا عشرة قرش') + self.assertEqual( + num2words(923411, to='currency', lang='ar', currency="SR"), + 'تسعمائة و ثلاثة و عشرون ألفاً و أربعمائة و أحد عشر ريالاً') + self.assertEqual( + num2words(1000000.99, to='currency', lang='ar', currency="KWD"), + 'واحد مليون دينار و تسع و تسعون فلس') def test_ordinal(self): - for test in TEST_CASES_ORDINAL: - self.assertEqual( - num2words(test[0], lang='ar', ordinal=True), - test[1] - ) + self.assertEqual(num2words(1, to='ordinal', lang='ar'), 'اول') + self.assertEqual(num2words(2, to='ordinal', lang='ar'), 'ثاني') + self.assertEqual(num2words(3, to='ordinal', lang='ar'), 'ثالث') + self.assertEqual(num2words(4, to='ordinal', lang='ar'), 'رابع') + self.assertEqual(num2words(5, to='ordinal', lang='ar'), 'خامس') + self.assertEqual(num2words(6, to='ordinal', lang='ar'), 'سادس') + self.assertEqual(num2words(9, to='ordinal', lang='ar'), 'تاسع') + self.assertEqual(num2words(20, to='ordinal', lang='ar'), 'عشرون') + self.assertEqual(num2words(94, to='ordinal', lang='ar'), + 'أربع و تسعون') + self.assertEqual(num2words(102, to='ordinal', lang='ar'), + 'مائة و اثنان') + self.assertEqual( + num2words(923411, to='ordinal_num', lang='ar'), + 'تسعمائة و ثلاثة و عشرون ألفاً و أربعمائة و أحد عشر') + + def test_cardinal(self): + self.assertEqual(num2words(12, to='cardinal', lang='ar'), 'اثنا عشر') + self.assertEqual(num2words(-8324, to='cardinal', lang='ar'), + 'سالب ثمانية آلاف و ثلاثمائة و أربعة و عشرون') + self.assertEqual( + num2words(3431.12, to='cardinal', lang='ar'), + 'ثلاثة آلاف و أربعمائة و واحد و ثلاثون , اثنتا عشرة') + self.assertEqual(num2words(431, to='cardinal', lang='ar'), + 'أربعمائة و واحد و ثلاثون') + self.assertEqual(num2words(94231, to='cardinal', lang='ar'), + 'أربعة و تسعون ألفاً و مئتان و واحد و ثلاثون') + self.assertEqual(num2words(1431, to='cardinal', lang='ar'), + 'واحد ألف و أربعمائة و واحد و ثلاثون') + + def test_prefix_and_suffix(self): + self.assertEqual(num2words(645, to='currency', + lang='ar', prefix="فقط", suffix="لاغير"), + 'فقط ستمائة و خمسة و أربعون ريالاً لاغير') + + def test_year(self): + self.assertEqual(num2words(2000, to='year', lang='ar'), 'ألفا') + + def test_max_numbers(self): + with self.assertRaises(Exception) as context: + num2words(10 ** 36, to='year', lang='ar') + + self.assertTrue('Too large' in str(context.exception))