Improve Arabic implementation (#176)

* Change Arabic Lang implementation
* Improve Arabic class, * update Arabic test cases
* Support EGP and KWD currency
* Add test cases
* Remove .cache and .pytest_cache folder
* Increase coverage from 84 to 90 #122
* Increase coverage from 90 to 92 #122
* Fix ordinal issue #166
This commit is contained in:
Abdullah Alhazmy
2018-09-11 20:45:26 +03:00
committed by Istvan SZALAÏ
parent 7639f5a820
commit f72c9997c6
2 changed files with 414 additions and 132 deletions

View File

@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved. # Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. # Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# Copyright (c) 2018, Abdullah Alhazmy, Alhazmy13. All Rights Reserved.
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@@ -15,106 +17,336 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA # MA 02110-1301 USA
from __future__ import division, print_function, unicode_literals import re
from decimal import Decimal
from math import floor
from . import lang_EU CURRENCY_SR = [("ريال", "ريالان", "ريالات", "ريالاً"),
("هللة", "هللتان", "هللات", "هللة")]
CURRENCY_EGP = [("جنيه", "جنيهان", "جنيهات", "جنيهاً"),
("قرش", "قرشان", "قروش", "قرش")]
CURRENCY_KWD = [("دينار", "ديناران", "دينارات", "ديناراً"),
("فلس", "فلسان", "فلس", "فلس")]
ARABIC_ONES = [
"", "واحد", "اثنان", "ثلاثة", "أربعة", "خمسة", "ستة", "سبعة", "ثمانية",
"تسعة",
"عشرة", "أحد عشر", "اثنا عشر", "ثلاثة عشر", "أربعة عشر", "خمسة عشر",
"ستة عشر", "سبعة عشر", "ثمانية عشر",
"تسعة عشر"
]
class Num2Word_AR(lang_EU.Num2Word_EU): class Num2Word_AR(object):
def set_high_numwords(self, high): errmsg_too_big = "Too large"
max = 3 + 3 * len(high) max_num = 10 ** 36
for word, n in zip(high, range(max, 3, -3)):
self.cards[10 ** n] = word + "illion"
def setup(self): def __init__(self):
self.negword = "سالب " self.number = 0
self.pointword = "فاصلة" self.arabicPrefixText = ""
self.errmsg_nornum = "Only numbers may be converted to words." self.arabicSuffixText = ""
self.exclude_title = ["و", "فاصلة", "سالب"] self.integer_value = 0
self._decimalValue = ""
self.partPrecision = 2
self.currency_unit = CURRENCY_SR[0]
self.currency_subunit = CURRENCY_SR[1]
self.isCurrencyPartNameFeminine = True
self.isCurrencyNameFeminine = False
self.separator = 'و'
self.mid_numwords = [(1000000, "مليون"), (1000, "ألف"), (100, "مئة"), self.arabicOnes = ARABIC_ONES
(90, "تسعين"), (80, "ثمانين"), (70, "سبعين"), self.arabicFeminineOnes = [
(60, "ستين"), (50, "خمسين"), (40, "أربعين"), "", "إحدى", "اثنتان", "ثلاث", "أربع", "خمس", "ست", "سبع", "ثمان",
(30, "ثلاثين")] "تسع",
self.low_numwords = ["عشرين", "تسعة عشر", "ثمانية عشر", "سبعة عشر", "عشر", "إحدى عشرة", "اثنتا عشرة", "ثلاث عشرة", "أربع عشرة",
"ستة عشر", "خمسة عشر", "أربعة عشر", "ثلاثة عشر", "خمس عشرة", "ست عشرة", "سبع عشرة", "ثماني عشرة",
"اثناعشر", "أحد عشر", "عشرة", "تسعة", "ثمانية", "تسع عشرة"
"سبعة", "ستة", "خمسة", "أربعة", "ثلاثة", "اثنين", ]
"واحد", "صفر"] self.arabicOrdinal = [
self.ords = {"واحد": "أول", "", "اول", "ثاني", "ثالث", "رابع", "خامس", "سادس", "سابع", "ثامن",
"اثنين": "ثاني", "تاسع", "عاشر", "حادي عشر", "ثاني عشر", "ثالث عشر", "رابع عشر",
"ثلاثة": "ثالث", "خامس عشر", "سادس عشر", "سابع عشر", "ثامن عشر", "تاسع عشر"
"أربعة": "رابع", ]
"خمسة": "خامس", self.arabicTens = [
"ثمانية": "ثامن", "عشرون", "ثلاثون", "أربعون", "خمسون", "ستون", "سبعون", "ثمانون",
"تسعة": "تاسع", "تسعون"
"اثناعشر": "ثاني عشر"} ]
self.arabicHundreds = [
"", "مائة", "مئتان", "ثلاثمائة", "أربعمائة", "خمسمائة", "ستمائة",
"سبعمائة", "ثمانمائة", "تسعمائة"
]
self.arabicAppendedTwos = [
"مئتا", "ألفا", "مليونا", "مليارا", "تريليونا", "كوادريليونا",
"كوينتليونا", "سكستيليونا"
]
self.arabicTwos = [
"مئتان", "ألفان", "مليونان", "ملياران", "تريليونان",
"كوادريليونان", "كوينتليونان", "سكستيليونان"
]
self.arabicGroup = [
"مائة", "ألف", "مليون", "مليار", "تريليون", "كوادريليون",
"كوينتليون", "سكستيليون"
]
self.arabicAppendedGroup = [
"", "ألفاً", "مليوناً", "ملياراً", "تريليوناً", "كوادريليوناً",
"كوينتليوناً", "سكستيليوناً"
]
self.arabicPluralGroups = [
"", "آلاف", "ملايين", "مليارات", "تريليونات", "كوادريليونات",
"كوينتليونات", "سكستيليونات"
]
def merge(self, lpair, rpair): def number_to_arabic(self, arabic_prefix_text, arabic_suffix_text):
ltext, lnum = lpair self.arabicPrefixText = arabic_prefix_text
rtext, rnum = rpair self.arabicSuffixText = arabic_suffix_text
if lnum == 1 and rnum < 100: self.extract_integer_and_decimal_parts()
return (rtext, rnum)
elif 100 > lnum > rnum:
return ("%s و%s" % (rtext, ltext), rnum + lnum)
elif lnum >= 100 > rnum:
return ("%s و %s" % (ltext, rtext), lnum + rnum)
elif rnum > lnum:
if lnum == 1 and rnum in [100, 1000, 1000000]:
return ("%s" % (rtext), rnum * lnum)
if lnum == 2 and rnum == 100:
return ("مئتين", rnum * lnum)
if lnum == 2 and rnum in [100, 1000]:
return ("%sين" % (rtext), rnum * lnum)
return ("%s %s" % (ltext, rtext), lnum * rnum)
return ("%s، %s" % (ltext, rtext), lnum + rnum)
def to_ordinal(self, value): def extract_integer_and_decimal_parts(self):
self.verify_ordinal(value) re.split('\\.', str(self.number))
outwords = self.to_cardinal(value).split(" ") splits = re.split('\\.', str(self.number))
lastwords = outwords[-1].split("-")
lastword = lastwords[-1].lower() self.integer_value = int(splits[0])
try: if len(splits) > 1:
lastword = self.ords[lastword] self._decimalValue = int(self.decimal_value(splits[1]))
except KeyError: else:
lastword += "" self._decimalValue = 0
lastwords[-1] = self.title(lastword)
outwords[-1] = "،".join(lastwords) def decimal_value(self, decimal_part):
return " ".join(outwords)
if self.partPrecision is not len(decimal_part):
decimal_part_length = len(decimal_part)
decimal_part_builder = decimal_part
for i in range(0, self.partPrecision - decimal_part_length):
decimal_part_builder += '0'
decimal_part = decimal_part_builder
if len(decimal_part) <= self.partPrecision:
dec = len(decimal_part)
else:
dec = self.partPrecision
result = decimal_part[0: dec]
else:
result = decimal_part
for i in range(len(result), self.partPrecision):
result += '0'
return result
def digit_feminine_status(self, digit, group_level):
if group_level == -1:
if self.isCurrencyPartNameFeminine:
return self.arabicFeminineOnes[int(digit)]
else:
return self.arabicOnes[int(digit)]
elif group_level == 0:
if self.isCurrencyNameFeminine:
return self.arabicFeminineOnes[int(digit)]
else:
return self.arabicOnes[int(digit)]
else:
return self.arabicOnes[int(digit)]
def process_arabic_group(self, group_number, group_level,
remaining_number):
tens = Decimal(group_number) % Decimal(100)
hundreds = Decimal(group_number) / Decimal(100)
ret_val = ""
if int(hundreds) > 0:
if tens == 0 and int(hundreds) == 2:
ret_val = "{}".format(self.arabicAppendedTwos[0])
else:
ret_val = "{}".format(self.arabicHundreds[int(hundreds)])
if tens > 0:
if tens < 20:
if tens == 2 and int(hundreds) == 0 and group_level > 0:
if self.integer_value in [2000, 2000000, 2000000000,
2000000000000, 2000000000000000,
2000000000000000000]:
ret_val = "{}".format(
self.arabicAppendedTwos[int(group_level)])
else:
ret_val = "{}".format(
self.arabicTwos[int(group_level)])
else:
if ret_val != "":
ret_val += " و "
if tens == 1 and group_level > 0 and hundreds == 0:
ret_val += ""
elif (tens == 1 or tens == 2) and (
group_level == 0 or group_level == -1) and \
hundreds == 0 and remaining_number == 0:
ret_val += ""
else:
ret_val += self.digit_feminine_status(int(tens),
group_level)
else:
ones = tens % 10
tens = (tens / 10) - 2
if ones > 0:
if ret_val is not "" and tens < 4:
ret_val += " و "
ret_val += self.digit_feminine_status(ones, group_level)
if ret_val is not "" and ones != 0:
ret_val += " و "
ret_val += self.arabicTens[int(tens)]
return ret_val
def convert(self, value):
self.number = "{:.9f}".format(value)
self.number_to_arabic(self.arabicPrefixText, self.arabicSuffixText)
return self.convert_to_arabic()
def convert_to_arabic(self):
temp_number = Decimal(self.number)
if temp_number == Decimal(0):
return "صفر"
decimal_string = self.process_arabic_group(self._decimalValue,
-1,
Decimal(0))
ret_val = ""
group = 0
while temp_number > Decimal(0):
number_to_process = int(
Decimal(str(temp_number)) % Decimal(str(1000)))
temp_number = int(Decimal(temp_number) / Decimal(1000))
group_description = \
self.process_arabic_group(number_to_process,
group,
Decimal(floor(temp_number)))
if group_description is not '':
if group > 0:
if ret_val is not "":
ret_val = "{} و {}".format("", ret_val)
if number_to_process != 2:
if number_to_process % 100 != 1:
if 3 <= number_to_process <= 10:
ret_val = "{} {}".format(
self.arabicPluralGroups[group], ret_val)
else:
if ret_val is not "":
ret_val = "{} {}".format(
self.arabicAppendedGroup[group],
ret_val)
else:
ret_val = "{} {}".format(
self.arabicGroup[group], ret_val)
else:
ret_val = "{} {}".format(self.arabicGroup[group],
ret_val)
ret_val = "{} {}".format(group_description, ret_val)
group += 1
formatted_number = ""
if self.arabicPrefixText is not "":
formatted_number += "{} ".format(self.arabicPrefixText)
formatted_number += ret_val
if self.integer_value != 0:
remaining100 = int(self.integer_value % 100)
if remaining100 == 0:
formatted_number += self.currency_unit[0]
elif remaining100 == 1:
formatted_number += self.currency_unit[0]
elif remaining100 == 2:
if self.integer_value == 2:
formatted_number += self.currency_unit[1]
else:
formatted_number += self.currency_unit[0]
elif 3 <= remaining100 <= 10:
formatted_number += self.currency_unit[2]
elif 11 <= remaining100 <= 99:
formatted_number += self.currency_unit[3]
if self._decimalValue != 0:
formatted_number += " {} ".format(self.separator)
formatted_number += decimal_string
if self._decimalValue != 0:
formatted_number += " "
remaining100 = int(self._decimalValue % 100)
if remaining100 == 0:
formatted_number += self.currency_subunit[0]
elif remaining100 == 1:
formatted_number += self.currency_subunit[0]
elif remaining100 == 2:
formatted_number += self.currency_subunit[1]
elif 3 <= remaining100 <= 10:
formatted_number += self.currency_subunit[2]
elif 11 <= remaining100 <= 99:
formatted_number += self.currency_subunit[3]
if self.arabicSuffixText is not "":
formatted_number += " {}".format(self.arabicSuffixText)
return formatted_number
def validate_number(self, number):
if number >= self.max_num:
raise OverflowError(self.errmsg_too_big)
return number
def set_currency_prefer(self, currency):
if currency is 'EGP':
self.currency_unit = CURRENCY_EGP[0]
self.currency_subunit = CURRENCY_EGP[1]
elif currency is 'KWD':
self.currency_unit = CURRENCY_KWD[0]
self.currency_subunit = CURRENCY_KWD[1]
else:
self.currency_unit = CURRENCY_SR[0]
self.currency_subunit = CURRENCY_SR[1]
def to_currency(self, value, currency='SR', prefix='', suffix=''):
self.set_currency_prefer(currency)
self.isCurrencyNameFeminine = False
self.separator = "و"
self.arabicOnes = ARABIC_ONES
self.arabicPrefixText = prefix
self.arabicSuffixText = suffix
return self.convert(value=value)
def to_ordinal(self, number, prefix=''):
if number <= 19:
return "{}".format(self.arabicOrdinal[number])
if number < 100:
self.isCurrencyNameFeminine = True
else:
self.isCurrencyNameFeminine = False
self.currency_subunit = ('', '', '', '')
self.currency_unit = ('', '', '', '')
self.arabicPrefixText = prefix
self.arabicSuffixText = ""
return "{}".format(self.convert(abs(number)).strip())
def to_year(self, value):
value = self.validate_number(value)
return self.to_cardinal(value)
def to_ordinal_num(self, value): def to_ordinal_num(self, value):
self.verify_ordinal(value) return self.to_ordinal(value).strip()
return "%s%s" % (value, self.to_ordinal(value)[-2:])
def to_year(self, val, longval=True): def to_cardinal(self, number):
if not (val // 100) % 10: number = self.validate_number(number)
return self.to_cardinal(val) minus = ''
return self.to_splitnum(val, hightxt="مئة", jointxt="و", if number < 0:
longval=longval) minus = 'سالب '
self.separator = ','
def to_currency(self, val, longval=True): self.currency_subunit = ('', '', '', '')
return self.to_splitnum(val, hightxt="ريال", lowtxt="هللة", self.currency_unit = ('', '', '', '')
jointxt="و", longval=longval, cents=True) self.arabicPrefixText = ""
self.arabicSuffixText = ""
self.arabicOnes = ARABIC_ONES
n2w = Num2Word_AR() return minus + self.convert(value=abs(number)).strip()
to_card = n2w.to_cardinal
to_ord = n2w.to_ordinal
to_ordnum = n2w.to_ordinal_num
to_year = n2w.to_year
def main():
for val in [1, 11, 12, 21, 31, 33, 71, 80, 81, 91, 99, 100, 101, 102, 155,
180, 300, 308, 832, 1000, 1001, 1061, 1100, 1500, 1701, 3000,
8280, 8291, 150000, 500000, 1000000, 2000000, 2000001,
-21212121211221211111, -2.121212, -1.0000100]:
n2w.test(val)
n2w.test(13253254360678768017687001076010010122121321432104732075403270573)
for val in [1, 120, 1000, 1120, 1800, 1976, 2000, 2010, 2099, 2171]:
print(val, "is", n2w.to_currency(val))
print(val, "is", n2w.to_year(val))
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,4 @@
# encoding: UTF-8 # -*- encoding: utf-8 -*-
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. # Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
@@ -13,51 +13,101 @@
# License along with this library; if not, write to the Free Software # License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA # MA 02110-1301 USA
from __future__ import unicode_literals # from __future__ import unicode_literals
from unittest import TestCase from unittest import TestCase
from num2words import num2words from num2words import num2words
TEST_CASES_CARDINAL = (
(1, 'واحد'),
(2, 'اثنين'),
(11, 'أحد عشر'),
(12, 'اثناعشر'),
(20, 'عشرين'),
(21, 'واحد وعشرين'),
(26, 'ستة وعشرين'),
(30, 'ثلاثين'),
(67, 'سبعة وستين'),
(70, 'سبعين'),
(100, 'مئة'),
(101, 'مئة و واحد'),
(199, 'مئة و تسعة وتسعين'),
(203, 'مئتين و ثلاثة'),
(1000, 'ألف'),
(1001, 'ألف و واحد'),
(1097, 'ألف و سبعة وتسعين'),
(1000000, 'مليون'),
(1000001, 'مليون و واحد'),
)
TEST_CASES_ORDINAL = (
(1, 'أول'),
(8, 'ثامن'),
(12, 'ثاني عشر'),
(100, 'مئة'),
)
class Num2WordsARTest(TestCase): class Num2WordsARTest(TestCase):
def test_number(self): def test_default_currency(self):
for test in TEST_CASES_CARDINAL: self.assertEqual(num2words(1, to='currency', lang='ar'), 'واحد ريال')
self.assertEqual(num2words(test[0], lang='ar'), test[1]) self.assertEqual(num2words(2, to='currency', lang='ar'),
'اثنان ريالان')
self.assertEqual(num2words(10, to='currency', lang='ar'),
'عشرة ريالات')
self.assertEqual(num2words(100, to='currency', lang='ar'), 'مائة ريال')
self.assertEqual(num2words(652.12, to='currency', lang='ar'),
'ستمائة و اثنان و خمسون ريالاً و اثنتا عشرة هللة')
self.assertEqual(num2words(324, to='currency', lang='ar'),
'ثلاثمائة و أربعة و عشرون ريالاً')
self.assertEqual(num2words(2000, to='currency', lang='ar'),
'ألفا ريال')
self.assertEqual(num2words(541, to='currency', lang='ar'),
'خمسمائة و واحد و أربعون ريالاً')
self.assertEqual(num2words(10000, to='currency', lang='ar'),
'عشرة آلاف ريال')
self.assertEqual(num2words(20000.12, to='currency', lang='ar'),
'عشرون ألف ريال و اثنتا عشرة هللة')
self.assertEqual(num2words(1000000, to='currency', lang='ar'),
'واحد مليون ريال')
val = 'تسعمائة و ثلاثة و عشرون ألفاً و أربعمائة و أحد عشر ريالاً'
self.assertEqual(num2words(923411, to='currency', lang='ar'), val)
self.assertEqual(num2words(63411, to='currency', lang='ar'),
'ثلاثة و ستون ألفاً و أربعمائة و أحد عشر ريالاً')
self.assertEqual(num2words(1000000.99, to='currency', lang='ar'),
'واحد مليون ريال و تسع و تسعون هللة')
def test_currency_parm(self):
self.assertEqual(
num2words(1, to='currency', lang='ar', currency="KWD"),
'واحد دينار')
self.assertEqual(
num2words(10, to='currency', lang='ar', currency="EGP"),
'عشرة جنيهات')
self.assertEqual(
num2words(20000.12, to='currency', lang='ar', currency="EGP"),
'عشرون ألف جنيه و اثنتا عشرة قرش')
self.assertEqual(
num2words(923411, to='currency', lang='ar', currency="SR"),
'تسعمائة و ثلاثة و عشرون ألفاً و أربعمائة و أحد عشر ريالاً')
self.assertEqual(
num2words(1000000.99, to='currency', lang='ar', currency="KWD"),
'واحد مليون دينار و تسع و تسعون فلس')
def test_ordinal(self): def test_ordinal(self):
for test in TEST_CASES_ORDINAL: self.assertEqual(num2words(1, to='ordinal', lang='ar'), 'اول')
self.assertEqual( self.assertEqual(num2words(2, to='ordinal', lang='ar'), 'ثاني')
num2words(test[0], lang='ar', ordinal=True), self.assertEqual(num2words(3, to='ordinal', lang='ar'), 'ثالث')
test[1] self.assertEqual(num2words(4, to='ordinal', lang='ar'), 'رابع')
) self.assertEqual(num2words(5, to='ordinal', lang='ar'), 'خامس')
self.assertEqual(num2words(6, to='ordinal', lang='ar'), 'سادس')
self.assertEqual(num2words(9, to='ordinal', lang='ar'), 'تاسع')
self.assertEqual(num2words(20, to='ordinal', lang='ar'), 'عشرون')
self.assertEqual(num2words(94, to='ordinal', lang='ar'),
'أربع و تسعون')
self.assertEqual(num2words(102, to='ordinal', lang='ar'),
'مائة و اثنان')
self.assertEqual(
num2words(923411, to='ordinal_num', lang='ar'),
'تسعمائة و ثلاثة و عشرون ألفاً و أربعمائة و أحد عشر')
def test_cardinal(self):
self.assertEqual(num2words(12, to='cardinal', lang='ar'), 'اثنا عشر')
self.assertEqual(num2words(-8324, to='cardinal', lang='ar'),
'سالب ثمانية آلاف و ثلاثمائة و أربعة و عشرون')
self.assertEqual(
num2words(3431.12, to='cardinal', lang='ar'),
'ثلاثة آلاف و أربعمائة و واحد و ثلاثون , اثنتا عشرة')
self.assertEqual(num2words(431, to='cardinal', lang='ar'),
'أربعمائة و واحد و ثلاثون')
self.assertEqual(num2words(94231, to='cardinal', lang='ar'),
'أربعة و تسعون ألفاً و مئتان و واحد و ثلاثون')
self.assertEqual(num2words(1431, to='cardinal', lang='ar'),
'واحد ألف و أربعمائة و واحد و ثلاثون')
def test_prefix_and_suffix(self):
self.assertEqual(num2words(645, to='currency',
lang='ar', prefix="فقط", suffix="لاغير"),
'فقط ستمائة و خمسة و أربعون ريالاً لاغير')
def test_year(self):
self.assertEqual(num2words(2000, to='year', lang='ar'), 'ألفا')
def test_max_numbers(self):
with self.assertRaises(Exception) as context:
num2words(10 ** 36, to='year', lang='ar')
self.assertTrue('Too large' in str(context.exception))