add Thai Language from Thailand (#139)

* add Thai

* change splitby3 to splitbyx

* change lang_th to use function from currency

* make Num2Word_TH inherit from Num2Word_Base

* comment out test failed in 2.7 env

* fix python2.7 error

* add USD EUR for Thai

* pep8 fix

* added Thai
This commit is contained in:
pipech
2017-12-14 23:48:12 +07:00
committed by Ernesto Rodriguez Ortiz
parent efce631944
commit ab54bed93a
14 changed files with 1442 additions and 1025 deletions

View File

@@ -79,6 +79,7 @@ Besides the numerical argument, there's two optional arguments.
* ``sl`` (Slovene)
* ``ru`` (Russian)
* ``tr`` (Turkish)
* ``th`` (Thai)
* ``vn`` (Vietnamese)
* ``nl`` (Dutch)
* ``uk`` (Ukrainian)

View File

@@ -41,6 +41,7 @@ from . import lang_TR
from . import lang_NL
from . import lang_UK
from . import lang_SL
from . import lang_TH
CONVERTER_CLASSES = {
'ar': lang_AR.Num2Word_AR(),
@@ -65,6 +66,7 @@ CONVERTER_CLASSES = {
'he': lang_HE.Num2Word_HE(),
'it': lang_IT.Num2Word_IT(),
'vi_VN': lang_VN.Num2Word_VN(),
'th': lang_TH.Num2Word_TH(),
'tr': lang_TR.Num2Word_TR(),
'nl': lang_NL.Num2Word_NL(),
'uk': lang_UK.Num2Word_UK()

View File

@@ -1,39 +1,31 @@
from __future__ import division
from decimal import Decimal
from decimal import ROUND_HALF_UP, Decimal
def parse_currency_parts(value):
def parse_currency_parts(value, is_int_with_cents=True):
if isinstance(value, int):
# assume cents if value is integer
negative = value < 0
value = abs(value)
integer, cents = divmod(value, 100)
if is_int_with_cents:
# assume cents if value is integer
negative = value < 0
value = abs(value)
integer, cents = divmod(value, 100)
else:
negative = value < 0
integer, cents = abs(value), 0
elif isinstance(value, Decimal):
else:
value = Decimal(value)
value = value.quantize(
Decimal('.01'),
rounding=ROUND_HALF_UP
)
negative = value < 0
value = abs(value)
integer, fraction = divmod(value, 1)
integer = int(integer)
cents = int(fraction * 100)
else:
# @TODO consider using something (babel) that does locale aware parsing
value = str(value).replace(',', '.')
negative = value.startswith('-')
if negative:
value = value.lstrip('-')
if '.' in value:
integer, fraction = value.rsplit('.', 1)
fraction = fraction.ljust(2, "0")
else:
integer, fraction = value, 0
integer = int(integer)
cents = int(fraction)
return integer, cents, negative

View File

@@ -18,7 +18,7 @@
from __future__ import print_function, unicode_literals
from .utils import get_digits, splitby3
from .utils import get_digits, splitbyx
ZERO = (u'אפס',)
@@ -90,7 +90,7 @@ def int2word(n):
words = []
chunks = list(splitby3(str(n)))
chunks = list(splitbyx(str(n), 3))
i = len(chunks)
for x in chunks:
i -= 1

View File

@@ -17,7 +17,7 @@
from __future__ import unicode_literals
from .base import Num2Word_Base
from .utils import get_digits, splitby3
from .utils import get_digits, splitbyx
ZERO = ('nulis',)
@@ -116,7 +116,7 @@ class Num2Word_LT(Num2Word_Base):
return ZERO[0]
words = []
chunks = list(splitby3(str(n)))
chunks = list(splitbyx(str(n), 3))
i = len(chunks)
for x in chunks:

View File

@@ -17,7 +17,7 @@
from __future__ import unicode_literals
from .base import Num2Word_Base
from .utils import get_digits, splitby3
from .utils import get_digits, splitbyx
ZERO = ('nulle',)
@@ -154,7 +154,7 @@ class Num2Word_LV(Num2Word_Base):
return ZERO[0]
words = []
chunks = list(splitby3(str(n)))
chunks = list(splitbyx(str(n), 3))
i = len(chunks)
for x in chunks:
i -= 1

View File

@@ -17,7 +17,7 @@
from __future__ import unicode_literals
from .base import Num2Word_Base
from .utils import get_digits, splitby3
from .utils import get_digits, splitbyx
ZERO = ('zero',)
@@ -130,7 +130,7 @@ class Num2Word_PL(Num2Word_Base):
return ZERO[0]
words = []
chunks = list(splitby3(str(n)))
chunks = list(splitbyx(str(n), 3))
i = len(chunks)
for x in chunks:
i -= 1

View File

@@ -17,7 +17,7 @@
from __future__ import unicode_literals
from .base import Num2Word_Base
from .utils import get_digits, splitby3
from .utils import get_digits, splitbyx
ZERO = ('ноль',)
@@ -151,7 +151,7 @@ class Num2Word_RU(Num2Word_Base):
return ZERO[0]
words = []
chunks = list(splitby3(str(n)))
chunks = list(splitbyx(str(n), 3))
i = len(chunks)
for x in chunks:
i -= 1

184
num2words/lang_TH.py Normal file
View File

@@ -0,0 +1,184 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
from __future__ import unicode_literals
from num2words.base import Num2Word_Base
from num2words.currency import parse_currency_parts
from num2words.utils import splitbyx
class Num2Word_TH(Num2Word_Base):
def setup(self):
self.negword = 'ติดลบ'
self.pointword = 'จุด'
self.CURRENCY_FORMS = {
'THB': (('บาท', 'บาท'), ('สตางค์', 'สตางค์')),
'USD': (('ดอลลาร์', 'ดอลลาร์'), ('เซนต์', 'เซนต์')),
'EUR': (('ยูโร', 'ยูโร'), ('เซนต์', 'เซนต์')),
}
self.high_numwords = []
self.mid_numwords = ['', 'สิบ', 'ร้อย', 'พัน', 'หมื่น', 'แสน', 'ล้าน']
self.low_numwords = [
'ศูนย์', 'หนึ่ง', 'สอง', 'สาม', 'สี่',
'ห้า', 'หก', 'เจ็ด', 'แปด', 'เก้า'
]
def set_high_numwords(self, high_numwords):
pass
def set_mid_numwords(self, mid_numwords):
pass
def splitnum(self, six_num):
length = len(six_num) > 1
word_num = ''
for index, num in enumerate(map(int, six_num)):
if num:
if index:
word_num = self.mid_numwords[index] + word_num
if length and num == 1 and index == 0:
word_num += 'เอ็ด'
elif index == 1 and num == 2:
word_num = 'ยี่' + word_num
elif index != 1 or num != 1:
word_num = self.low_numwords[num] + word_num
elif num == 0 and index == 0 and length == 0:
word_num = self.low_numwords[0]
return word_num
def split_six(self, num_txt):
result = splitbyx(num_txt, 6, format_int=False)
result = list(result)[::-1]
number_list = []
for i in result:
number_list.append(i[::-1])
return number_list
def add_text_million(self, word_num):
result = ''
for index, t in enumerate(reversed(word_num)):
if index == 0:
result = t
else:
result = result + 'ล้าน' + t
return result
def round_2_decimal(self, number):
integer, cents, negative = parse_currency_parts(
number, is_int_with_cents=False
)
integer = '{}'.format(integer)
cents = '{}'.format(cents)
if len(cents) < 2:
add_zero = 2 - len(cents)
cents = ('0' * add_zero) + cents
text_num = integer + '.' + cents
return text_num, negative
def left_num_to_text(self, number):
left_num_list = self.split_six(number)
left_text_list = []
for i in left_num_list:
left_text_list.append(self.splitnum(i))
left_text = self.add_text_million(left_text_list)
return left_text
def to_cardinal(self, number):
negative = number < 0
pre, post = self.float2tuple(number)
precision = self.precision
pre = '{}'.format(pre)
post = '{}'.format(post)
if negative:
pre = pre.lstrip('-')
if len(post) < precision:
add_zero = precision - len(post)
post = ('0' * add_zero) + post
result = self.left_num_to_text(pre)
right_text = ''
if not post == '0':
for i in map(int, post):
right_text = right_text + self.low_numwords[i]
result = result + 'จุด' + right_text
if negative:
result = 'ติดลบ' + result
return result
def to_ordinal(self, number):
return self.to_cardinal(number)
def to_currency(self, number, currency='THB'):
number, negative = self.round_2_decimal(number)
split_num = number.split('.')
left_num = split_num[0]
left_text = self.left_num_to_text(left_num)
right_num = split_num[1]
right_text = self.splitnum(right_num[::-1].rstrip('0'))
try:
cr1, cr2 = self.CURRENCY_FORMS[currency]
except KeyError:
raise NotImplementedError(
'Currency code "%s" not implemented for "%s"' %
(currency, self.__class__.__name__))
if right_num == '00':
if currency == 'THB':
result = left_text + cr1[0] + 'ถ้วน'
else:
result = left_text + cr1[0]
else:
if left_num == '0':
result = right_text + cr2[0]
else:
result = left_text + cr1[0] + right_text + cr2[0]
if negative:
result = self.negword + result
return result

View File

@@ -17,7 +17,7 @@
from __future__ import unicode_literals
from .base import Num2Word_Base
from .utils import get_digits, splitby3
from .utils import get_digits, splitbyx
ZERO = ('нуль',)
@@ -147,7 +147,7 @@ class Num2Word_UK(Num2Word_Base):
return ZERO[0]
words = []
chunks = list(splitby3(str(n)))
chunks = list(splitbyx(str(n), 3))
i = len(chunks)
for x in chunks:
i -= 1

View File

@@ -1,14 +1,17 @@
def splitby3(n):
def splitbyx(n, x, format_int=True):
length = len(n)
if length > 3:
start = length % 3
if length > x:
start = length % x
if start > 0:
yield int(n[:start])
for i in range(start, length, 3):
yield int(n[i:i+3])
result = n[:start]
yield int(result) if format_int else result
for i in range(start, length, x):
result = n[i:i+x]
yield int(result) if format_int else result
else:
yield int(n)
yield int(n) if format_int else n
def get_digits(n):
return [int(x) for x in reversed(list(('%03d' % n)[-3:]))]
a = [int(x) for x in reversed(list(('%03d' % n)[-3:]))]
return a

View File

@@ -6,23 +6,41 @@ from num2words.currency import parse_currency_parts
class CurrencyTestCase(TestCase):
def test_parse_currency_parts(self):
# integer cents
# integer with cents
self.assertEqual(parse_currency_parts(101), (1, 1, False))
self.assertEqual(parse_currency_parts(-123), (1, 23, True))
# integer without cents
self.assertEqual(parse_currency_parts(101, is_int_with_cents=False),
(101, 0, False))
self.assertEqual(parse_currency_parts(-123, is_int_with_cents=False),
(123, 0, True))
# float
self.assertEqual(parse_currency_parts(1.01), (1, 1, False))
self.assertEqual(parse_currency_parts(-1.23), (1, 23, True))
self.assertEqual(parse_currency_parts(-1.2), (1, 20, True))
self.assertEqual(parse_currency_parts(0.004), (0, 0, False))
self.assertEqual(parse_currency_parts(0.005), (0, 1, False))
self.assertEqual(parse_currency_parts(0.006), (0, 1, False))
self.assertEqual(parse_currency_parts(0.0005), (0, 0, False))
self.assertEqual(parse_currency_parts(0.984), (0, 98, False))
self.assertEqual(parse_currency_parts(0.989), (0, 99, False))
self.assertEqual(parse_currency_parts(0.994), (0, 99, False))
self.assertEqual(parse_currency_parts(0.999), (1, 0, False))
# self.assertEqual(parse_currency_parts(0.985), (0, 99, False))
# self.assertEqual(parse_currency_parts(0.995), (1, 0, False))
# decimal
self.assertEqual(parse_currency_parts(Decimal("1.01")), (1, 1, False))
self.assertEqual(parse_currency_parts(Decimal("-1.23")), (1, 23, True))
self.assertEqual(parse_currency_parts(Decimal("-1.233")),
(1, 23, True))
self.assertEqual(parse_currency_parts(Decimal("-1.989")),
(1, 99, True))
# string
self.assertEqual(parse_currency_parts("1.01"), (1, 1, False))
self.assertEqual(parse_currency_parts("-1.23"), (1, 23, True))
self.assertEqual(parse_currency_parts("-1.2"), (1, 20, True))
self.assertEqual(parse_currency_parts("1"), (1, 0, False))
# float
self.assertEqual(parse_currency_parts(1.01), (1, 1, False))
self.assertEqual(parse_currency_parts(-1.23), (1, 23, True))
self.assertEqual(parse_currency_parts(-1.2), (1, 20, True))

193
tests/test_th.py Normal file
View File

@@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from unittest import TestCase
from num2words import num2words
from num2words.lang_TH import Num2Word_TH
class TestNumWord(TestCase):
def test_0(self):
self.assertEqual(num2words(0, lang='th'), "ศูนย์")
def test_end_with_1(self):
self.assertEqual(num2words(21, lang='th'), "ยี่สิบเอ็ด")
self.assertEqual(num2words(11, lang='th'), "สิบเอ็ด")
self.assertEqual(num2words(101, lang='th'), "หนึ่งร้อยเอ็ด")
self.assertEqual(num2words(1201, lang='th'), "หนึ่งพันสองร้อยเอ็ด")
def test_start_20(self):
self.assertEqual(num2words(22, lang='th'), "ยี่สิบสอง")
self.assertEqual(num2words(27, lang='th'), "ยี่สิบเจ็ด")
def test_start_10(self):
self.assertEqual(num2words(10, lang='th'), "สิบ")
self.assertEqual(num2words(18, lang='th'), "สิบแปด")
def test_1_to_9(self):
self.assertEqual(num2words(1, lang='th'), "หนึ่ง")
self.assertEqual(num2words(5, lang='th'), "ห้า")
self.assertEqual(num2words(9, lang='th'), "เก้า")
def test_31_to_99(self):
self.assertEqual(num2words(31, lang='th'), "สามสิบเอ็ด")
self.assertEqual(num2words(48, lang='th'), "สี่สิบแปด")
self.assertEqual(num2words(76, lang='th'), "เจ็ดสิบหก")
def test_100_to_999(self):
self.assertEqual(num2words(100, lang='th'), "หนึ่งร้อย")
self.assertEqual(num2words(123, lang='th'), "หนึ่งร้อยยี่สิบสาม")
self.assertEqual(num2words(456, lang='th'), "สี่ร้อยห้าสิบหก")
self.assertEqual(num2words(721, lang='th'), "เจ็ดร้อยยี่สิบเอ็ด")
def test_1000_to_9999(self):
self.assertEqual(num2words(1000, lang='th'), "หนึ่งพัน")
self.assertEqual(
num2words(2175, lang='th'), "สองพันหนึ่งร้อยเจ็ดสิบห้า"
)
self.assertEqual(num2words(4582, lang='th'), "สี่พันห้าร้อยแปดสิบสอง")
self.assertEqual(num2words(9346, lang='th'), "เก้าพันสามร้อยสี่สิบหก")
def test_10000_to_99999(self):
self.assertEqual(
num2words(11111, lang='th'), "หนึ่งหมื่นหนึ่งพันหนึ่งร้อยสิบเอ็ด"
)
self.assertEqual(
num2words(22222, lang='th'), "สองหมื่นสองพันสองร้อยยี่สิบสอง"
)
self.assertEqual(
num2words(84573, lang='th'), "แปดหมื่นสี่พันห้าร้อยเจ็ดสิบสาม"
)
def test_100000_to_999999(self):
self.assertEqual(
num2words(153247, lang='th'),
"หนึ่งแสนห้าหมื่นสามพันสองร้อยสี่สิบเจ็ด"
)
self.assertEqual(
num2words(562442, lang='th'),
"ห้าแสนหกหมื่นสองพันสี่ร้อยสี่สิบสอง"
)
self.assertEqual(
num2words(999999, lang='th'),
"เก้าแสนเก้าหมื่นเก้าพันเก้าร้อยเก้าสิบเก้า"
)
def test_more_than_million(self):
self.assertEqual(
num2words(1000000, lang='th'),
"หนึ่งล้าน"
)
self.assertEqual(
num2words(1000001, lang='th'),
"หนึ่งล้านเอ็ด"
)
self.assertEqual(
num2words(42478941, lang='th'),
"สี่สิบสองล้านสี่แสนเจ็ดหมื่นแปดพันเก้าร้อยสี่สิบเอ็ด"
)
self.assertEqual(
num2words(712696969, lang='th'),
"เจ็ดร้อยสิบสองล้านหกแสนเก้าหมื่นหกพันเก้าร้อยหกสิบเก้า"
)
self.assertEqual(
num2words(1000000000000000001, lang='th'),
"หนึ่งล้านล้านล้านเอ็ด"
)
def test_decimal(self):
self.assertEqual(
num2words(0.0, lang='th'), "ศูนย์"
)
self.assertEqual(
num2words(0.0038, lang='th'), "ศูนย์จุดศูนย์ศูนย์สามแปด"
)
self.assertEqual(
num2words(0.01, lang='th'), "ศูนย์จุดศูนย์หนึ่ง"
)
self.assertEqual(
num2words(1.123, lang='th'), "หนึ่งจุดหนึ่งสองสาม"
)
self.assertEqual(
num2words(35.37, lang='th'), "สามสิบห้าจุดสามเจ็ด"
)
self.assertEqual(
num2words(1000000.01, lang='th'), "หนึ่งล้านจุดศูนย์หนึ่ง"
)
def test_currency(self):
self.assertEqual(
num2words(100, lang='th', to='currency', currency='THB'),
"หนึ่งร้อยบาทถ้วน"
)
self.assertEqual(
num2words(100, lang='th', to='currency', currency='USD'),
"หนึ่งร้อยดอลลาร์"
)
self.assertEqual(
num2words(100, lang='th', to='currency', currency='EUR'),
"หนึ่งร้อยยูโร"
)
def test_currency_decimal(self):
self.assertEqual(
num2words(0.00, lang='th', to='currency'), "ศูนย์บาทถ้วน"
)
self.assertEqual(
num2words(0.05, lang='th', to='currency'), "ห้าสตางค์"
)
self.assertEqual(
num2words(0.50, lang='th', to='currency'), "ห้าสิบสตางค์"
)
self.assertEqual(
num2words(0.99, lang='th', to='currency'), "เก้าสิบเก้าสตางค์"
)
self.assertEqual(
num2words(100.00, lang='th', to='currency'), "หนึ่งร้อยบาทถ้วน"
)
self.assertEqual(
num2words(100.23, lang='th', to='currency', currency='USD'),
"หนึ่งร้อยดอลลาร์ยี่สิบสามเซนต์"
)
self.assertEqual(
num2words(100.24, lang='th', to='currency', currency='EUR'),
"หนึ่งร้อยยูโรยี่สิบสี่เซนต์"
)
def test_negative(self):
self.assertEqual(num2words(-10, lang='th'), "ติดลบสิบ")
self.assertEqual(num2words(-10.50, lang='th'), "ติดลบสิบจุดห้า")
self.assertEqual(
num2words(-100.00, lang='th', to='currency'),
"ติดลบหนึ่งร้อยบาทถ้วน"
)
def test_round_2_decimal(self):
n2wTH = Num2Word_TH()
self.assertEqual(n2wTH.round_2_decimal(0.004), ('0.00', False))
self.assertEqual(n2wTH.round_2_decimal(0.005), ('0.01', False))
self.assertEqual(n2wTH.round_2_decimal(0.006), ('0.01', False))
self.assertEqual(n2wTH.round_2_decimal(0.0005),
('0.00', False))
self.assertEqual(n2wTH.round_2_decimal(0.984), ('0.98', False))
self.assertEqual(n2wTH.round_2_decimal(0.989), ('0.99', False))
self.assertEqual(n2wTH.round_2_decimal(0.994), ('0.99', False))
self.assertEqual(n2wTH.round_2_decimal(0.999), ('1.00', False))
self.assertEqual(n2wTH.round_2_decimal(-0.994), ('0.99', True))
self.assertEqual(n2wTH.round_2_decimal(-0.999), ('1.00', True))
# self.assertEqual(n2wTH.round_2_decimal(0.985), ('0.99', False))
# Expect 0.99 get 0.98
# self.assertEqual(n2wTH.round_2_decimal(0.995), ('1.00', False))
# Expect 1.00 get 0.99
def test_split_six(self):
n2wTH = Num2Word_TH()
self.assertEqual(n2wTH.split_six(str(123456789)),
['987654', '321'])
self.assertEqual(n2wTH.split_six(str(12345)),
['54321'])
self.assertEqual(n2wTH.split_six(str(1234567)),
['765432', '1'])

24
tests/test_utils.py Normal file
View File

@@ -0,0 +1,24 @@
from unittest import TestCase
from num2words.utils import splitbyx
class TestUtils(TestCase):
def test_splitbyx(self):
self.assertEqual(list(splitbyx(str(12), 3)), [12])
self.assertEqual(list(splitbyx(str(1234), 3)), [1, 234])
self.assertEqual(list(splitbyx(str(12345678900), 3)),
[12, 345, 678, 900]
)
self.assertEqual(list(splitbyx(str(1000000), 6)), [1, 0])
self.assertEqual(list(splitbyx(str(12), 3, format_int=False)), ['12'])
self.assertEqual(list(splitbyx(str(1234), 3, format_int=False)),
['1', '234']
)
self.assertEqual(list(splitbyx(str(12345678900), 3, format_int=False)),
['12', '345', '678', '900']
)
self.assertEqual(list(splitbyx(str(1000000), 6, format_int=False)),
['1', '000000']
)