From 28c304c2f489235b7bb56bdf67de075269fa740c Mon Sep 17 00:00:00 2001 From: Ariel Allon Date: Sun, 27 Oct 2019 23:46:02 -0500 Subject: [PATCH 1/2] Fix Hebrew support - Correct gender of 1 and 2 (and 11 and 12) to match remainder of numbers - Fix spelling of 8 (and 18 and 80) - Add cases for thousands 3-9 and support in the logic - Fix placement of "and" conjunction to match Academy of Hebrew Language position - Add tests --- num2words/lang_HE.py | 39 ++++++++++++++--------- tests/test_cli.py | 2 +- tests/test_he.py | 73 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 16 deletions(-) create mode 100644 tests/test_he.py diff --git a/num2words/lang_HE.py b/num2words/lang_HE.py index c1d71a3..0da469f 100644 --- a/num2words/lang_HE.py +++ b/num2words/lang_HE.py @@ -18,32 +18,33 @@ from __future__ import print_function, unicode_literals +from .base import Num2Word_Base from .utils import get_digits, splitbyx ZERO = (u'אפס',) ONES = { - 1: (u'אחד',), - 2: (u'שנים',), + 1: (u'אחת',), + 2: (u'שתים',), 3: (u'שלש',), 4: (u'ארבע',), 5: (u'חמש',), 6: (u'שש',), 7: (u'שבע',), - 8: (u'שמנה',), + 8: (u'שמונה',), 9: (u'תשע',), } TENS = { 0: (u'עשר',), - 1: (u'אחד עשרה',), - 2: (u'שנים עשרה',), + 1: (u'אחת עשרה',), + 2: (u'שתים עשרה',), 3: (u'שלש עשרה',), 4: (u'ארבע עשרה',), 5: (u'חמש עשרה',), 6: (u'שש עשרה',), 7: (u'שבע עשרה',), - 8: (u'שמנה עשרה',), + 8: (u'שמונה עשרה',), 9: (u'תשע עשרה',), } @@ -54,7 +55,7 @@ TWENTIES = { 5: (u'חמישים',), 6: (u'ששים',), 7: (u'שבעים',), - 8: (u'שמנים',), + 8: (u'שמונים',), 9: (u'תשעים',), } @@ -67,6 +68,13 @@ HUNDRED = { THOUSANDS = { 1: (u'אלף',), 2: (u'אלפיים',), + 3: (u'שלשת אלפים',), + 4: (u'ארבעת אלפים',), + 5: (u'חמשת אלפים',), + 6: (u'ששת אלפים',), + 7: (u'שבעת אלפים',), + 8: (u'שמונת אלפים',), + 9: (u'תשעת אלפים',), } AND = u'ו' @@ -100,12 +108,15 @@ def int2word(n): n1, n2, n3 = get_digits(x) + if i > 0: + words.append(THOUSANDS[n1][0]) + continue + if n3 > 0: if n3 <= 2: words.append(HUNDRED[n3][0]) else: - words.append(ONES[n3][0]) - words.append(HUNDRED[3][0]) + words.append(ONES[n3][0] + ' ' + HUNDRED[3][0]) if n2 > 1: words.append(TWENTIES[n2][0]) @@ -116,14 +127,12 @@ def int2word(n): words.append(ONES[n1][0]) if i > 0: - if i <= 2: - words.append(THOUSANDS[i][0]) - else: - words.append(ONES[i][0]) - words.append(THOUSANDS[1][0]) + words.append(THOUSANDS[i][0]) + # source: https://hebrew-academy.org.il/2017/01/30/%d7%95-%d7%94%d7%97%d7%99%d7%91%d7%95%d7%a8-%d7%91%d7%9e%d7%a1%d7%a4%d7%a8%d7%99%d7%9d/ if len(words) > 1: words[-1] = AND + words[-1] + return ' '.join(words) @@ -135,7 +144,7 @@ def to_currency(n, currency='EUR', cents=True, separator=','): raise NotImplementedError() -class Num2Word_HE(object): +class Num2Word_HE(Num2Word_Base): def to_cardinal(self, number): return n2w(number) diff --git a/tests/test_cli.py b/tests/test_cli.py index d8ea56c..a85b125 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -102,7 +102,7 @@ class CliTestCase(unittest.TestCase): ) def test_cli_with_lang_to(self): - """You should be able to specify a language + """You should be able to specify a language and currency """ output = self.cli.run_cmd(150.55, '--lang', 'es', '--to', 'currency') self.assertEqual(output.return_code, 0) diff --git a/tests/test_he.py b/tests/test_he.py new file mode 100644 index 0000000..206d7cc --- /dev/null +++ b/tests/test_he.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2003, Taro Ogawa. All Rights Reserved. +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + + +class Num2WordsHETest(TestCase): + maxDiff = None + + def test_0(self): + self.assertEqual(num2words(0, lang="he"), u'אפס') + + def test_1_to_10(self): + self.assertEqual(num2words(1, lang="he"), u'אחת') + self.assertEqual(num2words(2, lang="he"), u'שתים') + self.assertEqual(num2words(7, lang="he"), u'שבע') + self.assertEqual(num2words(10, lang="he"), u'עשר') + + def test_11_to_19(self): + self.assertEqual(num2words(11, lang="he"), u'אחת עשרה') + self.assertEqual(num2words(13, lang="he"), u'שלש עשרה') + self.assertEqual(num2words(15, lang="he"), u'חמש עשרה') + self.assertEqual(num2words(16, lang="he"), u'שש עשרה') + self.assertEqual(num2words(19, lang="he"), u'תשע עשרה') + + def test_20_to_99(self): + self.assertEqual(num2words(20, lang="he"), u'עשרים') + self.assertEqual(num2words(23, lang="he"), u'עשרים ושלש') + self.assertEqual(num2words(28, lang="he"), u'עשרים ושמונה') + self.assertEqual(num2words(31, lang="he"), u'שלשים ואחת') + self.assertEqual(num2words(40, lang="he"), u'ארבעים') + self.assertEqual(num2words(66, lang="he"), u'ששים ושש') + self.assertEqual(num2words(92, lang="he"), u'תשעים ושתים') + + def test_100_to_999(self): + self.assertEqual(num2words(100, lang="he"), u'מאה') + self.assertEqual(num2words(111, lang="he"), u'מאה ואחת עשרה') + self.assertEqual(num2words(150, lang="he"), u'מאה וחמישים') + self.assertEqual(num2words(196, lang="he"), u'מאה תשעים ושש') + self.assertEqual(num2words(200, lang="he"), u'מאתיים') + self.assertEqual(num2words(210, lang="he"), u'מאתיים ועשר') + self.assertEqual(num2words(701, lang="he"), u'שבע מאות ואחת') + + def test_1000_to_9999(self): + self.assertEqual(num2words(1000, lang="he"), u'אלף') + self.assertEqual(num2words(1001, lang="he"), u'אלף ואחת') + self.assertEqual(num2words(1500, lang="he"), u'אלף וחמש מאות') + self.assertEqual( + num2words(7378, lang="he"), u'שבעת אלפים שלש מאות שבעים ושמונה' + ) + self.assertEqual(num2words(2000, lang="he"), u'אלפיים') + self.assertEqual(num2words(2100, lang="he"), u'אלפיים ומאה') + self.assertEqual( + num2words(6870, lang="he"), u'ששת אלפים שמונה מאות ושבעים' + ) From c1e3e854bbf794d5734c9b2693ba3e12f6b5e811 Mon Sep 17 00:00:00 2001 From: Ariel Allon Date: Mon, 28 Oct 2019 00:01:21 -0500 Subject: [PATCH 2/2] Use non-url-encoded source URL to avoid CI rejection for too-long line --- num2words/lang_HE.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/num2words/lang_HE.py b/num2words/lang_HE.py index 0da469f..34b2ec8 100644 --- a/num2words/lang_HE.py +++ b/num2words/lang_HE.py @@ -129,7 +129,7 @@ def int2word(n): if i > 0: words.append(THOUSANDS[i][0]) - # source: https://hebrew-academy.org.il/2017/01/30/%d7%95-%d7%94%d7%97%d7%99%d7%91%d7%95%d7%a8-%d7%91%d7%9e%d7%a1%d7%a4%d7%a8%d7%99%d7%9d/ + # source: https://hebrew-academy.org.il/2017/01/30/ו-החיבור-במספרים/ if len(words) > 1: words[-1] = AND + words[-1]