From 4d8c93847cad53180585e07ff90c4df5d8bb0f41 Mon Sep 17 00:00:00 2001 From: Sangbum Kim Date: Sun, 11 Nov 2018 08:44:02 +0900 Subject: [PATCH] Add: Korean support (#219) * Add: lang_KO support Added Korean language support and tests * Remove commented code --- README.rst | 1 + num2words/__init__.py | 2 + num2words/lang_KO.py | 148 ++++++++++++++++++++++++++++++++++++++++++ tests/test_ko.py | 92 ++++++++++++++++++++++++++ 4 files changed, 243 insertions(+) create mode 100644 num2words/lang_KO.py create mode 100644 tests/test_ko.py diff --git a/README.rst b/README.rst index 0cdaf08..d9f4cd2 100644 --- a/README.rst +++ b/README.rst @@ -97,6 +97,7 @@ Besides the numerical argument, there are two main optional arguments. * ``id`` (Indonesian) * ``it`` (Italian) * ``ja`` (Japanese) +* ``ko`` (Korean) * ``lt`` (Lithuanian) * ``lv`` (Latvian) * ``no`` (Norwegian) diff --git a/num2words/__init__.py b/num2words/__init__.py index 8188d46..2db3100 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -49,6 +49,7 @@ from . import lang_UK from . import lang_SL from . import lang_SR from . import lang_TH +from . import lang_KO CONVERTER_CLASSES = { 'ar': lang_AR.Num2Word_AR(), @@ -66,6 +67,7 @@ CONVERTER_CLASSES = { 'es_VE': lang_ES_VE.Num2Word_ES_VE(), 'id': lang_ID.Num2Word_ID(), 'ja': lang_JA.Num2Word_JA(), + 'ko': lang_KO.Num2Word_KO(), 'lt': lang_LT.Num2Word_LT(), 'lv': lang_LV.Num2Word_LV(), 'pl': lang_PL.Num2Word_PL(), diff --git a/num2words/lang_KO.py b/num2words/lang_KO.py new file mode 100644 index 0000000..e192cc6 --- /dev/null +++ b/num2words/lang_KO.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2003, Taro Ogawa. All Rights Reserved. +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import division, print_function, unicode_literals + +from .base import Num2Word_Base +from .currency import parse_currency_parts + + +class Num2Word_KO(Num2Word_Base): + CURRENCY_FORMS = { + 'KRW': ('원', None), + 'USD': ('달러', '센트'), + 'JPY': ('엔', None) + } + + def set_high_numwords(self, high): + max = 4 * len(high) + for word, n in zip(high, range(max, 0, -4)): + self.cards[10 ** n] = word + + def setup(self): + super(Num2Word_KO, self).setup() + + self.negword = "마이너스 " + self.pointword = "점" + + self.high_numwords = [ + '무량대수', + '불가사의', + '나유타', + '아승기', + '항하사', + '극', + '재', + '정', + '간', + '구', + '양', + '자', + '해', + '경', + '조', + '억', + '만'] + self.mid_numwords = [(1000, "천"), (100, "백")] + self.low_numwords = ["십", "구", "팔", "칠", "육", "오", "사", "삼", "이", + "일", "영"] + self.ords = {"일": "한", + "이": "두", + "삼": "세", + "사": "네", + "오": "다섯", + "육": "여섯", + "칠": "일곱", + "팔": "여덟", + "구": "아홉", + "십": "열", + "이십": "스물", + "삼십": "서른", + "사십": "마흔", + "오십": "쉰", + "육십": "예순", + "칠십": "일흔", + "팔십": "여든", + "구십": "아흔"} + + def merge(self, lpair, rpair): + ltext, lnum = lpair + rtext, rnum = rpair + if lnum == 1 and rnum <= 10000: + return rpair + elif 10000 > lnum > rnum: + return ("%s%s" % (ltext, rtext), lnum + rnum) + elif lnum >= 10000 and lnum > rnum: + return ("%s %s" % (ltext, rtext), lnum + rnum) + else: + return ("%s%s" % (ltext, rtext), lnum * rnum) + + def to_ordinal(self, value): + self.verify_ordinal(value) + if(value == 1): + return "첫 번째" + outwords = self.to_cardinal(value).split(" ") + lastwords = outwords[-1].split("백") + if "십" in lastwords[-1]: + ten_one = lastwords[-1].split("십") + ten_one[0] = self.ords[ten_one[0] + "십"] + try: + ten_one[1] = self.ords[ten_one[1]] + ten_one[0] = ten_one[0].replace("스무", "스물") + except KeyError: + pass + lastwords[-1] = ''.join(ten_one) + else: + lastwords[-1] = self.ords[lastwords[-1]] + outwords[-1] = "백 ".join(lastwords) + return " ".join(outwords) + " 번째" + + def to_ordinal_num(self, value): + self.verify_ordinal(value) + return "%s 번째" % (value) + + def to_year(self, val, suffix=None, longval=True): + if val < 0: + val = abs(val) + suffix = '기원전' if not suffix else suffix + valtext = self.to_cardinal(val) + return ("%s년" % valtext if not suffix + else "%s %s년" % (suffix, valtext)) + + def to_currency(self, val, currency="KRW", cents=False, seperator="", + adjective=False): + left, right, is_negative = parse_currency_parts( + val, is_int_with_cents=cents) + + try: + cr1, cr2 = self.CURRENCY_FORMS[currency] + if (cents or right) and not cr2: + raise ValueError('Decimals not supported for "%s"' % currency) + except KeyError: + raise NotImplementedError( + 'Currency code "%s" not implemented for "%s"' % + (currency, self.__class__.__name__)) + + minus_str = self.negword if is_negative else "" + return '%s%s%s%s%s' % ( + minus_str, + ''.join(self.to_cardinal(left).split()), + cr1, + ' ' + self.to_cardinal(right) + if cr2 else '', + cr2 if cr2 else '', + ) diff --git a/tests/test_ko.py b/tests/test_ko.py new file mode 100644 index 0000000..401ecb8 --- /dev/null +++ b/tests/test_ko.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import division, print_function, unicode_literals + +from unittest import TestCase + +from num2words import num2words + + +def n2k(*args, **kwargs): + return num2words(*args, lang='ko', **kwargs) + + +class Num2WordsKOTest(TestCase): + def test_low(self): + cases = [(0, "영"), (1, "일"), (2, "이"), (3, "삼"), (4, "사"), (5, "오"), + (6, "육"), (7, "칠"), (8, "팔"), (9, "구"), (10, "십"), + (11, "십일"), (12, "십이"), (13, "십삼"), (14, "십사"), + (15, "십오"), (16, "십육"), (17, "십칠"), + (18, "십팔"), (19, "십구"), (20, "이십"), (25, "이십오"), + (31, "삼십일"), (42, "사십이"), (54, "오십사"), (63, "육십삼"), + (76, "칠십육"), (89, "팔십구"), (98, "구십팔")] + for num, out in cases: + self.assertEqual(n2k(num), out) + + def test_mid(self): + cases = [(100, "백"), (121, "백이십일"), (160, "백육십"), (256, "이백오십육"), + (285, "이백팔십오"), (486, "사백팔십육"), (627, "육백이십칠"), + (808, "팔백팔"), (999, "구백구십구"), (1004, "천사"), + (2018, "이천십팔"), (7063, "칠천육십삼")] + for num, out in cases: + self.assertEqual(n2k(num), out) + + def test_high(self): + cases = [(10000, "만"), (11020, "만 천이십"), (25891, "이만 오천팔백구십일"), + (64237, "육만 사천이백삼십칠"), (241572, "이십사만 천오백칠십이"), + (100000000, "일억"), (5000500000000, "오조 오억")] + for num, out in cases: + self.assertEqual(n2k(num), out) + + def test_negative(self): + cases = [(-11, "마이너스 십일"), (-15, "마이너스 십오"), + (-18, "마이너스 십팔"), (-241572, "마이너스 이십사만 천오백칠십이")] + for num, out in cases: + self.assertEqual(n2k(num), out) + + def test_year(self): + cases = [(2000, "이천년"), (2002, "이천이년"), (2018, "이천십팔년"), + (1954, "천구백오십사년"), (1910, "천구백십년"), (-1000, "기원전 천년")] + for num, out in cases: + self.assertEqual(n2k(num, to="year"), out) + + def test_currency(self): + cases_krw = [(8350, "팔천삼백오십원"), (14980, "만사천구백팔십원"), + (250004000, "이억오천만사천원")] + cases_usd = [(4, "사달러 영센트"), (19.55, "십구달러 오십오센트")] + cases_jpy = [(15, "십오엔"), (50, "오십엔")] + for num, out in cases_krw: + self.assertEqual(n2k(num, to="currency"), out) + for num, out in cases_usd: + self.assertEqual(n2k(num, to="currency", currency="USD"), out) + for num, out in cases_jpy: + self.assertEqual(n2k(num, to="currency", currency="JPY"), out) + with self.assertRaises(ValueError): + n2k(190.55, to="currency") + with self.assertRaises(NotImplementedError): + n2k(4, to="currency", currency="EUR") + + def test_ordinal(self): + cases = [(1, "첫 번째"), (101, "백 한 번째"), (2, "두 번째"), (5, "다섯 번째"), + (10, "열 번째"), (25, "스물다섯 번째"), (137, "백 서른일곱 번째")] + for num, out in cases: + self.assertEqual(n2k(num, to="ordinal"), out) + + def test_ordinal_num(self): + cases = [(1, "1 번째"), (101, "101 번째"), (25, "25 번째")] + for num, out in cases: + self.assertEqual(n2k(num, to="ordinal_num"), out)