add support for Japanese (JA) (#171)

* add support for Japanese (JA)
This commit is contained in:
siikamiika
2018-06-15 18:14:56 +03:00
committed by Istvan SZALAÏ
parent b492530cfa
commit 89554ff9e2
5 changed files with 837 additions and 1 deletions

View File

@@ -48,7 +48,7 @@ There's only one function to use::
>>> num2words(42, lang='fr') >>> num2words(42, lang='fr')
quarante-deux quarante-deux
Besides the numerical argument, there's two optional arguments. Besides the numerical argument, there are two main optional arguments.
**to:** The converter to use. Supported values are: **to:** The converter to use. Supported values are:
@@ -78,6 +78,7 @@ Besides the numerical argument, there's two optional arguments.
* ``he`` (Hebrew) * ``he`` (Hebrew)
* ``id`` (Indonesian) * ``id`` (Indonesian)
* ``it`` (Italian) * ``it`` (Italian)
* ``ja`` (Japanese)
* ``lt`` (Lithuanian) * ``lt`` (Lithuanian)
* ``lv`` (Latvian) * ``lv`` (Latvian)
* ``no`` (Norwegian) * ``no`` (Norwegian)
@@ -101,6 +102,58 @@ Therefore, if you want to call ``num2words`` with a fallback, you can do::
except NotImplementedError: except NotImplementedError:
return num2words(42, lang='en') return num2words(42, lang='en')
Additionally, some converters and languages support other optional arguments
that are needed to make the converter useful in practice.
**ja (Japanese)**
**reading:** whether or not to return the reading of the converted number.
Also has the special value ``"arabic"`` when used with ``year``::
>>> num2words(42, lang='ja', reading=True)
よんじゅうに
>>> num2words(2017, lang='ja', to='year', reading='arabic')
平成29年
**prefer:** when there are multiple readings or (kanji) words available,
prefer those in the sequence ``prefer``::
>>> num2words(0, lang='ja')
>>> num2words(0, lang='ja', prefer=[''])
>>> num2words(42, lang='ja', reading=True, prefer=['し'])
しじゅうに
>>> num2words(74, lang='ja', reading=True)
ななじゅうよん
>>> num2words(74, lang='ja', reading=True, prefer=['し', 'しち'])
しちじゅうし
>>> num2words(1375, lang='ja', to="year")
天授元年
>>> num2words(1375, lang='ja', to="year", prefer=['えいわ'])
永和元年
**era:** (``year`` only) whether or not to convert the year to the era
calendar format. Defaults to ``True``::
>>> num2words(2017, lang='ja', to='year', era=True)
平成二十九年
>>> num2words(2017, lang='ja', to='year', reading=True, era=True)
へいせいにじゅうくねん
>>> num2words(2017, lang='ja', to='year', era=False)
二千十七年
**counter:** (``ordinal`` and ``ordinal_num`` only) which counter to use with
the ordinal number. Defaults to ```` and only supports ``reading`` with
it::
>>> num2words(0, lang='ja', to='ordinal')
零番目
>>> num2words(1, lang='ja', to='ordinal', counter='人')
一人目
>>> num2words(1, lang='ja', to='ordinal', reading=True, counter='人')
NotImplementedError: Reading not implemented for 人
History History
------- -------

View File

@@ -31,6 +31,7 @@ from . import lang_LV
from . import lang_PL from . import lang_PL
from . import lang_RU from . import lang_RU
from . import lang_ID from . import lang_ID
from . import lang_JA
from . import lang_NO from . import lang_NO
from . import lang_DK from . import lang_DK
from . import lang_PT_BR from . import lang_PT_BR
@@ -59,6 +60,7 @@ CONVERTER_CLASSES = {
'es_CO': lang_ES_CO.Num2Word_ES_CO(), 'es_CO': lang_ES_CO.Num2Word_ES_CO(),
'es_VE': lang_ES_VE.Num2Word_ES_VE(), 'es_VE': lang_ES_VE.Num2Word_ES_VE(),
'id': lang_ID.Num2Word_ID(), 'id': lang_ID.Num2Word_ID(),
'ja': lang_JA.Num2Word_JA(),
'lt': lang_LT.Num2Word_LT(), 'lt': lang_LT.Num2Word_LT(),
'lv': lang_LV.Num2Word_LV(), 'lv': lang_LV.Num2Word_LV(),
'pl': lang_PL.Num2Word_PL(), 'pl': lang_PL.Num2Word_PL(),

View File

@@ -15,6 +15,12 @@
# MA 02110-1301 USA # MA 02110-1301 USA
try:
strtype = basestring
except NameError:
strtype = str
def to_s(val): def to_s(val):
try: try:
return unicode(val) return unicode(val)

591
num2words/lang_JA.py Normal file
View File

@@ -0,0 +1,591 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
from __future__ import division, print_function, unicode_literals
from .base import Num2Word_Base
from .compat import strtype, to_s
from .currency import parse_currency_parts, prefix_currency
def select_text(text, reading=False, prefer=None):
"""Select the correct text from the Japanese number, reading and
alternatives"""
# select kanji number or kana reading
if reading:
text = text[1]
else:
text = text[0]
# select the preferred one or the first one from multiple alternatives
if not isinstance(text, strtype):
common = set(text) & set(prefer or set())
if len(common) == 1:
text = common.pop()
else:
text = text[0]
return text
def rendaku_merge_pairs(lpair, rpair):
"""Merge lpair < rpair while applying semi-irregular rendaku rules"""
ltext, lnum = lpair
rtext, rnum = rpair
if lnum > rnum:
raise ValueError
if rpair == ("ひゃく", 100):
if lpair == ("さん", 3):
rtext = "びゃく"
elif lpair == ("ろく", 6):
ltext = "ろっ"
rtext = "ぴゃく"
elif lpair == ("はち", 8):
ltext = "はっ"
rtext = "ぴゃく"
elif rpair == ("せん", 1000):
if lpair == ("さん", 3):
rtext = "ぜん"
elif lpair == ("はち", 8):
ltext = "はっ"
elif rpair == ("ちょう", 10**12):
if lpair == ("いち", 1):
ltext = "いっ"
elif lpair == ("はち", 8):
ltext = "はっ"
elif lpair == ("じゅう", 10):
ltext = "じゅっ"
elif rpair == ("けい", 10**16):
if lpair == ("いち", 1):
ltext = "いっ"
elif lpair == ("ろく", 6):
ltext = "ろっ"
elif lpair == ("はち", 8):
ltext = "はっ"
elif lpair == ("じゅう", 10):
ltext = "じゅっ"
elif lpair == ("ひゃく", 100):
ltext = "ひゃっ"
return ("%s%s" % (ltext, rtext), lnum * rnum)
# Source: https://www.sljfaq.org/afaq/era-list.html
# if there are multiple eras for the same year, use the last one
ERA_START = [
(645, ("大化", "たいか")),
(650, ("白雉", "はくち")),
(686, ("朱鳥", "しゅちょう")),
(701, ("大宝", "たいほう")),
(704, ("慶雲", "けいうん")),
(708, ("和銅", "わどう")),
(715, ("霊亀", "れいき")),
(717, ("養老", "ようろう")),
(724, ("神亀", "じんき")),
(729, ("天平", "てんぴょう")),
(749, ("天平感宝", "てんぴょうかんぽう")),
(749, ("天平勝宝", "てんぴょうしょうほう")),
(757, ("天平宝字", "てんぴょうじょうじ")),
(765, ("天平神護", "てんぴょうじんご")),
(767, ("神護景雲", "じんごけいうん")),
(770, ("宝亀", "ほうき")),
(781, ("天応", "てんおう")),
(782, ("延暦", "えんりゃく")),
(806, ("大同", "だいどう")),
(810, ("弘仁", "こうにん")),
(823, ("天長", "てんちょう")),
(834, ("承和", "じょうわ")),
(848, ("嘉祥", "かしょう")),
(851, ("仁寿", "にんじゅ")),
(855, ("斉衡", "さいこう")),
(857, ("天安", "てんあん")),
(859, ("貞観", "じょうがん")),
(877, ("元慶", "がんぎょう")),
(885, ("仁和", "にんな")),
(889, ("寛平", "かんぴょう")),
(898, ("昌泰", "しょうたい")),
(901, ("延喜", "えんぎ")),
(923, ("延長", "えんちょう")),
(931, ("承平", "じょうへい")),
(938, ("天慶", "てんぎょう")),
(947, ("天暦", "てんりゃく")),
(957, ("天徳", "てんとく")),
(961, ("応和", "おうわ")),
(964, ("康保", "こうほう")),
(968, ("安和", "あんな")),
(970, ("天禄", "てんろく")),
(974, ("天延", "てんえん")),
(976, ("貞元", "じょうげん")),
(979, ("天元", "てんげん")),
(983, ("永観", "えいかん")),
(985, ("寛和", "かんな")),
(987, ("永延", "えいえん")),
(989, ("永祚", "えいそ")),
(990, ("正暦", "しょうりゃく")),
(995, ("長徳", "ちょうとく")),
(999, ("長保", "ちょうほう")),
(1004, ("寛弘", "かんこう")),
(1013, ("長和", "ちょうわ")),
(1017, ("寛仁", "かんにん")),
(1021, ("治安", "じあん")),
(1024, ("万寿", "まんじゅ")),
(1028, ("長元", "ちょうげん")),
(1037, ("長暦", "ちょうりゃく")),
(1040, ("長久", "ちょうきゅう")),
(1045, ("寛徳", "かんとく")),
(1046, ("永承", "えいしょう")),
(1053, ("天喜", "てんぎ")),
(1058, ("康平", "こうへい")),
(1065, ("治暦", "じりゃく")),
(1069, ("延久", "えんきゅう")),
(1074, ("承保", "じょうほう")),
(1078, ("承暦", "じょうりゃく")),
(1081, ("永保", "えいほう")),
(1084, ("応徳", "おうとく")),
(1087, ("寛治", "かんじ")),
(1095, ("嘉保", "かほう")),
(1097, ("永長", "えいちょう")),
(1098, ("承徳", "じょうとく")),
(1099, ("康和", "こうわ")),
(1104, ("長治", "ちょうじ")),
(1106, ("嘉承", "かじょう")),
(1108, ("天仁", "てんにん")),
(1110, ("天永", "てんねい")),
(1113, ("永久", "えいきゅう")),
(1118, ("元永", "げんえい")),
(1120, ("保安", "ほうあん")),
(1124, ("天治", "てんじ")),
(1126, ("大治", "だいじ")),
(1131, ("天承", "てんしょう")),
(1132, ("長承", "ちょうしょう")),
(1135, ("保延", "ほうえん")),
(1141, ("永治", "えいじ")),
(1142, ("康治", "こうじ")),
(1144, ("天養", "てんよう")),
(1145, ("久安", "きゅうあん")),
(1151, ("仁平", "にんぺい")),
(1154, ("久寿", "きゅうじゅ")),
(1156, ("保元", "ほうげん")),
(1159, ("平治", "へいじ")),
(1160, ("永暦", "えいりゃく")),
(1161, ("応保", "おうほう")),
(1163, ("長寛", "ちょうかん")),
(1165, ("永万", "えいまん")),
(1166, ("仁安", "にんあん")),
(1169, ("嘉応", "かおう")),
(1171, ("承安", "しょうあん")),
(1175, ("安元", "あんげん")),
(1177, ("治承", "じしょう")),
(1181, ("養和", "ようわ")),
(1182, ("寿永", "じゅえい")),
(1184, ("元暦", "げんりゃく")),
(1185, ("文治", "ぶんじ")),
(1190, ("建久", "けんきゅう")),
(1199, ("正治", "しょうじ")),
(1201, ("建仁", "けんにん")),
(1204, ("元久", "げんきゅう")),
(1206, ("建永", "けんえい")),
(1207, ("承元", "じょうげん")),
(1211, ("建暦", "けんりゃく")),
(1214, ("建保", "けんぽう")),
(1219, ("承久", "じょうきゅう")),
(1222, ("貞応", "じょうおう")),
(1225, ("元仁", "げんにん")),
(1225, ("嘉禄", "かろく")),
(1228, ("安貞", "あんてい")),
(1229, ("寛喜", "かんき")),
(1232, ("貞永", "じょうえい")),
(1233, ("天福", "てんぷく")),
(1235, ("文暦", "ぶんりゃく")),
(1235, ("嘉禎", "かてい")),
(1239, ("暦仁", "りゃくにん")),
(1239, ("延応", "えんおう")),
(1240, ("仁治", "にんじ")),
(1243, ("寛元", "かんげん")),
(1247, ("宝治", "ほうじ")),
(1249, ("建長", "けんちょう")),
(1256, ("康元", "こうげん")),
(1257, ("正嘉", "しょうか")),
(1259, ("正元", "しょうげん")),
(1260, ("文応", "ぶんおう")),
(1261, ("弘長", "こうちょう")),
(1264, ("文永", "ぶんえい")),
(1275, ("健治", "けんじ")),
(1278, ("弘安", "こうあん")),
(1288, ("正応", "しょうおう")),
(1293, ("永仁", "えいにん")),
(1299, ("正安", "しょうあん")),
(1303, ("乾元", "けんげん")),
(1303, ("嘉元", "かげん")),
(1307, ("徳治", "とくじ")),
(1308, ("延慶", "えんきょう")),
(1311, ("応長", "おうちょう")),
(1312, ("正和", "しょうわ")),
(1317, ("文保", "ぶんぽう")),
(1319, ("元応", "げんおう")),
(1321, ("元亨", "げんこう")),
(1325, ("正中", "しょうちゅ")),
(1326, ("嘉暦", "かりゃく")),
(1329, ("元徳", "げんとく")),
(1331, ("元弘", "げんこう")),
(1332, ("正慶", "しょうけい")),
(1334, ("建武", "けんむ")),
(1336, ("延元", "えいげん")),
(1338, ("暦応", "りゃくおう")),
(1340, ("興国", "こうこく")),
(1342, ("康永", "こうえい")),
(1345, ("貞和", "じょうわ")),
(1347, ("正平", "しょうへい")),
(1350, ("観応", "かんおう")),
(1352, ("文和", "ぶんな")),
(1356, ("延文", "えんぶん")),
(1361, ("康安", "こうあん")),
(1362, ("貞治", "じょうじ")),
(1368, ("応安", "おうあん")),
(1370, ("建徳", "けんとく")),
(1372, ("文中", "ぶんちゅう")),
(1375, ("永和", "えいわ")),
(1375, ("天授", "てんじゅ")),
(1379, ("康暦", "こうりゃく")),
(1381, ("永徳", "えいとく")),
(1381, ("弘和", "こうわ")),
(1384, ("至徳", "しとく")),
(1384, ("元中", "げんちゅう")),
(1387, ("嘉慶", "かけい")),
(1389, ("康応", "こうおう")),
(1390, ("明徳", "めいとく")),
(1394, ("応永", "おうえい")),
(1428, ("正長", "しょうちょう")),
(1429, ("永享", "えいきょう")),
(1441, ("嘉吉", "かきつ")),
(1444, ("文安", "ぶんあん")),
(1449, ("宝徳", "ほうとく")),
(1452, ("享徳", "きょうとく")),
(1455, ("康正", "こうしょう")),
(1457, ("長禄", "ちょうろく")),
(1461, ("寛正", "かんしょう")),
(1466, ("文正", "ぶんしょう")),
(1467, ("応仁", "おうにん")),
(1469, ("文明", "ぶんめい")),
(1487, ("長享", "ちょうきょう")),
(1489, ("延徳", "えんとく")),
(1492, ("明応", "めいおう")),
(1501, ("文亀", "ぶんき")),
(1504, ("永正", "えいしょう")),
(1521, ("大永", "だいえい")),
(1528, ("享禄", "きょうろく")),
(1532, ("天文", "てんぶん")),
(1555, ("弘治", "こうじ")),
(1558, ("永禄", "えいろく")),
(1570, ("元亀", "げんき")),
(1573, ("天正", "てんしょう")),
(1593, ("文禄", "ぶんろく")),
(1596, ("慶長", "けいちょう")),
(1615, ("元和", "げんな")),
(1624, ("寛永", "かんえい")),
(1645, ("正保", "しょうほう")),
(1648, ("慶安", "けいあん")),
(1652, ("承応", "じょうおう")),
(1655, ("明暦", "めいれき")),
(1658, ("万治", "まんじ")),
(1661, ("寛文", "かんぶん")),
(1673, ("延宝", "えんぽう")),
(1681, ("天和", "てんな")),
(1684, ("貞享", "じょうきょう")),
(1688, ("元禄", "げんろく")),
(1704, ("宝永", "ほうえい")),
(1711, ("正徳", "しょうとく")),
(1716, ("享保", "きょうほう")),
(1736, ("元文", "げんぶん")),
(1741, ("寛保", "かんぽう")),
(1744, ("延享", "えんきょう")),
(1748, ("寛延", "かんえん")),
(1751, ("宝暦", "ほうれき")),
(1764, ("明和", "めいわ")),
(1773, ("安永", "あんえい")),
(1781, ("天明", "てんめい")),
(1801, ("寛政", "かんせい")),
(1802, ("享和", "きょうわ")),
(1804, ("文化", "ぶんか")),
(1818, ("文政", "ぶんせい")),
(1831, ("天保", "てんぽう")),
(1845, ("弘化", "こうか")),
(1848, ("嘉永", "かえい")),
(1855, ("安政", "あんせい")),
(1860, ("万延", "まんえい")),
(1861, ("文久", "ぶんきゅう")),
(1864, ("元治", "げんじ")),
(1865, ("慶応", "けいおう")),
(1868, ("明治", "めいじ")),
(1912, ("大正", "たいしょう")),
(1926, ("昭和", "しょうわ")),
(1989, ("平成", "へいせい")),
]
class Num2Word_JA(Num2Word_Base):
CURRENCY_FORMS = {
'JPY': (('', 'えん'), ()),
}
def set_high_numwords(self, high):
max = 4 * len(high)
for word, n in zip(high, range(max, 0, -4)):
self.cards[10 ** n] = word
def setup(self):
self.negword = "マイナス"
self.pointword = ("", "てん")
self.exclude_title = ["", "マイナス"]
self.mid_numwords = [
(1000, ("", "せん")),
(100, ("", "ひゃく")),
]
self.low_numwords = [
("", "じゅう"), # 10 jū
("", "きゅう"), # 9 kyū
("", "はち"), # 8 hachi
("", ("なな", "しち")), # 7 nana, shichi
("", "ろく"), # 6 roku
("", ""), # 5 go
("", ("よん", "")), # 4 yon, shi
("", "さん"), # 3 san
("", ""), # 2 ni
("", "いち"), # 1 ichi
# both are alternatives, 零 doesn't map to ゼロ or to れい
(("", ""), ("ゼロ", "れい")), # 0 ZERO, rei
]
def merge(self, lpair, rpair):
ltext, lnum = lpair
rtext, rnum = rpair
fmt = "%s%s"
# ignore lpair if lnum is 1 and rnum is less than 10000
if lnum == 1 and rnum < 10000:
return rpair
# rnum is added to lnum
elif lnum > rnum:
return (fmt % (ltext, rtext), lnum + rnum)
# rnum is multiplied by lnum
elif lnum < rnum:
return rendaku_merge_pairs(lpair, rpair)
def _ordinal_suffix(self, reading, counter):
if reading:
if counter == "":
return "ばんめ"
else:
raise NotImplementedError(
"Reading not implemented for %s" % counter)
else:
return counter + ""
def to_ordinal(self, value, reading=False, prefer=None, counter=""):
self.verify_ordinal(value)
base = self.to_cardinal(value, reading=reading, prefer=prefer)
return "%s%s" % (base, self._ordinal_suffix(reading, counter))
def to_ordinal_num(self, value, reading=False, counter=""):
return "%s%s" % (value, self._ordinal_suffix(reading, counter))
def to_year(self, val, suffix=None, longval=True, reading=False,
prefer=None, era=True):
year = val
# Gregorian calendar
if not era:
prefix = ""
if year < 0:
year = abs(year)
prefix = "きげんぜん" if reading else "紀元前"
year_words = self.to_cardinal(year, reading=reading, prefer=prefer)
if reading and year % 10 == 9:
year_words = year_words[:-3] + ""
return "%s%s%s" % (prefix, year_words, "ねん" if reading else "")
# Era calendar (default)
min_year = ERA_START[0][0]
last_era_idx = len(ERA_START) - 1
if year < min_year:
raise ValueError(
"Can't convert years less than %s to era" % min_year)
first = 0
last = last_era_idx
era_idx = None
while era_idx is None:
mid = (first + last) // 2
if mid == last_era_idx or (ERA_START[mid][0] <= year and
ERA_START[mid + 1][0] > year):
era_idx = mid
# if an era lasting less than a year is preferred, choose it
if prefer:
i = mid - 1
while i >= 0 and ERA_START[i][0] == year:
# match kanji or hiragana
if set(ERA_START[i][1]) & set(prefer):
era_idx = i
break
i -= 1
# ends up at the last index where year >= ERA_START[mid][0]
if year < ERA_START[mid][0]:
last = mid - 1
else:
first = mid + 1
era = ERA_START[era_idx]
era_name = era[1][0]
era_year = year - era[0] + 1
fmt = "%s%s"
if reading == "arabic":
era_year_words = str(era_year)
elif reading:
era_name = era[1][1]
era_year_words = (self.to_cardinal(era_year, reading=True,
prefer=prefer)
if era_year != 1 else "がん")
if era_year % 10 == 9:
era_year_words = era_year_words[:-3] + ""
fmt = "%s%sねん"
else:
era_year_words = (self.to_cardinal(era_year, reading=False,
prefer=prefer)
if era_year != 1 else "")
return fmt % (era_name, era_year_words)
def to_currency(self, val, currency="JPY", cents=False, seperator="",
adjective=False, reading=False, prefer=None):
left, right, is_negative = parse_currency_parts(
val, is_int_with_cents=cents)
try:
cr1, cr2 = self.CURRENCY_FORMS[currency]
if (cents or abs(val) != left) and not cr2:
raise ValueError('Decimals not supported for "%s"' % currency)
except KeyError:
raise NotImplementedError(
'Currency code "%s" not implemented for "%s"' %
(currency, self.__class__.__name__))
if adjective and currency in self.CURRENCY_ADJECTIVES:
cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1)
minus_str = self.negword if is_negative else ""
return '%s%s%s%s%s' % (
minus_str,
self.to_cardinal(left, reading=reading, prefer=prefer),
cr1[1] if reading else cr1[0],
self.to_cardinal(right, reading=reading, prefer=prefer)
if cr2 else '',
(cr2[1] if reading else cr2[0]) if cr2 else '',
)
def base_setup(self):
self.high_numwords = [
("", "まん"), # 10**4 man
("", "おく"), # 10**8 oku
("", "ちょう"), # 10**12 chō
("", "けい"), # 10**16 kei
("", "がい"), # 10**20 gai
("", ""), # 10**24 shi
("", "じょう"), # 10**28 jō
("", "こう"), # 10**32 kō
("", "かん"), # 10**36 kan
("", "せい"), # 10**40 sei
("", "さい"), # 10**44 sai
("", "ごく"), # 10**48 goku
]
self.high_numwords.reverse()
def splitnum(self, value, reading, prefer):
for elem in self.cards:
if elem > value:
continue
out = []
if value == 0:
div, mod = 1, 0
else:
div, mod = divmod(value, elem)
if div == 1:
out.append((select_text(self.cards[1], reading, prefer), 1))
else:
if div == value: # The system tallies, eg Roman Numerals
return [(
div * select_text(self.cards[elem], reading, prefer),
div * elem)]
out.append(self.splitnum(div, reading, prefer))
out.append((select_text(self.cards[elem], reading, prefer), elem))
if mod:
out.append(self.splitnum(mod, reading, prefer))
return out
def to_cardinal(self, value, reading=False, prefer=None):
try:
assert int(value) == value
except (ValueError, TypeError, AssertionError):
return self.to_cardinal_float(value, reading=reading,
prefer=prefer)
self.verify_num(value)
out = ""
if value < 0:
value = abs(value)
out = self.negword
if value >= self.MAXVAL:
raise OverflowError(self.errmsg_toobig % (value, self.MAXVAL))
val = self.splitnum(value, reading, prefer)
words, _ = self.clean(val)
return self.title(out + words)
def to_cardinal_float(self, value, reading=False, prefer=None):
prefer = prefer or ["れい"]
try:
float(value) == value
except (ValueError, TypeError, AssertionError):
raise TypeError(self.errmsg_nonnum % value)
pre, post = self.float2tuple(float(value))
post = str(post)
post = '0' * (self.precision - len(post)) + post
out = [self.to_cardinal(pre, reading=reading, prefer=prefer)]
if self.precision:
out.append(self.title(self.pointword[1 if reading else 0]))
for i in range(self.precision):
curr = int(post[i])
out.append(to_s(
self.to_cardinal(curr, reading=reading, prefer=prefer)))
return "".join(out)

184
tests/test_ja.py Normal file
View File

@@ -0,0 +1,184 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
from __future__ import division, print_function, unicode_literals
from unittest import TestCase
from num2words import num2words
def n2j(*args, **kwargs):
return num2words(*args, lang='ja', **kwargs)
class Num2WordsJATest(TestCase):
def test_low(self):
self.assertEqual(n2j(0), "")
self.assertEqual(n2j(0, prefer=[""]), "")
self.assertEqual(n2j(0, reading=True), "ゼロ")
self.assertEqual(n2j(0, reading=True, prefer=["れい"]), "れい")
self.assertEqual(n2j(1), "")
self.assertEqual(n2j(1, reading=True), "いち")
self.assertEqual(n2j(2), "")
self.assertEqual(n2j(2, reading=True), "")
self.assertEqual(n2j(3), "")
self.assertEqual(n2j(3, reading=True), "さん")
self.assertEqual(n2j(4), "")
self.assertEqual(n2j(4, reading=True), "よん")
self.assertEqual(n2j(4, reading=True, prefer=[""]), "")
self.assertEqual(n2j(5), "")
self.assertEqual(n2j(5, reading=True), "")
self.assertEqual(n2j(6), "")
self.assertEqual(n2j(6, reading=True), "ろく")
self.assertEqual(n2j(7), "")
self.assertEqual(n2j(7, reading=True), "なな")
self.assertEqual(n2j(7, reading=True, prefer=["しち"]), "しち")
self.assertEqual(n2j(8), "")
self.assertEqual(n2j(8, reading=True), "はち")
self.assertEqual(n2j(9), "")
self.assertEqual(n2j(9, reading=True), "きゅう")
self.assertEqual(n2j(10), "")
self.assertEqual(n2j(10, reading=True), "じゅう")
self.assertEqual(n2j(11), "十一")
self.assertEqual(n2j(11, reading=True), "じゅういち")
self.assertEqual(n2j(12), "十二")
self.assertEqual(n2j(12, reading=True), "じゅうに")
self.assertEqual(n2j(13), "十三")
self.assertEqual(n2j(13, reading=True), "じゅうさん")
self.assertEqual(n2j(14), "十四")
self.assertEqual(n2j(14, reading=True), "じゅうよん")
self.assertEqual(n2j(14, reading=True, prefer=[""]), "じゅうし")
self.assertEqual(n2j(15), "十五")
self.assertEqual(n2j(15, reading=True), "じゅうご")
self.assertEqual(n2j(16), "十六")
self.assertEqual(n2j(16, reading=True), "じゅうろく")
self.assertEqual(n2j(17), "十七")
self.assertEqual(n2j(17, reading=True), "じゅうなな")
self.assertEqual(n2j(17, reading=True, prefer=["しち"]), "じゅうしち")
self.assertEqual(n2j(18), "十八")
self.assertEqual(n2j(18, reading=True), "じゅうはち")
self.assertEqual(n2j(19), "十九")
self.assertEqual(n2j(19, reading=True), "じゅうきゅう")
self.assertEqual(n2j(20), "二十")
self.assertEqual(n2j(20, reading=True), "にじゅう")
def test_mid(self):
self.assertEqual(n2j(100), "")
self.assertEqual(n2j(100, reading=True), "ひゃく")
self.assertEqual(n2j(123), "百二十三")
self.assertEqual(n2j(123, reading=True), "ひゃくにじゅうさん")
self.assertEqual(n2j(300), "三百")
self.assertEqual(n2j(300, reading=True), "さんびゃく")
self.assertEqual(n2j(400), "四百")
self.assertEqual(n2j(400, reading=True), "よんひゃく")
# 400 --> しひゃく sounds weird, but can be generated with prefer
self.assertEqual(n2j(600), "六百")
self.assertEqual(n2j(600, reading=True), "ろっぴゃく")
self.assertEqual(n2j(700, reading=True, prefer=["しち"]), "しちひゃく")
self.assertEqual(n2j(800, reading=True), "はっぴゃく")
self.assertEqual(n2j(1000), "")
self.assertEqual(n2j(1000, reading=True), "せん")
self.assertEqual(n2j(3000, reading=True), "さんぜん")
self.assertEqual(n2j(8000, reading=True), "はっせん")
def test_high(self):
self.assertEqual(n2j(10000), "一万")
self.assertEqual(n2j(10000, reading=True), "いちまん")
self.assertEqual(n2j(12345), "一万二千三百四十五")
self.assertEqual(n2j(12345, reading=True),
"いちまん"
"にせん"
"さんびゃく"
"よんじゅうご")
self.assertEqual(n2j(10**8), "一億")
self.assertEqual(n2j(10**8, reading=True), "いちおく")
self.assertEqual(n2j(123456789), "一億二千三百四十五万六千七百八十九")
self.assertEqual(n2j(123456789, reading=True),
"いちおく"
"にせんさんびゃくよんじゅうごまん"
"ろくせんななひゃく"
"はちじゅうきゅう")
self.assertEqual(n2j(10**12), "一兆")
self.assertEqual(n2j(10**12, reading=True), "いっちょう")
self.assertEqual(n2j(1234567890123),
"一兆二千三百四十五億六千七百八十九万百二十三")
self.assertEqual(n2j(1234567890123, reading=True),
"いっちょう"
"にせんさんびゃくよんじゅうごおく"
"ろくせんななひゃくはちじゅうきゅうまん"
"ひゃくにじゅうさん")
# TODO: tests for 10**16 and above
def test_cardinal_float(self):
self.assertEqual(n2j(0.0123456789, prefer=[""]),
"〇点〇一二三四五六七八九")
self.assertEqual(n2j(0.0123456789, reading=True),
"れいてん"
"れいいち"
"にさん"
"よんご"
"ろくなな"
"はちきゅう")
self.assertEqual(n2j(10**8 + 0.01), "一億点零一")
self.assertEqual(n2j(10**8 + 0.01, reading=True),
"いちおくてんれいいち")
def test_ordinal(self):
self.assertEqual(n2j(0, to="ordinal"), "零番目")
self.assertEqual(n2j(0, to="ordinal", reading=True, prefer=["れい"]),
"れいばんめ")
self.assertEqual(n2j(2, to="ordinal", counter=""), "二人目")
self.assertEqual(n2j(3, to="ordinal", counter=""), "三つ目")
with self.assertRaises(NotImplementedError):
n2j(4, to="ordinal", reading=True, counter="")
def test_ordinal_num(self):
self.assertEqual(n2j(0, to="ordinal_num"), "0番目")
self.assertEqual(n2j(0, to="ordinal_num", reading=True), "0ばんめ")
self.assertEqual(n2j(2, to="ordinal_num", counter=""), "2人目")
self.assertEqual(n2j(3, to="ordinal_num", counter=""), "3つ目")
def test_currency(self):
self.assertEqual(n2j(123456789, to="currency"),
"一億二千三百四十五万六千七百八十九円")
self.assertEqual(n2j(123456789, to="currency", reading=True),
"いちおく"
"にせんさんびゃくよんじゅうごまん"
"ろくせんななひゃく"
"はちじゅうきゅうえん")
def test_year(self):
self.assertEqual(n2j(2017, to="year"), "平成二十九年")
self.assertEqual(n2j(2017, to="year", reading=True),
"へいせいにじゅうくねん")
self.assertEqual(n2j(2017, to="year", reading="arabic"),
"平成29年")
self.assertEqual(n2j(2009, to="year", era=False), "二千九年")
self.assertEqual(n2j(2009, to="year", reading=True, era=False),
"にせんくねん")
self.assertEqual(n2j(2000, to="year", era=False), "二千年")
self.assertEqual(n2j(2000, to="year", era=False, reading=True),
"にせんねん")
self.assertEqual(n2j(645, to="year"), "大化元年")
self.assertEqual(n2j(645, to="year", reading=True), "たいかがんねん")
self.assertEqual(n2j(645, to="year"), "大化元年")
self.assertEqual(n2j(645, to="year", reading=True), "たいかがんねん")
self.assertEqual(n2j(-99, to="year", era=False), "紀元前九十九年")
self.assertEqual(n2j(-99, to="year", era=False, reading=True),
"きげんぜんきゅうじゅうくねん")
self.assertEqual(n2j(1375, to="year"), "天授元年")
self.assertEqual(n2j(1375, to="year", prefer=["えいわ"]), "永和元年")