Improvements, corrections, and bug fixes for the lang_IT module. (#59)

* General refactoring and bug fixes for lang_IT

* Added Python3 support for lang_IT

* Bug fixes for ordinal numerals in lang_IT

* Fixed lang_IT for negative values and added tests

* Fixes and minor adjustments for floats in lang_IT

* Decimal => float in tests for lang_IT

* Moved a comment

* 'tre's accentuated, big numbers support, ordinal bug fix, comments, tests

* 'meno' and 'virgola' and now class values

* Destroyed EU inheritance
This commit is contained in:
Filippo Costa
2017-03-17 14:12:44 +01:00
committed by Virgil Dupras
parent 907ebbc812
commit 90e7c9dc7d
2 changed files with 284 additions and 260 deletions

View File

@@ -16,202 +16,187 @@
from __future__ import unicode_literals
from .lang_EU import Num2Word_EU
import re
import math
# Globals
# -------
ZERO = "zero"
CARDINAL_WORDS = [
ZERO, "uno", "due", "tre", "quattro", "cinque", "sei", "sette", "otto",
"nove", "dieci", "undici", "dodici", "tredici", "quattordici", "quindici",
"sedici", "diciassette", "diciotto", "diciannove"
]
ORDINAL_WORDS = [
ZERO, "primo", "secondo", "terzo", "quarto", "quinto", "sesto", "settimo",
"ottavo", "nono", "decimo", "undicesimo", "dodicesimo", "tredicesimo",
"quattordicesimo", "quindicesimo", "sedicesimo", "diciassettesimo",
"diciottesimo", "diciannovesimo"
]
# The script can extrapolate the missing numbers from the base forms.
STR_TENS = {2: "venti", 3: "trenta", 4: "quaranta", 6: "sessanta"}
# These prefixes are used for extremely big numbers.
EXPONENT_PREFIXES = [
ZERO, "m", "b", "tr", "quadr", "quint", "sest", "sett", "ott", "nov", "dec"
]
# Utils
# =====
def phonetic_contraction(string):
return (string
.replace("oo", "o") # ex. "centootto"
.replace("ao", "o") # ex. "settantaotto"
.replace("io", "o") # ex. "ventiotto"
.replace("au", "u") # ex. "trentauno"
)
def exponent_length_to_string(exponent_length):
# We always assume `exponent` to be a multiple of 3. If it's not true, then
# Num2Word_IT.big_number_to_cardinal did something wrong.
prefix = EXPONENT_PREFIXES[exponent_length // 6]
if exponent_length % 6 == 0:
return prefix + "ilione"
else:
return prefix + "iliardo"
def accentuate(string):
# This is inefficient: it may do several rewritings when deleting
# half-sentence accents. However, it is the easiest method and speed is
# not crucial (duh), so...
return " ".join(
# Deletes half-sentence accents and accentuates the last "tre"
[w.replace("tré", "tre")[:-3] + "tré"
# We shouldn't accentuate a single "tre": is has to be a composite
# word. ~~~~~~~~~~
if w[-3:] == "tre" and len(w) > 3
# Deletes half-sentence accents anyway
# ~~~~~~~~~~~~~~~~~~~~~~
else w.replace("tré", "tre")
for w in string.split()
])
def omitt_if_zero(number_to_string):
return "" if number_to_string == ZERO else number_to_string
# Main class
# ==========
class Num2Word_IT:
MINUS_PREFIX_WORD = "meno "
FLOAT_INFIX_WORD = " virgola "
class Num2Word_IT(object):
def __init__(self):
self._minus = "meno "
self._exponent = {
0 : ('',''),
3 : ('mille','mila'),
6 : ('milione','miloni'),
12 : ('miliardo','miliardi'),
18 : ('trillone','trilloni'),
24 : ('quadrilione','quadrilioni')}
self._digits = ['zero', 'uno', 'due', 'tre', 'quattro', 'cinque', 'sei', 'sette', 'otto', 'nove']
self._sep = ''
def _toWords(self, num, power=0):
str_num = str(num)
# The return string;
ret = ''
# add a the word for the minus sign if necessary
if num < 0:
ret = self._sep + self._minus
if len(str_num) > 6:
current_power = 6
# check for highest power
if power in self._exponent:
# convert the number above the first 6 digits
# with it's corresponding $power.
snum = str_num[0:-6]
if snum != '':
ret = ret + self._toWords(int(snum), power + 6)
num = int(str_num[-6:])
if num == 0:
return ret
elif num == 0 or str_num == '':
return ' ' + self._digits[0] + ' '
else:
current_power = len(str_num)
# See if we need "thousands"
thousands = math.floor(num / 1000)
if thousands == 1:
ret = ret + self._sep + 'mille' + self._sep
elif thousands > 1:
ret = ret + self._toWords(int(thousands), 3) + self._sep
# values for digits, tens and hundreds
h = int(math.floor((num / 100) % 10))
t = int(math.floor((num / 10) % 10))
d = int(math.floor(num % 10))
# centinaia: duecento, trecento, etc...
if h == 1:
if ((d==0) and (t == 0)):# is it's '100' use 'cien'
ret = ret + self._sep + 'cento'
else:
ret = ret + self._sep + 'cento'
elif h == 2 or h == 3 or h == 4 or h == 6 or h == 8:
ret = ret + self._sep + self._digits[h] + 'cento'
elif h == 5:
ret = ret + self._sep + 'cinquecento'
elif h == 7:
ret = ret + self._sep + 'settecento'
elif h == 9:
ret = ret + self._sep + 'novecento'
# decine: venti trenta, etc...
if t == 9:
if d == 1 or d == 8:
ret = ret + self._sep + 'novant'
else:
ret = ret + self._sep + 'novanta'
if t == 8:
if d == 1 or d == 8:
ret = ret + self._sep + 'ottant'
else:
ret = ret + self._sep + 'ottanta'
if t == 7:
if d == 1 or d == 8:
ret = ret + self._sep + 'settant'
else:
ret = ret + self._sep + 'settanta'
if t == 6:
if d == 1 or d == 8:
ret = ret + self._sep + 'sessant'
else:
ret = ret + self._sep + 'sessanta'
if t == 5:
if d == 1 or d == 8:
ret = ret + self._sep + 'cinquant'
else:
ret = ret + self._sep + 'cinquanta'
if t == 4:
if d == 1 or d == 8:
ret = ret + self._sep + 'quarant'
else:
ret = ret + self._sep + 'quaranta'
if t == 3:
if d == 1 or d == 8:
ret = ret + self._sep + 'trent'
else:
ret = ret + self._sep + 'trenta'
if t == 2:
if d == 0:
ret = ret + self._sep + 'venti'
elif (d == 1 or d == 8):
ret = ret + self._sep + 'vent' + self._digits[d]
else:
ret = ret + self._sep + 'venti' + self._digits[d]
if t == 1:
if d == 0:
ret = ret + self._sep + 'dieci'
elif d == 1:
ret = ret + self._sep + 'undici'
elif d == 2:
ret = ret + self._sep + 'dodici'
elif d == 3:
ret = ret + self._sep + 'tredici'
elif d == 4:
ret = ret + self._sep + 'quattordici'
elif d == 5:
ret = ret + self._sep + 'quindici'
elif d == 6:
ret = ret + self._sep + 'sedici'
elif d == 7:
ret = ret + self._sep + 'diciassette'
elif d == 8:
ret = ret + self._sep + 'diciotto'
elif d == 9:
ret = ret + self._sep + 'diciannove'
# add digits only if it is a multiple of 10 and not 1x or 2x
if t != 1 and t != 2 and d > 0:
# don't add 'e' for numbers below 10
if t != 0:
# use 'un' instead of 'uno' when there is a suffix ('mila', 'milloni', etc...)
if (power > 0) and ( d == 1):
ret = ret + self._sep + 'e un'
else:
ret = ret + self._sep + '' + self._digits[d]
else:
if power > 0 and d == 1:
ret = ret + self._sep + 'un '
else:
ret = ret + self._sep + self._digits[d]
if power > 0:
if power in self._exponent:
lev = self._exponent[power]
if lev is None:
return None
# if it's only one use the singular suffix
if d == 1 and t == 0 and h == 0:
suffix = lev[0]
else:
suffix = lev[1]
if num != 0:
ret = ret + self._sep + suffix
return ret
def to_cardinal(self, number):
return self._toWords(number)
def to_ordinal_num(self, number):
pass
def to_ordinal(self,value):
if 0 <= value <= 10:
return ["primo", "secondo", "terzo", "quarto", "quinto", "sesto", "settimo", "ottavo", "nono", "decimo"][value - 1]
def float_to_words(self, float_number, ordinal=False):
if ordinal:
prefix = self.to_ordinal(int(float_number))
else:
as_word = self._toWords(value)
if as_word.endswith("dici"):
return re.sub("dici$", "dicesimo", as_word)
elif as_word.endswith("to"):
return re.sub("to$", "tesimo", as_word)
elif as_word.endswith("ta"):
return re.sub("ta$", "tesimo", as_word)
prefix = self.to_cardinal(int(float_number))
postfix = " ".join(
# Drops the trailing zero and comma ~~~~
[self.to_cardinal(int(c)) for c in str(float_number % 1)[2:]]
)
return prefix + Num2Word_IT.FLOAT_INFIX_WORD + postfix
def tens_to_cardinal(self, number):
tens = number // 10
units = number % 10
if tens in STR_TENS:
prefix = STR_TENS[tens]
else:
prefix = CARDINAL_WORDS[tens][:-1] + "anta"
postfix = omitt_if_zero(CARDINAL_WORDS[units])
return phonetic_contraction(prefix + postfix)
def hundreds_to_cardinal(self, number):
hundreds = number // 100
prefix = "cento"
if hundreds != 1:
prefix = CARDINAL_WORDS[hundreds] + prefix
postfix = omitt_if_zero(self.to_cardinal(number % 100))
return phonetic_contraction(prefix + postfix)
def thousands_to_cardinal(self, number):
thousands = number // 1000
if thousands == 1:
prefix = "mille"
else:
prefix = self.to_cardinal(thousands) + "mila"
postfix = omitt_if_zero(self.to_cardinal(number % 1000))
# "mille" and "mila" don't need any phonetic contractions
return prefix + postfix
def big_number_to_cardinal(self, number):
digits = [c for c in str(number)]
length = len(digits)
if length >= 66:
raise NotImplementedError("The given number is too large.")
# This is how many digits come before the "illion" term.
# cento miliardi => 3
# dieci milioni => 2
# un miliardo => 1
predigits = length % 3 or 3
multiplier = digits[:predigits]
exponent = digits[predigits:]
# Default infix string: "milione", "biliardo", "sestilione", ecc.
infix = exponent_length_to_string(len(exponent))
if multiplier == ["1"]:
prefix = "un "
else:
prefix = self.to_cardinal(int("".join(multiplier)))
# Plural form ~~~~~~~~~~~
infix = " " + infix[:-1] + "i"
# Read as: Does the value of exponent equal 0?
if set(exponent) != set("0"):
postfix = self.to_cardinal(int("".join(exponent)))
if " e " in postfix:
infix += ", "
else:
return as_word + "simo"
infix += " e "
else:
postfix = ""
return prefix + infix + postfix
def to_cardinal(self, number):
if number < 0:
string = Num2Word_IT.MINUS_PREFIX_WORD + self.to_cardinal(-number)
elif number % 1 != 0:
string = self.float_to_words(number)
elif number < 20:
string = CARDINAL_WORDS[number]
elif number < 100:
string = self.tens_to_cardinal(number)
elif number < 1000:
string = self.hundreds_to_cardinal(number)
elif number < 1000000:
string = self.thousands_to_cardinal(number)
else:
string = self.big_number_to_cardinal(number)
return accentuate(string)
n2w = Num2Word_IT()
to_card = n2w.to_cardinal
to_ord = n2w.to_ordinal
to_ordnum = n2w.to_ordinal_num
def to_ordinal(self, number):
tens = number % 100
# Italian grammar is poorly defined here ¯\_(ツ)_/¯:
# centodecimo VS centodieciesimo VS centesimo decimo?
is_outside_teens = not 10 < tens < 20
if number < 0:
return Num2Word_IT.MINUS_PREFIX_WORD + self.to_ordinal(-number)
elif number % 1 != 0:
return self.float_to_words(number, ordinal=True)
elif number < 20:
return ORDINAL_WORDS[number]
elif is_outside_teens and tens % 10 == 3:
# Gets ride of the accent ~~~~~~~~~~
return self.to_cardinal(number)[:-1] + "eesimo"
elif is_outside_teens and tens % 10 == 6:
return self.to_cardinal(number) + "esimo"
else:
string = self.to_cardinal(number)[:-1]
if string[-3:] == "mil":
string += "l"
return string + "esimo"

View File

@@ -15,82 +15,121 @@
# MA 02110-1301 USA
from __future__ import unicode_literals
from unittest import TestCase
from num2words import num2words
class Num2WordsITTest(TestCase):
def test_number(self):
maxDiff = None
test_cases = (
(1,'uno'),
(2,'due'),
(3,'tre'),
(11,'undici'),
(12,'dodici'),
(16,'sedici'),
(19,'diciannove'),
(20,'venti'),
(21,'ventuno'),
(26,'ventisei'),
(28,'ventotto'),
(30,'trenta'),
(31,'trentuno'),
(40,'quaranta'),
(43,'quarantatre'),
(50,'cinquanta'),
(55,'cinquantacinque'),
(60,'sessanta'),
(67,'sessantasette'),
(70,'settanta'),
(79,'settantanove'),
(100,'cento'),
(101,'centouno'),
(199,'centonovantanove'),
(203,'duecentotre'),
(287,'duecentoottantasette'),
(300,'trecento'),
(356,'trecentocinquantasei'),
(410,'quattrocentodieci'),
(434,'quattrocentotrentaquattro'),
(578,'cinquecentosettantotto'),
(689,'seicentoottantanove'),
(729,'settecentoventinove'),
(894,'ottocentonovantaquattro'),
(999,'novecentonovantanove'),
(1000,'mille'),
(1001,'milleuno'),
(1097,'millenovantasette'),
(1104,'millecentoquattro'),
(1243,'milleduecentoquarantatre'),
(2385,'duemilatrecentoottantacinque'),
(3766,'tremilasettecentosessantasei'),
(4196,'quattromilacentonovantasei'),
(5846,'cinquemilaottocentoquarantasei'),
(6459,'seimilaquattrocentocinquantanove'),
(7232,'settemiladuecentotrentadue'),
(8569,'ottomilacinquecentosessantanove'),
(9539,'novemilacinquecentotrentanove'),
(1000000,'un milione'),
(1000001,'un milioneuno'),
# (1000000100,'un miliardocento'), # DOES NOT WORK TODO: FIX
)
def test_negative(self):
number = 648972145
pos_crd = num2words(+number, lang="it")
neg_crd = num2words(-number, lang="it")
pos_ord = num2words(+number, lang="it", ordinal=True)
neg_ord = num2words(-number, lang="it", ordinal=True)
self.assertEqual("meno " + pos_crd, neg_crd)
self.assertEqual("meno " + pos_ord, neg_ord)
for test in test_cases:
self.assertEqual(num2words(test[0], lang='it'), test[1])
def test_float_to_cardinal(self):
self.assertTrue("tre virgola uno quattro uno" in num2words(3.1415, lang="it"))
self.assertTrue("meno cinque virgola uno" in num2words(-5.15, lang="it"))
self.assertTrue("meno zero virgola uno" in num2words(-0.15, lang="it"))
def test_ordinal(self):
def test_float_to_ordinal(self):
self.assertTrue("terzo virgola uno quattro uno" in num2words(3.1415, lang="it", ordinal=True))
self.assertTrue("meno quinto virgola uno" in num2words(-5.15, lang="it", ordinal=True))
self.assertTrue("meno zero virgola uno" in num2words(-0.15, lang="it", ordinal=True))
test_cases = (
(1,'primo'),
(8,'ottavo'),
(12,'dodicesimo'),
(14,'quattordicesimo'),
(28,'ventottesimo'),
(100,'centesimo'),
)
def test_0(self):
self.assertEqual(num2words(0, lang="it"), "zero")
self.assertEqual(num2words(0, lang="it", ordinal=True), "zero")
for test in test_cases:
self.assertEqual(num2words(test[0], lang='it', ordinal=True), test[1])
def test_1_to_10(self):
self.assertEqual(num2words(1, lang="it"), "uno")
self.assertEqual(num2words(2, lang="it"), "due")
self.assertEqual(num2words(7, lang="it"), "sette")
self.assertEqual(num2words(10, lang="it"), "dieci")
def test_11_to_19(self):
self.assertEqual(num2words(11, lang="it"), "undici")
self.assertEqual(num2words(13, lang="it"), "tredici")
self.assertEqual(num2words(15, lang="it"), "quindici")
self.assertEqual(num2words(16, lang="it"), "sedici")
self.assertEqual(num2words(19, lang="it"), "diciannove")
def test_20_to_99(self):
self.assertEqual(num2words(20, lang="it"), "venti")
self.assertEqual(num2words(23, lang="it"), "ventitré")
self.assertEqual(num2words(28, lang="it"), "ventotto")
self.assertEqual(num2words(31, lang="it"), "trentuno")
self.assertEqual(num2words(40, lang="it"), "quaranta")
self.assertEqual(num2words(66, lang="it"), "sessantasei")
self.assertEqual(num2words(92, lang="it"), "novantadue")
def test_100_to_999(self):
self.assertEqual(num2words(100, lang="it"), "cento")
self.assertEqual(num2words(111, lang="it"), "centoundici")
self.assertEqual(num2words(150, lang="it"), "centocinquanta")
self.assertEqual(num2words(196, lang="it"), "centonovantasei")
self.assertEqual(num2words(200, lang="it"), "duecento")
self.assertEqual(num2words(210, lang="it"), "duecentodieci")
self.assertEqual(num2words(701, lang="it"), "settecentouno")
def test_1000_to_9999(self):
self.assertEqual(num2words(1000, lang="it"), "mille")
self.assertEqual(num2words(1001, lang="it"), "milleuno")
self.assertEqual(num2words(1500, lang="it"), "millecinquecento")
self.assertEqual(num2words(7378, lang="it"), "settemilatrecentosettantotto")
self.assertEqual(num2words(2000, lang="it"), "duemila")
self.assertEqual(num2words(2100, lang="it"), "duemilacento")
self.assertEqual(num2words(6870, lang="it"), "seimilaottocentosettanta")
self.assertEqual(num2words(10000, lang="it"), "diecimila")
self.assertEqual(num2words(98765, lang="it"), "novantottomilasettecentosessantacinque")
self.assertEqual(num2words(100000, lang="it"), "centomila")
self.assertEqual(num2words(523456, lang="it"), "cinquecentoventitremilaquattrocentocinquantasei")
def test_big(self):
self.assertEqual(num2words(1000000, lang="it"), "un milione")
self.assertEqual(num2words(1000007, lang="it"), "un milione e sette")
self.assertEqual(num2words(1200000, lang="it"), "un milione e duecentomila")
self.assertEqual(num2words(3000000, lang="it"), "tre milioni")
self.assertEqual(num2words(3000005, lang="it"), "tre milioni e cinque")
self.assertEqual(num2words(3800000, lang="it"), "tre milioni e ottocentomila")
self.assertEqual(num2words(1000000000, lang="it"), "un miliardo")
self.assertEqual(num2words(1000000017, lang="it"), "un miliardo e diciassette")
self.assertEqual(num2words(2000000000, lang="it"), "due miliardi")
self.assertEqual(num2words(2000001000, lang="it"), "due miliardi e mille")
self.assertEqual(num2words(1234567890, lang="it"), "un miliardo, duecentotrentaquattro milioni e cinquecentosessantasettemilaottocentonovanta")
self.assertEqual(num2words(1000000000000, lang="it"), "un bilione")
self.assertEqual(num2words(123456789012345678901234567890, lang="it"), "centoventitré quadriliardi, quattrocentocinquantasei quadrilioni, settecentottantanove triliardi, dodici trilioni, trecentoquarantacinque biliardi, seicentosettantotto bilioni, novecentouno miliardi, duecentotrentaquattro milioni e cinquecentosessantasettemilaottocentonovanta")
def test_nth_1_to_99(self):
self.assertEqual(num2words(1, lang="it", ordinal=True), "primo")
self.assertEqual(num2words(8, lang="it", ordinal=True), "ottavo")
self.assertEqual(num2words(23, lang="it", ordinal=True), "ventitreesimo")
self.assertEqual(num2words(47, lang="it", ordinal=True), "quarantasettesimo")
self.assertEqual(num2words(99, lang="it", ordinal=True), "novantanovesimo")
def test_nth_100_to_999(self):
self.assertEqual(num2words(100, lang="it", ordinal=True), "centesimo")
self.assertEqual(num2words(112, lang="it", ordinal=True), "centododicesimo")
self.assertEqual(num2words(120, lang="it", ordinal=True), "centoventesimo")
self.assertEqual(num2words(316, lang="it", ordinal=True), "trecentosedicesimo")
self.assertEqual(num2words(700, lang="it", ordinal=True), "settecentesimo")
self.assertEqual(num2words(803, lang="it", ordinal=True), "ottocentotreesimo")
self.assertEqual(num2words(923, lang="it", ordinal=True), "novecentoventitreesimo")
def test_nth_1000_to_999999(self):
self.assertEqual(num2words(1000, lang="it", ordinal=True), "millesimo")
self.assertEqual(num2words(1001, lang="it", ordinal=True), "milleunesimo")
self.assertEqual(num2words(1003, lang="it", ordinal=True), "milletreesimo")
self.assertEqual(num2words(1200, lang="it", ordinal=True), "milleduecentesimo")
self.assertEqual(num2words(8640, lang="it", ordinal=True), "ottomilaseicentoquarantesimo")
self.assertEqual(num2words(14000, lang="it", ordinal=True), "quattordicimillesimo")
self.assertEqual(num2words(123456, lang="it", ordinal=True), "centoventitremilaquattrocentocinquantaseiesimo")
self.assertEqual(num2words(987654, lang="it", ordinal=True), "novecentottantasettemilaseicentocinquantaquattresimo")
def test_nth_big(self):
self.assertEqual(num2words(1000000001, lang="it", ordinal=True), "un miliardo e unesimo")
self.assertEqual(num2words(123456789012345678901234567890, lang="it", ordinal=True), "centoventitré quadriliardi, quattrocentocinquantasei quadrilioni, settecentottantanove triliardi, dodici trilioni, trecentoquarantacinque biliardi, seicentosettantotto bilioni, novecentouno miliardi, duecentotrentaquattro milioni e cinquecentosessantasettemilaottocentonovantesimo")