Add support for Serbian (Latin) (#207)

* Add support for Serbian language

* Fix the billion strings for short scale

Used the long scale name instead of the short one. Simplified the scale and the ones geneder definition.

* Adds Serbian to README
This commit is contained in:
fatkaratekid
2018-11-11 00:36:51 +01:00
committed by Ernesto Rodriguez Ortiz
parent 5c9bce19e4
commit 1ed09f511d
4 changed files with 460 additions and 0 deletions

View File

@@ -103,6 +103,8 @@ Besides the numerical argument, there are two main optional arguments.
* ``pl`` (Polish)
* ``pt`` (Portuguese)
* ``pt_BR`` (Portuguese - Brazilian)
* ``sl`` (Slovene)
* ``sr`` (Serbian)
* ``ro`` (Romanian)
* ``ru`` (Russian)
* ``sl`` (Slovene)

View File

@@ -47,6 +47,7 @@ from . import lang_TR
from . import lang_NL
from . import lang_UK
from . import lang_SL
from . import lang_SR
from . import lang_TH
CONVERTER_CLASSES = {
@@ -71,6 +72,7 @@ CONVERTER_CLASSES = {
'ro': lang_RO.Num2Word_RO(),
'ru': lang_RU.Num2Word_RU(),
'sl': lang_SL.Num2Word_SL(),
'sr': lang_SR.Num2Word_SR(),
'no': lang_NO.Num2Word_NO(),
'dk': lang_DK.Num2Word_DK(),
'pt': lang_PT.Num2Word_PT(),

215
num2words/lang_SR.py Normal file
View File

@@ -0,0 +1,215 @@
# -*- encoding: utf-8 -*-
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
from __future__ import unicode_literals
from .base import Num2Word_Base
from .currency import parse_currency_parts, prefix_currency
from .utils import get_digits, splitbyx
ZERO = ('nula',)
ONES = {
1: ('jedan', 'jedna'),
2: ('dva', 'dve'),
3: ('tri', 'tri'),
4: ('četiri', 'četiri'),
5: ('pet', 'pet'),
6: ('šest', 'šest'),
7: ('sedam', 'sedam'),
8: ('osam', 'osam'),
9: ('devet', 'devet'),
}
TENS = {
0: ('deset',),
1: ('jedanaest',),
2: ('dvanaest',),
3: ('trinaest',),
4: ('četrnaest',),
5: ('petnaest',),
6: ('šesnaest',),
7: ('sedamnaest',),
8: ('osamnaest',),
9: ('devetnaest',),
}
TWENTIES = {
2: ('dvadeset',),
3: ('trideset',),
4: ('četrdeset',),
5: ('pedeset',),
6: ('šezdeset',),
7: ('sedamdeset',),
8: ('osamdeset',),
9: ('devedeset',),
}
HUNDREDS = {
1: ('sto',),
2: ('dvesta',),
3: ('trista',),
4: ('četristo',),
5: ('petsto',),
6: ('šesto',),
7: ('sedamsto',),
8: ('osamsto',),
9: ('devetsto',),
}
SCALE = {
0: ('', '', '', False),
1: ('hiljada', 'hiljade', 'hiljada', True), # 10^3
2: ('milion', 'miliona', 'miliona', False), # 10^6
3: ('bilion', 'biliona', 'biliona', False), # 10^9
4: ('trilion', 'triliona', 'triliona', False), # 10^12
5: ('kvadrilion', 'kvadriliona', 'kvadriliona', False), # 10^15
6: ('kvintilion', 'kvintiliona', 'kvintiliona', False), # 10^18
7: ('sekstilion', 'sekstiliona', 'sekstiliona', False), # 10^21
8: ('septilion', 'septiliona', 'septiliona', False), # 10^24
9: ('oktilion', 'oktiliona', 'oktiliona', False), # 10^27
10: ('nonilion', 'noniliona', 'noniliona', False), # 10^30
}
class Num2Word_SR(Num2Word_Base):
CURRENCY_FORMS = {
'RUB': (
('rublja', 'rublje', 'rublji', True),
('kopejka', 'kopejke', 'kopejki', True)
),
'EUR': (
('evro', 'evra', 'evra', False),
('cent', 'centa', 'centi', False)
),
'RSD': (
('dinar', 'dinara', 'dinara', False),
('para', 'pare', 'para', True)
),
}
def setup(self):
self.negword = "minus"
self.pointword = "zapeta"
def to_cardinal(self, number, feminine=False):
n = str(number).replace(',', '.')
if '.' in n:
left, right = n.split('.')
return u'%s %s %s' % (
self._int2word(int(left), feminine),
self.pointword,
self._int2word(int(right), feminine)
)
else:
return self._int2word(int(n), feminine)
def pluralize(self, number, forms):
if number % 100 < 10 or number % 100 > 20:
if number % 10 == 1:
form = 0
elif 1 < number % 10 < 5:
form = 1
else:
form = 2
else:
form = 2
return forms[form]
def to_ordinal(self, number):
raise NotImplementedError()
def _cents_verbose(self, number, currency):
return self._int2word(
number,
self.CURRENCY_FORMS[currency][1][-1]
)
def _int2word(self, number, feminine=False):
if number < 0:
return ' '.join([self.negword, self._int2word(abs(number))])
if number == 0:
return ZERO[0]
words = []
chunks = list(splitbyx(str(number), 3))
chunk_len = len(chunks)
for chunk in chunks:
chunk_len -= 1
digit_right, digit_mid, digit_left = get_digits(chunk)
if digit_left > 0:
words.append(HUNDREDS[digit_left][0])
if digit_mid > 1:
words.append(TWENTIES[digit_mid][0])
if digit_mid == 1:
words.append(TENS[digit_right][0])
elif digit_right > 0:
is_feminine = feminine or SCALE[chunk_len][-1]
gender_idx = int(is_feminine)
words.append(
ONES[digit_right][gender_idx]
)
if chunk_len > 0 and chunk != 0:
words.append(self.pluralize(chunk, SCALE[chunk_len]))
return ' '.join(words)
def to_currency(self, val, currency='EUR', cents=True, seperator=',',
adjective=False):
"""
Args:
val: Numeric value
currency (str): Currency code
cents (bool): Verbose cents
seperator (str): Cent seperator
adjective (bool): Prefix currency name with adjective
Returns:
str: Formatted string
"""
left, right, is_negative = parse_currency_parts(val)
try:
cr1, cr2 = self.CURRENCY_FORMS[currency]
except KeyError:
raise NotImplementedError(
'Currency code "%s" not implemented for "%s"' %
(currency, self.__class__.__name__))
if adjective and currency in self.CURRENCY_ADJECTIVES:
cr1 = prefix_currency(
self.CURRENCY_ADJECTIVES[currency],
cr1
)
minus_str = "%s " % self.negword if is_negative else ""
cents_str = self._cents_verbose(right, currency) \
if cents else self._cents_terse(right, currency)
return u'%s%s %s%s %s %s' % (
minus_str,
self.to_cardinal(left, feminine=cr1[-1]),
self.pluralize(left, cr1),
seperator,
cents_str,
self.pluralize(right, cr2)
)

241
tests/test_sr.py Normal file
View File

@@ -0,0 +1,241 @@
# -*- encoding: utf-8 -*-
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
from __future__ import unicode_literals
from unittest import TestCase
from num2words import num2words
class Num2WordsSRTest(TestCase):
def test_cardinal(self):
self.assertEqual("sto", num2words(100, lang='sr'))
self.assertEqual("sto jedan", num2words(101, lang='sr'))
self.assertEqual("sto deset", num2words(110, lang='sr'))
self.assertEqual("sto petnaest", num2words(115, lang='sr'))
self.assertEqual(
"sto dvadeset tri", num2words(123, lang='sr')
)
self.assertEqual(
"jedna hiljada", num2words(1000, lang='sr')
)
self.assertEqual(
"jedna hiljada jedan", num2words(1001, lang='sr')
)
self.assertEqual(
"dve hiljade dvanaest", num2words(2012, lang='sr')
)
self.assertEqual(
"dvanaest hiljada petsto devetnaest zapeta osamdeset pet",
num2words(12519.85, lang='sr')
)
self.assertEqual(
"jedan bilion dvesta trideset četiri miliona petsto "
"šezdeset sedam hiljada osamsto devedeset",
num2words(1234567890, lang='sr')
)
self.assertEqual(
"dvesta petnaest noniliona četristo šezdeset jedan "
"oktilion četristo sedam septiliona osamsto devedeset "
"dva sekstiliona trideset devet kvintiliona dva kvadriliona "
"sto pedeset sedam triliona sto osamdeset devet biliona "
"osamsto osamdeset tri miliona devetsto jedna hiljada "
"šesto sedamdeset šest",
num2words(215461407892039002157189883901676, lang='sr')
)
self.assertEqual(
"sedamsto devetnaest noniliona devedeset četiri oktiliona "
"dvesta trideset četiri septiliona šesto devedeset tri "
"sekstiliona šesto šezdeset tri kvintiliona trideset "
"četiri kvadriliona osamsto dvadeset dva triliona osamsto "
"dvadeset četiri biliona trista osamdeset četiri miliona "
"dvesta dvadeset hiljada dvesta devedeset jedan",
num2words(719094234693663034822824384220291, lang='sr')
)
self.assertEqual("pet", num2words(5, lang='sr'))
self.assertEqual("petnaest", num2words(15, lang='sr'))
self.assertEqual("sto pedeset četiri", num2words(154, lang='sr'))
self.assertEqual(
"jedna hiljada sto trideset pet",
num2words(1135, lang='sr')
)
self.assertEqual(
"četristo osamnaest hiljada petsto trideset jedan",
num2words(418531, lang='sr'),
)
self.assertEqual(
"jedan milion sto trideset devet",
num2words(1000139, lang='sr')
)
def test_floating_point(self):
self.assertEqual("pet zapeta dva", num2words(5.2, lang='sr'))
self.assertEqual(
"petsto šezdeset jedan zapeta četrdeset dva",
num2words(561.42, lang='sr')
)
def test_to_ordinal(self):
# @TODO: implement to_ordinal
with self.assertRaises(NotImplementedError):
num2words(1, lang='sr', to='ordinal')
def test_to_currency(self):
self.assertEqual(
'jedan evro, nula centi',
num2words(1.0, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'dva evra, nula centi',
num2words(2.0, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'pet evra, nula centi',
num2words(5.0, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'dva evra, jedan cent',
num2words(2.01, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'dva evra, dva centa',
num2words(2.02, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'dva evra, pet centi',
num2words(2.05, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'dve rublje, nula kopejki',
num2words(2.0, lang='sr', to='currency', currency='RUB')
)
self.assertEqual(
'dve rublje, jedna kopejka',
num2words(2.01, lang='sr', to='currency', currency='RUB')
)
self.assertEqual(
'dve rublje, dve kopejke',
num2words(2.02, lang='sr', to='currency', currency='RUB')
)
self.assertEqual(
'dve rublje, pet kopejki',
num2words(2.05, lang='sr', to='currency', currency='RUB')
)
self.assertEqual(
'jedan dinar, nula para',
num2words(1.0, lang='sr', to='currency', currency='RSD')
)
self.assertEqual(
'dva dinara, dve pare',
num2words(2.02, lang='sr', to='currency', currency='RSD')
)
self.assertEqual(
'pet dinara, pet para',
num2words(5.05, lang='sr', to='currency', currency='RSD')
)
self.assertEqual(
'jedanaest dinara, jedanaest para',
num2words(11.11, lang='sr', to='currency', currency='RSD')
)
self.assertEqual(
'dvadeset jedan dinar, dvadeset jedna para',
num2words(21.21, lang='sr', to='currency', currency='RSD')
)
self.assertEqual(
'dvadeset jedan evro, dvadeset jedan cent',
num2words(21.21, lang='sr', to='currency', currency='EUR')
)
self.assertEqual(
'dvadeset jedna rublja, dvadeset jedna kopejka',
num2words(21.21, lang='sr', to='currency', currency='RUB')
)
self.assertEqual(
'jedna hiljada dvesta trideset četiri evra, '
'pedeset šest centi',
num2words(
1234.56, lang='sr', to='currency', currency='EUR'
)
)
self.assertEqual(
'jedna hiljada dvesta trideset četiri rublje, '
'pedeset šest kopejki',
num2words(
1234.56, lang='sr', to='currency', currency='RUB'
)
)
self.assertEqual(
'sto jedan evro i jedanaest centi',
num2words(
10111,
lang='sr',
to='currency',
currency='EUR',
seperator=' i'
)
)
self.assertEqual(
'sto jedna rublja i dvadeset jedna kopejka',
num2words(
10121,
lang='sr',
to='currency',
currency='RUB',
seperator=' i'
)
)
self.assertEqual(
'sto jedna rublja i dvadeset dve kopejke',
num2words(10122, lang='sr', to='currency', currency='RUB',
seperator=' i')
)
self.assertEqual(
'sto jedan evro i dvadeset jedan cent',
num2words(10121, lang='sr', to='currency', currency='EUR',
seperator=' i'),
)
self.assertEqual(
'minus dvanaest hiljada petsto devetnaest evra, 85 centi',
num2words(
-1251985,
lang='sr',
to='currency',
currency='EUR',
cents=False
)
)
self.assertEqual(
"trideset osam evra i 40 centi",
num2words('38.4', lang='sr', to='currency', seperator=' i',
cents=False, currency='EUR'),
)