Add support for Portuguese (Portugal) (PT) (#198)

* add support for Portuguese (Portugal) (PT) * FIX ADD SUPPORT TO PORTUGUESE (PORTUGAL) [PT] removed venv/ from .gitignore. correction from "milião" to "milhão" done on merge function. removed hack for negword, creating a backup of the original one. * add support PT (Portugal) (PT) * add support PT (Portugal) (PT) * add support PT (Portugal) (PT) * add support PT (Portugal) (PT) new tests
2026-02-03 08:52:24 +00:00 · 2018-10-20 20:53:14 +01:00
parent 6d30cb9e83
commit 0f63859572
5 changed files with 710 additions and 104 deletions
--- a/num2words/init.py
+++ b/num2words/init.py
@@ -36,6 +36,7 @@ from . import lang_ID
 from . import lang_JA
 from . import lang_NO
 from . import lang_DK
+from . import lang_PT
 from . import lang_PT_BR
 from . import lang_HE
 from . import lang_IT
@@ -72,6 +73,7 @@ CONVERTER_CLASSES = {
    'sl': lang_SL.Num2Word_SL(),
    'no': lang_NO.Num2Word_NO(),
    'dk': lang_DK.Num2Word_DK(),
+    'pt': lang_PT.Num2Word_PT(),
    'pt_BR': lang_PT_BR.Num2Word_PT_BR(),
    'he': lang_HE.Num2Word_HE(),
    'it': lang_IT.Num2Word_IT(),
--- a/num2words/lang_PT.py
+++ b/num2words/lang_PT.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2003, Taro Ogawa.  All Rights Reserved.
+# Copyright (c) 2013, Savoir-faire Linux inc.  All Rights Reserved.
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301 USA
+
+from __future__ import division, unicode_literals
+
+import re
+
+from .lang_EU import Num2Word_EU
+
+DOLLAR = ('dólar', 'dólares')
+CENTS = ('cêntimo', 'cêntimos')
+
+
+class Num2Word_PT(Num2Word_EU):
+
+    CURRENCY_FORMS = {
+        'AUD': (DOLLAR, CENTS),
+        'CAD': (DOLLAR, CENTS),
+        'EUR': (('euro', 'euros'), CENTS),
+        'GBP': (('libra', 'libras'), ('péni', 'pence')),
+        'USD': (DOLLAR, CENTS),
+    }
+
+    GIGA_SUFFIX = None
+    MEGA_SUFFIX = "ilião"
+
+    def setup(self):
+        super(Num2Word_PT, self).setup()
+        lows = ["quatr", "tr", "b", "m"]
+        self.high_numwords = self.gen_high_numwords([], [], lows)
+        self.negword = "menos "
+        self.pointword = "vírgula"
+        self.exclude_title = ["e", "vírgula", "menos"]
+
+        self.mid_numwords = [
+            (1000, "mil"), (100, "cem"), (90, "noventa"),
+            (80, "oitenta"), (70, "setenta"), (60, "sessenta"),
+            (50, "cinquenta"), (40, "quarenta"), (30, "trinta")
+        ]
+        self.low_numwords = [
+            "vinte", "dezanove", "dezoito", "dezassete", "dezasseis",
+            "quinze", "catorze", "treze", "doze", "onze", "dez",
+            "nove", "oito", "sete", "seis", "cinco", "quatro", "três", "dois",
+            "um", "zero"
+        ]
+        self.ords = [
+            {
+                0: "",
+                1: "primeiro",
+                2: "segundo",
+                3: "terceiro",
+                4: "quarto",
+                5: "quinto",
+                6: "sexto",
+                7: "sétimo",
+                8: "oitavo",
+                9: "nono",
+            },
+            {
+                0: "",
+                1: "décimo",
+                2: "vigésimo",
+                3: "trigésimo",
+                4: "quadragésimo",
+                5: "quinquagésimo",
+                6: "sexagésimo",
+                7: "septuagésimo",
+                8: "octogésimo",
+                9: "nonagésimo",
+            },
+            {
+                0: "",
+                1: "centésimo",
+                2: "ducentésimo",
+                3: "tricentésimo",
+                4: "quadrigentésimo",
+                5: "quingentésimo",
+                6: "seiscentésimo",
+                7: "septigentésimo",
+                8: "octigentésimo",
+                9: "nongentésimo",
+            },
+        ]
+        self.thousand_separators = {
+            3: "milésimo",
+            6: "milionésimo",
+            9: "milésimo milionésimo",
+            12: "bilionésimo",
+            15: "milésimo bilionésimo"
+        }
+        self.hundreds = {
+            1: "cento",
+            2: "duzentos",
+            3: "trezentos",
+            4: "quatrocentos",
+            5: "quinhentos",
+            6: "seiscentos",
+            7: "setecentos",
+            8: "oitocentos",
+            9: "novecentos",
+        }
+
+    def merge(self, curr, next):
+        ctext, cnum, ntext, nnum = curr + next
+
+        if cnum == 1:
+            if nnum < 1000000:
+                return next
+            ctext = "um"
+        elif cnum == 100 and not nnum % 1000 == 0:
+            ctext = "cento"
+
+        if nnum < cnum:
+            if cnum < 100:
+                return ("%s e %s" % (ctext, ntext), cnum + nnum)
+            return ("%s e %s" % (ctext, ntext), cnum + nnum)
+
+        elif (not nnum % 1000000000) and cnum > 1:
+            ntext = ntext[:-4] + "liões"
+        elif (not nnum % 1000000) and cnum > 1:
+            ntext = ntext[:-4] + "lhões"
+        # correct "milião" to "milhão"
+        if ntext == 'milião':
+            ntext = 'milhão'
+        if nnum == 100:
+            ctext = self.hundreds[cnum]
+            ntext = ""
+
+        else:
+            ntext = " " + ntext
+
+        return (ctext + ntext, cnum * nnum)
+
+    def to_cardinal(self, value):
+        result = super(Num2Word_PT, self).to_cardinal(value)
+
+        # Transforms "mil e cento e catorze" into "mil cento e catorze"
+        # Transforms "cem milhões e duzentos mil e duzentos e dez" em "cem
+        # milhões duzentos mil duzentos e dez" but "cem milhões e duzentos
+        # mil e duzentos" in "cem milhões duzentos mil e duzentos" and not in
+        # "cem milhões duzentos mil duzentos"
+        for ext in (
+                'mil', 'milhão', 'milhões', 'mil milhões',
+                'bilião', 'biliões', 'mil biliões'):
+            if re.match('.*{} e \w*entos? (?=.*e)'.format(ext), result):
+                result = result.replace(
+                    '{} e'.format(ext), '{}'.format(ext)
+                )
+
+        return result
+
+    # for the ordinal conversion the code is similar to pt_BR code,
+    # although there are other rules that are probably more correct in
+    # Portugal. Concerning numbers from 2000th on, saying "dois
+    # milésimos" instead of "segundo milésimo" (the first number
+    # would be used in the cardinal form instead of the ordinal) is better.
+    # This was not implemented.
+    # source:
+    # https://ciberduvidas.iscte-iul.pt/consultorio/perguntas/a-forma-por-extenso-de-2000-e-de-outros-ordinais/16428
+    def to_ordinal(self, value):
+        # Before changing this function remember this is used by pt-BR
+        # so act accordingly
+        self.verify_ordinal(value)
+
+        result = []
+        value = str(value)
+        thousand_separator = ''
+
+        for idx, char in enumerate(value[::-1]):
+            if idx and idx % 3 == 0:
+                thousand_separator = self.thousand_separators[idx]
+
+            if char != '0' and thousand_separator:
+                # avoiding "segundo milionésimo milésimo" for 6000000,
+                # for instance
+                result.append(thousand_separator)
+                thousand_separator = ''
+
+            result.append(self.ords[idx % 3][int(char)])
+
+        result = ' '.join(result[::-1])
+        result = result.strip()
+        result = re.sub('\s+', ' ', result)
+
+        if result.startswith('primeiro') and value != '1':
+            # avoiding "primeiro milésimo", "primeiro milionésimo" and so on
+            result = result[9:]
+
+        return result
+
+    def to_ordinal_num(self, value):
+        # Before changing this function remember this is used by pt-BR
+        # so act accordingly
+        self.verify_ordinal(value)
+        return "%sº" % (value)
+
+    def to_year(self, val, longval=True):
+        # Before changing this function remember this is used by pt-BR
+        # so act accordingly
+        if val < 0:
+            return self.to_cardinal(abs(val)) + ' antes de Cristo'
+        return self.to_cardinal(val)
+
+    def to_currency(self, val, currency='EUR', cents=True, seperator=' e',
+                    adjective=False):
+        # change negword because base.to_currency() does not need space after
+        backup_negword = self.negword
+        self.negword = self.negword[:-1]
+        result = super(Num2Word_PT, self).to_currency(
+            val, currency=currency, cents=cents, seperator=seperator,
+            adjective=adjective)
+        # undo the change on negword
+        self.negword = backup_negword
+
+        # transforms "milhões euros" em "milhões de euros"
+        try:
+            cr1, _ = self.CURRENCY_FORMS[currency]
+        except KeyError:
+            raise NotImplementedError(
+                'Currency code "%s" not implemented for "%s"' %
+                (currency, self.__class__.__name__))
+
+        for ext in (
+                'milhão', 'milhões', 'bilião',
+                'biliões', 'trilião', 'triliões'):
+            if re.match('.*{} (?={})'.format(ext, cr1[1]), result):
+                result = result.replace(
+                    '{}'.format(ext), '{} de'.format(ext), 1
+                )
+        # do not print "e zero cêntimos"
+        result = result.replace(' e zero cêntimos', '')
+        return result
--- a/num2words/lang_PT_BR.py
+++ b/num2words/lang_PT_BR.py
@@ -20,10 +20,10 @@ from __future__ import division, unicode_literals

 import re

-from . import lang_EU
+from . import lang_PT


-class Num2Word_PT_BR(lang_EU.Num2Word_EU):
+class Num2Word_PT_BR(lang_PT.Num2Word_PT):
    def set_high_numwords(self, high):
        max = 3 + 3*len(high)
        for word, n in zip(high, range(max, 3, -3)):
@@ -32,59 +32,10 @@ class Num2Word_PT_BR(lang_EU.Num2Word_EU):
    def setup(self):
        super(Num2Word_PT_BR, self).setup()

-        self.negword = "menos "
-        self.pointword = "vírgula"
-        self.exclude_title = ["e", "vírgula", "menos"]
+        self.low_numwords[1] = 'dezenove'
+        self.low_numwords[3] = 'dezessete'
+        self.low_numwords[4] = 'dezesseis'

-        self.mid_numwords = [
-            (1000, "mil"), (100, "cem"), (90, "noventa"),
-            (80, "oitenta"), (70, "setenta"), (60, "sessenta"),
-            (50, "cinquenta"), (40, "quarenta"), (30, "trinta")
-        ]
-        self.low_numwords = [
-            "vinte", "dezenove", "dezoito", "dezessete", "dezesseis",
-            "quinze", "catorze", "treze", "doze", "onze", "dez",
-            "nove", "oito", "sete", "seis", "cinco", "quatro", "três", "dois",
-            "um", "zero"
-        ]
-        self.ords = [
-            {
-                0: "",
-                1: "primeiro",
-                2: "segundo",
-                3: "terceiro",
-                4: "quarto",
-                5: "quinto",
-                6: "sexto",
-                7: "sétimo",
-                8: "oitavo",
-                9: "nono",
-            },
-            {
-                0: "",
-                1: "décimo",
-                2: "vigésimo",
-                3: "trigésimo",
-                4: "quadragésimo",
-                5: "quinquagésimo",
-                6: "sexagésimo",
-                7: "septuagésimo",
-                8: "octogésimo",
-                9: "nonagésimo",
-            },
-            {
-                0: "",
-                1: "centésimo",
-                2: "ducentésimo",
-                3: "tricentésimo",
-                4: "quadrigentésimo",
-                5: "quingentésimo",
-                6: "seiscentésimo",
-                7: "septigentésimo",
-                8: "octigentésimo",
-                9: "nongentésimo",
-            },
-        ]
        self.thousand_separators = {
            3: "milésimo",
            6: "milionésimo",
@@ -92,17 +43,6 @@ class Num2Word_PT_BR(lang_EU.Num2Word_EU):
            12: "trilionésimo",
            15: "quadrilionésimo"
        }
-        self.hundreds = {
-            1: "cento",
-            2: "duzentos",
-            3: "trezentos",
-            4: "quatrocentos",
-            5: "quinhentos",
-            6: "seiscentos",
-            7: "setecentos",
-            8: "oitocentos",
-            9: "novecentos",
-        }

    def merge(self, curr, next):
        ctext, cnum, ntext, nnum = curr + next
@@ -132,7 +72,7 @@ class Num2Word_PT_BR(lang_EU.Num2Word_EU):
        return (ctext + ntext, cnum * nnum)

    def to_cardinal(self, value):
-        result = super(Num2Word_PT_BR, self).to_cardinal(value)
+        result = lang_PT.Num2Word_EU.to_cardinal(self, value)

        # Transforms "mil E cento e catorze reais" into "mil, cento e catorze
        # reais"
@@ -146,44 +86,6 @@ class Num2Word_PT_BR(lang_EU.Num2Word_EU):

        return result

-    def to_ordinal(self, value):
-        self.verify_ordinal(value)
-
-        result = []
-        value = str(value)
-        thousand_separator = ''
-
-        for idx, char in enumerate(value[::-1]):
-            if idx and idx % 3 == 0:
-                thousand_separator = self.thousand_separators[idx]
-
-            if char != '0' and thousand_separator:
-                # avoiding "segundo milionésimo milésimo" for 6000000,
-                # for instance
-                result.append(thousand_separator)
-                thousand_separator = ''
-
-            result.append(self.ords[idx % 3][int(char)])
-
-        result = ' '.join(result[::-1])
-        result = result.strip()
-        result = re.sub('\s+', ' ', result)
-
-        if result.startswith('primeiro') and value != '1':
-            # avoiding "primeiro milésimo", "primeiro milionésimo" and so on
-            result = result[9:]
-
-        return result
-
-    def to_ordinal_num(self, value):
-        self.verify_ordinal(value)
-        return "%sº" % (value)
-
-    def to_year(self, val, longval=True):
-        if val < 0:
-            return self.to_cardinal(abs(val)) + ' antes de Cristo'
-        return self.to_cardinal(val)
-
    def to_currency(self, val, longval=True):
        integer_part, decimal_part = ('%.2f' % val).split('.')