maposmatic-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Maposmatic-dev] [PATCH] Updated ocitysmap/i18n.py.i18n_nl_generic


From: Jeroen van Rijn
Subject: [Maposmatic-dev] [PATCH] Updated ocitysmap/i18n.py.i18n_nl_generic
Date: Mon, 11 Jan 2010 05:18:51 +0100

Added comments as to what the Dutch class is actually doing.
Added more titles to capture as prefixes.
Added handling of counting words, e.g. "1e Walstraat" => "Walstraat (1e)"

Signed-off-by: Jeroen van Rijn <address@hidden>
---
 ocitysmap/i18n.py |   33 ++++++++++++++++++++++++++++++---
 1 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/ocitysmap/i18n.py b/ocitysmap/i18n.py
index bae9ae9..02dfb22 100644
--- a/ocitysmap/i18n.py
+++ b/ocitysmap/i18n.py
@@ -384,15 +384,31 @@ class i18n_ru_generic(i18n):
         return self._upper_unaccent_string(a) == self._upper_unaccent_string(b)
 
 class i18n_nl_generic(i18n):
+    #
+    # Dutch streets are often named after people and include a title.
+    # The title will be captured as part of the <prefix>
+    #
     APPELLATIONS = [ u"St.", u"Sint", u"Ptr.", u"Pater",
                      u"Prof.", u"Professor", u"Past.", u"Pastoor",
                      u"Pr.", u"Prins", u"Prinses", u"Gen.", u"Generaal",
                      u"Mgr.", u"Monseigneur", u"Mr.", u"Meester",
                      u"Burg.", u"Burgermeester", u"Dr.", u"Dokter",
-                     u"Ir.", "Ingenieur",
-                     u""]
+                     u"Ir.", u"Ingenieur", u"Ds.", u"Dominee", u"Deken",
+                     u"Drs.",
+                     # counting words before street name,
+                     # e.g. "1e Walstraat" => "Walstraat (1e)"
+                     u"\d+e",
+                     u"" ]
+    #
+    # Surnames in Dutch streets named after people tend to have the middle name
+    # listed after the rest of the surname,
+    # e.g. "Prins van Oranjestraat" => "Oranjestraat (Prins van)"
+    # Likewise, articles are captured as part of the prefix,
+    # e.g. "Den Urling" => "Urling (Den)"
+    #
     DETERMINANTS = [ u"\s?van der", u"\s?van den", u"\s?van de", u"\s?van",
-                     u"\s?Den", u"\s?D'n", u"\s?D'", u"\s?De", u"\s?'T", 
u"\s?Het" ]
+                     u"\s?Den", u"\s?D'n", u"\s?D'", u"\s?De", u"\s?'T", 
u"\s?Het",
+                     u"" ]
     
     SPACE_REDUCE = re.compile(r"\s+")
     PREFIX_REGEXP = re.compile(r"^(?P<prefix>(%s)(%s)?)\s?\b(?P<name>.+)" %
@@ -423,9 +439,20 @@ class i18n_nl_generic(i18n):
         return self.language
 
     def user_readable_street(self, name):
+        #
+        # Make sure name actually contains something,
+        # the PREFIX_REGEXP.match fails on zero-length strings
+        #
+        if len(name) == 0:
+            return name
+
         name = name.strip()
         name = self.SPACE_REDUCE.sub(" ", name)
         matches = self.PREFIX_REGEXP.match(name)
+        #
+        # If no prefix was captured, that's okay. Don't substitute
+        # the name however, "<name> ()" looks silly
+        #
         if matches.group('prefix'):
             name = self.PREFIX_REGEXP.sub(r"\g<name> (\g<prefix>)", name)
         return name
-- 
1.6.3.3





reply via email to

[Prev in Thread] Current Thread [Next in Thread]