pingus-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Pingus-CVS] r3638 - in trunk/pingus: . src


From: grumbel at BerliOS
Subject: [Pingus-CVS] r3638 - in trunk/pingus: . src
Date: Thu, 3 Jul 2008 05:45:58 +0200

Author: grumbel
Date: 2008-07-03 05:45:57 +0200 (Thu, 03 Jul 2008)
New Revision: 3638

Added:
   trunk/pingus/src/utf8_iterator.cpp
   trunk/pingus/src/utf8_iterator.hpp
Modified:
   trunk/pingus/SConstruct
Log:
Added UTF8Iterator class

Modified: trunk/pingus/SConstruct
===================================================================
--- trunk/pingus/SConstruct     2008-07-03 03:03:45 UTC (rev 3637)
+++ trunk/pingus/SConstruct     2008-07-03 03:45:57 UTC (rev 3638)
@@ -239,6 +239,7 @@
 'src/tinygettext/dictionary_manager.cpp',
 'src/tinygettext/dictionary.cpp',
 'src/tinygettext/language_def.cpp',
+'src/utf8_iterator.cpp',
 'src/math/vector2f.cpp',
 'src/math/vector2i.cpp',
 'src/math/vector3f.cpp',

Added: trunk/pingus/src/utf8_iterator.cpp
===================================================================
--- trunk/pingus/src/utf8_iterator.cpp  2008-07-03 03:03:45 UTC (rev 3637)
+++ trunk/pingus/src/utf8_iterator.cpp  2008-07-03 03:45:57 UTC (rev 3638)
@@ -0,0 +1,119 @@
+//  Pingus - A free Lemmings clone
+//  Copyright (C) 2008 Matthias Braun <address@hidden>, 
+//                     Ingo Ruhnke <address@hidden>
+//
+//  This program is free software: you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation, either version 3 of the License, or
+//  (at your option) any later version.
+//  
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//  
+//  You should have received a copy of the GNU General Public License
+//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#include <iostream>
+#include <stdexcept>
+#include "utf8_iterator.hpp"
+
+// FIXME: Get rid of exceptions in this code
+UTF8Iterator::UTF8Iterator(const std::string& text_)
+  : text(text_),
+    pos(0)
+{
+  try {
+    chr = decode_utf8(text, pos);
+  } catch (std::exception) {
+    std::cout << "Malformed utf-8 sequence beginning with " << 
*((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
+    chr = 0;
+  }
+}
+
+bool
+UTF8Iterator::done() const
+{
+  return pos > text.size();
+}
+
+UTF8Iterator&
+UTF8Iterator::operator++() {
+  try {
+    chr = decode_utf8(text, pos);
+  } catch (std::exception) {
+    std::cout << "Malformed utf-8 sequence beginning with " << 
*((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
+    chr = 0;
+    ++pos;
+  }
+
+  return *this;
+}
+
+uint32_t
+UTF8Iterator::operator*() const {
+  return chr;
+}
+
+/**
+ * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the 
2nd, 3rd or 4th byte of a multibyte utf8 string
+ */
+bool
+UTF8Iterator::has_multibyte_mark(unsigned char c) {
+  return ((c & 0300) == 0200);
+}
+
+/**
+ * gets unicode character at byte position @a p of UTF-8 encoded @a
+ * text, then advances @a p to the next character.
+ *
+ * @throws std::runtime_error if decoding fails.
+ * See unicode standard section 3.10 table 3-5 and 3-6 for details.
+ */
+uint32_t
+UTF8Iterator::decode_utf8(const std::string& text, size_t& p)
+{
+  uint32_t c1 = (unsigned char) text[p+0];
+
+  if (has_multibyte_mark(c1)) std::runtime_error("Malformed utf-8 sequence");
+
+  if ((c1 & 0200) == 0000) {
+    // 0xxx.xxxx: 1 byte sequence
+    p+=1;
+    return c1;
+  }
+  else if ((c1 & 0340) == 0300) {
+    // 110x.xxxx: 2 byte sequence
+    if(p+1 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+    uint32_t c2 = (unsigned char) text[p+1];
+    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 
sequence");
+    p+=2;
+    return (c1 & 0037) << 6 | (c2 & 0077);
+  }
+  else if ((c1 & 0360) == 0340) {
+    // 1110.xxxx: 3 byte sequence
+    if(p+2 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+    uint32_t c2 = (unsigned char) text[p+1];
+    uint32_t c3 = (unsigned char) text[p+2];
+    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 
sequence");
+    if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 
sequence");
+    p+=3;
+    return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
+  }
+  else if ((c1 & 0370) == 0360) {
+    // 1111.0xxx: 4 byte sequence
+    if(p+3 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+    uint32_t c2 = (unsigned char) text[p+1];
+    uint32_t c3 = (unsigned char) text[p+2];
+    uint32_t c4 = (unsigned char) text[p+4];
+    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 
sequence");
+    if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 
sequence");
+    if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8 
sequence");
+    p+=4;
+    return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 & 
0077);
+  }
+  throw std::runtime_error("Malformed utf-8 sequence");
+}
+
+/* EOF */


Property changes on: trunk/pingus/src/utf8_iterator.cpp
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: trunk/pingus/src/utf8_iterator.hpp
===================================================================
--- trunk/pingus/src/utf8_iterator.hpp  2008-07-03 03:03:45 UTC (rev 3637)
+++ trunk/pingus/src/utf8_iterator.hpp  2008-07-03 03:45:57 UTC (rev 3638)
@@ -0,0 +1,53 @@
+//  Pingus - A free Lemmings clone
+//  Copyright (C) 2008 Matthias Braun <address@hidden>, 
+//                     Ingo Ruhnke <address@hidden>
+//
+//  This program is free software: you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation, either version 3 of the License, or
+//  (at your option) any later version.
+//  
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//  
+//  You should have received a copy of the GNU General Public License
+//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef HEADER_PINGUS_UTF8_ITERATOR_HPP
+#define HEADER_PINGUS_UTF8_ITERATOR_HPP
+
+#include <string>
+
+class UTF8Iterator
+{
+private:
+  const std::string&     text;
+  std::string::size_type pos;
+  uint32_t chr;
+
+  /**
+   * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the 
2nd, 3rd or 4th byte of a multibyte utf8 string
+   */
+  bool has_multibyte_mark(unsigned char c);
+
+  /**
+   * gets unicode character at byte position @a p of UTF-8 encoded @a
+   * text, then advances @a p to the next character.
+   *
+   * @throws std::runtime_error if decoding fails.
+   * See unicode standard section 3.10 table 3-5 and 3-6 for details.
+   */
+  uint32_t decode_utf8(const std::string& text, size_t& p);
+
+public:
+  UTF8Iterator(const std::string& text_);
+  bool done() const;
+  UTF8Iterator& operator++();
+  uint32_t operator*() const;
+};
+
+#endif
+
+/* EOF */


Property changes on: trunk/pingus/src/utf8_iterator.hpp
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native





reply via email to

[Prev in Thread] Current Thread [Next in Thread]