KiCad PCB EDA Suite
utf8.h
Go to the documentation of this file.
1 /*
2  * This program source code file is part of KiCad, a free EDA CAD application.
3  *
4  * Copyright (C) 2013 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
5  * Copyright (C) 2013 KiCad Developers, see CHANGELOG.TXT for contributors.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, you may find one here:
19  * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
20  * or you may search the http://www.gnu.org website for the version 2 license,
21  * or you may write to the Free Software Foundation, Inc.,
22  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #ifndef UTF8_H_
26 #define UTF8_H_
27 
28 #include <string>
29 #include <wx/string.h>
30 
53 class UTF8 : public std::string
54 {
55 public:
56 
57  UTF8( const wxString& o );
58 
61  UTF8( const char* txt ) :
62  std::string( txt )
63  {
64  }
65 
68  UTF8( const wchar_t* txt );
69 
70  UTF8( const std::string& o ) :
71  std::string( o )
72  {
73  }
74 
75  UTF8() :
76  std::string()
77  {
78  }
79 
80  ~UTF8() // Needed mainly to build python wrapper
81  {
82  }
83 
84  UTF8& operator=( const wxString& o );
85 
86  UTF8& operator=( const std::string& o )
87  {
88  std::string::operator=( o );
89  return *this;
90  }
91 
92  UTF8& operator=( const char* s )
93  {
94  std::string::operator=( s );
95  return *this;
96  }
97 
98  UTF8& operator=( char c )
99  {
100  std::string::operator=( c );
101  return *this;
102  }
103 
104  UTF8 substr( size_t pos = 0, size_t len = npos ) const
105  {
106  return std::string::substr( pos, len );
107  }
108 
109  operator wxString () const;
110 
113  operator char* () const
114  {
115  return (char*) c_str();
116  }
117 
127  static int uni_forward( const unsigned char* aSequence, unsigned* aResult = NULL );
128 
129 #ifndef SWIG
130 
137  class uni_iter
138  {
139  friend class UTF8;
140 
141  const unsigned char* it;
142 
143  // private constructor.
144  uni_iter( const char* start ) :
145  it( (const unsigned char*) start )
146  {
147  // for the human: assert( sizeof(unsigned) >= 4 );
148  }
149 
150 
151  public:
152 
153  uni_iter() // Needed only to build python wrapper, not used outside the wrapper
154  {
155  it = NULL;
156  }
157 
158  uni_iter( const uni_iter& o )
159  {
160  it = o.it;
161  }
162 
165  {
166  it += uni_forward( it );
167  return *this;
168  }
169 
172  {
173  uni_iter ret = *this;
174 
175  it += uni_forward( it );
176  return ret;
177  }
178 
179  /*
181  unsigned operator->() const
182  {
183  unsigned result;
184 
185  // grab the result, do not advance
186  uni_forward( it, &result );
187  return result;
188  }
189  */
190 
192  unsigned operator*() const
193  {
194  unsigned result;
195 
196  // grab the result, do not advance
197  uni_forward( it, &result );
198  return result;
199  }
200 
201  bool operator==( const uni_iter& other ) const { return it == other.it; }
202  bool operator!=( const uni_iter& other ) const { return it != other.it; }
203 
206  bool operator< ( const uni_iter& other ) const { return it < other.it; }
207  bool operator<=( const uni_iter& other ) const { return it <= other.it; }
208  bool operator> ( const uni_iter& other ) const { return it > other.it; }
209  bool operator>=( const uni_iter& other ) const { return it >= other.it; }
210  };
211 
216  uni_iter ubegin() const
217  {
218  return uni_iter( data() );
219  }
220 
225  uni_iter uend() const
226  {
227  return uni_iter( data() + size() );
228  }
229 #endif // SWIG
230 };
231 
232 #endif // UTF8_H_
bool operator!=(const uni_iter &other) const
Definition: utf8.h:202
Class UTF8 is an 8 bit std::string that is assuredly encoded in UTF8, and supplies special conversion...
Definition: utf8.h:53
UTF8 & operator=(const wxString &o)
Definition: utf8.cpp:52
bool operator<(const uni_iter &other) const
Since the ++ operators advance more than one byte, this is your best loop termination test...
Definition: utf8.h:206
uni_iter(const uni_iter &o)
Definition: utf8.h:158
UTF8()
Definition: utf8.h:75
uni_iter ubegin() const
Function ubegin returns a uni_iter initialized to the start of "this" UTF8 byte sequence.
Definition: utf8.h:216
bool operator<=(const uni_iter &other) const
Definition: utf8.h:207
unsigned operator*() const
return unicode at current position
Definition: utf8.h:192
UTF8 substr(size_t pos=0, size_t len=npos) const
Definition: utf8.h:104
bool operator==(const uni_iter &other) const
Definition: utf8.h:201
UTF8 & operator=(const std::string &o)
Definition: utf8.h:86
UTF8(const char *txt)
This is a constructor for which you could end up with non-UTF8 encoding, but that would be your fault...
Definition: utf8.h:61
UTF8 & operator=(const char *s)
Definition: utf8.h:92
const uni_iter & operator++()
pre-increment and return uni_iter at new position
Definition: utf8.h:164
bool operator>(const uni_iter &other) const
Definition: utf8.h:208
class uni_iter is a non-muting iterator that walks through unicode code points in the UTF8 encoded st...
Definition: utf8.h:137
uni_iter uend() const
Function uend returns a uni_iter initialized to the end of "this" UTF8 byte sequence.
Definition: utf8.h:225
UTF8 & operator=(char c)
Definition: utf8.h:98
uni_iter(const char *start)
Definition: utf8.h:144
uni_iter operator++(int)
post-increment and return uni_iter at initial position
Definition: utf8.h:171
UTF8(const std::string &o)
Definition: utf8.h:70
static int uni_forward(const unsigned char *aSequence, unsigned *aResult=NULL)
Function uni_forward advances over a single UTF8 encoded multibyte character, capturing the unicode c...
Definition: utf8.cpp:66
bool operator>=(const uni_iter &other) const
Definition: utf8.h:209
const unsigned char * it
Definition: utf8.h:141
~UTF8()
Definition: utf8.h:80