Qore Programming Language  0.8.7
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
QoreEncoding.h
Go to the documentation of this file.
1 /* -*- mode: c++; indent-tabs-mode: nil -*- */
2 /*
3  QoreEncoding.h
4 
5  Qore Programming Language
6 
7  Copyright 2003 - 2013 David Nichols
8 
9  This library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU Lesser General Public
11  License as published by the Free Software Foundation; either
12  version 2.1 of the License, or (at your option) any later version.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public
20  License along with this library; if not, write to the Free Software
21  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 
24 #ifndef _QORE_CHARSET_H
25 
26 #define _QORE_CHARSET_H
27 
33 #include <qore/common.h>
34 #include <qore/QoreThreadLock.h>
35 
36 #include <strings.h>
37 #include <string.h>
38 
39 #include <map>
40 
41 #include <string>
42 
44 typedef qore_size_t (*mbcs_length_t)(const char *str, const char *end, bool &invalid);
45 
47 typedef qore_size_t (*mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid);
48 
50 typedef qore_size_t (*mbcs_pos_t)(const char *str, const char *ptr, bool &invalid);
51 
53 
57 typedef qore_size_t (*mbcs_charlen_t)(const char *str, qore_size_t valid_len);
58 
59 class ExceptionSink;
60 
62 
72 class QoreEncoding {
73 private:
74  std::string code;
75  std::string desc;
76  mbcs_length_t flength;
77  mbcs_end_t fend;
78  mbcs_pos_t fpos;
79  mbcs_charlen_t fcharlen;
80  unsigned char maxwidth;
81 
82 public:
83  DLLLOCAL QoreEncoding(const char *n_code, const char *n_desc = 0, unsigned char n_maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t c = 0) : code(n_code), desc(n_desc ? n_desc : ""), flength(l), fend(e), fpos(p), fcharlen(c), maxwidth(n_maxwidth) {
84  }
85 
86  DLLLOCAL ~QoreEncoding() {
87  }
88 
90 
95  DLLLOCAL qore_size_t getLength(const char *p, const char *end, bool &invalid) const {
96  return flength ? flength(p, end, invalid) : strlen(p);
97  }
98 
100 
105  DLLLOCAL qore_size_t getLength(const char *p, const char *end, ExceptionSink *xsink) const;
106 
108 
114  DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const {
115  return fend ? fend(p, end, c, invalid) : c;
116  }
117 
119 
125  DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, ExceptionSink *xsink) const;
126 
128 
133  DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const {
134  return fpos ? fpos(p, end, invalid) : end - p;
135  }
136 
138 
143  DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, ExceptionSink *xsink) const;
144 
146 
151  DLLLOCAL qore_size_t getCharLen(const char *p, qore_size_t valid_len) const {
152  return fcharlen ? fcharlen(p, valid_len) : 1;
153  }
154 
156  DLLLOCAL bool isMultiByte() const {
157  return (bool)flength;
158  }
159 
161  DLLLOCAL const char *getCode() const {
162  return code.c_str();
163  }
164 
166  DLLLOCAL const char *getDesc() const {
167  return desc.empty() ? "<no description available>" : desc.c_str();
168  }
169 
171  DLLLOCAL int getMaxCharWidth() const {
172  return maxwidth;
173  }
174 };
175 
176 // case-insensitive maps for encodings
177 typedef std::map<const char *, QoreEncoding *, class ltcstrcase> encoding_map_t;
178 typedef std::map<const char *, const QoreEncoding *, class ltcstrcase> const_encoding_map_t;
179 
180 class QoreString;
181 
183 
186 {
187  private:
188  DLLLOCAL static encoding_map_t emap;
189  DLLLOCAL static const_encoding_map_t amap;
190  DLLLOCAL static class QoreThreadLock mutex;
191 
192  DLLLOCAL static const QoreEncoding *addUnlocked(const char *code, const char *desc, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0);
193  DLLLOCAL static const QoreEncoding *findUnlocked(const char *name);
194 
195  public:
197  DLLEXPORT static void addAlias(const QoreEncoding *qcs, const char *alias);
198 
200  DLLEXPORT static const QoreEncoding *findCreate(const char *name);
201 
203  DLLEXPORT static const QoreEncoding *findCreate(const QoreString *str);
204 
206  DLLEXPORT static void showEncodings();
207 
209  DLLEXPORT static void showAliases();
210 
212  DLLEXPORT static const QoreEncoding *add(const char *code, const char *desc = 0, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0);
213 
214  DLLLOCAL static void init(const char *def);
215  DLLLOCAL QoreEncodingManager();
216  DLLLOCAL ~QoreEncodingManager();
217 };
218 
220 DLLEXPORT extern QoreEncodingManager QEM;
221 
222 // builtin character encodings
223 DLLEXPORT extern const QoreEncoding *QCS_DEFAULT,
224  *QCS_USASCII,
225  *QCS_UTF8,
226  *QCS_ISO_8859_1,
227  *QCS_ISO_8859_2,
228  *QCS_ISO_8859_3,
229  *QCS_ISO_8859_4,
230  *QCS_ISO_8859_5,
231  *QCS_ISO_8859_6,
232  *QCS_ISO_8859_7,
233  *QCS_ISO_8859_8,
234  *QCS_ISO_8859_9,
235  *QCS_ISO_8859_10,
236  *QCS_ISO_8859_11,
237  *QCS_ISO_8859_13,
238  *QCS_ISO_8859_14,
239  *QCS_ISO_8859_15,
240  *QCS_ISO_8859_16,
241  *QCS_KOI8_R,
242  *QCS_KOI8_U,
243  *QCS_KOI7;
244 
245 #endif // _QORE_CHARSET_H