00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef _QORE_CHARSET_H
00025
00026 #define _QORE_CHARSET_H
00027
00033 #include <qore/common.h>
00034 #include <qore/QoreThreadLock.h>
00035
00036 #include <strings.h>
00037 #include <string.h>
00038
00039 #include <map>
00040
00041 #include <string>
00042
00044 typedef qore_size_t (*mbcs_length_t)(const char *str, const char *end, bool &invalid);
00045
00047 typedef qore_size_t (*mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid);
00048
00050 typedef qore_size_t (*mbcs_pos_t)(const char *str, const char *ptr, bool &invalid);
00051
00053
00057 typedef qore_size_t (*mbcs_charlen_t)(const char *str, qore_size_t valid_len);
00058
00059 class ExceptionSink;
00060
00062
00072 class QoreEncoding {
00073 private:
00074 std::string code;
00075 std::string desc;
00076 mbcs_length_t flength;
00077 mbcs_end_t fend;
00078 mbcs_pos_t fpos;
00079 mbcs_charlen_t fcharlen;
00080 unsigned char maxwidth;
00081
00082 public:
00083 DLLLOCAL QoreEncoding(const char *n_code, const char *n_desc = 0, unsigned char n_maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t c = 0) : code(n_code), desc(n_desc ? n_desc : ""), flength(l), fend(e), fpos(p), fcharlen(c), maxwidth(n_maxwidth) {
00084 }
00085
00086 DLLLOCAL ~QoreEncoding() {
00087 }
00088
00090
00095 DLLLOCAL qore_size_t getLength(const char *p, const char *end, bool &invalid) const {
00096 return flength ? flength(p, end, invalid) : strlen(p);
00097 }
00098
00100
00105 DLLLOCAL qore_size_t getLength(const char *p, const char *end, ExceptionSink *xsink) const;
00106
00108
00114 DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const {
00115 return fend ? fend(p, end, c, invalid) : c;
00116 }
00117
00119
00125 DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, ExceptionSink *xsink) const;
00126
00128
00133 DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const {
00134 return fpos ? fpos(p, end, invalid) : end - p;
00135 }
00136
00138
00143 DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, ExceptionSink *xsink) const;
00144
00146
00151 DLLLOCAL qore_size_t getCharLen(const char *p, qore_size_t valid_len) const {
00152 return fcharlen ? fcharlen(p, valid_len) : 1;
00153 }
00154
00156 DLLEXPORT bool isMultiByte() const {
00157 return (bool)flength;
00158 }
00159
00161 DLLEXPORT const char *getCode() const {
00162 return code.c_str();
00163 }
00164
00166 DLLEXPORT const char *getDesc() const {
00167 return desc.empty() ? "<no description available>" : desc.c_str();
00168 }
00169
00171 DLLEXPORT int getMaxCharWidth() const {
00172 return maxwidth;
00173 }
00174 };
00175
00176
00177 typedef std::map<const char *, QoreEncoding *, class ltcstrcase> encoding_map_t;
00178 typedef std::map<const char *, const QoreEncoding *, class ltcstrcase> const_encoding_map_t;
00179
00180 class QoreString;
00181
00183
00185 class QoreEncodingManager
00186 {
00187 private:
00188 DLLLOCAL static encoding_map_t emap;
00189 DLLLOCAL static const_encoding_map_t amap;
00190 DLLLOCAL static class QoreThreadLock mutex;
00191
00192 DLLLOCAL static const QoreEncoding *addUnlocked(const char *code, const char *desc, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0);
00193 DLLLOCAL static const QoreEncoding *findUnlocked(const char *name);
00194
00195 public:
00197 DLLEXPORT static void addAlias(const QoreEncoding *qcs, const char *alias);
00198
00200 DLLEXPORT static const QoreEncoding *findCreate(const char *name);
00201
00203 DLLEXPORT static const QoreEncoding *findCreate(const QoreString *str);
00204
00206 DLLEXPORT static void showEncodings();
00207
00209 DLLEXPORT static void showAliases();
00210
00212 DLLEXPORT static const QoreEncoding *add(const char *code, const char *desc = 0, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0);
00213
00214 DLLLOCAL static void init(const char *def);
00215 DLLLOCAL QoreEncodingManager();
00216 DLLLOCAL ~QoreEncodingManager();
00217 };
00218
00220 DLLEXPORT extern QoreEncodingManager QEM;
00221
00222
00223 DLLEXPORT extern const QoreEncoding *QCS_DEFAULT,
00224 *QCS_USASCII,
00225 *QCS_UTF8,
00226 *QCS_ISO_8859_1,
00227 *QCS_ISO_8859_2,
00228 *QCS_ISO_8859_3,
00229 *QCS_ISO_8859_4,
00230 *QCS_ISO_8859_5,
00231 *QCS_ISO_8859_6,
00232 *QCS_ISO_8859_7,
00233 *QCS_ISO_8859_8,
00234 *QCS_ISO_8859_9,
00235 *QCS_ISO_8859_10,
00236 *QCS_ISO_8859_11,
00237 *QCS_ISO_8859_13,
00238 *QCS_ISO_8859_14,
00239 *QCS_ISO_8859_15,
00240 *QCS_ISO_8859_16,
00241 *QCS_KOI8_R,
00242 *QCS_KOI8_U,
00243 *QCS_KOI7;
00244
00245 #endif // _QORE_CHARSET_H