c++-gtk-utils
|
00001 /* Copyright (C) 2005 to 2010 Chris Vine 00002 00003 The library comprised in this file or of which this file is part is 00004 distributed by Chris Vine under the GNU Lesser General Public 00005 License as follows: 00006 00007 This library is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU Lesser General Public License 00009 as published by the Free Software Foundation; either version 2.1 of 00010 the License, or (at your option) any later version. 00011 00012 This library is distributed in the hope that it will be useful, but 00013 WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 Lesser General Public License, version 2.1, for more details. 00016 00017 You should have received a copy of the GNU Lesser General Public 00018 License, version 2.1, along with this library (see the file LGPL.TXT 00019 which came with this source code package in the c++-gtk-utils 00020 sub-directory); if not, write to the Free Software Foundation, Inc., 00021 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00022 00023 */ 00024 00025 #ifndef CGU_REASSEMBLER_H 00026 #define CGU_REASSEMBLER_H 00027 00028 #include <c++-gtk-utils/shared_handle.h> 00029 #include <c++-gtk-utils/cgu_config.h> 00030 00031 namespace Cgu { 00032 00033 namespace Utf8 { 00034 00035 00036 /** 00037 * @class Reassembler reassembler.h c++-gtk-utils/reassembler.h 00038 * @brief A class for reassembling UTF-8 strings sent over pipes and 00039 * sockets so they form complete valid UTF-8 characters. 00040 * 00041 * Utf8::Reassembler is a functor class which takes in a partially 00042 * formed UTF-8 string and returns a null terminated string comprising 00043 * such of the input string (after inserting, at the beginning, any 00044 * partially formed UTF-8 character which was at the end of the input 00045 * string passed in previous calls to the functor) as forms complete 00046 * UTF-8 characters (storing any partial character at the end for the 00047 * next call to the functor). If the input string contains invalid 00048 * UTF-8 after adding any stored previous part character (apart from 00049 * any partially formed character at the end of the input string) then 00050 * operator() will return a null Cgu::SharedHandle<char*> object (that 00051 * is, Cgu::SharedHandle<char*>::get() will return 0). Such input 00052 * will not be treated as invalid if it consists only of a single 00053 * partly formed UTF-8 character which could be valid if further bytes 00054 * were received and added to it. In that case the returned 00055 * SharedHandle<char*> object will contain an allocated string of zero 00056 * length (apart from the terminating 0 character), rather than a NULL 00057 * pointer. 00058 * 00059 * This enables UTF-8 strings to be sent over pipes, sockets, etc and 00060 * displayed in a GTK+ object at the receiving end 00061 * 00062 * Note that for efficiency reasons the memory held in the returned 00063 * Cgu::SharedHandle<char*> object may be greater than the length of 00064 * the null-terminated string that is contained in that memory: just 00065 * let the Cgu::SharedHandle<char*> object manage the memory, and use 00066 * the contents like any other null-terminated string. 00067 * 00068 * This class is not needed if std::getline(), with its default '\\n' 00069 * delimiter, is used to read UTF-8 characters using, say, 00070 * Cgu::fdistream, because a whole '\\n' delimited line of UTF-8 00071 * characters will always be complete. 00072 * 00073 * This is an example of its use, reading from a pipe until it is 00074 * closed by the writer and putting the received text in a 00075 * GtkTextBuffer object: 00076 * @code 00077 * using namespace Cgu; 00078 * 00079 * GtkTextIter end; 00080 * GtkTextBuffer* text_buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_view)); 00081 * gtk_text_buffer_get_end_iter(text_buffer, &end); 00082 * 00083 * Utf8::Reassembler reassembler; 00084 * const int BSIZE = 1024; 00085 * char read_buffer[BSIZE]; 00086 * ssize_t res; 00087 * do { 00088 * res = ::read(fd, read_buffer, BSIZE); 00089 * if (res > 0) { 00090 * SharedHandle<char*> utf8(reassembler(read_buffer, res)); 00091 * if (utf8.get()) { 00092 * gtk_text_buffer_insert(text_buffer, &end, 00093 * utf8.get(), std::strlen(utf8)); 00094 * } 00095 * else std::cerr << "Invalid utf8 text sent over pipe\n"; 00096 * } 00097 * } while (res && (res != -1 || errno == EINTR)); 00098 * @endcode 00099 */ 00100 00101 class Reassembler { 00102 size_t stored; 00103 const static size_t buff_size = 6; 00104 char buffer[buff_size]; 00105 char* join_buffer(const char*, size_t); 00106 public: 00107 /** 00108 * Takes a byte array of wholly or partly formed UTF-8 characters to 00109 * be converted (after taking account of previous calls to the method) 00110 * to a valid string of wholly formed characters. 00111 * @param input The input array. 00112 * @param size The number of bytes in the input (not the number of 00113 * UTF-8 characters). 00114 * @return A Cgu::SharedHandle<char*> object holding a null terminated 00115 * string comprising such of the input (after inserting, at the 00116 * beginning, any partially formed UTF-8 character which was at the 00117 * end of the input passed in previous calls to the functor) as forms 00118 * complete UTF-8 characters (storing any partial character at the end 00119 * for the next call to the functor). If the input is invalid after 00120 * such recombination, then a null Cgu::SharedHandle<char*> object is 00121 * returned (that is, Cgu::SharedHandle<char*>::get() will return 0). 00122 * Such input will not be treated as invalid if it consists only of a 00123 * single partly formed UTF-8 character which could be valid if 00124 * further bytes were received and added to it. In that case the 00125 * returned Cgu::SharedHandle<char*> object will contain an allocated 00126 * string of zero length (apart from the terminating 0 character), 00127 * rather than a NULL pointer. 00128 * @exception std::bad_alloc The method might throw std::bad_alloc if 00129 * memory is exhausted and the system throws in that case. It will 00130 * not throw any other exception. 00131 */ 00132 Cgu::SharedHandle<char*> operator()(const char* input, size_t size); 00133 00134 /** 00135 * Gets the number of bytes of a partially formed UTF-8 character 00136 * stored for the next call to operator()(). It will not throw. 00137 * @return The number of bytes. 00138 */ 00139 size_t get_stored() const {return stored;} 00140 00141 /** 00142 * Resets the Reassembler, by discarding any partially formed UTF-8 00143 * character from previous calls to operator()(). It will not throw. 00144 */ 00145 void reset() {stored = 0;} 00146 00147 /** 00148 * The constructor will not throw. 00149 */ 00150 Reassembler(): stored(0) {} 00151 00152 /* Only has effect if --with-glib-memory-slices-compat or 00153 * --with-glib-memory-slices-no-compat option picked */ 00154 CGU_GLIB_MEMORY_SLICES_FUNCS 00155 }; 00156 00157 } // namespace Utf8 00158 00159 } // namespace Cgu 00160 00161 #endif