Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

scim_utility.h

Go to the documentation of this file.
00001 /** @file scim_utility.h
00002  *  @brief various utility functions.
00003  */
00004 
00005 /*
00006  * Smart Common Input Method
00007  * 
00008  * Copyright (c) 2004 James Su <suzhe@turbolinux.com.cn>
00009  * Copyright (c) 2003 James Su <suzhe@turbolinux.com.cn>
00010  * Copyright (c) 2002 James Su <suzhe@turbolinux.com.cn>
00011  *
00012  *
00013  * This library is free software; you can redistribute it and/or
00014  * modify it under the terms of the GNU Lesser General Public
00015  * License as published by the Free Software Foundation; either
00016  * version 2 of the License, or (at your option) any later version.
00017  *
00018  * This library is distributed in the hope that it will be useful,
00019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00021  * GNU Lesser General Public License for more details.
00022  *
00023  * You should have received a copy of the GNU Lesser General Public
00024  * License along with this program; if not, write to the
00025  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
00026  * Boston, MA  02111-1307  USA
00027  *
00028  * $Id: scim_utility.h,v 1.18 2004/04/22 10:19:38 suzhe Exp $
00029  */
00030 
00031 #ifndef __SCIM_UTILITY_H
00032 #define __SCIM_UTILITY_H
00033 
00034 namespace scim {
00035 /**
00036  * @addtogroup Helper
00037  * @{
00038  */
00039 
00040 #define SCIM_PATH_DELIM_STRING "/"
00041 #define SCIM_PATH_DELIM        '/'
00042 
00043 // UTF-8 <-> ucs4_t convert
00044 
00045 /* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
00046 #define RET_ILSEQ      0
00047 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
00048 #define RET_TOOFEW(n)  (-1-(n))
00049 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
00050 #define RET_TOOSMALL   -1
00051 /* Replacement character for invalid multibyte sequence or wide character. */
00052 #define BAD_WCHAR ((ucs4_t) 0xfffd)
00053 #define BAD_CHAR '?'
00054 
00055 /**
00056  * @brief Convert an utf8 char sequence to ucs4.
00057  * 
00058  * @param pwc destination buffer to store the ucs4 code.
00059  * @param src source buffer contains the utf8 char sequence.
00060  * @param src_len the size of source buffer.
00061  *
00062  * @return number of chars in s actually converted.
00063  */ 
00064 int utf8_mbtowc (ucs4_t *pwc, const unsigned char *src, int src_len);
00065 
00066 /**
00067  * @brief Convert an ucs4 code to utf8 char sequence.
00068  *
00069  * @param dest destination buffer to store utf8 char sequence.
00070  * @param wc the ucs4 code to be converted.
00071  * @param dest_size the size of destination buffer.
00072  *
00073  * @return the number of bytes actually written into dest.
00074  */
00075 int utf8_wctomb (unsigned char *dest, ucs4_t wc, int dest_size);
00076 
00077 /**
00078  * @brief Convert an utf8 string to an ucs4 string.
00079  *
00080  * @param str source utf8 string.
00081  * @return the destination widestring.
00082  */
00083 WideString utf8_mbstowcs (const String & str);
00084 
00085 /**
00086  * @brief Convert an utf8 string to an ucs4 string.
00087  *
00088  * @param str source utf8 string.
00089  * @param len length of the source string.
00090  * @return the destination widestring.
00091  */
00092 WideString utf8_mbstowcs (const char *str, int len = -1);
00093 
00094 /**
00095  * @brief Convert an ucs4 string to an utf8 string.
00096  *
00097  * @param wstr source ucs4 string.
00098  *
00099  * @return the destination utf8 string.
00100  */
00101 String utf8_wcstombs (const WideString & wstr);
00102 
00103 /**
00104  * @brief Convert an ucs4 string to an utf8 string.
00105  *
00106  * @param wstr source ucs4 string.
00107  * @param len length of the source string.
00108  *
00109  * @return the destination utf8 string.
00110  */
00111 String utf8_wcstombs (const ucs4_t *wstr, int len = -1);
00112 
00113 /**
00114  * @brief Read a wide char from istream.
00115  *
00116  * The content in the istream are actually in utf-8 encoding.
00117  * 
00118  * @param is the stream to be read.
00119  *
00120  * @return if equal to 0 then got the end of the stream or error occurred.
00121  */
00122 ucs4_t utf8_read_wchar (std::istream &is);
00123 
00124 /**
00125  * @brief Write a wide char to ostream.
00126  *
00127  * The content written into the ostream will be converted into utf-8 encoding.
00128  *
00129  * @param os the stream to be written.
00130  * @param wc the wide char to be written to the stream.
00131  * @return the same stream object reference.
00132  */
00133 std::ostream & utf8_write_wchar (std::ostream &os, ucs4_t wc);
00134 
00135 /**
00136  * @brief Read a wide string from istream.
00137  *
00138  * The content in the istream are actually in utf-8 encoding.
00139  *
00140  * @param is the stream to be read.
00141  * @param delim the delimiter of the string.
00142  * @param rm_delim if the delim should be removed from the destination string.
00143  * @return the wide string read from the given stream.
00144  */
00145 WideString utf8_read_wstring (std::istream &is, ucs4_t delim = (ucs4_t) '\n', bool rm_delim = true);
00146 
00147 /**
00148  * @brief Write a wide string to ostream.
00149  *
00150  * The content written into the ostream will be converted into utf-8 encoding.
00151  *
00152  * @param os the stream to be written.
00153  * @param wstr the wide string to be written into the stream.
00154  * @return the same stream object reference.
00155  */
00156 std::ostream & utf8_write_wstring (std::ostream &os, const WideString & wstr);
00157 
00158 /**
00159  * @brief Convert an uint32 variable into a sequence of bytes.
00160  *
00161  * @param bytes the buffer to store the result.
00162  * @param n the variable to be converted.
00163  */
00164 inline
00165 void scim_uint32tobytes (unsigned char *bytes, uint32 n)
00166 {
00167     bytes [0] = (unsigned char) ((n & 0xFF));
00168     bytes [1] = (unsigned char) ((n >> 8) & 0xFF);
00169     bytes [2] = (unsigned char) ((n >> 16) & 0xFF);
00170     bytes [3] = (unsigned char) ((n >> 24) & 0xFF);
00171 }
00172 
00173 /**
00174  * @brief Convert a sequence of bytes into an uint32 value.
00175  *
00176  * @param bytes the buffer contains the bytes to be converted.
00177  * @return the result uint32 value.
00178  */
00179 inline
00180 uint32 scim_bytestouint32 (const unsigned char *bytes)
00181 {
00182     return  ((uint32) bytes [0])
00183             | (((uint32) bytes [1]) << 8)
00184             | (((uint32) bytes [2]) << 16)
00185             | (((uint32) bytes [3]) << 24);
00186 }
00187 
00188 /**
00189  * @brief Convert an uint16 variable into a sequence of bytes.
00190  *
00191  * @param bytes the buffer to store the result.
00192  * @param n the variable to be converted.
00193  */
00194 inline
00195 void scim_uint16tobytes (unsigned char *bytes, uint16 n)
00196 {
00197     bytes [0] = (unsigned char) ((n & 0xFF));
00198     bytes [1] = (unsigned char) ((n >> 8) & 0xFF);
00199 }
00200 
00201 /**
00202  * @brief Convert a sequence of bytes into an uint16 value.
00203  *
00204  * @param bytes the buffer contains the bytes to be converted.
00205  * @return the result uint16 value.
00206  */
00207 inline
00208 uint16 scim_bytestouint16 (const unsigned char *bytes)
00209 {
00210     return  ((uint16) bytes [0]) | (((uint16) bytes [1]) << 8);
00211 }
00212 
00213 /**
00214  * @brief Test if the locale is valid, and return the good locale name.
00215  *
00216  * @param locale the locale to be tested.
00217  * @return If the locale is valid, it's the good locale name, otherwise empty.
00218  */
00219 String scim_validate_locale (const String& locale);
00220 
00221 /**
00222  * @brief Get the encoding for a locale.
00223  *
00224  * @param locale the name of the locale.
00225  * @return The encoding used by the given locale.
00226  */
00227 String scim_get_locale_encoding (const String& locale);
00228 
00229 /**
00230  * @brief Get current system locale.
00231  * @return The current system locale.
00232  */
00233 String scim_get_current_locale ();
00234 
00235 /**
00236  * @brief Get the max length of the multibyte char of a locale.
00237  *
00238  * @param locale the name of the locale.
00239  * @return the maxlen of this locale.
00240  */
00241 int scim_get_locale_maxlen (const String& locale);
00242 
00243 /**
00244  * @brief Split string list into a string vector according to the delim char.
00245  *
00246  * @param vec the string vector to store the result.
00247  * @param str the string to be splitted.
00248  * @param delim the delimiter to split the strings.
00249  * @return the number of the strings in the result list.
00250  */
00251 int scim_split_string_list (std::vector<String>& vec, const String& str, char delim = ',');
00252 
00253 /**
00254  * @brief Combine a string vector into one string list, separated by char delim.
00255  *
00256  * @param vec the string vector which contains the strings to be combined.
00257  * @param delim the delimiter which should be put between two strings.
00258  * @return the result string.
00259  */
00260 String scim_combine_string_list (const std::vector<String>& vec, char delim = ',');
00261 
00262 /**
00263  * @brief Get machine endian type
00264  * @return 1 little endian, 0 big endian
00265  */
00266 bool scim_is_little_endian ();
00267 
00268 /**
00269  * @brief Test if wchar_t is using UCS4 encoding.
00270  */
00271 bool scim_if_wchar_ucs4_equal ();
00272 
00273 /**
00274  * @brief Convert a half width unicode char to its full width counterpart. 
00275  */
00276 ucs4_t scim_wchar_to_full_width (ucs4_t code);
00277 
00278 /**
00279  * @brief Convert a full width unicode char to its half width counterpart.
00280  */
00281 ucs4_t scim_wchar_to_half_width (ucs4_t code);
00282 
00283 /**
00284  * @brief Get the home dir of current user.
00285  */
00286 String scim_get_home_dir ();
00287 
00288 /**
00289  * @brief Get the name of current user.
00290  */
00291 String scim_get_user_name ();
00292 
00293 /**
00294  * @brief Load a file into memory.
00295  *
00296  * @param filename the name of the file to be loaded.
00297  * @param bufptr the place to store the newly allocated buffer pointer,
00298  *        if bufptr == NULL then the file is not actually loaded.
00299  *        the pointer *bufptr must be deleted afterwards.
00300  * @return the size of the data actually loaded (mostly, it's the file size),
00301  *         zero means load failed.
00302  */
00303 size_t scim_load_file (const String &filename, char **bufptr);
00304 
00305 /**
00306  * @brief Make a directory.
00307  *
00308  * @param dir the dir path to be created.
00309  *
00310  * @return true if sucess.
00311  */
00312 bool scim_make_dir (const String &dir);
00313 
00314 /** @} */
00315 
00316 } // namespace scim
00317 
00318 #endif //__SCIM_UTILITY_H
00319 /*
00320 vi:ts=4:nowrap:ai:expandtab
00321 */

Generated on Fri May 7 17:27:25 2004 for scim by doxygen 1.3.6