CrystalSpace

Public API Reference

csutil/csuctransform.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2003 by Frank Richter
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public
00015     License along with this library; if not, write to the Free
00016     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00017 */
00018 
00019 #ifndef __CS_CSUCTRANSFORM_H__
00020 #define __CS_CSUCTRANSFORM_H__
00021 
00022 #include "csunicode.h"
00023 
00035 #define CS_UC_MAX_UTF8_ENCODED          4  /* 6 to encode 32 bit */
00036 
00040 #define CS_UC_MAX_UTF16_ENCODED         2
00041 
00045 #define CS_UC_MAX_UTF32_ENCODED         1
00046 
00050 #define CS_UC_MAX_MAPPED                3
00051 
00055 enum
00056 {
00062   csUcMapSimple = (1 << 0)
00063 };
00064 
00068 class csUnicodeTransform
00069 {
00070 public:
00071 #define FAIL(ret)                               \
00072   {                                             \
00073     if (isValid) *isValid = false;              \
00074     ch = CS_UC_CHAR_REPLACER;                   \
00075     return ret;                                 \
00076   }
00077 
00078 #define SUCCEED                                 \
00079     if (isValid) *isValid = true;               \
00080     return chUsed;
00081   
00082 #define GET_NEXT(next)  \
00083   if ((size_t)chUsed == strlen)                 \
00084   {                                             \
00085     FAIL(chUsed);                               \
00086   }                                             \
00087   next = *str++;                                \
00088   if (next == 0)                                \
00089   {                                             \
00090     FAIL(chUsed);                               \
00091   }                                             \
00092   chUsed++;                                     
00093   
00112   inline static int UTF8Decode (const utf8_char* str, size_t strlen, 
00113     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00114   {
00115     if (str == 0)
00116     {
00117       FAIL(0);
00118     }
00119     int chUsed = 0;
00120     
00121     utf8_char curCh;
00122     GET_NEXT(curCh);
00123     if ((curCh & 0x80) == 0)
00124     {
00125       // easy case
00126       ch = curCh;
00127       SUCCEED;
00128     }
00129     else
00130     {
00131       // Count with how many bytes this char is encoded.
00132       int n = 0;
00133       while ((n < 7) && ((curCh & (1 << (7 - n))) != 0)) { n++; }
00134 
00135       if ((n < 2) || (n > 6))
00136       {
00137         // Invalid code: first char of a "sequence" must have
00138         // at least two and at most six MSBs set
00139         FAIL(1);
00140       }
00141 
00142       ch = (curCh & ((1 << (8 - n)) - 1));
00143       
00144       for (int i = 1; i < n; i++)
00145       {
00146         GET_NEXT(curCh);
00147         if ((curCh & 0xc0) != 0x80)
00148         {
00149           FAIL(chUsed);
00150         }
00151         else
00152         {
00153           ch <<= 6;
00154           ch |= (curCh & 0x3f);
00155         }
00156       }
00157       
00158       // Check if in Unicode range.
00159       if (ch > CS_UC_LAST_CHAR)
00160       {
00161         FAIL(chUsed);
00162       }
00163 
00164       // Check for "overlong" codes.
00165       if ((ch < 0x80) && (n > 0))
00166       {
00167         FAIL(chUsed);
00168       }
00169       else if ((ch < 0x800) && (n > 2))
00170       {
00171         FAIL(chUsed);
00172       }
00173       else if ((ch < 0x10000) && (n > 3))
00174       {
00175         FAIL(chUsed);
00176       }
00177       else if ((ch < 0x200000) && (n > 4))
00178       {
00179         FAIL(chUsed);
00180       }
00181       /* 
00182       else if ((ch < 0x4000000) && (n > 5))
00183       {
00184         FAIL(chUsed);
00185       }
00186       else if ((ch < 0x80000000) && (n > 6))
00187       {
00188         FAIL(chUsed);
00189       }
00190       */
00191       
00192       if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00193         || CS_UC_IS_SURROGATE(ch)))
00194         FAIL(chUsed);
00195       SUCCEED;
00196     }
00197   }
00198   
00203   inline static int UTF16Decode (const utf16_char* str, size_t strlen, 
00204     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00205   {
00206     if (str == 0)
00207     {
00208       FAIL(0);
00209     }
00210     int chUsed = 0;
00211     
00212     utf16_char curCh;
00213     GET_NEXT(curCh);
00214     // Decode surrogate
00215     if (CS_UC_IS_SURROGATE (curCh))
00216     {
00217       // Invalid code
00218       if (!CS_UC_IS_HIGH_SURROGATE (curCh))
00219       {
00220         FAIL(chUsed);
00221       }
00222       ch = 0x10000 + ((curCh & 0x03ff) << 10);
00223       GET_NEXT(curCh);
00224       // Invalid code
00225       if (!CS_UC_IS_LOW_SURROGATE (curCh))
00226       {
00227         // Fail with 1 so the char is handled upon the next Decode.
00228         FAIL(1);
00229       }
00230       ch |= (curCh & 0x3ff);
00231     }
00232     else
00233     {
00234       ch = curCh;
00235     }
00236     if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00237       || CS_UC_IS_SURROGATE(ch)))
00238       FAIL(chUsed);
00239     SUCCEED;
00240   }
00241   
00246   inline static int UTF32Decode (const utf32_char* str, size_t strlen, 
00247     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00248   {
00249     if (str == 0)
00250     {
00251       FAIL(0);
00252     }
00253     int chUsed = 0;
00254     
00255     GET_NEXT(ch);
00256     if ((!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00257       || CS_UC_IS_SURROGATE(ch))) || (ch > CS_UC_LAST_CHAR))
00258       FAIL(chUsed);
00259     SUCCEED;
00260   }
00261 
00266   inline static int Decode (const utf8_char* str, size_t strlen, 
00267     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00268   {
00269     return UTF8Decode (str, strlen, ch, isValid, returnNonChar);
00270   }
00275   inline static int Decode (const utf16_char* str, size_t strlen, 
00276     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00277   {
00278     return UTF16Decode (str, strlen, ch, isValid, returnNonChar);
00279   }
00284   inline static int Decode (const utf32_char* str, size_t strlen, 
00285     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00286   {
00287     return UTF32Decode (str, strlen, ch, isValid, returnNonChar);
00288   }
00289 
00291 #undef FAIL
00292 #undef SUCCEED
00293 #undef GET_NEXT
00294 
00297 #define _OUTPUT_CHAR(buf, chr)                          \
00298   if (bufRemaining > 0)                                 \
00299   {                                                     \
00300     if(buf) *buf++ = chr;                               \
00301     bufRemaining--;                                     \
00302   }                                                     \
00303   encodedLen++;
00304 
00305 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(buf, chr)
00306   
00320   inline static int EncodeUTF8 (const utf32_char ch, utf8_char* buf, 
00321     size_t bufsize, bool allowNonchars = false)
00322   {
00323     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00324       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00325       return 0;
00326     size_t bufRemaining = bufsize;
00327     int encodedLen = 0;
00328     
00329     if (ch < 0x80)
00330     {
00331       OUTPUT_CHAR ((utf8_char)ch);
00332     }
00333     else if (ch < 0x800)
00334     {
00335       OUTPUT_CHAR ((utf8_char)(0xc0 | (ch >> 6)));
00336       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00337     }
00338     else if (ch < 0x10000)
00339     {
00340       OUTPUT_CHAR ((utf8_char)(0xe0 | (ch >> 12)));
00341       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00342       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00343     }
00344     else if (ch < 0x200000)
00345     {
00346       OUTPUT_CHAR ((utf8_char)(0xf0 | (ch >> 18)));
00347       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00348       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00349       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00350     }
00351     /*
00352     else if (ch < 0x4000000)
00353     {
00354       OUTPUT_CHAR ((utf8_char)(0xf8 | (ch >> 24)));
00355       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f)));
00356       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00357       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00358       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00359     }
00360     else if (ch < 0x80000000)
00361     {
00362       OUTPUT_CHAR ((utf8_char)(0xfc | (ch >> 30)));
00363       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 24) & 0x3f)));
00364       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f)));
00365       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00366       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00367       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00368     }
00369     */
00370     return encodedLen;
00371   }
00372     
00377   inline static int EncodeUTF16 (const utf32_char ch, utf16_char* buf, 
00378     size_t bufsize, bool allowNonchars = false)
00379   {
00380     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00381       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00382       return 0;
00383     size_t bufRemaining = bufsize;
00384     int encodedLen = 0;
00385     
00386     if (ch < 0x10000)
00387     {
00388       OUTPUT_CHAR((utf16_char)ch);
00389     }
00390     else if (ch < 0x100000)
00391     {
00392       utf32_char ch_shifted = ch - 0x10000;
00393       OUTPUT_CHAR((utf16_char)((ch_shifted >> 10) 
00394         | CS_UC_CHAR_HIGH_SURROGATE_FIRST));
00395       OUTPUT_CHAR((utf16_char)((ch_shifted & 0x3ff) 
00396         | CS_UC_CHAR_LOW_SURROGATE_FIRST));
00397     }
00398     else
00399       return 0;
00400     
00401     return encodedLen;
00402   }
00403 
00408   inline static int EncodeUTF32 (const utf32_char ch, utf32_char* buf, 
00409     size_t bufsize, bool allowNonchars = false)
00410   {
00411     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00412       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00413       return 0;
00414     size_t bufRemaining = bufsize;
00415     int encodedLen = 0;
00416     
00417     OUTPUT_CHAR(ch);
00418     
00419     return encodedLen;
00420   }
00421 
00426   inline static int Encode (const utf32_char ch, utf8_char* buf, 
00427     size_t bufsize, bool allowNonchars = false)
00428   {
00429     return EncodeUTF8 (ch, buf, bufsize, allowNonchars);
00430   }
00435   inline static int Encode (const utf32_char ch, utf16_char* buf, 
00436     size_t bufsize, bool allowNonchars = false)
00437   {
00438     return EncodeUTF16 (ch, buf, bufsize, allowNonchars);
00439   }
00444   inline static int Encode (const utf32_char ch, utf32_char* buf, 
00445     size_t bufsize, bool allowNonchars = false)
00446   {
00447     return EncodeUTF32 (ch, buf, bufsize, allowNonchars);
00448   }
00450 #undef OUTPUT_CHAR
00451   
00454 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(dest, chr)
00455   
00456 #define UCTF_CONVERTER(funcName, fromType, decoder, toType, encoder)    \
00457   inline static size_t funcName (toType* dest, size_t destSize,         \
00458     const fromType* source, size_t srcSize = (size_t)-1)                \
00459   {                                                                     \
00460     if ((srcSize == 0) || (source == 0))                                \
00461       return 0;                                                         \
00462                                                                         \
00463     size_t bufRemaining = (destSize > 0) ? destSize - 1 : 0;            \
00464     size_t encodedLen = 0;                                              \
00465                                                                         \
00466     size_t srcChars = srcSize;                                          \
00467                                                                         \
00468     if (srcSize == (size_t)-1)                                          \
00469     {                                                                   \
00470       srcChars = 0;                                                     \
00471       const fromType* sptr = source;                                    \
00472       while (*sptr++ != 0) srcChars++;                                  \
00473     }                                                                   \
00474                                                                         \
00475     while (srcChars > 0)                                                \
00476     {                                                                   \
00477       utf32_char ch;                                                    \
00478       int scnt = decoder (source, srcChars, ch, 0);                     \
00479       if (scnt == 0) break;                                             \
00480       int dcnt = encoder (ch, dest, bufRemaining);                      \
00481       if (dcnt == 0)                                                    \
00482       {                                                                 \
00483         dcnt = encoder (CS_UC_CHAR_REPLACER, dest, bufRemaining);       \
00484       }                                                                 \
00485                                                                         \
00486       if ((size_t)dcnt >= bufRemaining)                                 \
00487       {                                                                 \
00488         if (dest && (destSize > 0)) dest += bufRemaining;               \
00489         bufRemaining = 0;                                               \
00490       }                                                                 \
00491       else                                                              \
00492       {                                                                 \
00493         bufRemaining -= dcnt;                                           \
00494         if (dest && (destSize > 0)) dest += dcnt;                       \
00495       }                                                                 \
00496       encodedLen += dcnt;                                               \
00497       if ((size_t)scnt >= srcChars) break;                              \
00498       srcChars -= scnt;                                                 \
00499       source += scnt;                                                   \
00500     }                                                                   \
00501                                                                         \
00502     if (dest) *dest = 0;                                                \
00503                                                                         \
00504     return encodedLen + 1;                                              \
00505   }
00506 
00522   UCTF_CONVERTER (UTF8to16, utf8_char, UTF8Decode, utf16_char, EncodeUTF16);
00527   UCTF_CONVERTER (UTF8to32, utf8_char, UTF8Decode, utf32_char, EncodeUTF32);
00528 
00533   UCTF_CONVERTER (UTF16to8, utf16_char, UTF16Decode, utf8_char, EncodeUTF8);
00538   UCTF_CONVERTER (UTF16to32, utf16_char, UTF16Decode, utf32_char, EncodeUTF32);
00539   
00544   UCTF_CONVERTER (UTF32to8, utf32_char, UTF32Decode, utf8_char, EncodeUTF8);
00549   UCTF_CONVERTER (UTF32to16, utf32_char, UTF32Decode, utf16_char, EncodeUTF16);
00552 #undef UCTF_CONVERTER
00553 #undef OUTPUT_CHAR
00554 #undef _OUTPUT_CHAR
00555 
00556 #if (CS_WCHAR_T_SIZE == 1)
00557   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00558     const utf8_char* source, size_t srcSize)
00559   {
00560     size_t srcChars = srcSize;                                          
00561     if (srcSize == (size_t)-1)                                          
00562     {                                                                   
00563       srcChars = 0;                                                     
00564       const utf8_char* sptr = source;                                   
00565       while (*sptr++ != 0) srcChars++;                                  
00566     }                           
00567     if ((dest != 0) && (destSize != 0))
00568     {
00569       size_t len = MIN (destSize - 1, srcChars);
00570       memcpy (dest, source, size * sizeof (wchar_t));
00571       *(dest + len) = 0;
00572     }
00573     return srcChars + 1;
00574   };
00575 
00576   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00577     const utf16_char* source, size_t srcSize)
00578   {
00579     return UTF16to8 ((utf8_char*)dest, destSize, source, srcSize);
00580   };
00581 
00582   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00583     const utf32_char* source, size_t srcSize)
00584   {
00585     return UTF32to8 ((utf8_char*)dest, destSize, source, srcSize);
00586   };
00587   
00588   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00589     const wchar_t* source, size_t srcSize)
00590   {
00591     size_t srcChars = srcSize;                                          
00592     if (srcSize == (size_t)-1)                                          
00593     {                                                                   
00594       srcChars = 0;                                                     
00595       const wchar_t* sptr = source;                                     
00596       while (*sptr++ != 0) srcChars++;                                  
00597     }                           
00598     if ((dest != 0) && (destSize != 0))
00599     {
00600       size_t len = MIN (destSize - 1, srcChars);
00601       memcpy (dest, source, len * sizeof (wchar_t));
00602       *(dest + len) = 0;
00603     }
00604     return srcChars + 1;
00605   };
00606 
00607   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00608     const wchar_t* source, size_t srcSize)
00609   {
00610     return UTF8to16 (dest, destSize, source, srcSize);
00611   };
00612 
00613   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00614     const wchar_t* source, size_t srcSize)
00615   {
00616     return UTF8to32 (dest, destSize, source, srcSize);
00617   };
00618 
00619   inline static int Decode (const wchar_t* str, size_t strlen, 
00620     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00621   {
00622     return UTF8Decode ((utf8_char*)str, strlen, ch, isValid, returnNonChar);
00623   }
00624   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00625     size_t bufsize, bool allowNonchars = false)
00626   {
00627     return EncodeUTF8 (ch, (utf8_char*)buf, bufsize, allowNonchars);
00628   }
00629 #elif (CS_WCHAR_T_SIZE == 2)
00630   // Methods below for doxygen documentation are here as the size '2' is 
00631   // default.
00632   
00639   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00640     const utf8_char* source, size_t srcSize)
00641   {
00642     return UTF8to16 ((utf16_char*)dest, destSize, source, srcSize);
00643   };
00644 
00649   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00650     const utf16_char* source, size_t srcSize)
00651   {
00652     size_t srcChars = srcSize;                                          
00653     if (srcSize == (size_t)-1)                                          
00654     {                                                                   
00655       srcChars = 0;                                                     
00656       const utf16_char* sptr = source;                                  
00657       while (*sptr++ != 0) srcChars++;                                  
00658     }                           
00659     if ((dest != 0) && (destSize != 0))
00660     {
00661       size_t len = MIN (destSize - 1, srcChars);
00662       memcpy (dest, source, len * sizeof (wchar_t));
00663       *(dest + len) = 0;
00664     }
00665     return srcChars + 1;
00666   };
00667 
00672   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00673     const utf32_char* source, size_t srcSize)
00674   {
00675     return UTF32to16 ((utf16_char*)dest, destSize, source, srcSize);
00676   };
00677   
00682   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00683     const wchar_t* source, size_t srcSize)
00684   {
00685     return UTF16to8 (dest, destSize, (utf16_char*)source, srcSize);
00686   };
00687 
00692   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00693     const wchar_t* source, size_t srcSize)
00694   {
00695     size_t srcChars = srcSize;                                          
00696     if (srcSize == (size_t)-1)                                          
00697     {                                                                   
00698       srcChars = 0;                                                     
00699       const wchar_t* sptr = source;                                     
00700       while (*sptr++ != 0) srcChars++;                                  
00701     }                           
00702     if ((dest != 0) && (destSize != 0))
00703     {
00704       size_t len = MIN (destSize - 1, srcChars);
00705       memcpy (dest, source, len * sizeof (wchar_t));
00706       *(dest + len) = 0;
00707     }
00708     return srcChars + 1;
00709   };
00710 
00715   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00716     const wchar_t* source, size_t srcSize)
00717   {
00718     return UTF16to32 (dest, destSize, (utf16_char*)source, srcSize);
00719   };
00720 
00721   /* Decode()/Encode() overloads for wchar_t.
00722    * - On VC7+, wchar_t may be an unsigned short or the special type __wchar_t.
00723    * - On VC6 wchar_t is always an unsigned short. __wchar_t does not exist.
00724    * Now there may be conflicts with the utf16_char overloads if wchar_t is
00725    * an unsigned short. On the other hand, we would like to support VC7+'s
00726    * built-in wchar_t as well.
00727    * So: on VC7+, provide overloads for __wchar_t, on VC6, don't compile this
00728    * code at all, on other compilers, provide overloads for wchar_t instead
00729    * (by re#definining __wchar_t). 
00730    */
00731 #if !defined(CS_COMPILER_MSVC) || (_MSC_VER > 1300)
00732 #if !defined(CS_COMPILER_MSVC)
00733   #define __wchar_t wchar_t
00734 #endif  
00735 
00739   inline static int Decode (const __wchar_t* str, size_t strlen, 
00740     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00741   {
00742     return UTF16Decode ((utf16_char*)str, strlen, ch, isValid, returnNonChar);
00743   }
00748   inline static int Encode (const utf32_char ch, __wchar_t* buf, 
00749     size_t bufsize, bool allowNonchars = false)
00750   {
00751     return EncodeUTF16 (ch, (utf16_char*)buf, bufsize, allowNonchars);
00752   }
00753 #ifdef __wchar_t
00754   #undef __wchar_t
00755 #endif
00756 #endif
00757 
00758 #elif (CS_WCHAR_T_SIZE == 4)
00759   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00760     const utf8_char* source, size_t srcSize)
00761   {
00762     return UTF8to32 ((utf32_char*)dest, destSize, source, srcSize);
00763   };
00764 
00765   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00766     const utf16_char* source, size_t srcSize)
00767   {
00768     return UTF16to32 ((utf32_char*)dest, destSize, source, srcSize);
00769   };
00770 
00771   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00772     const utf32_char* source,  size_t srcSize)
00773   {
00774     size_t srcChars = srcSize;                                          
00775     if (srcSize == (size_t)-1)                                          
00776     {                                                                   
00777       srcChars = 0;                                                     
00778       const utf32_char* sptr = source;                                  
00779       while (*sptr++ != 0) srcChars++;                                  
00780     }                           
00781     if ((dest != 0) && (destSize != 0))
00782     {
00783       size_t len = MIN (destSize - 1, srcChars);
00784       memcpy (dest, source, len * sizeof (wchar_t));
00785       *(dest + len) = 0;
00786     }
00787     return srcChars + 1;
00788   };
00789   
00790   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00791     const wchar_t* source, size_t srcSize)
00792   {
00793     return UTF32to8 (dest, destSize, (utf32_char*)source, srcSize);
00794   };
00795 
00796   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00797     const wchar_t* source, size_t srcSize)
00798   {
00799     return UTF32to16 (dest, destSize, (utf32_char*)source, srcSize);
00800   };
00801 
00802   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00803     const wchar_t* source, size_t srcSize)
00804   {
00805     size_t srcChars = srcSize;                                          
00806     if (srcSize == (size_t)-1)                                          
00807     {                                                                   
00808       srcChars = 0;                                                     
00809       const wchar_t* sptr = source;                                     
00810       while (*sptr++ != 0) srcChars++;                                  
00811     }                           
00812     if ((dest != 0) && (destSize != 0))
00813     {
00814       size_t len = MIN (destSize - 1, srcChars);
00815       memcpy (dest, source, len * sizeof (wchar_t));
00816       *(dest + len) = 0;
00817     }
00818     return srcChars + 1;
00819   };
00820 
00821   inline static int Decode (const wchar_t* str, size_t strlen, 
00822     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00823   {
00824     return UTF32Decode ((utf32_char*)str, strlen, ch, isValid, returnNonChar);
00825   }
00826   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00827     size_t bufsize, bool allowNonchars = false)
00828   {
00829     return EncodeUTF32 (ch, (utf32_char*)buf, bufsize, allowNonchars);
00830   }
00831 #else
00832   #error Odd-sized, unsupported wchar_t!
00833 #endif
00834 
00847   inline static int UTF8Skip (const utf8_char* str, size_t maxSkip)
00848   {
00849     if (maxSkip < 1) return 0;
00850   
00851     if ((*str & 0x80) == 0)
00852     {
00853       return 1;
00854     }
00855     else
00856     {
00857       int n = 0;
00858       while ((n < 7) && ((*str & (1 << (7 - n))) != 0)) { n++; }
00859 
00860       if ((n < 2) || (n > 6))
00861       {
00862         return 1;
00863       }
00864 
00865       int skip = 1;
00866       
00867       for (; skip < n; skip++)
00868       {
00869         if (((str[skip] & 0xc0) != 0x80) || ((size_t)skip > maxSkip))
00870         {
00871           break;
00872         }
00873       }
00874       return skip;
00875     }
00876   }
00877   
00888   inline static int UTF8Rewind (const utf8_char* str, size_t maxRew)
00889   {
00890     if (maxRew < 1) return 0;
00891     
00892     const utf8_char* pos = str - 1;
00893     
00894     if ((*pos & 0x80) == 0)
00895     {
00896       return 1;
00897     }
00898     
00899     // Skip backward to the first byte of the sequence.
00900     int skip = 1;
00901     while (((*pos & 0xc0) == 0x80) && ((size_t)skip < maxRew))
00902     {
00903       skip++;
00904       pos--;
00905     }
00906     
00907     return skip;
00908   }
00909   
00915   inline static int UTF16Skip (const utf16_char* str, size_t maxSkip)
00916   {
00917     if (CS_UC_IS_HIGH_SURROGATE (*str))
00918       return (int)(MIN(maxSkip, 2));
00919     else
00920       return (int)(MIN(maxSkip, 1));
00921   }
00922   
00928   inline static int UTF16Rewind (const utf16_char* str, size_t maxRew)
00929   {
00930     if (maxRew < 1) return 0;
00931     
00932     const utf16_char* pos = str - 1;
00933     if (!CS_UC_IS_SURROGATE(*pos)) 
00934       return 1;
00935     else
00936     {
00937       if ((maxRew > 1) && (CS_UC_IS_HIGH_SURROGATE(*(pos - 1))))
00938         return 2;
00939       else
00940         return 1;
00941     }
00942   }
00943   
00949   inline static int UTF32Skip (const utf32_char* str, size_t maxSkip)
00950   {
00951     (void)str; // silence gcc
00952     return (int)(MIN(maxSkip, 1));
00953   }
00954 
00960   inline static int UTF32Rewind (const utf32_char* str, size_t maxRew)
00961   {
00962     (void)str; // silence gcc
00963     if (maxRew < 1) return 0;
00964     return 1;
00965   }
00980   static size_t MapToUpper (const utf32_char ch, utf32_char* dest, 
00981     size_t destSize, uint flags = 0);
00986   static size_t MapToLower (const utf32_char ch, utf32_char* dest, 
00987     size_t destSize, uint flags = 0);
00993   static size_t MapToFold (const utf32_char ch, utf32_char* dest, 
00994     size_t destSize, uint flags = 0);
00996 };
00997 
01000 #endif
01001 

Generated for Crystal Space by doxygen 1.4.6