Gnash  0.8.11dev
utf8.h
Go to the documentation of this file.
1 // utf8.h: utilities for converting to and from UTF-8
2 //
3 // Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 //
19 // Based on the public domain work of Thatcher Ulrich <tu@tulrich.com> 2004
20 
21 #ifndef UTF8_H
22 #define UTF8_H
23 
24 #include <string>
25 #include <boost/cstdint.hpp> // for C99 int types
26 #include <vector>
27 
28 #include "dsodefs.h" // For DSOEXPORT
29 
30 // Android doesn't have any support for wide characters at all.
31 #ifdef __ANDROID__
32 namespace std {
33 typedef basic_string
34  <wchar_t
35  ,std::char_traits<wchar_t>
36  ,std::allocator<wchar_t> >
37 wstring;
38 }
39 #endif
40 
41 namespace gnash {
42 
44 //
66 //
70 namespace utf8 {
71 
73  //
77  //
80  DSOEXPORT std::wstring decodeCanonicalString(const std::string& str, int version);
81 
83  //
93  DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr, int version);
94 
96  //
101  DSOEXPORT boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it,
102  const std::string::const_iterator& e);
103 
106  DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t ucs_character);
107 
109  //
112  DSOEXPORT std::string encodeLatin1Character(boost::uint32_t ucsCharacter);
113 
125  };
126 
128  //
151  DSOEXPORT char* stripBOM(char* in, size_t& size, TextEncoding& encoding);
152 
154  DSOEXPORT const char* textEncodingName(TextEncoding enc);
155 
160  };
161 
163  // Shift-Jis, UTF8, and other. Puts the DisplayObject count in length,
164  // and the offsets to the DisplayObjects in offsets, if offsets is not NULL.
165  // If not NULL, offsets should be at least s.length().
166  // offsets are not accurate if the return value is GUESSENC_OTHER
167  //
170  DSOEXPORT EncodingGuess guessEncoding(const std::string& s, int& length,
171  std::vector<int>& offsets);
172 
173 
174 } // namespace utf8
175 } // namespace gnash
176 
177 #endif // UTF8_H
178 
179 
180 // Local Variables:
181 // mode: C++
182 // c-basic-offset: 8
183 // tab-width: 8
184 // indent-tabs-mode: t
185 // End:
EncodingGuess guessEncoding(const std::string &str, int &length, std::vector< int > &offsets)
Common code for guessing at the encoding of random text, between.
Definition: utf8.cpp:281
Definition: utf8.h:157
SWFStream & s
Definition: DefineBitsTag.cpp:73
Definition: utf8.h:121
Definition: utf8.h:118
Definition: utf8.h:123
Anonymous namespace for callbacks, local functions, event handlers etc.
Definition: dbus_ext.cpp:40
Definition: utf8.h:117
Definition: utf8.h:115
Definition: utf8.h:119
std::string encodeUnicodeCharacter(boost::uint32_t ucs_character)
Encodes the given wide character into a canonical string, theoretically up to 6 chars in length...
Definition: utf8.cpp:165
Definition: utf8.h:158
std::string encodeLatin1Character(boost::uint32_t ucsCharacter)
Encodes the given wide character into an at least 8-bit character.
Definition: utf8.cpp:84
boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator &it, const std::string::const_iterator &e)
Return the next Unicode character in the UTF-8 encoded string.
Definition: utf8.cpp:93
Definition: utf8.h:122
Definition: utf8.h:159
EncodingGuess
Definition: utf8.h:156
Definition: klash_part.cpp:329
char * stripBOM(char *in, size_t &size, TextEncoding &encoding)
Interpret (and skip) Byte Order Mark in input stream.
Definition: utf8.cpp:208
#define DSOEXPORT
Definition: dsodefs.h:55
Definition: utf8.h:116
Definition: GnashKey.h:151
Definition: utf8.h:124
std::string encodeCanonicalString(const std::wstring &wstr, int version)
Converts a std::wstring into canonical std::string.
Definition: utf8.cpp:67
std::wstring decodeCanonicalString(const std::string &str, int version)
Converts a std::string with multibyte characters into a std::wstring.
Definition: utf8.cpp:39
TextEncoding
Definition: utf8.h:114
const char * textEncodingName(TextEncoding enc)
Return name of a text encoding.
Definition: utf8.cpp:262
Definition: utf8.h:120