textcodec.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004-2009 Marc Boris Duerner
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 #ifndef cxxtools_TextCodec_h
29 #define cxxtools_TextCodec_h
30 
31 #include <cxxtools/char.h>
33 #include <string>
34 
35 #ifdef CXXTOOLS_WITH_STD_LOCALE
36 
37 namespace std {
38 
39 template<>
40 class codecvt<cxxtools::Char, char, cxxtools::MBState> : public codecvt_base, public locale::facet
41 {
42  public:
43  static locale::id id;
44  virtual locale::id& __get_id (void) const { return id; }
45 
46  public:
47  explicit codecvt(size_t ref = 0);
48 
49  virtual ~codecvt();
50 
51  codecvt_base::result out(cxxtools::MBState& state,
52  const cxxtools::Char* from,
53  const cxxtools::Char* from_end,
54  const cxxtools::Char*& from_next,
55  char* to,
56  char* to_end,
57  char*& to_next) const
58  { return this->do_out(state, from, from_end, from_next, to, to_end, to_next); }
59 
60  codecvt_base::result unshift(cxxtools::MBState& state,
61  char* to,
62  char* to_end,
63  char*& to_next) const
64  { return this->do_unshift(state, to, to_end, to_next); }
65 
66  codecvt_base::result in(cxxtools::MBState& state,
67  const char* from,
68  const char* from_end,
69  const char*& from_next,
70  cxxtools::Char* to,
71  cxxtools::Char* to_end,
72  cxxtools::Char*& to_next) const
73  { return this->do_in(state, from, from_end, from_next, to, to_end, to_next); }
74 
75  int encoding() const
76  { return this->do_encoding(); }
77 
78  bool always_noconv() const
79  { return this->do_always_noconv(); }
80 
81  int length(cxxtools::MBState& state, const char* from,
82  const char* end, size_t max) const
83  { return this->do_length(state, from, end, max); }
84 
85  int max_length() const
86  { return this->do_max_length(); }
87 
88  protected:
89  virtual codecvt_base::result do_out(cxxtools::MBState& state,
90  const cxxtools::Char* from,
91  const cxxtools::Char* from_end,
92  const cxxtools::Char*& from_next,
93  char* to,
94  char* to_end,
95  char*& to_next) const = 0;
96 
97  virtual codecvt_base::result do_unshift(cxxtools::MBState& state,
98  char* to,
99  char* to_end,
100  char*& to_next) const = 0;
101 
102  virtual codecvt_base::result do_in(cxxtools::MBState& state,
103  const char* from,
104  const char* from_end,
105  const char*& from_next,
106  cxxtools::Char* to,
107  cxxtools::Char* to_end,
108  cxxtools::Char*& to_next) const = 0;
109 
110  virtual int do_encoding() const throw() = 0;
111 
112  virtual bool do_always_noconv() const throw() = 0;
113 
114  virtual int do_length(cxxtools::MBState&,
115  const char* from,
116  const char* end,
117  size_t max) const = 0;
118 
119  virtual int do_max_length() const throw() = 0;
120 };
121 
122 
123 template<>
124 class codecvt<char, char, cxxtools::MBState> : public codecvt_base, public locale::facet
125 {
126  public:
127  static locale::id id;
128  virtual locale::id& __get_id (void) const { return id; }
129 
130  public:
131  explicit codecvt(size_t ref = 0);
132 
133  virtual ~codecvt();
134 
135  codecvt_base::result out(cxxtools::MBState& state,
136  const char* from,
137  const char* from_end,
138  const char*& from_next,
139  char* to,
140  char* to_end,
141  char*& to_next) const
142  { return this->do_out(state, from, from_end, from_next, to, to_end, to_next); }
143 
144  codecvt_base::result unshift(cxxtools::MBState& state,
145  char* to,
146  char* to_end,
147  char*& to_next) const
148  { return this->do_unshift(state, to, to_end, to_next); }
149 
150  codecvt_base::result in(cxxtools::MBState& state,
151  const char* from,
152  const char* from_end,
153  const char*& from_next,
154  char* to, char* to_end,
155  char*& to_next) const
156  { return this->do_in(state, from, from_end, from_next, to, to_end, to_next); }
157 
158  int encoding() const
159  { return this->do_encoding(); }
160 
161  bool always_noconv() const
162  { return this->do_always_noconv(); }
163 
164  int length(cxxtools::MBState& state, const char* from,
165  const char* end, size_t max) const
166  { return this->do_length(state, from, end, max); }
167 
168  int max_length() const
169  { return this->do_max_length(); }
170 
171  protected:
172  virtual codecvt_base::result do_out(cxxtools::MBState& state,
173  const char* from,
174  const char* from_end,
175  const char*& from_next,
176  char* to,
177  char* to_end,
178  char*& to_next) const = 0;
179 
180  virtual codecvt_base::result do_unshift(cxxtools::MBState& state,
181  char* to,
182  char* to_end,
183  char*& to_next) const = 0;
184 
185  virtual codecvt_base::result do_in(cxxtools::MBState& state,
186  const char* from,
187  const char* from_end,
188  const char*& from_next,
189  char* to,
190  char* to_end,
191  char*& to_next) const = 0;
192 
193  virtual int do_encoding() const throw() = 0;
194 
195  virtual bool do_always_noconv() const throw() = 0;
196 
197  virtual int do_length(cxxtools::MBState&,
198  const char* from,
199  const char* end,
200  size_t max) const = 0;
201 
202  virtual int do_max_length() const throw() = 0;
203 };
204 
205 }
206 
207 #else // no CXXTOOLS_WITH_STD_LOCALE
208 
209 namespace std {
210 
211 class codecvt_base
212 {
213  public:
214  enum { ok, partial, error, noconv };
215  typedef int result;
216 
217  virtual ~codecvt_base()
218  { }
219 };
220 
221 template <typename I, typename E, typename S>
223 {
224  public:
225  typedef I InternT;
226  typedef E ExternT;
227  typedef S StateT;
228 
229  public:
230  explicit codecvt(size_t ref = 0)
231  {}
232 
233  virtual ~codecvt()
234  { }
235 
237  const InternT* from,
238  const InternT* from_end,
239  const InternT*& from_next,
240  ExternT* to,
241  ExternT* to_end,
242  ExternT*& to_next) const
243  { return this->do_out(state, from, from_end, from_next, to, to_end, to_next); }
244 
246  ExternT* to,
247  ExternT* to_end,
248  ExternT*& to_next) const
249  { return this->do_unshift(state, to, to_end, to_next); }
250 
252  const ExternT* from,
253  const ExternT* from_end,
254  const ExternT*& from_next,
255  InternT* to,
256  InternT* to_end,
257  InternT*& to_next) const
258  { return this->do_in(state, from, from_end, from_next, to, to_end, to_next); }
259 
260  int encoding() const
261  { return this->do_encoding(); }
262 
263  bool always_noconv() const
264  { return this->do_always_noconv(); }
265 
266  int length(StateT& state, const ExternT* from,
267  const ExternT* end, size_t max) const
268  { return this->do_length(state, from, end, max); }
269 
270  int max_length() const
271  { return this->do_max_length(); }
272 
273  protected:
274  virtual result do_in(StateT& s, const ExternT* fromBegin,
275  const ExternT* fromEnd, const ExternT*& fromNext,
276  InternT* toBegin, InternT* toEnd, InternT*& toNext) const = 0;
277 
278  virtual result do_out(StateT& s, const InternT* fromBegin,
279  const InternT* fromEnd, const InternT*& fromNext,
280  ExternT* toBegin, ExternT* toEnd, ExternT*& toNext) const = 0;
281 
282  virtual bool do_always_noconv() const = 0;
283 
284  virtual int do_length(StateT& s, const ExternT* fromBegin,
285  const ExternT* fromEnd, size_t max) const = 0;
286 
287  virtual int do_max_length() const = 0;
288 
290  ExternT*,
291  ExternT*,
292  ExternT*&) const = 0;
293 
294  virtual int do_encoding() const = 0;
295 };
296 
297 }
298 
299 #endif // CXXTOOLS_WITH_STD_LOCALE
300 
301 namespace cxxtools {
302 
325 template <typename I, typename E>
326 class TextCodec : public std::codecvt<I, E, cxxtools::MBState>
327 {
328  public:
329  typedef I InternT;
330  typedef E ExternT;
331 
332  public:
341  explicit TextCodec(size_t ref = 0)
342  : std::codecvt<InternT, ExternT, MBState>(ref)
343  , _refs(ref)
344  {}
345 
346  public:
348  virtual ~TextCodec()
349  {}
350 
351  size_t refs() const
352  { return _refs; }
353 
354  private:
355  size_t _refs;
356 };
357 
363 template <typename CodecType>
364 std::basic_string<typename CodecType::InternT> decode(const typename CodecType::ExternT* data, unsigned size)
365 {
366  CodecType codec;
367 
368  typename CodecType::InternT to[64];
369  MBState state;
370  std::basic_string<typename CodecType::InternT> ret;
371  const typename CodecType::ExternT* from = data;
372 
373  typename CodecType::result r;
374  do
375  {
376  typename CodecType::InternT* to_next = to;
377 
378  const typename CodecType::ExternT* from_next = from;
379  r = codec.in(state, from, from + size, from_next, to, to + sizeof(to)/sizeof(typename CodecType::InternT), to_next);
380 
381  if (r == CodecType::error)
382  throw ConversionError("character conversion failed");
383 
384  if (r == CodecType::partial && from_next == from)
385  throw ConversionError("character conversion failed - unexpected end of input sequence");
386 
387  ret.append(to, to_next);
388 
389  size -= (from_next - from);
390  from = from_next;
391 
392  } while (r == CodecType::partial);
393 
394  return ret;
395 }
396 
410 template <typename CodecType>
411 std::basic_string<typename CodecType::InternT> decode(const std::basic_string<typename CodecType::ExternT>& data)
412 {
413  return decode<CodecType>(data.data(), data.size());
414 }
415 
416 
417 template <typename CodecType>
418 std::basic_string<typename CodecType::ExternT> encode(const typename CodecType::InternT* data, unsigned size)
419 {
420  CodecType codec;
421  char to[64];
422  MBState state;
423 
424  typename CodecType::result r;
425  const typename CodecType::InternT* from = data;
426  std::basic_string<typename CodecType::ExternT> ret;
427 
428  do{
429  const typename CodecType::InternT* from_next;
430 
431  typename CodecType::ExternT* to_next = to;
432  r = codec.out(state, from, from + size, from_next, to, to + sizeof(to), to_next);
433 
434  if (r == CodecType::error)
435  throw ConversionError("character conversion failed");
436 
437  ret.append(to, to_next);
438 
439  size -= (from_next - from);
440  from = from_next;
441 
442  } while (r == CodecType::partial);
443 
444  typename CodecType::ExternT* to_next = to;
445  r = codec.unshift(state, to, to + sizeof(to), to_next);
446  if (r == CodecType::error)
447  throw ConversionError("character conversion failed");
448 
449  ret.append(to, to_next);
450 
451  return ret;
452 }
453 
466 template <typename CodecType>
467 std::basic_string<typename CodecType::ExternT> encode(const std::basic_string<typename CodecType::InternT>& data)
468 {
469  return encode<CodecType>(data.data(), data.size());
470 }
471 
472 }
473 
474 #endif