textcodec.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004-2009 Marc Boris Duerner
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 #ifndef cxxtools_TextCodec_h
29 #define cxxtools_TextCodec_h
30 
31 #include <cxxtools/api.h>
32 #include <cxxtools/char.h>
34 #include <string>
35 
36 #ifdef CXXTOOLS_WITH_STD_LOCALE
37 
38 namespace std {
39 
40 template<>
41 class CXXTOOLS_API codecvt<cxxtools::Char, char, cxxtools::MBState> : public codecvt_base, public locale::facet
42 {
43  public:
44  static locale::id id;
45  virtual locale::id& __get_id (void) const { return id; }
46 
47  public:
48  explicit codecvt(size_t ref = 0);
49 
50  virtual ~codecvt();
51 
53  const cxxtools::Char* from,
54  const cxxtools::Char* from_end,
55  const cxxtools::Char*& from_next,
56  char* to,
57  char* to_end,
58  char*& to_next) const
59  { return this->do_out(state, from, from_end, from_next, to, to_end, to_next); }
60 
62  char* to,
63  char* to_end,
64  char*& to_next) const
65  { return this->do_unshift(state, to, to_end, to_next); }
66 
68  const char* from,
69  const char* from_end,
70  const char*& from_next,
71  cxxtools::Char* to,
72  cxxtools::Char* to_end,
73  cxxtools::Char*& to_next) const
74  { return this->do_in(state, from, from_end, from_next, to, to_end, to_next); }
75 
76  int encoding() const
77  { return this->do_encoding(); }
78 
79  bool always_noconv() const
80  { return this->do_always_noconv(); }
81 
82  int length(cxxtools::MBState& state, const char* from,
83  const char* end, size_t max) const
84  { return this->do_length(state, from, end, max); }
85 
86  int max_length() const
87  { return this->do_max_length(); }
88 
89  protected:
90  virtual codecvt_base::result do_out(cxxtools::MBState& state,
91  const cxxtools::Char* from,
92  const cxxtools::Char* from_end,
93  const cxxtools::Char*& from_next,
94  char* to,
95  char* to_end,
96  char*& to_next) const = 0;
97 
98  virtual codecvt_base::result do_unshift(cxxtools::MBState& state,
99  char* to,
100  char* to_end,
101  char*& to_next) const = 0;
102 
103  virtual codecvt_base::result do_in(cxxtools::MBState& state,
104  const char* from,
105  const char* from_end,
106  const char*& from_next,
107  cxxtools::Char* to,
108  cxxtools::Char* to_end,
109  cxxtools::Char*& to_next) const = 0;
110 
111  virtual int do_encoding() const throw() = 0;
112 
113  virtual bool do_always_noconv() const throw() = 0;
114 
115  virtual int do_length(cxxtools::MBState&,
116  const char* from,
117  const char* end,
118  size_t max) const = 0;
119 
120  virtual int do_max_length() const throw() = 0;
121 };
122 
123 
124 template<>
125 class CXXTOOLS_API codecvt<char, char, cxxtools::MBState> : public codecvt_base, public locale::facet
126 {
127  public:
128  static locale::id id;
129  virtual locale::id& __get_id (void) const { return id; }
130 
131  public:
132  explicit codecvt(size_t ref = 0);
133 
134  virtual ~codecvt();
135 
137  const char* from,
138  const char* from_end,
139  const char*& from_next,
140  char* to,
141  char* to_end,
142  char*& to_next) const
143  { return this->do_out(state, from, from_end, from_next, to, to_end, to_next); }
144 
146  char* to,
147  char* to_end,
148  char*& to_next) const
149  { return this->do_unshift(state, to, to_end, to_next); }
150 
152  const char* from,
153  const char* from_end,
154  const char*& from_next,
155  char* to, char* to_end,
156  char*& to_next) const
157  { return this->do_in(state, from, from_end, from_next, to, to_end, to_next); }
158 
159  int encoding() const
160  { return this->do_encoding(); }
161 
162  bool always_noconv() const
163  { return this->do_always_noconv(); }
164 
165  int length(cxxtools::MBState& state, const char* from,
166  const char* end, size_t max) const
167  { return this->do_length(state, from, end, max); }
168 
169  int max_length() const
170  { return this->do_max_length(); }
171 
172  protected:
173  virtual codecvt_base::result do_out(cxxtools::MBState& state,
174  const char* from,
175  const char* from_end,
176  const char*& from_next,
177  char* to,
178  char* to_end,
179  char*& to_next) const = 0;
180 
181  virtual codecvt_base::result do_unshift(cxxtools::MBState& state,
182  char* to,
183  char* to_end,
184  char*& to_next) const = 0;
185 
186  virtual codecvt_base::result do_in(cxxtools::MBState& state,
187  const char* from,
188  const char* from_end,
189  const char*& from_next,
190  char* to,
191  char* to_end,
192  char*& to_next) const = 0;
193 
194  virtual int do_encoding() const throw() = 0;
195 
196  virtual bool do_always_noconv() const throw() = 0;
197 
198  virtual int do_length(cxxtools::MBState&,
199  const char* from,
200  const char* end,
201  size_t max) const = 0;
202 
203  virtual int do_max_length() const throw() = 0;
204 };
205 
206 }
207 
208 #else // no CXXTOOLS_WITH_STD_LOCALE
209 
210 namespace std {
211 
212 class codecvt_base
213 {
214  public:
215  enum { ok, partial, error, noconv };
216  typedef int result;
217 
218  virtual ~codecvt_base()
219  { }
220 };
221 
222 template <typename I, typename E, typename S>
224 {
225  public:
226  typedef I InternT;
227  typedef E ExternT;
228  typedef S StateT;
229 
230  public:
231  explicit codecvt(size_t ref = 0)
232  {}
233 
234  virtual ~codecvt()
235  { }
236 
238  const InternT* from,
239  const InternT* from_end,
240  const InternT*& from_next,
241  ExternT* to,
242  ExternT* to_end,
243  ExternT*& to_next) const
244  { return this->do_out(state, from, from_end, from_next, to, to_end, to_next); }
245 
246  codecvt_base::result unshift(StateT& state,
247  ExternT* to,
248  ExternT* to_end,
249  ExternT*& to_next) const
250  { return this->do_unshift(state, to, to_end, to_next); }
251 
253  const ExternT* from,
254  const ExternT* from_end,
255  const ExternT*& from_next,
256  InternT* to,
257  InternT* to_end,
258  InternT*& to_next) const
259  { return this->do_in(state, from, from_end, from_next, to, to_end, to_next); }
260 
261  int encoding() const
262  { return this->do_encoding(); }
263 
264  bool always_noconv() const
265  { return this->do_always_noconv(); }
266 
267  int length(StateT& state, const ExternT* from,
268  const ExternT* end, size_t max) const
269  { return this->do_length(state, from, end, max); }
270 
271  int max_length() const
272  { return this->do_max_length(); }
273 
274  protected:
275  virtual result do_in(StateT& s, const ExternT* fromBegin,
276  const ExternT* fromEnd, const ExternT*& fromNext,
277  InternT* toBegin, InternT* toEnd, InternT*& toNext) const = 0;
278 
279  virtual result do_out(StateT& s, const InternT* fromBegin,
280  const InternT* fromEnd, const InternT*& fromNext,
281  ExternT* toBegin, ExternT* toEnd, ExternT*& toNext) const = 0;
282 
283  virtual bool do_always_noconv() const = 0;
284 
285  virtual int do_length(StateT& s, const ExternT* fromBegin,
286  const ExternT* fromEnd, size_t max) const = 0;
287 
288  virtual int do_max_length() const = 0;
289 
290  virtual std::codecvt_base::result do_unshift(StateT&,
291  ExternT*,
292  ExternT*,
293  ExternT*&) const = 0;
294 
295  virtual int do_encoding() const = 0;
296 };
297 
298 }
299 
300 #endif // CXXTOOLS_WITH_STD_LOCALE
301 
302 namespace cxxtools {
303 
326 template <typename I, typename E>
327 class TextCodec : public std::codecvt<I, E, cxxtools::MBState>
328 {
329  public:
330  typedef I InternT;
331  typedef E ExternT;
332 
333  public:
342  explicit TextCodec(size_t ref = 0)
343  : std::codecvt<InternT, ExternT, MBState>(ref)
344  , _refs(ref)
345  {}
346 
347  public:
349  virtual ~TextCodec()
350  {}
351 
352  size_t refs() const
353  { return _refs; }
354 
355  private:
356  size_t _refs;
357 };
358 
364 template <typename CodecType>
365 std::basic_string<typename CodecType::InternT> decode(const typename CodecType::ExternT* data, unsigned size)
366 {
367  CodecType codec;
368 
369  typename CodecType::InternT to[64];
370  MBState state;
371  std::basic_string<typename CodecType::InternT> ret;
372  const typename CodecType::ExternT* from = data;
373 
374  typename CodecType::result r;
375  do
376  {
377  typename CodecType::InternT* to_next = to;
378 
379  const typename CodecType::ExternT* from_next = from;
380  r = codec.in(state, from, from + size, from_next, to, to + sizeof(to)/sizeof(typename CodecType::InternT), to_next);
381 
382  if (r == CodecType::error)
383  throw ConversionError("character conversion failed");
384 
385  if (r == CodecType::partial && from_next == from)
386  throw ConversionError("character conversion failed - unexpected end of input sequence");
387 
388  ret.append(to, to_next);
389 
390  size -= (from_next - from);
391  from = from_next;
392 
393  } while (r == CodecType::partial);
394 
395  return ret;
396 }
397 
411 template <typename CodecType>
412 std::basic_string<typename CodecType::InternT> decode(const std::basic_string<typename CodecType::ExternT>& data)
413 {
414  return decode<CodecType>(data.data(), data.size());
415 }
416 
417 
418 template <typename CodecType>
419 std::basic_string<typename CodecType::ExternT> encode(const typename CodecType::InternT* data, unsigned size)
420 {
421  CodecType codec;
422  char to[64];
423  MBState state;
424 
425  typename CodecType::result r;
426  const typename CodecType::InternT* from = data;
427  std::basic_string<typename CodecType::ExternT> ret;
428 
429  do{
430  const typename CodecType::InternT* from_next;
431 
432  typename CodecType::ExternT* to_next = to;
433  r = codec.out(state, from, from + size, from_next, to, to + sizeof(to), to_next);
434 
435  if (r == CodecType::error)
436  throw ConversionError("character conversion failed");
437 
438  ret.append(to, to_next);
439 
440  size -= (from_next - from);
441  from = from_next;
442 
443  } while (r == CodecType::partial);
444 
445  typename CodecType::ExternT* to_next = to;
446  r = codec.unshift(state, to, to + sizeof(to), to_next);
447  if (r == CodecType::error)
448  throw ConversionError("character conversion failed");
449 
450  ret.append(to, to_next);
451 
452  return ret;
453 }
454 
467 template <typename CodecType>
468 std::basic_string<typename CodecType::ExternT> encode(const std::basic_string<typename CodecType::InternT>& data)
469 {
470  return encode<CodecType>(data.data(), data.size());
471 }
472 
473 }
474 
475 #endif