textbuffer.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004-2009 Marc Boris Duerner
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 #ifndef cxxtools_TextBuffer_h
29 #define cxxtools_TextBuffer_h
30 
31 #include <cxxtools/char.h>
32 #include <cxxtools/textcodec.h>
34 #include <iostream>
35 #include <utility>
36 
37 namespace cxxtools
38 {
39 
40 
59 template <typename CharT, typename ByteT>
60 class BasicTextBuffer : public std::basic_streambuf<CharT>
61 {
62  public:
63  typedef ByteT extern_type;
64  typedef CharT intern_type;
65  typedef CharT char_type;
66  typedef typename std::char_traits<CharT> traits_type;
67  typedef typename traits_type::int_type int_type;
68  typedef typename traits_type::pos_type pos_type;
69  typedef typename traits_type::off_type off_type;
71  typedef MBState state_type;
72 
73  private:
74  static const int _pbmax = 4;
75 
76  static const int _ebufmax = 256;
77  extern_type _ebuf[_ebufmax];
78  int _ebufsize;
79 
80  static const int _ibufmax = 256;
81  intern_type _ibuf[_ibufmax];
82 
84  state_type _state;
85 
87  CodecType* _codec;
88 
89  std::basic_ios<extern_type>* _target;
90 
91  public:
102  BasicTextBuffer(std::basic_ios<extern_type>* target, CodecType* codec)
103  : _ebufsize(0)
104  , _codec(codec)
105  , _target(target)
106  {
107  this->setg(0, 0, 0);
108  this->setp(0, 0);
109  }
110 
112  {
113  try
114  {
115  this->terminate();
116  }
117  catch(...) {}
118 
119  if(_codec && _codec->refs() == 0)
120  delete _codec;
121  }
122 
123  void attach(std::basic_ios<extern_type>& target)
124  {
125  this->terminate();
126  _target = &target;
127  }
128 
129  void detach()
130  {
131  this->terminate();
132  _target = 0;
133  }
134 
135  int terminate()
136  {
137  if( this->pptr() )
138  {
139  if( -1 == this->sync() )
140  return -1;
141 
142  if( _codec && ! _codec->always_noconv() )
143  {
144  typename CodecType::result res = CodecType::error;
145  do
146  {
147  extern_type* next = 0;
148  res = _codec->unshift(_state, _ebuf, _ebuf + _ebufmax, next);
149  _ebufsize = next - _ebuf;
150 
151  if(res == CodecType::error)
152  {
153  throw ConversionError("character conversion failed");
154  }
155  else if(res == CodecType::ok || res == CodecType::partial)
156  {
157  if(_ebufsize > 0)
158  {
159  _ebufsize -= _target->rdbuf()->sputn(_ebuf, _ebufsize);
160  if(_ebufsize)
161  return -1;
162  }
163  }
164  }
165  while(res == CodecType::partial);
166  }
167  }
168 
169  this->setp(0, 0);
170  this->setg(0, 0, 0);
171  _ebufsize = 0;
172  _state = state_type();
173  return 0;
174  }
175 
176  std::streamsize import()
177  {
178  if( _target )
179  {
180  std::streamsize n = _target->rdbuf()->in_avail();
181  return do_underflow(n).second;
182  }
183 
184  return this->in_avail();
185  }
186 
187  protected:
188  // inheritdoc
189  virtual int sync()
190  {
191  if( this->pptr() )
192  {
193  // Try to write out the whole buffer to the underlying stream.
194  // Fail if we can not make progress, because more characters
195  // are needed to continue a multi-byte sequence.
196  while( this->pptr() > this->pbase() )
197  {
198  const char_type* p = this->pptr();
199 
200  if( this->overflow( traits_type::eof() ) == traits_type::eof() )
201  return -1;
202 
203  if( p == this->pptr() )
204  throw ConversionError("character conversion failed");
205  }
206  }
207 
208  return 0;
209  }
210 
211  virtual std::streamsize showmanyc()
212  {
213  return 0;
214  }
215 
216  // inheritdoc
217  virtual int_type overflow( int_type ch = traits_type::eof() )
218  {
219  if( ! _target || this->gptr() )
220  return traits_type::eof();
221 
222  if( ! this->pptr() )
223  {
224  this->setp( _ibuf, _ibuf + _ibufmax );
225  }
226 
227  while( this->pptr() > this->pbase() )
228  {
229  const char_type* fromBegin = _ibuf;
230  const char_type* fromEnd = this->pptr();
231  const char_type* fromNext = fromBegin;
232  extern_type* toBegin = _ebuf + _ebufsize;
233  extern_type* toEnd = _ebuf + _ebufmax;
234  extern_type* toNext = toBegin;
235 
236  typename CodecType::result res = CodecType::noconv;
237  if(_codec)
238  res = _codec->out(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
239 
240  if(res == CodecType::noconv)
241  {
242  size_t fromSize = fromEnd - fromBegin;
243  size_t toSize = toEnd - toBegin;
244  size_t size = toSize < fromSize ? toSize : fromSize;
245 
246  this->copyChars( toBegin, fromBegin, size );
247 
248  fromNext += size;
249  toNext += size;
250  }
251 
252  _ebufsize += toNext - toBegin;
253  size_t leftover = fromEnd - fromNext;
254  if(leftover && fromNext > fromBegin)
255  {
256  std::char_traits<char_type>::move(_ibuf, fromNext, leftover);
257  }
258 
259  this->setp( _ibuf, _ibuf + _ibufmax );
260  this->pbump( leftover );
261 
262  if(res == CodecType::error)
263  throw ConversionError("character conversion failed");
264 
265  if(res == CodecType::partial && _ebufsize == 0)
266  break;
267 
268  _ebufsize -= _target->rdbuf()->sputn(_ebuf, _ebufsize);
269  if(_ebufsize)
270  return traits_type::eof();
271  }
272 
273  if( ! traits_type::eq_int_type(ch, traits_type::eof()) )
274  {
275  *( this->pptr() ) = traits_type::to_char_type(ch);
276  this->pbump(1);
277  }
278 
279  return traits_type::not_eof(ch);
280  }
281 
282 
283  // inheritdoc
284  virtual int_type underflow()
285  {
286  if( ! _target )
287  return traits_type::eof();
288 
289  if( this->gptr() < this->egptr() )
290  return traits_type::to_int_type( *this->gptr() );
291 
292  return do_underflow(_ebufmax).first;
293  }
294 
295 
296  std::pair<int_type, std::streamsize> do_underflow(std::streamsize size)
297  {
298  typedef std::pair<int_type, std::streamsize> ret_type;
299 
300  std::streamsize n = 0;
301 
302  if( this->pptr() )
303  {
304  if( -1 == this->terminate() )
305  return ret_type(traits_type::eof(), 0);
306  }
307 
308  if( ! this->gptr() )
309  {
310  this->setg(_ibuf, _ibuf, _ibuf);
311  }
312 
313  if( this->gptr() - this->eback() > _pbmax)
314  {
315  std::streamsize movelen = this->egptr() - this->gptr() + _pbmax;
316  std::char_traits<char_type>::move( _ibuf,
317  this->gptr() - _pbmax,
318  movelen );
319  this->setg(_ibuf, _ibuf + _pbmax, _ibuf + movelen);
320  }
321 
322  bool atEof = false;
323  const std::streamsize bufavail = _ebufmax - _ebufsize;
324  const std::streamsize in_avail = _target->rdbuf()->in_avail();
325  if (bufavail < size)
326  size = bufavail;
327  if (in_avail > 0 && in_avail < size)
328  size = in_avail;
329  if (size > 0)
330  {
331  n = _target->rdbuf()->sgetn( _ebuf + _ebufsize, size );
332  _ebufsize += n;
333  if(n == 0)
334  atEof = true;
335  }
336 
337  const extern_type* fromBegin = _ebuf;
338  const extern_type* fromEnd = _ebuf + _ebufsize;
339  const extern_type* fromNext = fromBegin;
340  char_type* toBegin = this->egptr();
341  char_type* toEnd = _ibuf + _ibufmax;
342  char_type* toNext = toBegin;
343 
345  if (_codec)
346  {
347  r = _codec->in(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
348  std::streamsize consumed = fromNext - fromBegin;
349  if(consumed)
350  {
351  std::char_traits<extern_type>::move( _ebuf, _ebuf + consumed, _ebufsize );
352  _ebufsize -= consumed;
353  }
354  }
355  else
356  {
357  // copy characters and advance toNext
358  int n = _ebufsize > _ibufmax ? _ibufmax : _ebufsize;
359  this->copyBytes(toBegin, fromBegin, n);
360  _ebufsize -= n;
361  toNext += n;
362  }
363 
364  std::streamsize generated = toNext - toBegin;
365  if(generated)
366  {
367  this->setg(this->eback(), // start of read buffer
368  this->gptr(), // gptr position
369  this->egptr() + generated ); // end of read buffer
370  }
371 
372  if(r == CodecType::error)
373  throw ConversionError("character conversion failed");
374 
375  if( this->gptr() < this->egptr() )
376  return ret_type(traits_type::to_int_type( *this->gptr() ), n);
377 
378  // fail if partial charactes are at the end of the stream
379  if(r == CodecType::partial && atEof)
380  throw ConversionError("character conversion failed");
381 
382  return ret_type(traits_type::eof(), 0);
383  }
384 
385  static void copyChars(extern_type* s1, const char_type* s2, size_t n)
386  {
387  while(n-- > 0)
388  {
389  *s1 = std::char_traits<char_type>::to_int_type(*s2);
390  ++s1;
391  ++s2;
392  }
393  }
394 
395  static void copyBytes(char_type* s1, const extern_type* s2, size_t n)
396  {
397  while(n-- > 0)
398  {
399  *s1 = std::char_traits<char_type>::to_char_type(*s2);
400  ++s1;
401  ++s2;
402  }
403  }
404 };
405 
406 
412 class TextBuffer : public BasicTextBuffer<cxxtools::Char, char>
413 {
414  public:
416 
417  public:
424  TextBuffer(std::ios* buffer, Codec* codec);
425 };
426 
427 } // namespace cxxtools
428 
429 #endif
430