textbuffer.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004-2009 Marc Boris Duerner
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 #ifndef cxxtools_TextBuffer_h
29 #define cxxtools_TextBuffer_h
30 
31 #include <cxxtools/api.h>
32 #include <cxxtools/char.h>
33 #include <cxxtools/textcodec.h>
35 #include <iostream>
36 #include <utility>
37 
38 namespace cxxtools
39 {
40 
41 
60 template <typename CharT, typename ByteT>
61 class BasicTextBuffer : public std::basic_streambuf<CharT>
62 {
63  public:
64  typedef ByteT extern_type;
65  typedef CharT intern_type;
66  typedef CharT char_type;
67  typedef typename std::char_traits<CharT> traits_type;
68  typedef typename traits_type::int_type int_type;
69  typedef typename traits_type::pos_type pos_type;
70  typedef typename traits_type::off_type off_type;
73 
74  private:
75  static const int _pbmax = 4;
76 
77  static const int _ebufmax = 256;
78  extern_type _ebuf[_ebufmax];
79  int _ebufsize;
80 
81  static const int _ibufmax = 256;
82  intern_type _ibuf[_ibufmax];
83 
85  state_type _state;
86 
88  CodecType* _codec;
89 
90  std::basic_ios<extern_type>* _target;
91 
92  public:
103  BasicTextBuffer(std::basic_ios<extern_type>* target, CodecType* codec)
104  : _ebufsize(0)
105  , _codec(codec)
106  , _target(target)
107  {
108  this->setg(0, 0, 0);
109  this->setp(0, 0);
110  }
111 
113  {
114  try
115  {
116  this->terminate();
117  }
118  catch(...) {}
119 
120  if(_codec && _codec->refs() == 0)
121  delete _codec;
122  }
123 
124  void attach(std::basic_ios<extern_type>& target)
125  {
126  this->terminate();
127  _target = &target;
128  }
129 
130  void detach()
131  {
132  this->terminate();
133  _target = 0;
134  }
135 
136  int terminate()
137  {
138  if( this->pptr() )
139  {
140  if( -1 == this->sync() )
141  return -1;
142 
143  if( _codec && ! _codec->always_noconv() )
144  {
145  typename CodecType::result res = CodecType::error;
146  do
147  {
148  extern_type* next = 0;
149  res = _codec->unshift(_state, _ebuf, _ebuf + _ebufmax, next);
150  _ebufsize = next - _ebuf;
151 
152  if(res == CodecType::error)
153  {
154  throw ConversionError("character conversion failed");
155  }
156  else if(res == CodecType::ok || res == CodecType::partial)
157  {
158  if(_ebufsize > 0)
159  {
160  _ebufsize -= _target->rdbuf()->sputn(_ebuf, _ebufsize);
161  if(_ebufsize)
162  return -1;
163  }
164  }
165  }
166  while(res == CodecType::partial);
167  }
168  }
169 
170  this->setp(0, 0);
171  this->setg(0, 0, 0);
172  _ebufsize = 0;
173  _state = state_type();
174  return 0;
175  }
176 
177  std::streamsize import()
178  {
179  if( _target )
180  {
181  std::streamsize n = _target->rdbuf()->in_avail();
182  return do_underflow(n).second;
183  }
184 
185  return this->in_avail();
186  }
187 
188  protected:
189  // inheritdoc
190  virtual int sync()
191  {
192  if( this->pptr() )
193  {
194  // Try to write out the whole buffer to the underlying stream.
195  // Fail if we can not make progress, because more characters
196  // are needed to continue a multi-byte sequence.
197  while( this->pptr() > this->pbase() )
198  {
199  const char_type* p = this->pptr();
200 
201  if( this->overflow( traits_type::eof() ) == traits_type::eof() )
202  return -1;
203 
204  if( p == this->pptr() )
205  throw ConversionError("character conversion failed");
206  }
207  }
208 
209  return 0;
210  }
211 
212  virtual std::streamsize showmanyc()
213  {
214  return 0;
215  }
216 
217  // inheritdoc
218  virtual int_type overflow( int_type ch = traits_type::eof() )
219  {
220  if( ! _target || this->gptr() )
221  return traits_type::eof();
222 
223  if( ! this->pptr() )
224  {
225  this->setp( _ibuf, _ibuf + _ibufmax );
226  }
227 
228  while( this->pptr() > this->pbase() )
229  {
230  const char_type* fromBegin = _ibuf;
231  const char_type* fromEnd = this->pptr();
232  const char_type* fromNext = fromBegin;
233  extern_type* toBegin = _ebuf + _ebufsize;
234  extern_type* toEnd = _ebuf + _ebufmax;
235  extern_type* toNext = toBegin;
236 
237  typename CodecType::result res = CodecType::noconv;
238  if(_codec)
239  res = _codec->out(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
240 
241  if(res == CodecType::noconv)
242  {
243  size_t fromSize = fromEnd - fromBegin;
244  size_t toSize = toEnd - toBegin;
245  size_t size = toSize < fromSize ? toSize : fromSize;
246 
247  this->copyChars( toBegin, fromBegin, size );
248 
249  fromNext += size;
250  toNext += size;
251  }
252 
253  _ebufsize += toNext - toBegin;
254  size_t leftover = fromEnd - fromNext;
255  if(leftover && fromNext > fromBegin)
256  {
257  std::char_traits<char_type>::move(_ibuf, fromNext, leftover);
258  }
259 
260  this->setp( _ibuf, _ibuf + _ibufmax );
261  this->pbump( leftover );
262 
263  if(res == CodecType::error)
264  throw ConversionError("character conversion failed");
265 
266  if(res == CodecType::partial && _ebufsize == 0)
267  break;
268 
269  _ebufsize -= _target->rdbuf()->sputn(_ebuf, _ebufsize);
270  if(_ebufsize)
271  return traits_type::eof();
272  }
273 
274  if( ! traits_type::eq_int_type(ch, traits_type::eof()) )
275  {
276  *( this->pptr() ) = traits_type::to_char_type(ch);
277  this->pbump(1);
278  }
279 
280  return traits_type::not_eof(ch);
281  }
282 
283 
284  // inheritdoc
285  virtual int_type underflow()
286  {
287  if( ! _target )
288  return traits_type::eof();
289 
290  if( this->gptr() < this->egptr() )
291  return traits_type::to_int_type( *this->gptr() );
292 
293  return do_underflow(_ebufmax).first;
294  }
295 
296 
297  std::pair<int_type, std::streamsize> do_underflow(std::streamsize size)
298  {
299  typedef std::pair<int_type, std::streamsize> ret_type;
300 
301  std::streamsize n = 0;
302 
303  if( this->pptr() )
304  {
305  if( -1 == this->terminate() )
306  return ret_type(traits_type::eof(), 0);
307  }
308 
309  if( ! this->gptr() )
310  {
311  this->setg(_ibuf, _ibuf, _ibuf);
312  }
313 
314  if( this->gptr() - this->eback() > _pbmax)
315  {
316  std::streamsize movelen = this->egptr() - this->gptr() + _pbmax;
317  std::char_traits<char_type>::move( _ibuf,
318  this->gptr() - _pbmax,
319  movelen );
320  this->setg(_ibuf, _ibuf + _pbmax, _ibuf + movelen);
321  }
322 
323  bool atEof = false;
324  const std::streamsize bufavail = _ebufmax - _ebufsize;
325  size = bufavail < size ? bufavail : size;
326  if(size)
327  {
328  n = _target->rdbuf()->sgetn( _ebuf + _ebufsize, size );
329  _ebufsize += n;
330  if(n == 0)
331  atEof = true;
332  }
333 
334  const extern_type* fromBegin = _ebuf;
335  const extern_type* fromEnd = _ebuf + _ebufsize;
336  const extern_type* fromNext = fromBegin;
337  char_type* toBegin = this->egptr();
338  char_type* toEnd = _ibuf + _ibufmax;
339  char_type* toNext = toBegin;
340 
342  if(_codec)
343  r = _codec->in(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
344 
345  if(r == CodecType::noconv)
346  {
347  // copy characters and advance fromNext and toNext
348  int n =_ebufsize > _ibufmax ? _ibufmax : _ebufsize ;
349  this->copyBytes(toBegin, fromBegin, n);
350  _ebufsize -= n;
351  fromNext += n;
352  toNext += n;
353  }
354 
355  std::streamsize consumed = fromNext - fromBegin;
356  if(consumed)
357  {
358  std::char_traits<extern_type>::move( _ebuf, _ebuf + consumed, _ebufsize );
359  _ebufsize -= consumed;
360  }
361 
362  std::streamsize generated = toNext - toBegin;
363  if(generated)
364  {
365  this->setg(this->eback(), // start of read buffer
366  this->gptr(), // gptr position
367  this->egptr() + generated ); // end of read buffer
368  }
369 
370  if(r == CodecType::error)
371  throw ConversionError("character conversion failed");
372 
373  if( this->gptr() < this->egptr() )
374  return ret_type(traits_type::to_int_type( *this->gptr() ), n);
375 
376  // fail if partial charactes are at the end of the stream
377  if(r == CodecType::partial && atEof)
378  throw ConversionError("character conversion failed");
379 
380  return ret_type(traits_type::eof(), 0);
381  }
382 
383  static void copyChars(extern_type* s1, const char_type* s2, size_t n)
384  {
385  while(n-- > 0)
386  {
387  *s1 = std::char_traits<char_type>::to_int_type(*s2);
388  ++s1;
389  ++s2;
390  }
391  }
392 
393  static void copyBytes(char_type* s1, const extern_type* s2, size_t n)
394  {
395  while(n-- > 0)
396  {
397  *s1 = std::char_traits<char_type>::to_char_type(*s2);
398  ++s1;
399  ++s2;
400  }
401  }
402 };
403 
404 
410 class CXXTOOLS_API TextBuffer : public BasicTextBuffer<cxxtools::Char, char>
411 {
412  public:
414 
415  public:
422  TextBuffer(std::ios* buffer, Codec* codec);
423 };
424 
425 } // namespace cxxtools
426 
427 #endif
428