Webmaster  |  Imprint 
C++ Server Pages
Main  |  License  |  Documentation  |  Download 

/home/tommi/cxxtools/include/cxxtools/textbuffer.h

00001 /*
00002  * Copyright (C) 2004-2009 Marc Boris Duerner
00003  * 
00004  * This library is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU Lesser General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 2.1 of the License, or (at your option) any later version.
00008  * 
00009  * As a special exception, you may use this file as part of a free
00010  * software library without restriction. Specifically, if other files
00011  * instantiate templates or use macros or inline functions from this
00012  * file, or you compile this file and link it with other files to
00013  * produce an executable, this file does not by itself cause the
00014  * resulting executable to be covered by the GNU General Public
00015  * License. This exception does not however invalidate any other
00016  * reasons why the executable file might be covered by the GNU Library
00017  * General Public License.
00018  * 
00019  * This library is distributed in the hope that it will be useful,
00020  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00021  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00022  * Lesser General Public License for more details.
00023  * 
00024  * You should have received a copy of the GNU Lesser General Public
00025  * License along with this library; if not, write to the Free Software
00026  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00027  */
00028 #ifndef cxxtools_TextBuffer_h
00029 #define cxxtools_TextBuffer_h
00030 
00031 #include <cxxtools/api.h>
00032 #include <cxxtools/char.h>
00033 #include <cxxtools/textcodec.h>
00034 #include <cxxtools/conversionerror.h>
00035 #include <iostream>
00036 
00037 namespace cxxtools {
00038 
00039 
00058 template <typename CharT, typename ByteT>
00059 class BasicTextBuffer : public std::basic_streambuf<CharT>
00060 {
00061     public:
00062         typedef ByteT extern_type;
00063         typedef CharT intern_type;
00064         typedef CharT char_type;
00065         typedef typename std::char_traits<CharT> traits_type;
00066         typedef typename traits_type::int_type int_type;
00067         typedef typename traits_type::pos_type pos_type;
00068         typedef typename traits_type::off_type off_type;
00069         typedef TextCodec<char_type, extern_type> CodecType;
00070         typedef MBState state_type;
00071 
00072     private:
00073         static const int _pbmax = 4;
00074 
00075         static const int _ebufmax = 256;
00076         extern_type _ebuf[_ebufmax];
00077         int _ebufsize;
00078 
00079         static const int _ibufmax = 256;
00080         intern_type _ibuf[_ibufmax];
00081 
00083         state_type _state;
00084 
00086         CodecType* _codec;
00087 
00088         std::basic_ios<extern_type>* _target;
00089 
00090     public:
00101         BasicTextBuffer(std::basic_ios<extern_type>* target, CodecType* codec)
00102         : _ebufsize(0)
00103         , _codec(codec) 
00104         , _target(target)
00105         {
00106             this->setg(0, 0, 0);
00107             this->setp(0, 0);
00108         }
00109 
00110         ~BasicTextBuffer() throw()
00111         {
00112             try
00113             {
00114                 this->terminate();
00115             }
00116             catch(...) {}
00117 
00118             if(_codec && _codec->refs() == 0)
00119                 delete _codec;
00120         }
00121 
00122         void attach(std::basic_ios<extern_type>& target)
00123         {
00124             this->terminate();
00125             _target = &target;
00126         }
00127 
00128         void detach()
00129         {
00130             this->terminate();
00131             _target = 0;
00132         }
00133 
00134         int terminate()
00135         {
00136             if( this->pptr() )
00137             {
00138                 if( -1 == this->sync() )
00139                     return -1;
00140 
00141                 if( _codec && ! _codec->always_noconv() )
00142                 {
00143                     typename CodecType::result res = CodecType::error;
00144                     do
00145                     {
00146                         extern_type* next = 0;
00147                         res = _codec->unshift(_state, _ebuf, _ebuf + _ebufmax, next);
00148                         _ebufsize = next - _ebuf;
00149 
00150                         if(res == CodecType::error)
00151                         {
00152                             throw ConversionError("character conversion failed");
00153                         }
00154                         else if(res == CodecType::ok || res == CodecType::partial)
00155                         {
00156                             if(_ebufsize > 0)
00157                             {
00158                                 _ebufsize -= _target->rdbuf()->sputn(_ebuf, _ebufsize);
00159                                 if(_ebufsize)
00160                                     return -1;
00161                             }
00162                         }
00163                     }
00164                     while(res == CodecType::partial);
00165                 }
00166             }
00167 
00168             this->setp(0, 0);
00169             this->setg(0, 0, 0);
00170             _ebufsize = 0;
00171             _state = state_type();
00172             return 0;
00173         }
00174 
00175         std::streamsize import()
00176         {
00177             if( _target )
00178             {
00179                 std::streamsize n = _target->rdbuf()->in_avail();
00180                 return do_underflow(n).second;
00181             }
00182 
00183             return this->in_avail();
00184         }
00185 
00186     protected:
00187         // inheritdoc
00188         virtual int sync()
00189         {
00190             if( this->pptr() )
00191             {
00192                 // Try to write out the whole buffer to the underlying stream.
00193                 // Fail if we can not make progress, because more characters
00194                 // are needed to continue a multi-byte sequence.
00195                 while( this->pptr() > this->pbase() )
00196                 {
00197                     const char_type* p = this->pptr();
00198 
00199                     if( this->overflow( traits_type::eof() ) == traits_type::eof() )
00200                         return -1;
00201 
00202                     if( p == this->pptr() )
00203                         throw ConversionError("character conversion failed");
00204                 }
00205             }
00206 
00207             return 0;
00208         }
00209 
00210         virtual std::streamsize showmanyc()
00211         {
00212             return 0;
00213         }
00214 
00215         // inheritdoc
00216         virtual int_type overflow( int_type ch = traits_type::eof() )
00217         {
00218             if( ! _target || this->gptr() )
00219                 return traits_type::eof();
00220 
00221             if( ! this->pptr() )
00222             {
00223                 this->setp( _ibuf, _ibuf + _ibufmax );
00224             }
00225 
00226             while( this->pptr() > this->pbase() )
00227             {
00228                 const char_type* fromBegin = _ibuf;
00229                 const char_type* fromEnd   = this->pptr();
00230                 const char_type* fromNext  = fromBegin;
00231                 extern_type* toBegin       = _ebuf + _ebufsize;
00232                 extern_type* toEnd         = _ebuf + _ebufmax;
00233                 extern_type* toNext        = toBegin;
00234 
00235                 typename CodecType::result res = CodecType::noconv;
00236                 if(_codec)
00237                     res = _codec->out(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
00238 
00239                 if(res == CodecType::noconv)
00240                 {
00241                     size_t fromSize = fromEnd - fromBegin;
00242                     size_t toSize   = toEnd - toBegin;
00243                     size_t size     = toSize < fromSize ? toSize : fromSize;
00244 
00245                     this->copyChars( toBegin, fromBegin, size );
00246 
00247                     fromNext += size;
00248                     toNext += size;
00249                 }
00250 
00251                 _ebufsize += toNext - toBegin;
00252                 size_t leftover = fromEnd - fromNext;
00253                 if(leftover && fromNext > fromBegin)
00254                 {
00255                     std::char_traits<char_type>::move(_ibuf, fromNext, leftover);
00256                 }
00257 
00258                 this->setp( _ibuf, _ibuf + _ibufmax );
00259                 this->pbump( leftover );
00260 
00261                 if(res == CodecType::error)
00262                     throw ConversionError("character conversion failed");
00263 
00264                 if(res == CodecType::partial && _ebufsize == 0)
00265                     break;
00266 
00267                 _ebufsize -= _target->rdbuf()->sputn(_ebuf, _ebufsize);
00268                 if(_ebufsize)
00269                     return traits_type::eof();
00270             }
00271 
00272             if( ! traits_type::eq_int_type(ch, traits_type::eof()) )
00273             {
00274                 *( this->pptr() ) = traits_type::to_char_type(ch);
00275                 this->pbump(1);
00276             }
00277 
00278             return traits_type::not_eof(ch);
00279         }
00280 
00281 
00282         // inheritdoc
00283         virtual int_type underflow()
00284         {
00285             if( ! _target )
00286                 return traits_type::eof();
00287 
00288             if( this->gptr() < this->egptr() )
00289                 return traits_type::to_int_type( *this->gptr() );
00290 
00291             return do_underflow(_ebufmax).first;
00292         }
00293 
00294 
00295         std::pair<int_type, std::streamsize> do_underflow(std::streamsize size)
00296         {
00297             typedef std::pair<int_type, std::streamsize> ret_type;
00298 
00299             std::streamsize n = 0;
00300 
00301             if( this->pptr() )
00302             {
00303                 if( -1 == this->terminate() )
00304                     return ret_type(traits_type::eof(), 0);
00305             }
00306 
00307             if( ! this->gptr() )
00308             {
00309                 this->setg(_ibuf, _ibuf, _ibuf);
00310             }
00311 
00312             if( this->gptr() - this->eback() > _pbmax)
00313             {
00314                 std::streamsize movelen = this->egptr() - this->gptr() + _pbmax;
00315                 std::char_traits<char_type>::move( _ibuf,
00316                                                    this->gptr() - _pbmax,
00317                                                    movelen );
00318                 this->setg(_ibuf, _ibuf + _pbmax, _ibuf + movelen);
00319             }
00320 
00321             bool atEof = false;
00322             const std::streamsize bufavail = _ebufmax - _ebufsize;
00323             size = bufavail < size ? bufavail : size;
00324             if(size)
00325             {
00326                 n = _target->rdbuf()->sgetn( _ebuf + _ebufsize, size );
00327                 _ebufsize += n;
00328                 if(n == 0)
00329                     atEof = true;
00330             }
00331 
00332             const extern_type* fromBegin = _ebuf;
00333             const extern_type* fromEnd   = _ebuf + _ebufsize;
00334             const extern_type* fromNext  = fromBegin;
00335             char_type* toBegin           = this->egptr();
00336             char_type* toEnd             = _ibuf + _ibufmax;
00337             char_type* toNext            = toBegin;
00338 
00339             typename CodecType::result r = CodecType::noconv;
00340             if(_codec)
00341                 r = _codec->in(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
00342 
00343             if(r == CodecType::noconv)
00344             {
00345                 // copy characters and advance fromNext and toNext
00346                 int n =_ebufsize > _ibufmax ? _ibufmax : _ebufsize ;
00347                 this->copyChars(toBegin, fromBegin, n);
00348                 _ebufsize -= n;
00349                 fromNext += n;
00350                 toNext += n;
00351             }
00352 
00353             std::streamsize consumed = fromNext - fromBegin;
00354             if(consumed)
00355             {
00356                 std::char_traits<extern_type>::move( _ebuf, _ebuf + consumed, _ebufsize );
00357                 _ebufsize -= consumed;
00358             }
00359 
00360             std::streamsize generated = toNext - toBegin;
00361             if(generated)
00362             {
00363                 this->setg(this->eback(),              // start of read buffer
00364                            this->gptr(),               // gptr position
00365                            this->egptr() + generated ); // end of read buffer
00366             }
00367 
00368             if(r == CodecType::error)
00369                 throw ConversionError("character conversion failed");
00370 
00371             if( this->gptr() < this->egptr() )
00372                 return ret_type(traits_type::to_int_type( *this->gptr() ), n);
00373 
00374             // fail if partial charactes are at the end of the stream
00375             if(r == CodecType::partial && atEof)
00376                 throw ConversionError("character conversion failed");
00377 
00378             return ret_type(traits_type::eof(), 0);
00379         }
00380 
00381         template <typename T>
00382         void copyChars(T* s1, const T* s2, size_t n)
00383         {
00384             std::char_traits<T>::copy(s1, s2, n);
00385         }
00386 
00387         //TODO: signature like codecvt with ptr refs
00388         template <typename A, typename B>
00389         void copyChars(A* s1, const B* s2, size_t n)
00390         {
00391             while(n-- > 0)
00392             {
00393                 *s1 = *s2;
00394                 ++s1;
00395                 ++s2;
00396             }
00397         }
00398 };
00399 
00400 
00406 class CXXTOOLS_API TextBuffer : public BasicTextBuffer<cxxtools::Char, char>
00407 {
00408     public:
00409         typedef TextCodec<cxxtools::Char, char> Codec;
00410 
00411     public:
00418         TextBuffer(std::ios* buffer, Codec* codec);
00419 };
00420 
00421 } // namespace cxxtools
00422 
00423 #endif
00424 
Copyright © 2008 The Tntnet Development Team
Tntnet 1.6