This Codec class is able to convert from UTF-8 to UTF-32 and from UTF-32 to UTF-8. More...

#include <cxxtools/utf8codec.h>

Inheritance diagram for cxxtools::Utf8Codec:
cxxtools::TextCodec< Char, char > std::codecvt< Char, char, cxxtools::MBState > std::codecvt_base

Public Member Functions

 Utf8Codec (size_t ref=0)
 Constructs a new Utf8Codec object which converts UTF-8 to UTF-32 and UTF-32 to UTF-8.
virtual ~Utf8Codec ()
 Empty destructor.
- Public Member Functions inherited from cxxtools::TextCodec< Char, char >
 TextCodec (size_t ref=0)
 Constructs a new TextCodec object.
virtual ~TextCodec ()
 Empty desctructor.
size_t refs () const
- Public Member Functions inherited from std::codecvt< Char, char, cxxtools::MBState >
 codecvt (size_t ref=0)
virtual ~codecvt ()
codecvt_base::result out (StateT &state, const InternT *from, const InternT *from_end, const InternT *&from_next, ExternT *to, ExternT *to_end, ExternT *&to_next) const
codecvt_base::result unshift (StateT &state, ExternT *to, ExternT *to_end, ExternT *&to_next) const
codecvt_base::result in (StateT &state, const ExternT *from, const ExternT *from_end, const ExternT *&from_next, InternT *to, InternT *to_end, InternT *&to_next) const
int encoding () const
bool always_noconv () const
int length (StateT &state, const ExternT *from, const ExternT *end, size_t max) const
int max_length () const
- Public Member Functions inherited from std::codecvt_base
virtual ~codecvt_base ()

Static Public Member Functions

static String decode (const char *data, unsigned size)
 shortcut for converting utf-8 encoded data to unicode string
static String decode (const std::string &data)
 shortcut for converting utf-8 encoded std::string to unicode string
static std::string encode (const Char *data, unsigned size)
 shortcut for converting unicode data to utf-8 encoded std::string
static std::string encode (const String &data)
 shortcut for converting unicode string to utf-8 encoded std::string

Protected Member Functions

virtual result do_in (MBState &s, const char *fromBegin, const char *fromEnd, const char *&fromNext, Char *toBegin, Char *toEnd, Char *&toNext) const
 Decodes UTF-8 to UTF-32.
virtual result do_out (MBState &s, const Char *fromBegin, const Char *fromEnd, const Char *&fromNext, char *toBegin, char *toEnd, char *&toNext) const
 Encodes UTF-32 to UTF-8.
virtual bool do_always_noconv () const throw ()
virtual int do_length (MBState &s, const char *fromBegin, const char *fromEnd, size_t max) const
virtual int do_max_length () const throw ()
std::codecvt_base::result do_unshift (cxxtools::MBState &, char *, char *, char *&) const
int do_encoding () const throw ()

Additional Inherited Members

- Public Types inherited from cxxtools::TextCodec< Char, char >
typedef Char InternT
typedef char ExternT

Detailed Description

This Codec class is able to convert from UTF-8 to UTF-32 and from UTF-32 to UTF-8.

The method do_in() converts an array of char containing UTF-8-encoded data into an array of cxxtools::Char which is UTF-32-encoded, which means that the data is a direct readable 32-bit representation of the character.

The method do_out() converts an array of cxxtools::Char objects (UTF-32/Unicode) into an array of char which contains the same sequence of characters in UTF-8-encoding.

Constructor & Destructor Documentation

cxxtools::Utf8Codec::Utf8Codec ( size_t  ref = 0)
explicit

Constructs a new Utf8Codec object which converts UTF-8 to UTF-32 and UTF-32 to UTF-8.

The internal type is cxxtools::Char and external type is $char$

Parameters
refThis optional parameter is passed to std::codecvt. When ref == 0 the locale takes care of deleting the facet. If ref == 1 the locale does not destroy the facet. Default value is 0.
virtual cxxtools::Utf8Codec::~Utf8Codec ( )
inlinevirtual

Empty destructor.

Member Function Documentation

static String cxxtools::Utf8Codec::decode ( const char *  data,
unsigned  size 
)
inlinestatic

shortcut for converting utf-8 encoded data to unicode string

Example:

std::string data = cxxtools::Utf8Codec::decode(utfdataptr, utfdatasize);
static String cxxtools::Utf8Codec::decode ( const std::string &  data)
inlinestatic

shortcut for converting utf-8 encoded std::string to unicode string

virtual bool cxxtools::Utf8Codec::do_always_noconv ( ) const throw ()
protectedvirtual
int cxxtools::Utf8Codec::do_encoding ( ) const throw ()
inlineprotectedvirtual
virtual result cxxtools::Utf8Codec::do_in ( MBState &  s,
const char *  fromBegin,
const char *  fromEnd,
const char *&  fromNext,
Char toBegin,
Char toEnd,
Char *&  toNext 
) const
protectedvirtual

Decodes UTF-8 to UTF-32.

virtual int cxxtools::Utf8Codec::do_length ( MBState &  s,
const char *  fromBegin,
const char *  fromEnd,
size_t  max 
) const
protectedvirtual
virtual int cxxtools::Utf8Codec::do_max_length ( ) const throw ()
protectedvirtual
virtual result cxxtools::Utf8Codec::do_out ( MBState &  s,
const Char fromBegin,
const Char fromEnd,
const Char *&  fromNext,
char *  toBegin,
char *  toEnd,
char *&  toNext 
) const
protectedvirtual

Encodes UTF-32 to UTF-8.

std::codecvt_base::result cxxtools::Utf8Codec::do_unshift ( cxxtools::MBState &  ,
char *  ,
char *  ,
char *&   
) const
inlineprotectedvirtual
static std::string cxxtools::Utf8Codec::encode ( const Char data,
unsigned  size 
)
inlinestatic

shortcut for converting unicode data to utf-8 encoded std::string

static std::string cxxtools::Utf8Codec::encode ( const String data)
inlinestatic

shortcut for converting unicode string to utf-8 encoded std::string


The documentation for this class was generated from the following file: