Listing 4
// Filtering stream buffer for character output.
template<class TChar = wchar_t,
class TCharTraits = std::char_traits<TChar>,
class TConvTraits = Detail::ConversionTraits<TChar> >
class UTF8Streambuf: public basic_streambuf<TChar, TCharTraits>
{
public:
enum BufferState { OK,
RD_LEAD_INVAL, RD_CONT_INVAL, RD_NONSHORT,
RD_UNICODE_INVAL, RD_OVERFLOW, RD_EOF,
WR_UNICODE_INVAL, WR_EOF
};
typedef basic_streambuf<TChar, TCharTraits> base_type;
typedef typename base_type::int_type int_type;
UTF8Streambuf(streambuf* pExternBuf)
: pExternBuf_(pExternBuf), state_(OK) { }
BufferState get_state() { return state_; }
private:
TChar readBuf_; // last input-char
streambuf* pExternBuf_; // ext. stream buffer
BufferState state_; // last error-code
bool is_valid_unicode(unsigned long unicodeChar) const;
int utf8_octets_needed(unsigned long unicodeChar) const;
int get_utf8_lead_octet(unsigned long& leadOctet);
bool get_utf8_cont_octet(unsigned long& contOctet);
bool put_utf8_octet(unsigned long utf8Octet);
bool get_next_char(TChar& nextChar);
bool put_next_char(TChar nextChar);
virtual int_type underflow();
virtual int_type overflow(int_type ch = TCharTraits::eof());
};
template<class TChar, class TCharTraits, class TConvTraits>
int UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
utf8_octets_needed(unsigned long unicodeChar) const
{
// how many UTF-8 octets are needed for this UTF-32 char?
if(unicodeChar < 0x00000080) return 1;
if(unicodeChar < 0x00000800) return 2;
...
return 0; // invalid code-point
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
put_utf8_octet(unsigned long utf8Octet)
{
// write UTF-8 octet to ext. stream buffer
if(pExternBuf_->sputc(static_cast<char>(utf8Octet)) == EOF)
{ state_ = WR_EOF; return false; }
return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
put_next_char(TChar nextChar)
{
// write next UTF-32 char
unsigned long u0, u1, u2, u3;
unsigned long ucode = TConvTraits::to_ulong(nextChar);
switch(utf8_octets_needed(ucode))
{
case 1: if(!put_utf8_octet(ucode)) return false;
break;
case 2: u1 = (ucode & 0x3F) + 0x80; ucode >>= 6;
u0 = (ucode & 0x1F) + 0xC0;
if(!put_utf8_octet(u0)) return false;
if(!put_utf8_octet(u1)) return false;
break;
case 3: ...
case 4: ...
default: state_ = WR_UNICODE_INVAL; return false;
}
return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::int_type
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::overflow(
int_type ch)
{
// protected, virtual function to write next UTF-32 char
if(TCharTraits::eq_int_type(ch, TCharTraits::eof()))
return TCharTraits::not_eof(ch);
if(!put_next_char(TCharTraits::to_char_type(ch)))
return TCharTraits::eof();
return ch;
}