Listing 4

// Filtering stream buffer for character output.
template<class TChar = wchar_t,
         class TCharTraits = std::char_traits<TChar>,
         class TConvTraits = Detail::ConversionTraits<TChar> >
class UTF8Streambuf: public basic_streambuf<TChar, TCharTraits>
{
public:
  enum BufferState {  OK,
    RD_LEAD_INVAL,    RD_CONT_INVAL, RD_NONSHORT,
    RD_UNICODE_INVAL, RD_OVERFLOW,   RD_EOF,
    WR_UNICODE_INVAL, WR_EOF
  };

  typedef basic_streambuf<TChar, TCharTraits> base_type;
  typedef typename base_type::int_type int_type;

  UTF8Streambuf(streambuf* pExternBuf)
      : pExternBuf_(pExternBuf), state_(OK) { }
  BufferState get_state() { return state_; }

private:
  TChar readBuf_;          // last input-char
  streambuf* pExternBuf_;  // ext. stream buffer
  BufferState state_;      // last error-code

  bool is_valid_unicode(unsigned long unicodeChar) const;
  int utf8_octets_needed(unsigned long unicodeChar) const;
  int get_utf8_lead_octet(unsigned long& leadOctet);
  bool get_utf8_cont_octet(unsigned long& contOctet);
  bool put_utf8_octet(unsigned long utf8Octet);
  bool get_next_char(TChar& nextChar);
  bool put_next_char(TChar nextChar);
  virtual int_type underflow();
  virtual int_type overflow(int_type ch = TCharTraits::eof());
};

template<class TChar, class TCharTraits, class TConvTraits>
int UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
utf8_octets_needed(unsigned long unicodeChar) const
{
  // how many UTF-8 octets are needed for this UTF-32 char?
  if(unicodeChar < 0x00000080) return 1;
  if(unicodeChar < 0x00000800) return 2;
  ...
  return 0;  // invalid code-point
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
put_utf8_octet(unsigned long utf8Octet)
{
  // write UTF-8 octet to ext. stream buffer
  if(pExternBuf_->sputc(static_cast<char>(utf8Octet)) == EOF)
  { state_ = WR_EOF; return false; }
  return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
put_next_char(TChar nextChar)
{
  // write next UTF-32 char
  unsigned long u0, u1, u2, u3;
  unsigned long ucode = TConvTraits::to_ulong(nextChar);
  switch(utf8_octets_needed(ucode))
  {
  case 1:  if(!put_utf8_octet(ucode)) return false;
           break;
  case 2:  u1 = (ucode & 0x3F) + 0x80; ucode >>= 6;
           u0 = (ucode & 0x1F) + 0xC0;
           if(!put_utf8_octet(u0)) return false;
           if(!put_utf8_octet(u1)) return false;
           break;
  case 3:  ...
  case 4:  ...
  default: state_ = WR_UNICODE_INVAL; return false;
  }
  return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::int_type
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::overflow(
  int_type ch)
{
  // protected, virtual function to write next UTF-32 char
  if(TCharTraits::eq_int_type(ch, TCharTraits::eof()))
    return TCharTraits::not_eof(ch);
  if(!put_next_char(TCharTraits::to_char_type(ch)))
    return TCharTraits::eof();
  return ch;
}