Listing 5

// Filtering stream buffer for character input.
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
is_valid_unicode(unsigned long unicodeChar) const
{
  // is this a valid Unicode code-point?
  if(unicodeChar < 0x00D800) return true;
  if(unicodeChar < 0x00E000) return false;
  ...
  return false;
}
template<class TChar, class TCharTraits, class TConvTraits>
int UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
get_utf8_lead_octet(unsigned long& leadOctet)
{ 
  // read leading UTF-8 octet from ext. stream buffer
  streambuf::int_type tmp = pExternBuf_->sbumpc();
  if(tmp == EOF) { state_ = RD_EOF; return 0; }
  leadOctet = static_cast<unsigned char>(tmp);
  if(leadOctet < 0x80) return 1;
  if(leadOctet < 0xC0) { state_ = RD_LEAD_INVAL; return 0; }
  if(leadOctet < 0xE0) return 2;
  ...
  state_ = RD_LEAD_INVAL;
  return 0;
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
get_utf8_cont_octet(unsigned long& contOctet)
{
  // read cont. UTF-8 octet from ext. stream buffer
  streambuf::int_type ch = pExternBuf_->sgetc();
  if(ch == EOF) { state_ = RD_EOF; return false; }
  contOctet = static_cast<unsigned char>(ch);
  if((contOctet & 0xC0) != 0x80)
  { state_ = RD_CONT_INVAL; return false; }
  pExternBuf_->sbumpc();
  return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
get_next_char(TChar& nextChar)
{
  // read next UTF-32 char
  unsigned long c1, c2, c3, c4, c5, c6, ucode;
  switch(get_utf8_lead_octet(c1))
  {
  case 1:  ucode = c1;
           break;
  case 2:  if(!get_utf8_cont_octet(c2)) return false;
           ucode = c1 & 0x1F; ucode <<= 6; ucode += c2 & 0x3F;
           if(ucode < 0x80)
           { state_ = RD_NONSHORT; return false; }
           break;
  case 3:  ...
  case 4:  ...
  case 5:  ...
  case 6:  ...
  default: return false;
  }
  if(!is_valid_unicode(ucode))
  { state_ = RD_UNICODE_INVAL; return false; }
  if(!TConvTraits::to_char_type(nextChar, ucode))
  { state_ = RD_OVERFLOW; return false; }
  return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::int_type
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::underflow()
{
  // protected, virtual function to read next UTF-32 char
  if(this->gptr() == &readBuf_)
    return TCharTraits::to_int_type(readBuf_);
  TChar nextChar;
  if(!get_next_char(nextChar)) return TCharTraits::eof();
  readBuf_ = nextChar;
  this->setg(&readBuf_, &readBuf_, &readBuf_ + 1);
  return TCharTraits::to_int_type(nextChar);
}