Listing 5
// Filtering stream buffer for character input.
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
is_valid_unicode(unsigned long unicodeChar) const
{
// is this a valid Unicode code-point?
if(unicodeChar < 0x00D800) return true;
if(unicodeChar < 0x00E000) return false;
...
return false;
}
template<class TChar, class TCharTraits, class TConvTraits>
int UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
get_utf8_lead_octet(unsigned long& leadOctet)
{
// read leading UTF-8 octet from ext. stream buffer
streambuf::int_type tmp = pExternBuf_->sbumpc();
if(tmp == EOF) { state_ = RD_EOF; return 0; }
leadOctet = static_cast<unsigned char>(tmp);
if(leadOctet < 0x80) return 1;
if(leadOctet < 0xC0) { state_ = RD_LEAD_INVAL; return 0; }
if(leadOctet < 0xE0) return 2;
...
state_ = RD_LEAD_INVAL;
return 0;
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
get_utf8_cont_octet(unsigned long& contOctet)
{
// read cont. UTF-8 octet from ext. stream buffer
streambuf::int_type ch = pExternBuf_->sgetc();
if(ch == EOF) { state_ = RD_EOF; return false; }
contOctet = static_cast<unsigned char>(ch);
if((contOctet & 0xC0) != 0x80)
{ state_ = RD_CONT_INVAL; return false; }
pExternBuf_->sbumpc();
return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
bool UTF8Streambuf<TChar, TCharTraits, TConvTraits>::
get_next_char(TChar& nextChar)
{
// read next UTF-32 char
unsigned long c1, c2, c3, c4, c5, c6, ucode;
switch(get_utf8_lead_octet(c1))
{
case 1: ucode = c1;
break;
case 2: if(!get_utf8_cont_octet(c2)) return false;
ucode = c1 & 0x1F; ucode <<= 6; ucode += c2 & 0x3F;
if(ucode < 0x80)
{ state_ = RD_NONSHORT; return false; }
break;
case 3: ...
case 4: ...
case 5: ...
case 6: ...
default: return false;
}
if(!is_valid_unicode(ucode))
{ state_ = RD_UNICODE_INVAL; return false; }
if(!TConvTraits::to_char_type(nextChar, ucode))
{ state_ = RD_OVERFLOW; return false; }
return true;
}
template<class TChar, class TCharTraits, class TConvTraits>
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::int_type
UTF8Streambuf<TChar, TCharTraits, TConvTraits>::underflow()
{
// protected, virtual function to read next UTF-32 char
if(this->gptr() == &readBuf_)
return TCharTraits::to_int_type(readBuf_);
TChar nextChar;
if(!get_next_char(nextChar)) return TCharTraits::eof();
readBuf_ = nextChar;
this->setg(&readBuf_, &readBuf_, &readBuf_ + 1);
return TCharTraits::to_int_type(nextChar);
}