****************************** UTF8File.h ******************************
//Support CStringW only.
#pragma once
#include "afx.h"
class CUTF8File : public CFile
{
private:
BYTE m_offset;
public:
CUTF8File();
bool Open(LPCTSTR lpszFileName);
ULONGLONG GetPosition() const;
ULONGLONG GetLength() const;
ULONGLONG Seek(LONGLONG lOff, UINT nFrom);
bool ReadString(CString& rString);
bool WriteString(LPCTSTR lpsz, BYTE LastLine);
};
****************************** UTF8File.cpp ******************************
#include "StdAfx.h"
#include "UTF8File.h"
CUTF8File::CUTF8File()
{
m_offset = 0;
}
bool CUTF8File::Open(LPCTSTR lpszFileName)
{
if(!__super::Open(lpszFileName, modeReadWrite|typeBinary|shareDenyWrite))
{
return false;
}
try
{
if(GetLength() >= 3)
{
WORD wTempWord;
if( Read(&wTempWord, sizeof(wTempWord)) == sizeof(wTempWord) )
{
if(wTempWord == 0xBBEF)
{
BYTE byTempByte;
if(sizeof(byTempByte) == Read(&byTempByte, sizeof(byTempByte)))
{
if(byTempByte == 0xBF)
{
m_offset = 3;
return true;
}
}
}
}
}
Close();
return false;
}
catch( ... )
{
Close();
return false;
}
}
ULONGLONG CUTF8File::GetPosition() const
{
return(CFile::GetPosition() - m_offset);
}
ULONGLONG CUTF8File::GetLength() const
{
return(CFile::GetLength() - m_offset);
}
ULONGLONG CUTF8File::Seek(LONGLONG lOff, UINT nFrom)
{
ULONGLONG pos = GetPosition();
ULONGLONG len = GetLength();
switch(nFrom)
{
default:
case begin: lOff = lOff; break;
case current: lOff = pos + lOff; break;
case end: lOff = len - lOff; break;
}
lOff = max(min(lOff, len), 0) + m_offset;
pos = CFile::Seek(lOff, begin) - m_offset;
return(pos);
}
bool CUTF8File::ReadString(CString &rString)
{
try
{
bool fEOF = false;
rString = _TEXT("");
BYTE byTempByte;
while( Read(&byTempByte, sizeof(byTempByte)) == sizeof(byTempByte) )
{
fEOF = true;
TCHAR cTempChar = _TCHAR('?');
if( !(byTempByte&0x80) ) // 0xxxxxxx
{
cTempChar = byTempByte&0x7F;
}
else if( (byTempByte&0xE0) == 0xC0 ) // 110xxxxx 10xxxxxx
{
cTempChar = (byTempByte&0x1F)<<6;
if( Read(&byTempByte, sizeof(byTempByte)) != sizeof(byTempByte) )
{
break;
}
cTempChar |= (byTempByte&0x3F);
}
else if( (byTempByte&0xF0) == 0xE0 ) // 1110xxxx 10xxxxxx 10xxxxxx
{
cTempChar = (byTempByte&0x0F)<<12;
if( Read(&byTempByte, sizeof(byTempByte)) != sizeof(byTempByte) )
{
break;
}
cTempChar |= (byTempByte&0x3F)<<6;
if( Read(&byTempByte, sizeof(byTempByte)) != sizeof(byTempByte) )
{
break;
}
cTempChar |= (byTempByte&0x3F);
}
if(cTempChar == '\r')
{
continue;
}
if(cTempChar == '\n')
{
break;
}
rString += cTempChar;
}
return fEOF;
}
catch( ... )
{
return true;
}
}
bool CUTF8File::WriteString(LPCTSTR lpsz, BYTE LastLine)
{
try
{
CString sTempStr(lpsz);
if( LastLine != 1)
{
sTempStr += _TEXT("\r\n");
}
for(unsigned short nCount = 0; nCount < sTempStr.GetLength(); nCount++)
{
DWORD dwTempDWORD = (WORD)sTempStr[(int)nCount];
if( (dwTempDWORD >= 0)&&(dwTempDWORD < 0x80) ) // 0xxxxxxx
{
Write(&dwTempDWORD, 1);
}
else if( (dwTempDWORD >= 0x80)&&(dwTempDWORD < 0x800) ) // 110xxxxx 10xxxxxx
{
dwTempDWORD = 0xC080|((dwTempDWORD<<2)&0x1F00)|(dwTempDWORD&0x003F);
Write((BYTE*)&dwTempDWORD+1, 1);
Write(&dwTempDWORD, 1);
}
else if( (dwTempDWORD >= 0x800)&&(dwTempDWORD < 0xFFFF) ) // 1110xxxx 10xxxxxx 10xxxxxx
{
dwTempDWORD = 0xE08080|((dwTempDWORD<<4)&0x0F0000)|((dwTempDWORD<<2)&0x3F00)|(dwTempDWORD&0x003F);
Write( (BYTE*)&dwTempDWORD+2, 1 );
Write( (BYTE*)&dwTempDWORD+1, 1 );
Write( &dwTempDWORD, 1 );
}
else
{
dwTempDWORD = _TCHAR('?');
Write( &dwTempDWORD, 1 );
}
}
return true;
}
catch( ... )
{
return false;
}
}