判断是否是UTF-8文件:
bool IsUTF8Text(const void* pBuffer, long size)
{
bool IsUTF8 = true;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while (start = end - 1)
{
break;
}
if ((start[1] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 2;
}
else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符
{
if (start >= end - 2)
{
break;
}
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 3;
}
else
{
IsUTF8 = false;
break;
}
}
return IsUTF8;
}
bool IsUTF8File(const char* pFileName)
{
FILE *f = NULL;
fopen_s(&f, pFileName, "rb");
if (NULL == f)
{
return false;
}
fseek(f, 0, SEEK_END);
long lSize = ftell(f);
fseek(f, 0, SEEK_SET); //或rewind(f);
char *pBuff = new char[lSize + 1];
memset(pBuff, 0, lSize + 1);
fread(pBuff, lSize, 1, f);
fclose(f);
bool bIsUTF8 = IsUTF8Text(pBuff, lSize);
delete[]pBuff;
pBuff = NULL;
return bIsUTF8;
}
读取文件:
CString GetFile(CString filename, UINT CodePage)
{
CFile fileR;
CString strFile = L"";
if (!fileR.Open(filename, CFile::modeRead | CFile::typeBinary))
{
return strFile;
}
BYTE head[3];
fileR.Read(head, 3);
if (!(head[0] == 0xEF && head[1] == 0xBB && head[2] == 0xBF))
{
fileR.SeekToBegin();
}
ULONGLONG FileSize = fileR.GetLength();
char* pContent = (char*)calloc(FileSize + 1, sizeof(char));
fileR.Read(pContent, FileSize);
fileR.Close();
int n = MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, NULL, 0);
wchar_t* pWideChar = (wchar_t*)calloc(n + 1, sizeof(wchar_t));
MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, pWideChar, n);
strFile = CString(pWideChar);
free(pContent);
free(pWideChar);
return strFile;
}
|