判断文件编码(UTF8,8BOM,16LE,16BE,ANSI)附string互转wstring、String互转Wchar_t、获取错误文本,分割string到vector

柳萱公子 · · 编程&逆向技术交流
0

本文共计3840个字,预计阅读时长15.4分钟。

拿走回个1

#pragma once
#ifndef MYTOOL_H
#define MYTOOL_H

#include <stdio.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <Windows.h>
#include <winbase.h>

class MYTOOL {
public:
    MYTOOL() = default;
    ~MYTOOL() = default;
    static std::wstring string2wstring(std::string str);
    static std::string wstring2string(std::wstring wstr);
    static std::vector<std::string>split(std::string str, std::string pattern);
    static void Wchar_tToString(std::string& szDst, wchar_t* wchar);
    static wchar_t* StringToWchar_t(std::string& str);
    static std::string get_last_error(DWORD errCode = GetLastError());
    static BOOL 复制到剪切板(const char* data);
    static int IsUTF8(const void* pBuffer, long size);
    static int CalculateFileEncoding(LPCSTR filePath, std::string& str);
    static int CalculateFileEncodingW(LPCWSTR filePath, std::string& str);
    static std::string 判断文件编码(LPCSTR filePath, std::string& 读到的文本);
    static std::string 判断文件编码W(LPCWSTR filePath, std::string& 读到的文本);
    static std::string 读取文件UTF8(std::string file);
    static std::string 读取文件UTF8W(const wchar_t* file);
private:

};

BOOL MYTOOL::复制到剪切板(const char* data) {
    int contentSize = (int)strlen(data) + 1;
    HGLOBAL hMemory;
    LPTSTR lpMemory;
    if (!OpenClipboard(NULL)) return false;
    if (!EmptyClipboard()) return false;
    if (!(hMemory = GlobalAlloc(GMEM_MOVEABLE, contentSize))) return false;
    if (!(lpMemory = (LPTSTR)GlobalLock(hMemory))) return false;
    memcpy_s(lpMemory, contentSize, data, contentSize);
    GlobalUnlock(hMemory);
    if (!SetClipboardData(CF_TEXT, hMemory)) return false;
    CloseClipboard();
    return true;
}

std::string MYTOOL::读取文件UTF8(std::string file) {
    FILE* fp;
    auto err = _wfopen_s(&fp, string2wstring(file).c_str(), L"r,ccs=UTF-8");
    if (fp == NULL) {
        return "";
    }
    if (err != 0) {
        return "";
    }
    std::string sum;
    wchar_t str[1024] = { 0 };
    while (fgetws(str, 1024, fp) != NULL) {
        std::string 当前行内容;
        Wchar_tToString(当前行内容, str);
        sum += 当前行内容;
    }
    fclose(fp);
    return sum;
}

std::string MYTOOL::读取文件UTF8W(const wchar_t* file) {
    FILE* fp;
    auto err = _wfopen_s(&fp, file, L"r,ccs=UTF-8");
    if (fp == NULL) {
        return "";
    }
    if (err != 0) {
        return "";
    }
    std::string sum;
    wchar_t str[1024] = { 0 };
    while (fgetws(str, 1024, fp) != NULL) {
        std::string 当前行内容;
        Wchar_tToString(当前行内容, str);
        sum += 当前行内容;
    }
    fclose(fp);
    return sum;
}

std::wstring MYTOOL::string2wstring(std::string str) {
    std::wstring result;
    int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), (int)str.size(), NULL, 0);
    wchar_t* buffer = new wchar_t[static_cast<size_t>(len) + (size_t)1];
    MultiByteToWideChar(CP_ACP, 0, str.c_str(), (int)str.size(), buffer, len);
    buffer[len] = '\0';
    result.append(buffer);
    delete[] buffer;
    return result;
}

std::string MYTOOL::wstring2string(std::wstring wstr) {
    std::string result;
    int len = WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), (int)wstr.size(), NULL, 0, NULL, NULL);
    char* buffer = new char[static_cast<size_t>(len) + (size_t)1];
    WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), (int)wstr.size(), buffer, len, NULL, NULL);
    buffer[len] = '\0';
    result.append(buffer);
    delete[] buffer;
    return result;
}

std::vector<std::string>MYTOOL::split(std::string str, std::string pattern) {
    int pos;
    std::vector<std::string>result;
    str += pattern;
    int size = (int)str.size();
    for (int i = 0; i 0);
    CloseHandle(pFile);
    
    str.clear();
    str += buffer;
    if (buffer[0] == (char)0xFF && buffer[1] == (char)0xFE) {
        return 2;//UTF16_LE
    } else if (buffer[0] == -1 && buffer[1] == -2) {
        return 2;//UTF16_LE
    } else if (buffer[0] == (char)0xFE && buffer[1] == (char)0xFF) {
        return 3;//UTF16_BE
    } else if (buffer[0] == -2 && buffer[1] == -1) {
        return 3;//UTF16_BE
    } else if (buffer[0] == (char)0xEF && buffer[1] == (char)0xBB && buffer[2] == (char)0xBF) {
        return 4;//UTF8_BOM
    } else if (buffer[0] == -17 && buffer[1] == -69 && buffer[2] == -65) {
        return 4;//UTF8_BOM
    } else if (IsUTF8(buffer, fileSize + 1)) {
        return 1;//UTF-8
    } else {
        return 5;//以上都不是,可能是ANSI
    }
}

int MYTOOL::CalculateFileEncodingW(LPCWSTR filePath, std::string& str) {
    /*	返回值说明
    * 0		文件读取失败
    * 1		UTF-8
    * 2		UTF-16LE
    * 3		UTF16_BE
    * 4		UTF8_BOM
    * 5		未知
    */
    HANDLE pFile;
    char* tmpBuf;
    DWORD fileSize, dwBytesRead, dwBytesToRead;
    pFile = CreateFileW(filePath, FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
    if (pFile == INVALID_HANDLE_VALUE) {
        CloseHandle(pFile);
        return 0;
    }
    fileSize = GetFileSize(pFile, NULL);
    char* buffer;// = new char[static_cast<size_t>(fileSize) + (size_t)1];
    buffer = (char*)malloc(static_cast<size_t>(fileSize) + (size_t)1);
    if (buffer == NULL) {
        return 0;
    }
    ZeroMemory(buffer, static_cast<size_t>(fileSize) + (size_t)1);
    dwBytesToRead = fileSize;
    dwBytesRead = 0;
    tmpBuf = buffer;
    do {
        BOOL success = ReadFile(pFile, tmpBuf, dwBytesToRead, &dwBytesRead, NULL);
        if (success == NULL) printf("ReadFile failed : %s", get_last_error().c_str());
        if (dwBytesRead == 0) break;
        dwBytesToRead -= dwBytesRead;
        tmpBuf += dwBytesRead;
    } while (dwBytesToRead >0);
    CloseHandle(pFile);
    
    str.clear();
    str += buffer;
    if (buffer[0] == (char)0xFF && buffer[1] == (char)0xFE) {
        return 2;//UTF16_LE
    } else if (buffer[0] == -1 && buffer[1] == -2) {
        return 2;//UTF16_LE
    } else if (buffer[0] == (char)0xFE && buffer[1] == (char)0xFF) {
        return 3;//UTF16_BE
    } else if (buffer[0] == -2 && buffer[1] == -1) {
        return 3;//UTF16_BE
    } else if (buffer[0] == (char)0xEF && buffer[1] == (char)0xBB && buffer[2] == (char)0xBF) {
        return 4;//UTF8_BOM
    } else if (buffer[0] == -17 && buffer[1] == -69 && buffer[2] == -65) {
        return 4;//UTF8_BOM
    } else if (IsUTF8(buffer, fileSize + 1)) {
        return 1;//UTF-8
    } else {
        return 5;//以上都不是,可能是ANSI
    }
}

std::string MYTOOL::判断文件编码(LPCSTR filePath, std::string& 读到的文本) {
    /*	返回值说明
    * 0		文件读取失败
    * 1		UTF-8
    * 2		UTF-16LE
    * 3		UTF-16BE
    * 4		UTF8-BOM
    * 5		未知
    */
    int ret = CalculateFileEncoding(filePath, 读到的文本);
    switch (ret) {
    case 0:
        return "READ_FAIL";
        break;
    case 1:
        return "UTF-8";
        break;
    case 2:
        return "UTF-16LE";
        break;
    case 3:
        return "UTF-16BE";
        break;
    case 4:
        return "UTF8-BOM";
        break;
    case 5://以上都不是,可能是ANSI
        return "ANSI";
        break;
    default:
        return "ERROR";
        break;
    }
}

std::string MYTOOL::判断文件编码W(LPCWSTR filePath, std::string& 读到的文本) {
    /*	返回值说明
    * 0		文件读取失败
    * 1		UTF-8
    * 2		UTF-16LE
    * 3		UTF-16BE
    * 4		UTF8-BOM
    * 5		未知
    */
    int ret = CalculateFileEncodingW(filePath, 读到的文本);
    switch (ret) {
    case 0:
        return "READ_FAIL";
        break;
    case 1:
        return "UTF-8";
        break;
    case 2:
        return "UTF-16LE";
        break;
    case 3:
        return "UTF-16BE";
        break;
    case 4:
        return "UTF8-BOM";
        break;
    case 5://以上都不是,可能是ANSI
        return "ANSI";
        break;
    default:
        return "ERROR";
        break;
    }
}

#endif
最后于 2023-2-4 被柳萱公子编辑 ,原因: 代码格式怎么变了,原先的三个点换成XML了

最新回复 ( 0 )
全部楼主
  • 暂无评论