通过C#将GB18030编码转义为UTF-8

发布于:2025-04-13 ⋅ 阅读:(54) ⋅ 点赞:(0)

使用C#代码,对于GB18030编码转义为UTF-8格式。

using System.Text;

public class FileEncodingConverter
{
	// 支持转换的文件扩展名列表(可根据需求扩展)
	private static readonly string[] SupportedExtensions = { ".sln", ".cpp", ".h", ".txt", ".csv", ".log", ".json", ".xml", ".html", ".css", ".js" };

	public static void ConvertFolderToUTF8(string folderPath)
	{
		try
		{
			// 遍历文件夹及其子文件夹中的所有文件
			foreach (var file in Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories))
			{
				var ext = Path.GetExtension(file).ToLower();
				if (SupportedExtensions.Contains(ext))
				{
					try
					{
						ConvertFileToUTF8(file);
					}
					catch (Exception ex)
					{
						Console.WriteLine($"Error processing {file}: {ex.Message}");
					}
				}
			}
			Console.WriteLine("所有支持的文件已转换为 UTF-8 编码!");
		}
		catch (Exception ex)
		{
			Console.WriteLine($"程序异常:{ex.Message}");
		}
	}

	private static void ConvertFileToUTF8(string filePath)
	{
		// 检测文件编码
		var detectedEncoding = DetectEncoding(filePath);
		if (detectedEncoding == null)
		{
			throw new Exception("无法检测文件编码");
		}
		// 读取文件内容
		string content = File.ReadAllText(filePath, detectedEncoding);
		// 保存为UTF-8(无BOM)
		File.WriteAllText(filePath, content, new UTF8Encoding(false));
		Console.WriteLine($"成功转换:{filePath}");
	}

	private static Encoding DetectEncoding(string path)
	{
		using (var reader = new BinaryReader(File.OpenRead(path)))
		{
			byte[] bytes = reader.ReadBytes(4); // 读取前4字节

			// 检查UTF-8 BOM
			if (bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
			{
				return Encoding.UTF8;
			}

			// 检查UTF-16 BOM(虽然用户可能不需要,但完整检测)
			if (bytes.Length >= 2 && (bytes[0] == 0xFF && bytes[1] == 0xFE) || (bytes[0] == 0xFE && bytes[1] == 0xFF))
			{
				return Encoding.Unicode;
			}

			// 无BOM,尝试检测为GB18030或UTF-8
			// 尝试用GB18030解码前100字节,若无异常则认为是GB18030
			Encoding gb18030 = Encoding.GetEncoding("GB18030");
			try
			{
				using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read))
				{
					byte[] sample = new byte[100];
					stream.Read(sample, 0, 100);
					gb18030.GetChars(sample); // 触发解码
				}
				return gb18030;
			}
			catch (DecoderFallbackException)
			{
				// GB18030解码失败,尝试UTF-8
				Encoding utf8 = Encoding.UTF8;
				try
				{
					using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read))
					{
						byte[] sample = new byte[100];
						stream.Read(sample, 0, 100);
						utf8.GetChars(sample); // 触发解码
					}
					return utf8;
				}
				catch (DecoderFallbackException)
				{
					// 无法确定,返回默认编码(如系统默认)
					return Encoding.Default;
				}
			}
		}
	}

	// 程序入口
	public static void Main()
	{
		// 步骤1:注册编码提供者(必须在首次使用编码前调用)
		Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
		// 步骤2:使用GB18030编码
		Encoding gb18030 = Encoding.GetEncoding("GB18030");
		#region 加载包测试
		byte[] bytes = gb18030.GetBytes("你好,世界!");
		string text = gb18030.GetString(bytes);
		Console.WriteLine(text);
		#endregion
		string folderPath = "dir";
		ConvertFolderToUTF8(folderPath);
	}
}


网站公告

今日签到

点亮在社区的每一天
去签到