Unicode 字体字符集可视化工具 - 代码介绍
项目概述
这个工具是一个用于分析和可视化字体文件中包含的 Unicode 字符的实用程序,能够扫描指定字体文件,提取其中包含的所有 Unicode 字符,并按 Unicode 区块分类生成 PDF 文档,直观展示字体支持的所有字符。
核心功能
- 字体扫描:自动扫描系统字体目录,识别 TrueType (.ttf) 和 OpenType (.otf) 字体文件
- 字符提取:使用 FreeType 库提取字体中支持的所有 Unicode 字符
- 区块分类:按照 Unicode 标准区块对字符进行分类
- PDF 生成:使用 Haru PDF 库生成包含所有字符的可视化文档
- 智能排版:自动处理多页文档、分栏显示和换行
主要组件
1. UnicodeBlock 结构体
定义 Unicode 区块的起始码点、结束码点和名称,包含完整的 Unicode 15.0 标准区块定义。
2. FontUtils 类
封装 FreeType 库操作,提供以下功能:
- 初始化 FreeType 库
- 从字体文件提取 Unicode 字符集
- Unicode 码点到 UTF-8 编码的转换
- 按 Unicode 区块过滤字符
3. PDFDocument 类
封装 Haru PDF 库操作,提供以下功能:
- PDF 文档初始化
- 字体加载和编码设置
- 字符页面生成(支持多页)
- 彩色标题和统计信息添加
- 文档保存
4. FontScanner 类
负责扫描字体目录,提供以下功能:
- 递归扫描指定目录中的字体文件
- 按扩展名和文件名模式过滤字体
- 支持排除特定样式的字体(如粗体、斜体等)
技术特点
- 跨平台支持:使用标准 C++17 和跨平台库
- 完整 Unicode 支持:覆盖所有 Unicode 15.0 标准区块
- 高效处理:智能分页和区块分割,避免内存问题
- 美观输出:彩色渐变标题、清晰的字符网格布局
- 灵活配置:可调整的页面布局参数(边距、字体大小等)
使用示例
int main() {
try {
FontUtils font_utils;
fs::path font_dir = "C:/Windows/Fonts"; // 字体目录
fs::path output_dir = "output_pdfs"; // 输出目录
FontScanner scanner(font_dir);
auto font_files = scanner.GetTTfFont(); // 获取常规字体文件
for (const auto& path : font_files) {
std::cout << "处理字体文件: " << path << std::endl;
PDFDocument doc(path, output_dir);
doc.GenerateWithUnicodeChars(font_utils);
doc.Save();
}
}
catch (const std::exception& ex) {
std::cerr << "错误: " << ex.what() << std::endl;
return 1;
}
return 0;
}
依赖库
- FreeType:用于字体解析和字符提取
- Haru PDF:用于 PDF 文档生成
- C++17 标准库:文件系统、字符串处理等
- STL:容器、算法等
输出示例
生成的 PDF 文档包含:
- 彩色渐变字体标题
- 字体统计信息(总字符数)
- 按 Unicode 区块组织的字符网格
- 自动分页的多页文档
应用场景
- 字体设计师验证字体覆盖范围
- 开发人员检查字体对特定语言的支持
- 多语言项目中的字体兼容性测试
- 字体文档自动生成
- 学术研究中的字符集分析
扩展性
项目可轻松扩展以支持:
- 自定义 Unicode 范围过滤
- 额外的输出格式(如 HTML、CSV)
- 字符属性显示(如编码点、名称)
- 字体特性分析(如连字、变体)
#include <iostream>
#include <string>
#include <vector>
#include <filesystem>
#include <hpdf.h>
#include <cstring>
#include <set>
#include<sstream>
#include <ft2build.h>
#include <freetype/ftadvanc.h>
#include FT_FREETYPE_H
namespace fs = std::filesystem;
constexpr size_t CHARS_PER_LINE = 40; // 每行字符数
constexpr size_t LINES_PER_PAGE = 120; // 每页行数
constexpr size_t CHARS_PER_PAGE = CHARS_PER_LINE * LINES_PER_PAGE; // 每页字符总数
constexpr size_t MAX_CHARS_PER_BLOCK = 300; // 每个区块最大字符数
constexpr int FONT_SIZE = 13;
constexpr float CHAR_WIDTH = 13.0f; // 每个字符的宽度
constexpr float LINE_HEIGHT = 20.0f; // 行高
constexpr float LEFT_MARGIN = 10.0f; // 左边距
constexpr float TOP_MARGIN = 790.0f; // 上边距
struct UnicodeBlock {
unsigned int start;
unsigned int end;
std::string name;
};
// Unicode区块定义
const std::vector<UnicodeBlock> UNICODE_BLOCKS = {
{0x0000, 0x007F, "Basic Latin"},
{0x0080, 0x00FF, "Latin-1 Supplement"},
{0x0100, 0x017F, "Latin Extended-A"},
{0x0180, 0x024F, "Latin Extended-B"},
{0x0250, 0x02AF, "IPA Extensions"},
{0x02B0, 0x02FF, "Spacing Modifier Letters"},
{0x0300, 0x036F, "Combining Diacritical Marks"},
{0x0370, 0x03FF, "Greek and Coptic"},
{0x0400, 0x04FF, "Cyrillic"},
{0x0500, 0x052F, "Cyrillic Supplement"},
{0x0530, 0x058F, "Armenian"},
{0x0590, 0x05FF, "Hebrew"},
{0x0600, 0x06FF, "Arabic"},
{0x0700, 0x074F, "Syriac"},
{0x0750, 0x077F, "Arabic Supplement"},
{0x0780, 0x07BF, "Thaana"},
{0x07C0, 0x07FF, "NKo"},
{0x0800, 0x083F, "Samaritan"},
{0x0840, 0x085F, "Mandaic"},
{0x0860, 0x086F, "Syriac Supplement"},
{0x08A0, 0x08FF, "Arabic Extended-A"},
{0x0900, 0x097F, "Devanagari"},
{0x0980, 0x09FF, "Bengali"},
{0x0A00, 0x0A7F, "Gurmukhi"},
{0x0A80, 0x0AFF, "Gujarati"},
{0x0B00, 0x0B7F, "Oriya"},
{0x0B80, 0x0BFF, "Tamil"},
{0x0C00, 0x0C7F, "Telugu"},
{0x0C80, 0x0CFF, "Kannada"},
{0x0D00, 0x0D7F, "Malayalam"},
{0x0D80, 0x0DFF, "Sinhala"},
{0x0E00, 0x0E7F, "Thai"},
{0x0E80, 0x0EFF, "Lao"},
{0x0F00, 0x0FFF, "Tibetan"},
{0x1000, 0x109F, "Myanmar"},
{0x10A0, 0x10FF, "Georgian"},
{0x1100, 0x11FF, "Hangul Jamo"},
{0x1200, 0x137F, "Ethiopic"},
{0x1380, 0x139F, "Ethiopic Supplement"},
{0x13A0, 0x13FF, "Cherokee"},
{0x1400, 0x167F, "Unified Canadian Aboriginal Syllabics"},
{0x1680, 0x169F, "Ogham"},
{0x16A0, 0x16FF, "Runic"},
{0x1700, 0x171F, "Tagalog"},
{0x1720, 0x173F, "Hanunoo"},
{0x1740, 0x175F, "Buhid"},
{0x1760, 0x177F, "Tagbanwa"},
{0x1780, 0x17FF, "Khmer"},
{0x1800, 0x18AF, "Mongolian"},
{0x18B0, 0x18FF, "Unified Canadian Aboriginal Syllabics Extended"},
{0x1900, 0x194F, "Limbu"},
{0x1950, 0x197F, "Tai Le"},
{0x1980, 0x19DF, "New Tai Lue"},
{0x19E0, 0x19FF, "Khmer Symbols"},
{0x1A00, 0x1A1F, "Buginese"},
{0x1A20, 0x1AAF, "Tai Tham"},
{0x1AB0, 0x1AFF, "Combining Diacritical Marks Extended"},
{0x1B00, 0x1B7F, "Balinese"},
{0x1B80, 0x1BBF, "Sundanese"},
{0x1BC0, 0x1BFF, "Batak"},
{0x1C00, 0x1C4F, "Lepcha"},
{0x1C50, 0x1C7F, "Ol Chiki"},
{0x1C80, 0x1C8F, "Cyrillic Extended-C"},
{0x1C90, 0x1CBF, "Georgian Extended"},
{0x1CC0, 0x1CCF, "Sundanese Supplement"},
{0x1CD0, 0x1CFF, "Vedic Extensions"},
{0x1D00, 0x1D7F, "Phonetic Extensions"},
{0x1D80, 0x1DBF, "Phonetic Extensions Supplement"},
{0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement"},
{0x1E00, 0x1EFF, "Latin Extended Additional"},
{0x1F00, 0x1FFF, "Greek Extended"},
{0x2000, 0x206F, "General Punctuation"},
{0x2070, 0x209F, "Superscripts and Subscripts"},
{0x20A0, 0x20CF, "Currency Symbols"},
{0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols"},
{0x2100, 0x214F, "Letterlike Symbols"},
{0x2150, 0x218F, "Number Forms"},
{0x2190, 0x21FF, "Arrows"},
{0x2200, 0x22FF, "Mathematical Operators"},
{0x2300, 0x23FF, "Miscellaneous Technical"},
{0x2400, 0x243F, "Control Pictures"},
{0x2440, 0x245F, "Optical Character Recognition"},
{0x2460, 0x24FF, "Enclosed Alphanumerics"},
{0x2500, 0x257F, "Box Drawing"},
{0x2580, 0x259F, "Block Elements"},
{0x25A0, 0x25FF, "Geometric Shapes"},
{0x2600, 0x26FF, "Miscellaneous Symbols"},
{0x2700, 0x27BF, "Dingbats"},
{0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A"},
{0x27F0, 0x27FF, "Supplemental Arrows-A"},
{0x2800, 0x28FF, "Braille Patterns"},
{0x2900, 0x297F, "Supplemental Arrows-B"},
{0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B"},
{0x2A00, 0x2AFF, "Supplemental Mathematical Operators"},
{0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows"},
{0x2C00, 0x2C5F, "Glagolitic"},
{0x2C60, 0x2C7F, "Latin Extended-C"},
{0x2C80, 0x2CFF, "Coptic"},
{0x2D00, 0x2D2F, "Georgian Supplement"},
{0x2D30, 0x2D7F, "Tifinagh"},
{0x2D80, 0x2DDF, "Ethiopic Extended"},
{0x2DE0, 0x2DFF, "Cyrillic Extended-A"},
{0x2E00, 0x2E7F, "Supplemental Punctuation"},
{0x2E80, 0x2EFF, "CJK Radicals Supplement"},
{0x2F00, 0x2FDF, "Kangxi Radicals"},
{0x2FF0, 0x2FFF, "Ideographic Description Characters"},
{0x3000, 0x303F, "CJK Symbols and Punctuation"},
{0x3040, 0x309F, "Hiragana"},
{0x30A0, 0x30FF, "Katakana"},
{0x3100, 0x312F, "Bopomofo"},
{0x3130, 0x318F, "Hangul Compatibility Jamo"},
{0x3190, 0x319F, "Kanbun"},
{0x31A0, 0x31BF, "Bopomofo Extended"},
{0x31C0, 0x31EF, "CJK Strokes"},
{0x31F0, 0x31FF, "Katakana Phonetic Extensions"},
{0x3200, 0x32FF, "Enclosed CJK Letters and Months"},
{0x3300, 0x33FF, "CJK Compatibility"},
{0x3400, 0x4DBF, "CJK Unified Ideographs Extension A"},
{0x4DC0, 0x4DFF, "Yijing Hexagram Symbols"},
{0x4E00, 0x9FFF, "CJK Unified Ideographs"},
{0xA000, 0xA48F, "Yi Syllables"},
{0xA490, 0xA4CF, "Yi Radicals"},
{0xA4D0, 0xA4FF, "Lisu"},
{0xA500, 0xA63F, "Vai"},
{0xA640, 0xA69F, "Cyrillic Extended-B"},
{0xA6A0, 0xA6FF, "Bamum"},
{0xA700, 0xA71F, "Modifier Tone Letters"},
{0xA720, 0xA7FF, "Latin Extended-D"},
{0xA800, 0xA82F, "Syloti Nagri"},
{0xA830, 0xA83F, "Common Indic Number Forms"},
{0xA840, 0xA87F, "Phags-pa"},
{0xA880, 0xA8DF, "Saurashtra"},
{0xA8E0, 0xA8FF, "Devanagari Extended"},
{0xA900, 0xA92F, "Kayah Li"},
{0xA930, 0xA95F, "Rejang"},
{0xA960, 0xA97F, "Hangul Jamo Extended-A"},
{0xA980, 0xA9DF, "Javanese"},
{0xA9E0, 0xA9FF, "Myanmar Extended-B"},
{0xAA00, 0xAA5F, "Cham"},
{0xAA60, 0xAA7F, "Myanmar Extended-A"},
{0xAA80, 0xAADF, "Tai Viet"},
{0xAAE0, 0xAAFF, "Meetei Mayek Extensions"},
{0xAB00, 0xAB2F, "Ethiopic Extended-A"},
{0xAB30, 0xAB6F, "Latin Extended-E"},
{0xAB70, 0xABBF, "Cherokee Supplement"},
{0xABC0, 0xABFF, "Meetei Mayek"},
{0xAC00, 0xD7AF, "Hangul Syllables"},
{0xD7B0, 0xD7FF, "Hangul Jamo Extended-B"},
{0xE000, 0xF8FF, "Private Use Area"},
{0xF900, 0xFAFF, "CJK Compatibility Ideographs"},
{0xFB00, 0xFB4F, "Alphabetic Presentation Forms"},
{0xFB50, 0xFDFF, "Arabic Presentation Forms-A"},
{0xFE00, 0xFE0F, "Variation Selectors"},
{0xFE10, 0xFE1F, "Vertical Forms"},
{0xFE20, 0xFE2F, "Combining Half Marks"},
{0xFE30, 0xFE4F, "CJK Compatibility Forms"},
{0xFE50, 0xFE6F, "Small Form Variants"},
{0xFE70, 0xFEFF, "Arabic Presentation Forms-B"},
{0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms"},
{0xFFF0, 0xFFFF, "Specials"}
};
class FontUtils {
public:
FontUtils() {
if (FT_Init_FreeType(&library_)) {
throw std::runtime_error("无法初始化FreeType库");
}
}
~FontUtils() {
FT_Done_FreeType(library_);
}
std::set<unsigned int> GetUnicodeFromFont(const fs::path& fontPath) {
FT_Face face;
std::set<unsigned int> unicodeSet;
if (FT_New_Face(library_, fontPath.string().c_str(), 0, &face)) {
std::cerr << "Error loading font file: " << fontPath << std::endl;
return unicodeSet;
}
FT_UInt glyphIndex;
FT_ULong charCode = FT_Get_First_Char(face, &glyphIndex);
while (glyphIndex != 0) {
unicodeSet.insert(static_cast<unsigned int>(charCode));
charCode = FT_Get_Next_Char(face, charCode, &glyphIndex);
}
FT_Done_Face(face);
return unicodeSet;
}
std::vector<unsigned char> unicode_code_point_to_utf8(unsigned int code_point) {
std::vector<unsigned char> utf8_bytes;
if (code_point <= 0x7F) {
utf8_bytes.push_back(static_cast<unsigned char>(code_point));
}
else if (code_point <= 0x7FF) {
utf8_bytes.push_back(static_cast<unsigned char>(0xC0 | ((code_point >> 6) & 0x1F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | (code_point & 0x3F)));
}
else if (code_point <= 0xFFFF) {
utf8_bytes.push_back(static_cast<unsigned char>(0xE0 | ((code_point >> 12) & 0x0F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | ((code_point >> 6) & 0x3F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | (code_point & 0x3F)));
}
else if (code_point <= 0x10FFFF) {
utf8_bytes.push_back(static_cast<unsigned char>(0xF0 | ((code_point >> 18) & 0x07)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | ((code_point >> 12) & 0x3F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | ((code_point >> 6) & 0x3F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | (code_point & 0x3F)));
}
return utf8_bytes;
}
std::string getFontCharactersForBlock(const fs::path& fontPath, const UnicodeBlock& block) {
std::string characters;
auto unicodeSet = GetUnicodeFromFont(fontPath);
size_t count = 0;
for (unsigned int code : unicodeSet) {
if (code >= block.start && code <= block.end) {
auto utf8_bytes = unicode_code_point_to_utf8(code);
characters.append(reinterpret_cast<const char*>(utf8_bytes.data()), utf8_bytes.size());
count++;
// 限制每个区块的最大字符数
if (count >= MAX_CHARS_PER_BLOCK) {
break;
}
}
}
return characters;
}
private:
FT_Library library_;
};
class PDFDocument {
public:
PDFDocument(const fs::path& font_path, const fs::path& output_dir)
: pdf(HPDF_New(nullptr, nullptr)), output_dir(output_dir), font_path(font_path) {
if (!pdf) {
throw std::runtime_error("无法创建 PDF 文档");
}
font_name = font_path.stem().string();
pdf_filename = font_name + "_unicode_chars.pdf";
HPDF_SetErrorHandler(pdf, [](HPDF_STATUS error_no, HPDF_STATUS detail_no, void* user_data) {
std::cerr << "PDF错误: " << error_no << ", 详情: " << detail_no << std::endl;
});
HPDF_UseUTFEncodings(pdf);
HPDF_SetCurrentEncoder(pdf, "UTF-8");
std::string extension = font_path.extension().string();
const char* loaded_font = nullptr;
if (extension == ".ttc") {
loaded_font = HPDF_LoadTTFontFromFile2(pdf, font_path.string().c_str(), 0, HPDF_TRUE);
}
else if (extension == ".ttf" || extension == ".otf") {
loaded_font = HPDF_LoadTTFontFromFile(pdf, font_path.string().c_str(), HPDF_TRUE);
}
else {
HPDF_Free(pdf);
throw std::runtime_error("不支持的字体文件格式: " + extension);
}
if (!loaded_font) {
HPDF_Free(pdf);
throw std::runtime_error("无法从文件加载字体: " + font_path.string());
}
font = HPDF_GetFont(pdf, loaded_font, "UTF-8");
if (!font) {
HPDF_Free(pdf);
throw std::runtime_error("无法获取字体对象");
}
}
~PDFDocument() {
if (pdf) {
HPDF_Free(pdf);
}
}
void AddCharactersPage(const std::string& chars, bool is_first_page = false) {
HPDF_Page page = HPDF_AddPage(pdf);
HPDF_Page_SetSize(page, HPDF_PAGE_SIZE_A4, HPDF_PAGE_PORTRAIT);
float current_y = TOP_MARGIN; // 当前Y坐标
// 如果是第一页,添加彩色标题
// 如果是第一页,添加彩色渐变标题
if (is_first_page) {
std::string title = "the font name is " + font_name;
size_t title_len = title.length();
// 设置标题字体大小
HPDF_Page_SetFontAndSize(page, font, 30);
// 设置文字描边(模拟加粗)
HPDF_Page_SetLineWidth(page, 1); // 描边宽度
HPDF_Page_SetRGBStroke(page, 0, 0, 0); // 描边颜色(黑色)
// 2. 启用描边+填充模式
HPDF_Page_SetTextRenderingMode(page, HPDF_FILL_THEN_STROKE);
// 定位到标题起始位置
HPDF_Page_BeginText(page);
HPDF_Page_MoveTextPos(page, LEFT_MARGIN, current_y);
// 生成渐变彩色标题(红到蓝渐变)
for (size_t i = 0; i < title_len; i++) {
char buf[2] = { title[i], '\0' };
// 计算渐变颜色(从红色渐变到蓝色)
// 彩虹色渐变(红->黄->绿->青->蓝->紫)
float r = std::max(0.0f, 1.0f - std::abs((float)i / title_len - 0.5f) * 2.0f);
float g = std::max(0.0f, 1.0f - std::abs((float)i / title_len - 0.25f) * 4.0f);
float b = std::max(0.0f, 1.0f - std::abs((float)i / title_len - 0.75f) * 4.0f);
//// 金色渐变效果
//float r = 0.8f + 0.2f * sin((float)i / title_len * 3.14f);
//float g = 0.6f + 0.2f * cos((float)i / title_len * 3.14f);
//float b = 0.1f;
// 自定义双色渐变(如从蓝到紫)
//Color start(0.2, 0.4, 1.0); // 蓝色
//Color end(0.8, 0.2, 1.0); // 紫色
//float r = start.r + (end.r - start.r) * (i / title_len);
//float g = start.g + (end.g - start.g) * (i / title_len);
//float b = start.b + (end.b - start.b) * (i / title_len);
HPDF_Page_SetRGBFill(page, r, g, b); // 保持少量绿色分量
HPDF_Page_ShowText(page, buf);
}
HPDF_Page_EndText(page);
// 添加黑色统计信息
HPDF_Page_SetRGBFill(page, 0.0f, 0.0f, 0.0f); // 重置为黑色
// 4. 恢复默认填充模式(避免影响后续文本)
HPDF_Page_SetTextRenderingMode(page, HPDF_FILL);
HPDF_Page_BeginText(page);
HPDF_Page_SetFontAndSize(page, font, 12);
HPDF_Page_MoveTextPos(page, LEFT_MARGIN, current_y - 25);
std::string stats = "font count: " + std::to_string(chars.size());
HPDF_Page_ShowText(page, stats.c_str());
HPDF_Page_EndText(page);
// 调整字符显示的起始Y位置
current_y -= 40;
}
// 设置字符显示字体
HPDF_Page_SetFontAndSize(page, font, FONT_SIZE);
float x = LEFT_MARGIN;
float y = current_y; // 使用调整后的Y坐标
size_t char_count = 0;
size_t i = 0;
while (i < chars.size()) {
// 计算当前字符的UTF-8长度
size_t char_len = 1;
unsigned char c = chars[i];
if ((c & 0xE0) == 0xC0) char_len = 2;
else if ((c & 0xF0) == 0xE0) char_len = 3;
else if ((c & 0xF8) == 0xF0) char_len = 4;
// 提取当前字符
std::string current_char = chars.substr(i, char_len);
// 绘制字符
HPDF_Page_BeginText(page);
HPDF_Page_MoveTextPos(page, x, y);
HPDF_Page_ShowText(page, current_char.c_str());
HPDF_Page_EndText(page);
// 更新位置
x += CHAR_WIDTH;
char_count++;
// 换行处理
if (char_count % CHARS_PER_LINE == 0) {
x = LEFT_MARGIN;
y -= LINE_HEIGHT;
// 检查是否超出页面
if (y < 50) { // 底部边距
// 创建新页面继续输出
AddCharactersPage(chars.substr(i + char_len));
return;
}
}
i += char_len;
}
}
void GenerateWithUnicodeChars(FontUtils& font_utils) {
std::string all_chars;
size_t total_chars = 0;
// 收集所有区块的字符
for (const auto& block : UNICODE_BLOCKS) {
std::string block_chars = font_utils.getFontCharactersForBlock(font_path, block);
if (!block_chars.empty()) {
all_chars += block_chars;
total_chars += block_chars.size();
std::cout << "添加区块: " << block.name
<< " (U+" << std::hex << block.start << "-U+" << block.end << ")"
<< ", 字符数: " << block_chars.size() << std::endl;
}
}
std::cout << "总共收集 " << total_chars << " 个字符" << std::endl;
if (!all_chars.empty()) {
// 添加字符内容页(第一页包含标题)
AddCharactersPage(all_chars, true);
}
else {
std::cout << "字体中未找到任何Unicode字符" << std::endl;
}
}
void Save() {
if (!fs::exists(output_dir)) {
fs::create_directories(output_dir);
}
fs::path full_path = fs::path(output_dir) / pdf_filename;
HPDF_STATUS ret = HPDF_SaveToFile(pdf, full_path.string().c_str());
if (ret != HPDF_OK) {
throw std::runtime_error("保存PDF文件失败");
}
std::cout << "PDF已生成: " << full_path.string() << std::endl;
}
private:
HPDF_Doc pdf;
HPDF_Font font;
std::string font_name;
std::string pdf_filename;
fs::path output_dir;
fs::path font_path;
void AddTitlePage(size_t total_chars) {
HPDF_Page page = HPDF_AddPage(pdf);
HPDF_Page_SetSize(page, HPDF_PAGE_SIZE_A4, HPDF_PAGE_PORTRAIT);
// 添加标题
HPDF_Page_BeginText(page);
HPDF_Page_SetFontAndSize(page, font, 24);
HPDF_Page_MoveTextPos(page, LEFT_MARGIN, TOP_MARGIN);
std::string title = "font " + font_name + " support Unicode character";
HPDF_Page_ShowText(page, title.c_str());
HPDF_Page_EndText(page);
}
std::string to_hex(unsigned int value) {
std::stringstream ss;
ss << std::hex << std::uppercase << value;
return ss.str();
}
};
class FontScanner {
public:
FontScanner(fs::path font_dir) :font_dir_(font_dir) {};
std::vector<fs::path> GetTTcFont() {
return ScanFontsInDirectory(
font_dir_,
{ ".otf", ".ttf" }, // 排除otf和 ttc 后缀
{ "Bold", "Italic","Light","Cond","-" }, // 排除包含"Light"或"Bold"等其他样式的的字体
false // 启用详细输出
);
};
std::vector<fs::path> GetTTfFont() {
return ScanFontsInDirectory(
font_dir_,
{ ".otf", ".ttc" }, // 排除otf和 ttc 后缀
{ "Bold", "Italic","Light","Cond","-" }, // 排除包含"Light"或"Bold"等其他样式的的字体
false // 启用详细输出
);
};
std::vector<fs::path> ScanFontsInDirectory(
const fs::path& directory,
const std::set<std::string>& excludeSuffixes,
const std::set<std::string>& excludeSubstrings,
bool verbose)
{
std::vector<fs::path> fontPaths;
// 1. 验证目录路径
if (!fs::exists(directory)) {
if (verbose) {
std::cerr << "Error: Directory does not exist: "
<< directory.string() << std::endl;
}
return fontPaths;
}
if (!fs::is_directory(directory)) {
if (verbose) {
std::cerr << "Error: Path is not a directory: "
<< directory.string() << std::endl;
}
return fontPaths;
}
// 2. 预定义字体扩展名(小写)
static const std::set<std::string> kFontExtensions = {
".ttf", ".otf", ".ttc", ".woff", ".woff2", ".pfb", ".pfm"
};
// 3. 预处理排除条件(转换为小写)
std::set<std::string> excludeSuffixesLower;
for (const auto& suffix : excludeSuffixes) {
std::string lower = suffix;
std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
excludeSuffixesLower.insert(lower);
}
std::set<std::string> excludeSubstringsLower;
for (const auto& substr : excludeSubstrings) {
std::string lower = substr;
std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
excludeSubstringsLower.insert(lower);
}
// 4. 遍历目录
try {
for (const auto& entry : fs::recursive_directory_iterator(directory)) {
if (!entry.is_regular_file()) {
continue;
}
const auto& path = entry.path();
std::string ext = path.extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
// 4.1 检查扩展名
if (kFontExtensions.find(ext) == kFontExtensions.end()) {
if (verbose) {
std::cout << "Skipping non-font file: "
<< path.string() << std::endl;
}
continue;
}
std::string filename = path.filename().string();
std::string filenameLower = filename;
std::transform(filenameLower.begin(), filenameLower.end(),
filenameLower.begin(), ::tolower);
// 4.2 检查排除后缀
bool excluded = false;
for (const auto& suffix : excludeSuffixesLower) {
if (EndsWithCI(filenameLower, suffix)) {
if (verbose) {
std::cout << "Excluding by suffix: "
<< path.string() << std::endl;
}
excluded = true;
break;
}
}
if (excluded) continue;
// 4.3 检查排除子串
for (const auto& substr : excludeSubstringsLower) {
if (filenameLower.find(substr) != std::string::npos) {
if (verbose) {
std::cout << "Excluding by substring: "
<< path.string() << std::endl;
}
excluded = true;
break;
}
}
if (excluded) continue;
// 5. 添加到结果集
if (verbose) {
std::cout << "Adding font file: "
<< path.string() << std::endl;
}
fontPaths.emplace_back(path);
}
}
catch (const fs::filesystem_error& e) {
if (verbose) {
std::cerr << "Filesystem error: " << e.what()
<< " (path: " << e.path1().string() << ")" << std::endl;
}
}
return fontPaths;
}
private:
// 辅助函数:检查字符串是否以指定后缀结尾(不区分大小写)
bool EndsWithCI(const std::string& str, const std::string& suffix) {
if (suffix.size() > str.size()) return false;
auto str_it = str.rbegin();
auto suffix_it = suffix.rbegin();
while (suffix_it != suffix.rend()) {
if (tolower(*str_it++) != tolower(*suffix_it++)) {
return false;
}
}
return true;
}
fs::path font_dir_;
};
int main() {
try {
FontUtils font_utils;
fs::path font_dir = "C:/Windows/Fonts"; // 字体目录
fs::path output_dir = "D:/document_for_test/pdf_out"; // 输出目录
FontScanner scanner(font_dir);
auto ttc_font_files = scanner.GetTTcFont();
auto ttf_font_files = scanner.GetTTfFont();
for (const auto& path : ttf_font_files) {
std::cout << "处理字体文件: " << path << std::endl;
PDFDocument doc(path, output_dir);
doc.GenerateWithUnicodeChars(font_utils);
doc.Save();
}
}
catch (const std::exception& ex) {
std::cerr << "错误: " << ex.what() << std::endl;
return 1;
}
return 0;
}为这个代码起个标题
CMakeLists.txt
cmake_minimum_required(VERSION 3.12)
project(FontUnicodeVisualizer VERSION 1.0 LANGUAGES CXX)
# 设置 C++ 标准
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# 查找 ZLIB 库
find_package(ZLIB REQUIRED)
if(NOT ZLIB_FOUND)
message(FATAL_ERROR "zlib library not found!")
endif()
# 查找 PNG 库
find_package(PNG REQUIRED)
if(NOT PNG_FOUND)
message(FATAL_ERROR "libpng library not found!")
endif()
# 查找 FreeType 库
find_package(Freetype REQUIRED)
if(NOT Freetype_FOUND)
message(FATAL_ERROR "FreeType library not found!")
endif()
# 手动指定 Haru 库的路径(因为可能没有 Findharu.cmake)
set(HARU_INCLUDE_DIR "/usr/local/Cellar/libharu/2.4.5/include")
set(HARU_LIBRARY "/usr/local/Cellar/libharu/2.4.5/lib/libhpdf.a")
# 设置可执行文件
add_executable(font_visualizer
${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
)
# 添加头文件路径
target_include_directories(font_visualizer PRIVATE
${FREETYPE_INCLUDE_DIRS}
${HARU_INCLUDE_DIR}
${ZLIB_INCLUDE_DIRS}
${PNG_INCLUDE_DIRS}
)
# 链接库
target_link_libraries(font_visualizer PRIVATE
${FREETYPE_LIBRARIES}
${HARU_LIBRARY}
${PNG_LIBRARIES}
${ZLIB_LIBRARIES}
)
# 在 Windows 上需要额外链接系统库
if(WIN32)
target_link_libraries(font_visualizer PRIVATE
gdi32
)
endif()
# 安装配置
install(TARGETS font_visualizer
RUNTIME DESTINATION bin
)
另一个版本 (跳过不能处理字体,但是不崩溃)
#include <iostream>
#include <string>
#include <vector>
#include <filesystem>
#include <hpdf.h>
#include <cstring>
#include <set>
#include<sstream>
#include <ft2build.h>
#include <freetype/ftadvanc.h>
#include FT_FREETYPE_H
namespace fs = std::filesystem;
constexpr size_t CHARS_PER_LINE = 40; // 每行字符数
constexpr size_t LINES_PER_PAGE = 120; // 每页行数
constexpr size_t CHARS_PER_PAGE = CHARS_PER_LINE * LINES_PER_PAGE; // 每页字符总数
constexpr size_t MAX_CHARS_PER_BLOCK = 300; // 每个区块最大字符数
constexpr int FONT_SIZE = 13;
constexpr float CHAR_WIDTH = 13.0f; // 每个字符的宽度
constexpr float LINE_HEIGHT = 20.0f; // 行高
constexpr float LEFT_MARGIN = 10.0f; // 左边距
constexpr float TOP_MARGIN = 790.0f; // 上边距
struct UnicodeBlock {
unsigned int start;
unsigned int end;
std::string name;
};
// Unicode区块定义
const std::vector<UnicodeBlock> UNICODE_BLOCKS = {
{0x0000, 0x007F, "Basic Latin"},
{0x0080, 0x00FF, "Latin-1 Supplement"},
{0x0100, 0x017F, "Latin Extended-A"},
{0x0180, 0x024F, "Latin Extended-B"},
{0x0250, 0x02AF, "IPA Extensions"},
{0x02B0, 0x02FF, "Spacing Modifier Letters"},
{0x0300, 0x036F, "Combining Diacritical Marks"},
{0x0370, 0x03FF, "Greek and Coptic"},
{0x0400, 0x04FF, "Cyrillic"},
{0x0500, 0x052F, "Cyrillic Supplement"},
{0x0530, 0x058F, "Armenian"},
{0x0590, 0x05FF, "Hebrew"},
{0x0600, 0x06FF, "Arabic"},
{0x0700, 0x074F, "Syriac"},
{0x0750, 0x077F, "Arabic Supplement"},
{0x0780, 0x07BF, "Thaana"},
{0x07C0, 0x07FF, "NKo"},
{0x0800, 0x083F, "Samaritan"},
{0x0840, 0x085F, "Mandaic"},
{0x0860, 0x086F, "Syriac Supplement"},
{0x08A0, 0x08FF, "Arabic Extended-A"},
{0x0900, 0x097F, "Devanagari"},
{0x0980, 0x09FF, "Bengali"},
{0x0A00, 0x0A7F, "Gurmukhi"},
{0x0A80, 0x0AFF, "Gujarati"},
{0x0B00, 0x0B7F, "Oriya"},
{0x0B80, 0x0BFF, "Tamil"},
{0x0C00, 0x0C7F, "Telugu"},
{0x0C80, 0x0CFF, "Kannada"},
{0x0D00, 0x0D7F, "Malayalam"},
{0x0D80, 0x0DFF, "Sinhala"},
{0x0E00, 0x0E7F, "Thai"},
{0x0E80, 0x0EFF, "Lao"},
{0x0F00, 0x0FFF, "Tibetan"},
{0x1000, 0x109F, "Myanmar"},
{0x10A0, 0x10FF, "Georgian"},
{0x1100, 0x11FF, "Hangul Jamo"},
{0x1200, 0x137F, "Ethiopic"},
{0x1380, 0x139F, "Ethiopic Supplement"},
{0x13A0, 0x13FF, "Cherokee"},
{0x1400, 0x167F, "Unified Canadian Aboriginal Syllabics"},
{0x1680, 0x169F, "Ogham"},
{0x16A0, 0x16FF, "Runic"},
{0x1700, 0x171F, "Tagalog"},
{0x1720, 0x173F, "Hanunoo"},
{0x1740, 0x175F, "Buhid"},
{0x1760, 0x177F, "Tagbanwa"},
{0x1780, 0x17FF, "Khmer"},
{0x1800, 0x18AF, "Mongolian"},
{0x18B0, 0x18FF, "Unified Canadian Aboriginal Syllabics Extended"},
{0x1900, 0x194F, "Limbu"},
{0x1950, 0x197F, "Tai Le"},
{0x1980, 0x19DF, "New Tai Lue"},
{0x19E0, 0x19FF, "Khmer Symbols"},
{0x1A00, 0x1A1F, "Buginese"},
{0x1A20, 0x1AAF, "Tai Tham"},
{0x1AB0, 0x1AFF, "Combining Diacritical Marks Extended"},
{0x1B00, 0x1B7F, "Balinese"},
{0x1B80, 0x1BBF, "Sundanese"},
{0x1BC0, 0x1BFF, "Batak"},
{0x1C00, 0x1C4F, "Lepcha"},
{0x1C50, 0x1C7F, "Ol Chiki"},
{0x1C80, 0x1C8F, "Cyrillic Extended-C"},
{0x1C90, 0x1CBF, "Georgian Extended"},
{0x1CC0, 0x1CCF, "Sundanese Supplement"},
{0x1CD0, 0x1CFF, "Vedic Extensions"},
{0x1D00, 0x1D7F, "Phonetic Extensions"},
{0x1D80, 0x1DBF, "Phonetic Extensions Supplement"},
{0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement"},
{0x1E00, 0x1EFF, "Latin Extended Additional"},
{0x1F00, 0x1FFF, "Greek Extended"},
{0x2000, 0x206F, "General Punctuation"},
{0x2070, 0x209F, "Superscripts and Subscripts"},
{0x20A0, 0x20CF, "Currency Symbols"},
{0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols"},
{0x2100, 0x214F, "Letterlike Symbols"},
{0x2150, 0x218F, "Number Forms"},
{0x2190, 0x21FF, "Arrows"},
{0x2200, 0x22FF, "Mathematical Operators"},
{0x2300, 0x23FF, "Miscellaneous Technical"},
{0x2400, 0x243F, "Control Pictures"},
{0x2440, 0x245F, "Optical Character Recognition"},
{0x2460, 0x24FF, "Enclosed Alphanumerics"},
{0x2500, 0x257F, "Box Drawing"},
{0x2580, 0x259F, "Block Elements"},
{0x25A0, 0x25FF, "Geometric Shapes"},
{0x2600, 0x26FF, "Miscellaneous Symbols"},
{0x2700, 0x27BF, "Dingbats"},
{0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A"},
{0x27F0, 0x27FF, "Supplemental Arrows-A"},
{0x2800, 0x28FF, "Braille Patterns"},
{0x2900, 0x297F, "Supplemental Arrows-B"},
{0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B"},
{0x2A00, 0x2AFF, "Supplemental Mathematical Operators"},
{0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows"},
{0x2C00, 0x2C5F, "Glagolitic"},
{0x2C60, 0x2C7F, "Latin Extended-C"},
{0x2C80, 0x2CFF, "Coptic"},
{0x2D00, 0x2D2F, "Georgian Supplement"},
{0x2D30, 0x2D7F, "Tifinagh"},
{0x2D80, 0x2DDF, "Ethiopic Extended"},
{0x2DE0, 0x2DFF, "Cyrillic Extended-A"},
{0x2E00, 0x2E7F, "Supplemental Punctuation"},
{0x2E80, 0x2EFF, "CJK Radicals Supplement"},
{0x2F00, 0x2FDF, "Kangxi Radicals"},
{0x2FF0, 0x2FFF, "Ideographic Description Characters"},
{0x3000, 0x303F, "CJK Symbols and Punctuation"},
{0x3040, 0x309F, "Hiragana"},
{0x30A0, 0x30FF, "Katakana"},
{0x3100, 0x312F, "Bopomofo"},
{0x3130, 0x318F, "Hangul Compatibility Jamo"},
{0x3190, 0x319F, "Kanbun"},
{0x31A0, 0x31BF, "Bopomofo Extended"},
{0x31C0, 0x31EF, "CJK Strokes"},
{0x31F0, 0x31FF, "Katakana Phonetic Extensions"},
{0x3200, 0x32FF, "Enclosed CJK Letters and Months"},
{0x3300, 0x33FF, "CJK Compatibility"},
{0x3400, 0x4DBF, "CJK Unified Ideographs Extension A"},
{0x4DC0, 0x4DFF, "Yijing Hexagram Symbols"},
{0x4E00, 0x9FFF, "CJK Unified Ideographs"},
{0xA000, 0xA48F, "Yi Syllables"},
{0xA490, 0xA4CF, "Yi Radicals"},
{0xA4D0, 0xA4FF, "Lisu"},
{0xA500, 0xA63F, "Vai"},
{0xA640, 0xA69F, "Cyrillic Extended-B"},
{0xA6A0, 0xA6FF, "Bamum"},
{0xA700, 0xA71F, "Modifier Tone Letters"},
{0xA720, 0xA7FF, "Latin Extended-D"},
{0xA800, 0xA82F, "Syloti Nagri"},
{0xA830, 0xA83F, "Common Indic Number Forms"},
{0xA840, 0xA87F, "Phags-pa"},
{0xA880, 0xA8DF, "Saurashtra"},
{0xA8E0, 0xA8FF, "Devanagari Extended"},
{0xA900, 0xA92F, "Kayah Li"},
{0xA930, 0xA95F, "Rejang"},
{0xA960, 0xA97F, "Hangul Jamo Extended-A"},
{0xA980, 0xA9DF, "Javanese"},
{0xA9E0, 0xA9FF, "Myanmar Extended-B"},
{0xAA00, 0xAA5F, "Cham"},
{0xAA60, 0xAA7F, "Myanmar Extended-A"},
{0xAA80, 0xAADF, "Tai Viet"},
{0xAAE0, 0xAAFF, "Meetei Mayek Extensions"},
{0xAB00, 0xAB2F, "Ethiopic Extended-A"},
{0xAB30, 0xAB6F, "Latin Extended-E"},
{0xAB70, 0xABBF, "Cherokee Supplement"},
{0xABC0, 0xABFF, "Meetei Mayek"},
{0xAC00, 0xD7AF, "Hangul Syllables"},
{0xD7B0, 0xD7FF, "Hangul Jamo Extended-B"},
{0xE000, 0xF8FF, "Private Use Area"},
{0xF900, 0xFAFF, "CJK Compatibility Ideographs"},
{0xFB00, 0xFB4F, "Alphabetic Presentation Forms"},
{0xFB50, 0xFDFF, "Arabic Presentation Forms-A"},
{0xFE00, 0xFE0F, "Variation Selectors"},
{0xFE10, 0xFE1F, "Vertical Forms"},
{0xFE20, 0xFE2F, "Combining Half Marks"},
{0xFE30, 0xFE4F, "CJK Compatibility Forms"},
{0xFE50, 0xFE6F, "Small Form Variants"},
{0xFE70, 0xFEFF, "Arabic Presentation Forms-B"},
{0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms"},
{0xFFF0, 0xFFFF, "Specials"}
};
class FontUtils {
public:
FontUtils() {
if (FT_Init_FreeType(&library_)) {
throw std::runtime_error("无法初始化FreeType库");
}
}
~FontUtils() {
FT_Done_FreeType(library_);
}
std::set<unsigned int> GetUnicodeFromFont(const fs::path& fontPath) {
FT_Face face;
std::set<unsigned int> unicodeSet;
if (FT_New_Face(library_, fontPath.string().c_str(), 0, &face)) {
std::cerr << "Error loading font file: " << fontPath << std::endl;
return unicodeSet;
}
FT_UInt glyphIndex;
FT_ULong charCode = FT_Get_First_Char(face, &glyphIndex);
while (glyphIndex != 0) {
unicodeSet.insert(static_cast<unsigned int>(charCode));
charCode = FT_Get_Next_Char(face, charCode, &glyphIndex);
}
FT_Done_Face(face);
return unicodeSet;
}
std::vector<unsigned char> unicode_code_point_to_utf8(unsigned int code_point) {
std::vector<unsigned char> utf8_bytes;
if (code_point <= 0x7F) {
utf8_bytes.push_back(static_cast<unsigned char>(code_point));
}
else if (code_point <= 0x7FF) {
utf8_bytes.push_back(static_cast<unsigned char>(0xC0 | ((code_point >> 6) & 0x1F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | (code_point & 0x3F)));
}
else if (code_point <= 0xFFFF) {
utf8_bytes.push_back(static_cast<unsigned char>(0xE0 | ((code_point >> 12) & 0x0F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | ((code_point >> 6) & 0x3F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | (code_point & 0x3F)));
}
else if (code_point <= 0x10FFFF) {
utf8_bytes.push_back(static_cast<unsigned char>(0xF0 | ((code_point >> 18) & 0x07)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | ((code_point >> 12) & 0x3F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | ((code_point >> 6) & 0x3F)));
utf8_bytes.push_back(static_cast<unsigned char>(0x80 | (code_point & 0x3F)));
}
return utf8_bytes;
}
std::string getFontCharactersForBlock(const fs::path& fontPath, const UnicodeBlock& block) {
std::string characters;
auto unicodeSet = GetUnicodeFromFont(fontPath);
size_t count = 0;
for (unsigned int code : unicodeSet) {
if (code >= block.start && code <= block.end) {
auto utf8_bytes = unicode_code_point_to_utf8(code);
characters.append(reinterpret_cast<const char*>(utf8_bytes.data()), utf8_bytes.size());
count++;
// 限制每个区块的最大字符数
if (count >= MAX_CHARS_PER_BLOCK) {
break;
}
}
}
return characters;
}
private:
FT_Library library_;
};
class PDFDocument {
public:
PDFDocument(const fs::path& font_path, const fs::path& output_dir)
: pdf(nullptr), font(nullptr), font_path(font_path), output_dir(output_dir) {
try {
pdf = HPDF_New(nullptr, nullptr);
if (!pdf) {
throw std::runtime_error("无法创建 PDF 文档");
}
font_name = font_path.stem().string();
pdf_filename = font_name + "_unicode_chars.pdf";
HPDF_SetErrorHandler(pdf, [](HPDF_STATUS error_no, HPDF_STATUS detail_no, void* user_data) {
std::cerr << "PDF错误: " << error_no << ", 详情: " << detail_no << std::endl;
});
HPDF_UseUTFEncodings(pdf);
HPDF_SetCurrentEncoder(pdf, "UTF-8");
std::string extension = font_path.extension().string();
const char* loaded_font = nullptr;
if (extension == ".ttc") {
loaded_font = HPDF_LoadTTFontFromFile2(pdf, font_path.string().c_str(), 0, HPDF_TRUE);
}
else if (extension == ".ttf" || extension == ".otf") {
loaded_font = HPDF_LoadTTFontFromFile(pdf, font_path.string().c_str(), HPDF_TRUE);
}
else {
throw std::runtime_error("不支持的字体文件格式: " + extension);
}
if (!loaded_font) {
throw std::runtime_error("无法从文件加载字体: " + font_path.string());
}
font = HPDF_GetFont(pdf, loaded_font, "UTF-8");
if (!font) {
throw std::runtime_error("无法获取字体对象");
}
}
catch (const std::exception& e) {
if (pdf) {
HPDF_Free(pdf);
pdf = nullptr;
}
throw; // 重新抛出异常
}
}
~PDFDocument() {
if (pdf) {
HPDF_Free(pdf);
}
}
bool IsValid() const {
return pdf != nullptr && font != nullptr;
}
void AddCharactersPage(const std::string& chars, bool is_first_page = false) {
if (!IsValid()) return;
HPDF_Page page = HPDF_AddPage(pdf);
HPDF_Page_SetSize(page, HPDF_PAGE_SIZE_A4, HPDF_PAGE_PORTRAIT);
float current_y = TOP_MARGIN; // 当前Y坐标
// 如果是第一页,添加彩色标题
if (is_first_page) {
std::string title = "the font name is " + font_name;
size_t title_len = title.length();
// 设置标题字体大小
HPDF_Page_SetFontAndSize(page, font, 30);
// 设置文字描边(模拟加粗)
HPDF_Page_SetLineWidth(page, 1); // 描边宽度
HPDF_Page_SetRGBStroke(page, 0, 0, 0); // 描边颜色(黑色)
// 2. 启用描边+填充模式
HPDF_Page_SetTextRenderingMode(page, HPDF_FILL_THEN_STROKE);
// 定位到标题起始位置
HPDF_Page_BeginText(page);
HPDF_Page_MoveTextPos(page, LEFT_MARGIN, current_y);
// 生成渐变彩色标题(红到蓝渐变)
for (size_t i = 0; i < title_len; i++) {
char buf[2] = { title[i], '\0' };
// 计算渐变颜色(从红色渐变到蓝色)
float r = std::max(0.0f, 1.0f - std::abs((float)i / title_len - 0.5f) * 2.0f);
float g = std::max(0.0f, 1.0f - std::abs((float)i / title_len - 0.25f) * 4.0f);
float b = std::max(0.0f, 1.0f - std::abs((float)i / title_len - 0.75f) * 4.0f);
HPDF_Page_SetRGBFill(page, r, g, b);
HPDF_Page_ShowText(page, buf);
}
HPDF_Page_EndText(page);
// 添加黑色统计信息
HPDF_Page_SetRGBFill(page, 0.0f, 0.0f, 0.0f); // 重置为黑色
HPDF_Page_SetTextRenderingMode(page, HPDF_FILL);
HPDF_Page_BeginText(page);
HPDF_Page_SetFontAndSize(page, font, 12);
HPDF_Page_MoveTextPos(page, LEFT_MARGIN, current_y - 25);
std::string stats = "font count: " + std::to_string(chars.size());
HPDF_Page_ShowText(page, stats.c_str());
HPDF_Page_EndText(page);
// 调整字符显示的起始Y位置
current_y -= 40;
}
// 设置字符显示字体
HPDF_Page_SetFontAndSize(page, font, FONT_SIZE);
float x = LEFT_MARGIN;
float y = current_y; // 使用调整后的Y坐标
size_t char_count = 0;
size_t i = 0;
while (i < chars.size()) {
// 计算当前字符的UTF-8长度
size_t char_len = 1;
unsigned char c = chars[i];
if ((c & 0xE0) == 0xC0) char_len = 2;
else if ((c & 0xF0) == 0xE0) char_len = 3;
else if ((c & 0xF8) == 0xF0) char_len = 4;
// 提取当前字符
std::string current_char = chars.substr(i, char_len);
// 绘制字符
HPDF_Page_BeginText(page);
HPDF_Page_MoveTextPos(page, x, y);
HPDF_Page_ShowText(page, current_char.c_str());
HPDF_Page_EndText(page);
// 更新位置
x += CHAR_WIDTH;
char_count++;
// 换行处理
if (char_count % CHARS_PER_LINE == 0) {
x = LEFT_MARGIN;
y -= LINE_HEIGHT;
// 检查是否超出页面
if (y < 50) { // 底部边距
// 创建新页面继续输出
AddCharactersPage(chars.substr(i + char_len));
return;
}
}
i += char_len;
}
}
void GenerateWithUnicodeChars(FontUtils& font_utils) {
if (!IsValid()) return;
std::string all_chars;
size_t total_chars = 0;
// 收集所有区块的字符
for (const auto& block : UNICODE_BLOCKS) {
std::string block_chars = font_utils.getFontCharactersForBlock(font_path, block);
if (!block_chars.empty()) {
all_chars += block_chars;
total_chars += block_chars.size();
std::cout << "添加区块: " << block.name
<< " (U+" << std::hex << block.start << "-U+" << block.end << ")"
<< ", 字符数: " << block_chars.size() << std::endl;
}
}
std::cout << "总共收集 " << total_chars << " 个字符" << std::endl;
if (!all_chars.empty()) {
// 添加字符内容页(第一页包含标题)
AddCharactersPage(all_chars, true);
}
else {
std::cout << "字体中未找到任何Unicode字符" << std::endl;
}
}
bool Save() {
if (!IsValid()) return false;
try {
if (!fs::exists(output_dir)) {
fs::create_directories(output_dir);
}
fs::path full_path = fs::path(output_dir) / pdf_filename;
HPDF_STATUS ret = HPDF_SaveToFile(pdf, full_path.string().c_str());
if (ret != HPDF_OK) {
std::cerr << "保存PDF文件失败: " << full_path << std::endl;
return false;
}
std::cout << "PDF已生成: " << full_path.string() << std::endl;
return true;
}
catch (const std::exception& e) {
std::cerr << "保存PDF时出错: " << e.what() << std::endl;
return false;
}
}
private:
HPDF_Doc pdf;
HPDF_Font font;
std::string font_name;
std::string pdf_filename;
fs::path output_dir;
fs::path font_path;
};
class FontScanner {
public:
FontScanner(fs::path font_dir) :font_dir_(font_dir) {};
std::vector<fs::path> GetTTcFont() {
return ScanFontsInDirectory(
font_dir_,
{ ".otf", ".ttf" }, // 排除otf和 ttc 后缀
{ "Bold", "Italic","Light","Cond","-" }, // 排除包含"Light"或"Bold"等其他样式的的字体
false // 启用详细输出
);
};
std::vector<fs::path> GetTTfFont() {
return ScanFontsInDirectory(
font_dir_,
{ ".otf", ".ttc" }, // 排除otf和 ttc 后缀
{ "Bold", "Italic","Light","Cond","-" }, // 排除包含"Light"或"Bold"等其他样式的的字体
false // 启用详细输出
);
};
std::vector<fs::path> ScanFontsInDirectory(
const fs::path& directory,
const std::set<std::string>& excludeSuffixes,
const std::set<std::string>& excludeSubstrings,
bool verbose)
{
std::vector<fs::path> fontPaths;
// 1. 验证目录路径
if (!fs::exists(directory)) {
if (verbose) {
std::cerr << "Error: Directory does not exist: "
<< directory.string() << std::endl;
}
return fontPaths;
}
if (!fs::is_directory(directory)) {
if (verbose) {
std::cerr << "Error: Path is not a directory: "
<< directory.string() << std::endl;
}
return fontPaths;
}
// 2. 预定义字体扩展名(小写)
static const std::set<std::string> kFontExtensions = {
".ttf", ".otf", ".ttc", ".woff", ".woff2", ".pfb", ".pfm"
};
// 3. 预处理排除条件(转换为小写)
std::set<std::string> excludeSuffixesLower;
for (const auto& suffix : excludeSuffixes) {
std::string lower = suffix;
std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
excludeSuffixesLower.insert(lower);
}
std::set<std::string> excludeSubstringsLower;
for (const auto& substr : excludeSubstrings) {
std::string lower = substr;
std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
excludeSubstringsLower.insert(lower);
}
// 4. 遍历目录
try {
for (const auto& entry : fs::recursive_directory_iterator(directory)) {
if (!entry.is_regular_file()) {
continue;
}
const auto& path = entry.path();
std::string ext = path.extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
// 4.1 检查扩展名
if (kFontExtensions.find(ext) == kFontExtensions.end()) {
if (verbose) {
std::cout << "Skipping non-font file: "
<< path.string() << std::endl;
}
continue;
}
std::string filename = path.filename().string();
std::string filenameLower = filename;
std::transform(filenameLower.begin(), filenameLower.end(),
filenameLower.begin(), ::tolower);
// 4.2 检查排除后缀
bool excluded = false;
for (const auto& suffix : excludeSuffixesLower) {
if (EndsWithCI(filenameLower, suffix)) {
if (verbose) {
std::cout << "Excluding by suffix: "
<< path.string() << std::endl;
}
excluded = true;
break;
}
}
if (excluded) continue;
// 4.3 检查排除子串
for (const auto& substr : excludeSubstringsLower) {
if (filenameLower.find(substr) != std::string::npos) {
if (verbose) {
std::cout << "Excluding by substring: "
<< path.string() << std::endl;
}
excluded = true;
break;
}
}
if (excluded) continue;
// 5. 添加到结果集
if (verbose) {
std::cout << "Adding font file: "
<< path.string() << std::endl;
}
fontPaths.emplace_back(path);
}
}
catch (const fs::filesystem_error& e) {
if (verbose) {
std::cerr << "Filesystem error: " << e.what()
<< " (path: " << e.path1().string() << ")" << std::endl;
}
}
return fontPaths;
}
private:
// 辅助函数:检查字符串是否以指定后缀结尾(不区分大小写)
bool EndsWithCI(const std::string& str, const std::string& suffix) {
if (suffix.size() > str.size()) return false;
auto str_it = str.rbegin();
auto suffix_it = suffix.rbegin();
while (suffix_it != suffix.rend()) {
if (tolower(*str_it++) != tolower(*suffix_it++)) {
return false;
}
}
return true;
}
fs::path font_dir_;
};
int main() {
try {
FontUtils font_utils;
fs::path font_dir = "/Users/admin/Desktop/CopiedFonts"; // 字体目录
fs::path output_dir = "/Users/admin/Mac_Font_For_Test_PeoPle/pdf_out"; // 输出目录
FontScanner scanner(font_dir);
auto ttc_font_files = scanner.GetTTcFont();
auto ttf_font_files = scanner.GetTTfFont();
// 合并所有字体文件
std::vector<fs::path> all_font_files;
all_font_files.insert(all_font_files.end(), ttc_font_files.begin(), ttc_font_files.end());
all_font_files.insert(all_font_files.end(), ttf_font_files.begin(), ttf_font_files.end());
for (const auto& path : all_font_files) {
std::cout << "\n处理字体文件: " << path << std::endl;
try {
PDFDocument doc(path, output_dir);
if (!doc.IsValid()) {
std::cerr << "警告: 无法处理字体文件 " << path << ",跳过" << std::endl;
continue;
}
doc.GenerateWithUnicodeChars(font_utils);
if (!doc.Save()) {
std::cerr << "警告: 无法保存PDF文件 " << path << ",跳过" << std::endl;
}
}
catch (const std::exception& e) {
std::cerr << "处理字体文件 " << path << " 时出错: " << e.what() << ",跳过" << std::endl;
continue;
}
}
}
catch (const std::exception& ex) {
std::cerr << "错误: " << ex.what() << std::endl;
return 1;
}
return 0;
}