Java读取SpringBoot工程内所有汉字,并输出上下文,文件类型,汉字内容,文件路径
package com.bims;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.nio.file.*;
import java.util.*;
import java.util.regex.*;
public class 读取工程内所有文字 {
private static final int MAX_ROWS_PER_SHEET = 1000000;
private static final int CONTEXT_SIZE = 200;
// 文件类型和对应的注释模式
private static final Map<String, CommentPattern> COMMENT_PATTERNS = new HashMap<>();
static {
COMMENT_PATTERNS.put(".java", new CommentPattern("//.*", "/\\*.*?\\*/", Pattern.DOTALL));
COMMENT_PATTERNS.put(".js", new CommentPattern("//.*", "/\\*.*?\\*/", Pattern.DOTALL));
COMMENT_PATTERNS.put(".html", new CommentPattern(null, "<!--.*?-->", Pattern.DOTALL));
COMMENT_PATTERNS.put(".xml", new CommentPattern(null, "<!--.*?-->", Pattern.DOTALL));
COMMENT_PATTERNS.put(".css", new CommentPattern(null, "/\\*.*?\\*/", Pattern.DOTALL));
}
// 中文正则表达式
private static final Pattern CHINESE_PATTERN = Pattern.compile("[\\u4e00-\\u9fa5]+");
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
System.out.print("请输入Java工程路径: ");
String projectDir = scanner.nextLine();
System.out.print("请输入输出Excel文件名(默认:chinese_results.xlsx): ");
String outputFile = scanner.nextLine();
if (outputFile.isEmpty()) outputFile = "chinese_results.xlsx";
scanner.close();
try {
List<ChineseOccurrence> results = scanProject(projectDir);
saveToExcel(results, outputFile);
System.out.println("扫描完成! 找到 " + results.size() + " 处中文内容");
System.out.println("结果保存到: " + new File(outputFile).getAbsolutePath());
} catch (IOException e) {
System.err.println("处理过程中出错: " + e.getMessage());
e.printStackTrace();
} catch (Exception e) {
System.err.println("发生错误: " + e.getMessage());
e.printStackTrace();
}
}
private static List<ChineseOccurrence> scanProject(String projectDir) throws IOException {
List<ChineseOccurrence> results = new ArrayList<>();
long startTime = System.currentTimeMillis();
Path startPath = Paths.get(projectDir);
if (!Files.exists(startPath)) {
throw new IOException("路径不存在: " + projectDir);
}
Files.walk(startPath)
.parallel()
.filter(Files::isRegularFile)
.filter(path -> {
String fileName = path.toString();
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex == -1) return false;
String ext = fileName.substring(dotIndex).toLowerCase();
return COMMENT_PATTERNS.containsKey(ext);
})
.forEach(path -> {
try {
String content = new String(Files.readAllBytes(path), "UTF-8");
String fileName = path.toString();
int dotIndex = fileName.lastIndexOf('.');
String fileType = (dotIndex != -1) ? fileName.substring(dotIndex) : "Unknown";
String ext = fileType.toLowerCase();
if (!COMMENT_PATTERNS.containsKey(ext)) {
return;
}
CommentPattern pattern = COMMENT_PATTERNS.get(ext);
// 移除注释
String cleanContent = removeComments(content, pattern);
// 查找中文
findChineseInContent(cleanContent, fileName, fileType, results);
} catch (IOException e) {
System.err.println("处理文件出错: " + path + " - " + e.getMessage());
} catch (Exception e) {
System.err.println("处理文件时发生错误: " + path + " - " + e.getMessage());
}
});
long duration = System.currentTimeMillis() - startTime;
System.out.println("扫描耗时: " + duration + "ms, 找到 " + results.size() + " 个结果");
return results;
}
private static String removeComments(String content, CommentPattern pattern) {
// 先移除多行注释
String result = pattern.multiPattern != null ?
pattern.multiPattern.matcher(content).replaceAll("") :
content;
// 再移除单行注释
if (pattern.singlePattern != null) {
return pattern.singlePattern.matcher(result).replaceAll("");
}
return result;
}
private static void findChineseInContent(String content, String filePath, String fileType,
List<ChineseOccurrence> results) {
Matcher matcher = CHINESE_PATTERN.matcher(content);
int count = 0;
int maxMatchesPerFile = 100000; // 每个文件最大匹配数
while (matcher.find() && count < maxMatchesPerFile) {
String chinese = matcher.group();
int start = matcher.start();
int end = matcher.end();
// 获取上下文
int contextStart = Math.max(0, start - CONTEXT_SIZE);
int contextEnd = Math.min(content.length(), end + CONTEXT_SIZE);
String context = content.substring(contextStart, contextEnd);
// 添加结果
synchronized (results) {
results.add(new ChineseOccurrence(filePath, fileType, chinese, context));
}
count++;
}
if (count >= maxMatchesPerFile) {
System.err.println("警告: 文件 " + filePath + " 超过最大匹配限制 (" + maxMatchesPerFile + ")");
}
}
private static void saveToExcel(List<ChineseOccurrence> results, String outputFile) throws IOException {
try (Workbook workbook = new XSSFWorkbook()) {
int totalResults = results.size();
int sheetCount = (int) Math.ceil((double) totalResults / MAX_ROWS_PER_SHEET);
System.out.println("总结果数: " + totalResults);
System.out.println("需要创建 " + sheetCount + " 个Sheet页");
// 创建可重用的单元格样式
CellStyle headerStyle = createHeaderStyle(workbook);
CellStyle wrapTextStyle = createWrapTextStyle(workbook);
CellStyle fileTypeStyle = createFileTypeStyle(workbook);
for (int sheetIndex = 0; sheetIndex < sheetCount; sheetIndex++) {
// 计算当前sheet的数据范围
int startIndex = sheetIndex * MAX_ROWS_PER_SHEET;
int endIndex = Math.min((sheetIndex + 1) * MAX_ROWS_PER_SHEET, totalResults);
int rowsInSheet = endIndex - startIndex;
// 创建Sheet
String sheetName = "结果";
if (sheetCount > 1) {
sheetName += "_" + (sheetIndex + 1);
}
Sheet sheet = workbook.createSheet(sheetName);
// 创建标题行
createHeaderRow(sheet, headerStyle);
// 填充数据
for (int i = 0; i < rowsInSheet; i++) {
ChineseOccurrence occ = results.get(startIndex + i);
Row row = sheet.createRow(i + 1); // +1 跳过标题行
// 文件路径
Cell fileCell = row.createCell(0);
fileCell.setCellValue(occ.filePath);
// 文件类型
Cell typeCell = row.createCell(1);
typeCell.setCellValue(occ.fileType);
typeCell.setCellStyle(fileTypeStyle);
// 中文字符
Cell chineseCell = row.createCell(2);
chineseCell.setCellValue(occ.chinese);
// 上下文代码(带换行)
Cell contextCell = row.createCell(3);
contextCell.setCellValue(occ.context);
contextCell.setCellStyle(wrapTextStyle);
}
// 调整列宽
sheet.autoSizeColumn(0); // 文件路径
sheet.setColumnWidth(1, 10 * 256); // 文件类型(固定宽度)
sheet.autoSizeColumn(2); // 中文字符
sheet.setColumnWidth(3, 150 * 256); // 上下文代码
System.out.println("Sheet " + (sheetIndex + 1) + " 完成: " + rowsInSheet + " 行");
}
// 保存文件
try (FileOutputStream fos = new FileOutputStream(outputFile)) {
workbook.write(fos);
}
}
}
private static CellStyle createHeaderStyle(Workbook workbook) {
CellStyle headerStyle = workbook.createCellStyle();
Font headerFont = workbook.createFont();
headerFont.setBold(true);
headerFont.setColor(IndexedColors.WHITE.getIndex());
headerStyle.setFont(headerFont);
headerStyle.setFillForegroundColor(IndexedColors.DARK_BLUE.getIndex());
headerStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
headerStyle.setAlignment(HorizontalAlignment.CENTER);
headerStyle.setBorderBottom(BorderStyle.THIN);
headerStyle.setBorderTop(BorderStyle.THIN);
headerStyle.setBorderLeft(BorderStyle.THIN);
headerStyle.setBorderRight(BorderStyle.THIN);
return headerStyle;
}
private static CellStyle createWrapTextStyle(Workbook workbook) {
CellStyle style = workbook.createCellStyle();
style.setWrapText(true);
style.setBorderBottom(BorderStyle.THIN);
style.setBorderTop(BorderStyle.THIN);
style.setBorderLeft(BorderStyle.THIN);
style.setBorderRight(BorderStyle.THIN);
return style;
}
private static CellStyle createFileTypeStyle(Workbook workbook) {
CellStyle style = workbook.createCellStyle();
style.setAlignment(HorizontalAlignment.CENTER);
style.setFillForegroundColor(IndexedColors.LIGHT_YELLOW.getIndex());
style.setFillPattern(FillPatternType.SOLID_FOREGROUND);
style.setBorderBottom(BorderStyle.THIN);
style.setBorderTop(BorderStyle.THIN);
style.setBorderLeft(BorderStyle.THIN);
style.setBorderRight(BorderStyle.THIN);
return style;
}
private static void createHeaderRow(Sheet sheet, CellStyle headerStyle) {
Row headerRow = sheet.createRow(0);
// 文件路径列
Cell fileHeader = headerRow.createCell(0);
fileHeader.setCellValue("文件路径");
fileHeader.setCellStyle(headerStyle);
// 文件类型列(新增列)
Cell typeHeader = headerRow.createCell(1);
typeHeader.setCellValue("文件类型");
typeHeader.setCellStyle(headerStyle);
// 中文字符列
Cell chineseHeader = headerRow.createCell(2);
chineseHeader.setCellValue("中文字符");
chineseHeader.setCellStyle(headerStyle);
// 上下文代码列
Cell contextHeader = headerRow.createCell(3);
contextHeader.setCellValue("上下文代码");
contextHeader.setCellStyle(headerStyle);
}
private static class ChineseOccurrence {
String filePath;
String fileType; // 新增:文件类型
String chinese;
String context;
ChineseOccurrence(String filePath, String fileType, String chinese, String context) {
this.filePath = filePath;
this.fileType = fileType;
this.chinese = chinese;
this.context = context;
}
}
private static class CommentPattern {
Pattern singlePattern;
Pattern multiPattern;
CommentPattern(String singleLineRegex, String multiLineRegex, int flags) {
this.singlePattern = singleLineRegex != null ?
Pattern.compile(singleLineRegex, flags) : null;
this.multiPattern = multiLineRegex != null ?
Pattern.compile(multiLineRegex, flags) : null;
}
}
}