使用docx4j 实现word转pdf(linux乱码处理)

发布于:2025-06-25 ⋅ 阅读:(22) ⋅ 点赞:(0)

由于系统之前使用了是itext进行转换的,现在已经不是开源的工具了,需要收费,然后进行改造,具体处理如下。

<dependency>
			<groupId>org.docx4j</groupId>
			<artifactId>docx4j</artifactId>
			<version>6.1.2</version>
			<exclusions>
				<exclusion>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
				</exclusion>
				<exclusion>
					<groupId>xalan</groupId>
					<artifactId>xalan</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>org.docx4j</groupId>
			<artifactId>docx4j-export-fo</artifactId>
			<version>6.0.0</version>
			<exclusions>
				<exclusion>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
				</exclusion>
				<exclusion>
					<groupId>xalan</groupId>
					<artifactId>xalan</artifactId>
				</exclusion>
			</exclusions>
		</dependency>

以上排查一下日志,因为和自己的有冲突,所以,自己项目看情况而定。


    public static void writeWordModle(String inPath, String outPath, Map<String, String> params,String rootpath) throws Exception {
        FileInputStream is = null;
        FileOutputStream targetpdf = null;
        WordprocessingMLPackage wordMLPackage = null;
        CustomXWPFDocument doc = null;
        ByteArrayOutputStream baos = null;
        InputStream inputStream = null;

        try {
            // 加载 Word 文档
            File file = new File(inPath);
            is = new FileInputStream(file);

            doc = new CustomXWPFDocument(is);

            // 替换文本和表格中的变量
            WordUtils wordUtil = new WordUtils();
            wordUtil.replaceInPara(doc, params);    // 替换文本里面的变量
            wordUtil.replaceInTable(doc, params); // 替换表格里面的变量

            // 将 XWPFDocument 转换为字节数组
            baos = new ByteArrayOutputStream();
            doc.write(baos);
            baos.flush();
            byte[] docBytes = baos.toByteArray();

            // 使用字节数组创建输入流
            inputStream = new ByteArrayInputStream(docBytes);
        
            // 将输入流转换为 WordprocessingMLPackage
            wordMLPackage = WordprocessingMLPackage.load(inputStream);

            // 设置字体映射器
            IdentityPlusMapper fontMapper = new IdentityPlusMapper();
            //加载字体
            fontMapper = setFontMapper(fontMapper);
         
            //解决宋体(正文)和宋体(标题)的乱码问题,以下是在linux的关键
            PhysicalFonts.put("PMingLiU", PhysicalFonts.get("SimSun"));
            PhysicalFonts.put("新細明體", PhysicalFonts.get("SimSun"));
            wordMLPackage.setFontMapper(fontMapper);    
            // 执行转换
            targetpdf = new FileOutputStream(outPath);
            Docx4J.toPDF(wordMLPackage, targetpdf);

        } catch (Exception e) {
            e.printStackTrace();
            throw e;
        } finally {
            // 关闭所有流
            close(is);
            close(targetpdf);
            close(baos);
            close(inputStream);
        }
    }

因为我是通过word模版生成了流然后通过流转换成pdf的,可以使用File docFile = new File(docxPath);替代代码中的   inputStream = new ByteArrayInputStream(docBytes);

 //解决宋体(正文)和宋体(标题)的乱码问题,以下是在linux的关键
            PhysicalFonts.put("PMingLiU", PhysicalFonts.get("SimSun"));
            PhysicalFonts.put("新細明體", PhysicalFonts.get("SimSun"));

这句很关键,我测试了很久找了很多资料,这个解决了linux环境乱码的问题。

/** 
     * 字体设置
     * @param mlPackage
     * @throws Exception
     */
    private static IdentityPlusMapper setFontMapper( IdentityPlusMapper fontMapper) throws Exception {
    	   fontMapper.put("隶书", PhysicalFonts.get("LiSu"));
           fontMapper.put("宋体", PhysicalFonts.get("SimSun"));
           fontMapper.put("微软雅黑", PhysicalFonts.get("Microsoft Yahei"));
           fontMapper.put("黑体", PhysicalFonts.get("SimHei"));
           fontMapper.put("楷体", PhysicalFonts.get("KaiTi"));
           fontMapper.put("新宋体", PhysicalFonts.get("NSimSun"));
           fontMapper.put("华文行楷", PhysicalFonts.get("STXingkai"));
           fontMapper.put("华文仿宋", PhysicalFonts.get("STFangsong"));
           fontMapper.put("仿宋", PhysicalFonts.get("FangSong"));
           fontMapper.put("幼圆", PhysicalFonts.get("YouYuan"));
           fontMapper.put("华文宋体", PhysicalFonts.get("STSong"));
           fontMapper.put("华文中宋", PhysicalFonts.get("STZhongsong"));
           fontMapper.put("等线", PhysicalFonts.get("SimSun"));
           fontMapper.put("等线 Light", PhysicalFonts.get("SimSun"));
           fontMapper.put("华文琥珀", PhysicalFonts.get("STHupo"));
           fontMapper.put("华文隶书", PhysicalFonts.get("STLiti"));
           fontMapper.put("华文新魏", PhysicalFonts.get("STXinwei"));
           fontMapper.put("华文彩云", PhysicalFonts.get("STCaiyun"));
           fontMapper.put("方正姚体", PhysicalFonts.get("FZYaoti"));
           fontMapper.put("方正舒体", PhysicalFonts.get("FZShuTi"));
           fontMapper.put("华文细黑", PhysicalFonts.get("STXihei"));
           fontMapper.put("宋体扩展",PhysicalFonts.get("simsun-extB"));
           fontMapper.put("仿宋_GB2312",PhysicalFonts.get("FangSong_GB2312"));
           fontMapper.put("新細明體",PhysicalFonts.get("SimSun"));
      
        return fontMapper;
    }

设置字体,主要是windows使用,其实不设置也没问题。以上基本解决了代码的问题。

linux字体环境准备及处理

1、复制windows的C:\Windows\Fonts\下的所有字体库,放在桌面的fonts文件夹里

2、在linux目录/usr/share/fonts/chinese 创建文件夹chinese ,如果文件夹不存在,手动创建

3、把fonts拷贝上去,并给他授权chmod -R 755 /usr/share/fonts/chinese

4、# mkfontscale (如果提示 mkfontscale: command not found,需自行安装 # yum install mkfontscale )
# mkfontdir
# fc-cache -fv (如果提示 fc-cache: command not found,则需要安装# yum install fontconfig 

5、到路径vi /etc/fonts/fonts.conf 修改配置,把/usr/share/fonts/chinese添加进去

6、最后执行,fc-cache和fc-list :lang=zh  

/usr/share/fonts/chinese/simsun.ttc: SimSun\-PUA,宋体\-PUA:style=Regular
/usr/share/fonts/chinese/simsun.ttc: NSimSun,新宋体:style=Regular
/usr/share/fonts/chinese/simsunb.ttf: SimSun\-ExtB:style=Regular,Normal,oby?ejné,Standard,Κανονικ?,Normaali,Normál,Normale,Standaard,Normalny,Обычный,Normálne,Navadno,Arrunta
/usr/share/fonts/chinese/SimsunExtG.ttf: SimSun\-ExtG:style=Regular
/usr/share/fonts/chinese/simsun.ttf: SimSun,宋体:style=Regular
/usr/share/fonts/chinese/simsun.ttc: SimSun,宋体:style=Regular

出现上面的类似即表示成功。


网站公告

今日签到

点亮在社区的每一天
去签到