SpringBoot集成阿里云文档格式转换实现pdf转换word,excel

发布于:2025-04-16 ⋅ 阅读:(19) ⋅ 点赞:(0)

一、前置条件

1.1 创建accessKey

如何申请:https://help.aliyun.com/zh/ram/user-guide/create-an-accesskey-pair

1.2 开通服务

官方地址:https://docmind.console.aliyun.com/doc-overview

未开通服务时需要点击开通按钮,然后才能调用相关api。
在这里插入图片描述

二、代码实现

2.1 引入依赖

<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>tea-openapi</artifactId>
    <version>0.2.5</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>docmind_api20220711</artifactId>
    <version>2.0.3</version>
</dependency>
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>2.0.50</version>
</dependency>

2.2 pdf转换word

官方文档:https://help.aliyun.com/zh/document-mind/developer-reference/convertpdftoword

package net.lab1024.sa.admin.util;

import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
import com.aliyun.teautil.models.RuntimeOptions;

import java.io.FileInputStream;
import java.util.List;

public class PdfConvertUtil {

    private static final String OK = "200";

    private static final String ACCESS_KEY_ID = "xxx";

    private static final String ACCESS_KEY_SECRET = "xxx";

    public static void main(String[] args) throws Exception {
        String id = submitPdfToWord("C:\\Users\\admin\\Desktop\\example.pdf");

        // 10秒后再查询结果,等阿里云处理一会儿
        Thread.sleep(10000);

        List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> data = queryPdfToWord(id);
    }

    /**
     * 客户端
     *
     * @return
     * @throws Exception
     */
    private static Client getClient() throws Exception {
        Config config = new Config();
        config.setAccessKeyId(ACCESS_KEY_ID);
        config.setAccessKeySecret(ACCESS_KEY_SECRET);
        // 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
        config.setEndpoint("docmind-api.cn-hangzhou.aliyuncs.com");
        return new Client(config);
    }

    /**
     * 提交pdf转换word转换任务
     *
     * @return
     * @throws Exception
     */
    public static String submitPdfToWord(String filePath) throws Exception {
        Client client = getClient();

        // 请求参数
        SubmitConvertPdfToWordJobAdvanceRequest advanceRequest = new SubmitConvertPdfToWordJobAdvanceRequest();
        advanceRequest.setFileUrlObject(new FileInputStream(filePath));
        advanceRequest.setFileName("example.pdf");

        // 运行参数
        RuntimeOptions runtime = new RuntimeOptions();


        // 发送请求
        SubmitConvertPdfToWordJobResponse response = client.submitConvertPdfToWordJobAdvance(advanceRequest, runtime);

        // 处理结果
        SubmitConvertPdfToWordJobResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换word任务提交失败");
        }
        return body.getData().getId();
    }

    /**
     * 查询pdf转换word转换任务
     *
     * @param id
     * @return
     * @throws Exception
     */
    public static List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> queryPdfToWord(String id) throws Exception {
        Client client = getClient();

        // 请求参数
        GetDocumentConvertResultRequest resultRequest = new GetDocumentConvertResultRequest();
        resultRequest.setId(id);

        // todo 这里是简单处理 需要轮询120分钟,10秒一次

        GetDocumentConvertResultResponse response = client.getDocumentConvertResult(resultRequest);
        GetDocumentConvertResultResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换word任务查询失败");
        }

        Boolean completed = body.getCompleted();
        if (!completed) {
            throw new RuntimeException("pdf转换word任务未完成");
        }

        String status = body.getStatus();
        if (!"Success".equals(status)) {
            throw new RuntimeException("pdf转换word任务转换失败");
        }
        return body.getData();
    }

}

2.3 pdf转换excel

官方文档:https://help.aliyun.com/zh/document-mind/developer-reference/convertpdftoexcel

package net.lab1024.sa.admin.util;

import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
import com.aliyun.teautil.models.RuntimeOptions;

import java.io.FileInputStream;
import java.util.List;

public class PdfConvertUtil {

    private static final String OK = "200";

    private static final String ACCESS_KEY_ID = "xxx";

    private static final String ACCESS_KEY_SECRET = "xxx";

    public static void main(String[] args) throws Exception {
        String id = submitPdfToExcel("C:\\Users\\admin\\Desktop\\example.pdf");

        // 10秒后再查询结果,等阿里云处理一会儿
        Thread.sleep(10000);

        List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> data = queryPdfToExcel(id);
    }

    /**
     * 客户端
     *
     * @return
     * @throws Exception
     */
    private static Client getClient() throws Exception {
        Config config = new Config();
        config.setAccessKeyId(ACCESS_KEY_ID);
        config.setAccessKeySecret(ACCESS_KEY_SECRET);
        // 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
        config.setEndpoint("docmind-api.cn-hangzhou.aliyuncs.com");
        return new Client(config);
    }

    /**
     * 提交pdf转换excel转换任务
     * @return
     * @throws Exception
     */
    public static String submitPdfToExcel(String filePath) throws Exception {
        Client client = getClient();

        // 请求参数
        SubmitConvertPdfToExcelJobAdvanceRequest advanceRequest = new SubmitConvertPdfToExcelJobAdvanceRequest();
        advanceRequest.setFileUrlObject(new FileInputStream(filePath));
        advanceRequest.setFileName("example.pdf");
        // 合并为1个sheet
        advanceRequest.setForceMergeExcel(true);

        // 运行参数
        RuntimeOptions runtime = new RuntimeOptions();

        // 发送请求
        SubmitConvertPdfToExcelJobResponse response = client.submitConvertPdfToExcelJobAdvance(advanceRequest, runtime);

        // 处理结果
        SubmitConvertPdfToExcelJobResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换excel任务提交失败");
        }
        return body.getData().getId();
    }

    /**
     * 查询pdf转换excel转换任务
     * @param id
     * @return
     * @throws Exception
     */
    public static List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> queryPdfToExcel(String id) throws Exception {
        Client client = getClient();

        // 请求参数
        GetDocumentConvertResultRequest resultRequest = new GetDocumentConvertResultRequest();
        resultRequest.setId(id);

        // todo 这里是简单处理 需要轮询120分钟,10秒一次

        GetDocumentConvertResultResponse response = client.getDocumentConvertResult(resultRequest);
        GetDocumentConvertResultResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换excel任务查询失败");
        }

        Boolean completed = body.getCompleted();
        if (!completed) {
            throw new RuntimeException("pdf转换excel任务未完成");
        }

        String status = body.getStatus();
        if (!"Success".equals(status)) {
            throw new RuntimeException("pdf转换excel任务转换失败");
        }
        return body.getData();
    }

}