SpringBoot如何實現word文檔轉pdf

一.背景

項目中有個需求大體意思是,上傳一個word模板,根據word模板合成word文件,再將word文件轉為pdf。

二.方案選擇

1.Spire.Doc for Java方案

Spire.Doc for Java這個是商用收費的,不過API文檔豐富且集成簡單,免費版僅支持3頁轉換。類似的還有ITEXT,這個商用也是受限制的。

2.docx4j方案

開源可商用,僅支持docx格式的word。

3.jodconverter+LibreOffice 方案

開源可商用,調用本地office服務,進行pdf轉換,類似的還有jodconverter+openOffice。

4.其他

至於其他的由於不支持跨平臺不做考慮。

三.實操

1.docx4j

首先嘗試瞭docx4j,因為docx4j本身支持模板替換的操作,可一次性做替換及文檔類型轉換,而且僅支持docx類型,對於本次需求問題不大。

1.依賴僅需要一個即可

<dependency>
    <groupId>org.docx4j</groupId>
    <artifactId>docx4j-export-fo</artifactId>
    <version>6.1.0</version>
</dependency>

2.主要代碼

@Slf4j
public class PdfUtil {
    public static <T> void exportByLocalPath(HttpServletResponse response, String fileName, String path, Map<String,String> params){
        try (InputStream in = PdfUtil.class.getClassLoader().getResourceAsStream(path)) {
            convertDocxToPdf(in, response,fileName,params);
        } catch (Exception e) {
            log.error("docx文檔轉換為PDF失敗", e.getMessage());
        }
    }
    /**
     * docx文檔轉換為PDF
     * @param in
     * @param response
     * @return
     */
    public static void convertDocxToPdf(InputStream in, HttpServletResponse response, String fileName, Map<String,String> params) throws Exception {
        response.setContentType("application/pdf");
         String fullFileName = new String(fileName.getBytes(), StandardCharsets.ISO_8859_1);
        response.setHeader("Content-disposition", "attachment;filename=" + fullFileName + ".pdf");
        WordprocessingMLPackage wmlPackage = WordprocessingMLPackage.load(in);
        if (params!=null&&!params.isEmpty()) {
            MainDocumentPart documentPart = wmlPackage.getMainDocumentPart();
            cleanDocumentPart(documentPart);
            documentPart.variableReplace(params);
        }
        setFontMapper(wmlPackage);
        Docx4J.toPDF(wmlPackage,response.getOutputStream());
    }
    /**
     * 清除文檔空白占位符
     * @param documentPart
     * @return {@link boolean}
     */
    public static boolean cleanDocumentPart(MainDocumentPart documentPart) throws Exception {
        if (documentPart == null) {
            return false;
        }
        Document document = documentPart.getContents();
        String wmlTemplate =
                XmlUtils.marshaltoString(document, true, false, Context.jc);
        document = (Document) XmlUtils.unwrap(DocxVariableClearUtil.doCleanDocumentPart(wmlTemplate, Context.jc));
        documentPart.setContents(document);
        return true;
    }
    /**
     * 設置字體樣式
     * @param mlPackage
     */
    private static void setFontMapper(WordprocessingMLPackage mlPackage) throws Exception {
        Mapper fontMapper = new IdentityPlusMapper();
        fontMapper.put("隸書", PhysicalFonts.get("LiSu"));
        fontMapper.put("宋體", PhysicalFonts.get("SimSun"));
        fontMapper.put("微軟雅黑", PhysicalFonts.get("Microsoft Yahei"));
        fontMapper.put("黑體", PhysicalFonts.get("SimHei"));
        fontMapper.put("楷體", PhysicalFonts.get("KaiTi"));
        fontMapper.put("新宋體", PhysicalFonts.get("NSimSun"));
        fontMapper.put("華文行楷", PhysicalFonts.get("STXingkai"));
        fontMapper.put("華文仿宋", PhysicalFonts.get("STFangsong"));
        fontMapper.put("宋體擴展", PhysicalFonts.get("simsun-extB"));
        fontMapper.put("仿宋", PhysicalFonts.get("FangSong"));
        fontMapper.put("仿宋_GB2312", PhysicalFonts.get("FangSong_GB2312"));
        fontMapper.put("幼圓", PhysicalFonts.get("YouYuan"));
        fontMapper.put("華文宋體", PhysicalFonts.get("STSong"));
        fontMapper.put("華文中宋", PhysicalFonts.get("STZhongsong"));
        mlPackage.setFontMapper(fontMapper);
    }
}

清除工具類,用於處理占位符替換不生效的問題,這裡參考文章

public class DocxVariableClearUtil {
    /**
     * 去任意XML標簽
     */
    private static final Pattern XML_PATTERN = Pattern.compile("<[^>]*>");
    private DocxVariableClearUtil() {
    }
    /**
     * start符號
     */
    private static final char PREFIX = '$';
    /**
     * 中包含
     */
    private static final char LEFT_BRACE = '{';
    /**
     * 結尾
     */
    private static final char RIGHT_BRACE = '}';
    /**
     * 未開始
     */
    private static final int NONE_START = -1;
    /**
     * 未開始
     */
    private static final int NONE_START_INDEX = -1;
    /**
     * 開始
     */
    private static final int PREFIX_STATUS = 1;
    /**
     * 左括號
     */
    private static final int LEFT_BRACE_STATUS = 2;
    /**
     * 右括號
     */
    private static final int RIGHT_BRACE_STATUS = 3;
    /**
     * doCleanDocumentPart
     *
     * @param wmlTemplate
     * @param jc
     * @return
     * @throws JAXBException
     */
    public static Object doCleanDocumentPart(String wmlTemplate, JAXBContext jc) throws JAXBException {
        // 進入變量塊位置
        int curStatus = NONE_START;
        // 開始位置
        int keyStartIndex = NONE_START_INDEX;
        // 當前位置
        int curIndex = 0;
        char[] textCharacters = wmlTemplate.toCharArray();
        StringBuilder documentBuilder = new StringBuilder(textCharacters.length);
        documentBuilder.append(textCharacters);
        // 新文檔
        StringBuilder newDocumentBuilder = new StringBuilder(textCharacters.length);
        // 最後一次寫位置
        int lastWriteIndex = 0;
        for (char c : textCharacters) {
            switch (c) {
                case PREFIX:
                    // 不管其何狀態直接修改指針,這也意味著變量名稱裡面不能有PREFIX
                    keyStartIndex = curIndex;
                    curStatus = PREFIX_STATUS;
                    break;
                case LEFT_BRACE:
                    if (curStatus == PREFIX_STATUS) {
                        curStatus = LEFT_BRACE_STATUS;
                    }
                    break;
                case RIGHT_BRACE:
                    if (curStatus == LEFT_BRACE_STATUS) {
                        // 接上之前的字符
                        newDocumentBuilder.append(documentBuilder.substring(lastWriteIndex, keyStartIndex));
                        // 結束位置
                        int keyEndIndex = curIndex + 1;
                        // 替換
                        String rawKey = documentBuilder.substring(keyStartIndex, keyEndIndex);
                        // 幹掉多餘標簽
                        String mappingKey = XML_PATTERN.matcher(rawKey).replaceAll("");
                        if (!mappingKey.equals(rawKey)) {
                            char[] rawKeyChars = rawKey.toCharArray();
                            // 保留原格式
                            StringBuilder rawStringBuilder = new StringBuilder(rawKey.length());
                            // 去掉變量引用字符
                            for (char rawChar : rawKeyChars) {
                                if (rawChar == PREFIX || rawChar == LEFT_BRACE || rawChar == RIGHT_BRACE) {
                                    continue;
                                }
                                rawStringBuilder.append(rawChar);
                            }
                            // 要求變量連在一起
                            String variable = mappingKey.substring(2, mappingKey.length() - 1);
                            int variableStart = rawStringBuilder.indexOf(variable);
                            if (variableStart > 0) {
                                rawStringBuilder = rawStringBuilder.replace(variableStart, variableStart + variable.length(), mappingKey);
                            }
                            newDocumentBuilder.append(rawStringBuilder.toString());
                        } else {
                            newDocumentBuilder.append(mappingKey);
                        }
                        lastWriteIndex = keyEndIndex;
                        curStatus = NONE_START;
                        keyStartIndex = NONE_START_INDEX;
                    }
                default:
                    break;
            }
            curIndex++;
        }
        // 餘部
        if (lastWriteIndex < documentBuilder.length()) {
            newDocumentBuilder.append(documentBuilder.substring(lastWriteIndex));
        }
        return XmlUtils.unmarshalString(newDocumentBuilder.toString(), jc);
    }
}

2.poi-tl+jodconverter+LibreOffice 方案

poi-tl這個是專門用來進行word模板合成的開源庫,文檔很詳細。

LibreOffice 下載最新的穩定版本即可。

1.maven依賴

		<!-- word合成 -->
		<!-- 這裡註意版本,1.5版本依賴的poi 3.x的版本 -->
		<dependency>
			<groupId>com.deepoove</groupId>
			<artifactId>poi-tl</artifactId>
			<version>1.5.1</version>
		</dependency>
		<!-- jodconverter  word轉pdf -->
		<!-- jodconverter-core這個依賴,理論上不用加的,jodconverter-local已經依賴瞭,但測試的時候不添加依賴找不到 -->
		<dependency>
			<groupId>org.jodconverter</groupId>
			<artifactId>jodconverter-core</artifactId>
			<version>4.2.0</version>
		</dependency>
		<dependency>
			<groupId>org.jodconverter</groupId>
			<artifactId>jodconverter-local</artifactId>
			<version>4.2.0</version>
		</dependency>
		<dependency>
			<groupId>org.jodconverter</groupId>
			<artifactId>jodconverter-spring-boot-starter</artifactId>
			<version>4.2.0</version>
		</dependency>
		<!--  工具類,非必須 -->
		<dependency>
			<groupId>cn.hutool</groupId>
			<artifactId>hutool-all</artifactId>
			<version>5.4.3</version>
		</dependency>

2.主要代碼

JodConverterConfig配置類

@Configuration
public class JodConverterConfig {
    @Autowired
    private OfficeManager officeManager;
    @Bean
    public DocumentConverter documentConverter() {
        return LocalConverter.builder()
                .officeManager(officeManager)
                .build();
    }
}

yml配置文件

jodconverter:
  local:
    enabled: true
    office-home: "C:\\Program Files\\LibreOffice"

PdfService合成導出代碼

@Slf4j
@Component
public class PdfService {
    @Autowired
    private DocumentConverter documentConverter;
    public  void docxToPDF(InputStream inputStream,HttpServletResponse response,String fileName) {
        response.setContentType("application/pdf");
        try {
            String fullFileName = new String(fileName.getBytes(), StandardCharsets.ISO_8859_1);
            response.setHeader("Content-disposition","attachment;filename=\\"+fullFileName+".pdf\\");
            documentConverter
                    .convert(inputStream)
                    .as(DefaultDocumentFormatRegistry.DOCX)
                    .to(response.getOutputStream())
                    .as(DefaultDocumentFormatRegistry.PDF)
                    .execute();
        } catch (OfficeException |IOException e) {
           log.error("word轉pdf失敗:{}",e.getMessage());
        }
    }
    public void exportByLocalPath(HttpServletResponse response, String fileName, String path, Object params) throws Exception {
        BufferedOutputStream outputStream = null;
        BufferedInputStream wordInputStream = null;
        try (InputStream in = PdfService.class.getClassLoader().getResourceAsStream(path)) {
            // 生成臨時文件
            String outPutWordPath = System.getProperty("java.io.tmpdir").replaceAll(File.separator + "$", "") + fileName+".docx";
            File tempFile = FileUtil.touch(outPutWordPath);
            outputStream = FileUtil.getOutputStream(tempFile);
            // word模板合成寫到臨時文件
            WordUtil.replaceWord(outputStream, in, params);
            // word 轉pdf
            wordInputStream = FileUtil.getInputStream(tempFile);
            docxToPDF(wordInputStream, response,fileName);
            // 移除臨時文件
            FileUtil.del(tempFile);
        } catch (Exception e) {
            log.error("docx文檔轉換為PDF失敗", e.getMessage());
        } finally {
            IoUtil.close(outputStream);
            IoUtil.close(wordInputStream);
        }
    }

四.結論

1.docx4j方案

  • 依賴少
  • 同時支持word合成及格式轉換
  • 轉化效率較差
  • 對於含樣式及圖片轉換不友好,容易排版混亂

2.jodconverter+LibreOffice 方案

  • 操作穩定
  • 轉換效率快
  • 集成依賴設置較多
  • 依賴本地服務
  • LibreOffice打開word可能排版樣式錯亂
  • 最後考慮項目需求,最終選擇瞭jodconverter+LibreOffice方案。

以上為個人經驗,希望能給大傢一個參考,也希望大傢多多支持WalkonNet。 

推薦閱讀: