Java利用IText导出PDF（更新）

我很久以前写的还是上大学的时候写的：https://www.cnblogs.com/LUA123/p/5108007.html ，今天心血来潮决定更新一波。

看了下官网（https://itextpdf.com/en），出来个IText 7，但是这个要收费的，怎么收费我也不清楚，为了避免不必要的麻烦，还是用IText5版本玩了。

正文

首先引入依赖：（目前最新版的）

<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itextpdf</artifactId>
    <version>5.5.13.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian -->
<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itext-asian</artifactId>
    <version>5.2.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf.tool/xmlworker -->
<dependency>
    <groupId>com.itextpdf.tool</groupId>
    <artifactId>xmlworker</artifactId>
    <version>5.5.13.1</version>
</dependency>

示例一：HTML文件转PDF

web.html

<div style="text-align: center"><b><span style="font-size: large">Terms and Conditions</span></b></div>
<ul>
    <li>Prices are in AED</li>
    <li>All Credit Card transactions are subject to a 3.25% processing fee</li>
    <li>In the event production is required per customer request, 50% of the entire bill will be due prior to start of production, and the balance due upon delivery.</li>
    <li>All furniture will be delivered in A+ condition. In the event that the equipment is damaged, the renter shall be liable for all repair costs to restore the equipment to its state at the beginning of the rental period.</li>
    <li>Equipment shall be utilized for the stated purpose and at the stated location only.</li>
</ul>

<ul class="chinese" style="font-family: SimSun;" >
    <li>价格以迪拉姆为单位</li>
    <li>所有信用卡交易都要支付3.25%的手续费</li>
    <li>如果客户要求生产，则应支付全部账单的50%</li>
    <li>在开始生产之前，以及交货时的余额。所有家具将以+状态交付。如果设备损坏，承租人应承担所有维修费用，以将设备恢复至租期。</li>
    <li>设备应仅用于规定用途和规定位置。</li>
</ul>

web.css

ul li {
    color: #0ba79c;
}

.chinese li {
    color: #ccc920;
}

代码

package com.demo.pdf;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Utilities;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorker;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.css.CssFile;
import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
import com.itextpdf.tool.xml.html.CssAppliers;
import com.itextpdf.tool.xml.html.CssAppliersImpl;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;

/**
 * HTML转PDF
 */
public class HTMLAndPDF {
    public static final String PDF = "pdf/web.pdf";
    public static final String PDF2 = "pdf/web2.pdf";
    public static final String PDF3 = "pdf/web3.pdf";
    public static final String PDF4 = "pdf/web4.pdf";
    public static final String HTML = "pdf/web.html";
    public static final String CSS = "pdf/web.css";

    public static void main(String[] args) throws IOException, DocumentException {
        File file = new File(PDF);
        file.getParentFile().mkdirs();
        new HTMLAndPDF().createPdf(PDF);

        file = new File(PDF2);
        file.getParentFile().mkdirs();
        new HTMLAndPDF().createPdf2(PDF2);

        file = new File(PDF3);
        file.getParentFile().mkdirs();
        new HTMLAndPDF().createPdf3(PDF3);

        file = new File(PDF4);
        file.getParentFile().mkdirs();
        new HTMLAndPDF().createPdf4(PDF4);

    }

    /**
     * 原封不动转换
     * @param file
     * @throws IOException
     * @throws DocumentException
     */
    public void createPdf(String file) throws IOException, DocumentException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
        writer.setInitialLeading(12);
        // step 3
        document.open();
        // step 4
        XMLWorkerHelper.getInstance().parseXHtml(writer, document,
                new FileInputStream(HTML), Charset.forName("UTF-8"));
        // step 5
        document.close();
    }

    /**
     * 引入额外的css
     * @param file
     * @throws IOException
     * @throws DocumentException
     */
    public void createPdf2(String file) throws IOException, DocumentException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
        writer.setInitialLeading(12);
        // step 3
        document.open();
        // step 4
        XMLWorkerHelper.getInstance().parseXHtml(writer, document,
                new FileInputStream(HTML));

        String html = Utilities.readFileToString(HTML);
        String css = "ul { list-style: disc } li { padding: 10px }";
        PdfPTable table = new PdfPTable(1);
        table.setSpacingBefore(20);
        PdfPCell cell = new PdfPCell();
        for (Element e : XMLWorkerHelper.parseToElementList(html, css)) {
            cell.addElement(e);
        }
        table.addCell(cell);
        document.add(table);
        // step 5
        document.close();
    }

    /**
     * 引入外部css
     * @param file
     * @throws IOException
     * @throws DocumentException
     */
    public void createPdf3(String file) throws IOException, DocumentException {
        // step 1
        Document document = new Document();

        // step 2
        PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
        writer.setInitialLeading(12.5f);

        // step 3
        document.open();

        // step 4

        // CSS
        CSSResolver cssResolver = new StyleAttrCSSResolver();
        CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS));
        cssResolver.addCss(cssFile);

        // HTML
        HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
        htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());

        // Pipelines
        PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
        HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
        CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);

        // XML Worker
        XMLWorker worker = new XMLWorker(css, true);
        XMLParser p = new XMLParser(worker);
        p.parse(new FileInputStream(HTML));

        // step 5
        document.close();
    }

    /**
     * 处理中文（引入外部字体文件）
     * @param file
     * @throws IOException
     * @throws DocumentException
     */
    public void createPdf4(String file) throws IOException, DocumentException {
        // step 1
        Document document = new Document();

        // step 2
        PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
        writer.setInitialLeading(12.5f);

        // step 3
        document.open();

        // step 4

        // CSS
        CSSResolver cssResolver = new StyleAttrCSSResolver();
        CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS));
        cssResolver.addCss(cssFile);

        // HTML
        XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
        fontProvider.register("pdf/华庚少女字体.ttf", "girl");    // 字体别名，在web.html使用
        CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
        HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
        htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());

        // Pipelines
        PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
        HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
        CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);

        // XML Worker
        XMLWorker worker = new XMLWorker(css, true);
        XMLParser p = new XMLParser(worker);
        p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));
        // step 5
        document.close();
    }
    
}

第一个输出：

Java利用IText导出PDF（更新）

第二个输出：

Java利用IText导出PDF（更新）

第三个输出：

Java利用IText导出PDF（更新）

第四个输出：

Java利用IText导出PDF（更新）

大家可以看到中文的问题，注意点有两个：html文件指定 font-family；如果引入外部字体文件，别名要与font-family一致。文件：https://github.com/Mysakura/DataFiles

第四个要想成功，需要将web.html文件里的font-family修改 Java利用IText导出PDF（更新）

Java利用IText导出PDF（更新）

所以呢，如果你对字体没啥要求，那font-family就指定常用中文字体即可，宋体，雅黑什么的 Java利用IText导出PDF（更新）

这部分涉及的文件

Java利用IText导出PDF（更新）

注意！！！如果你外部字体为ttc文件，比如simsun.ttc，在引入的地方就要注意写法，如下：后面有个[,1]

Java利用IText导出PDF（更新）

合并PDF文件 & 嵌入外部字体

提示：如果你运行上面的例子，你可以发现我的输入输出文件都在工程根目录的pdf文件夹里。

Java利用IText导出PDF（更新）

代码

package com.demo.pdf;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.*;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;

/**
 * 合并文档 & 嵌入字体
 */
public class MergeAndAddFont {
    public static final String FONT = "pdf/华庚少女字体.ttf";
    public static final Integer FILE_NUM = 2;   // 合并两个文件
    public static final String[] FILE_A = {
            "pdf/testA0.pdf", "pdf/testA1.pdf"
    };
    public static final String[] FILE_B = {
            "pdf/testB0.pdf", "pdf/testB1.pdf"
    };
    public static final String[] FILE_C = {
            "pdf/testC0.pdf", "pdf/testC1.pdf"
    };
    public static final String[] CONTENT = {
            "琪亚娜·卡斯兰娜", "德丽莎·阿波卡利斯"
    };
    public static final String MERGED_A1 = "pdf/testA_merged1.pdf";
    public static final String MERGED_A2 = "pdf/testA_merged2.pdf";
    public static final String MERGED_B1 = "pdf/testB_merged1.pdf";
    public static final String MERGED_B2 = "pdf/testB_merged2.pdf";
    public static final String MERGED_C1 = "pdf/testC_merged1.pdf";
    public static final String MERGED_C2 = "pdf/testC_merged2.pdf";

    public static void main(String[] args) throws DocumentException, IOException {

        File file = new File(MERGED_A1);
        file.getParentFile().mkdirs();
        MergeAndAddFont app = new MergeAndAddFont();

        // 测试一：嵌入字体；生成的文件仅仅包含用到的字形；智能合并；非智能合并
        for (int i = 0; i < FILE_A.length; i++) {
            app.createPdf(FILE_A[i], CONTENT[i], true, true);
        }
        app.mergeFiles(FILE_A, MERGED_A1,false);
        app.mergeFiles(FILE_A, MERGED_A2, true);

        // 测试二：嵌入字体；生成的文件包含完整字体；智能合并；非智能合并
        for (int i = 0; i < FILE_B.length; i++) {
            app.createPdf(FILE_B[i], CONTENT[i], true, false);
        }
        app.mergeFiles(FILE_B, MERGED_B1,false);
        app.mergeFiles(FILE_B, MERGED_B2, true);

        // 测试三：不嵌入字体；生成的文件包含完整字体；智能合并；手动嵌入字体
        for (int i = 0; i < FILE_C.length; i++) {
            app.createPdf(FILE_C[i], CONTENT[i], false, false);
        }
        app.mergeFiles(FILE_C, MERGED_C1, true);
        app.embedFont(MERGED_C1, FONT, MERGED_C2);
    }

    /**
     *
     * @param filename
     * @param text
     * @param embedded  true在PDF中嵌入字体，false不嵌入
     * @param subset    true仅仅包含用到的字形，false包含完整字体
     * @throws DocumentException
     * @throws IOException
     */
    public void createPdf(String filename, String text, boolean embedded, boolean subset) throws DocumentException, IOException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter.getInstance(document, new FileOutputStream(filename));
        // step 3
        document.open();
        // step 4
        BaseFont bf = BaseFont.createFont(FONT, BaseFont.IDENTITY_H, embedded); // 生成文件大小与编码有关，如果你没有中文，那么编码用BaseFont.WINANSI就节约很多资源了。
        bf.setSubset(subset);
        Font font = new Font(bf, 12);
        document.add(new Paragraph(text, font));
        // step 5
        document.close();
    }

    /**
     * 合并文件
     * @param files
     * @param result
     * @param smart 智能Copy
     * @throws IOException
     * @throws DocumentException
     */
    public void mergeFiles(String[] files, String result, boolean smart) throws IOException, DocumentException {
        Document document = new Document();
        PdfCopy copy;
        if (smart)
            copy = new PdfSmartCopy(document, new FileOutputStream(result));
        else
            copy = new PdfCopy(document, new FileOutputStream(result));
        document.open();
        PdfReader[] reader = new PdfReader[FILE_NUM];
        for (int i = 0; i < files.length; i++) {
            reader[i] = new PdfReader(files[i]);
            copy.addDocument(reader[i]);
            copy.freeReader(reader[i]);
            reader[i].close();
        }
        document.close();
    }

    /**
     * 嵌入字体
     * @param merged
     * @param fontfile
     * @param result
     * @throws IOException
     * @throws DocumentException
     */
    private void embedFont(String merged, String fontfile, String result) throws IOException, DocumentException {
        // the font file
        RandomAccessFile raf = new RandomAccessFile(fontfile, "r");
        byte fontbytes[] = new byte[(int)raf.length()];
        raf.readFully(fontbytes);
        raf.close();
        // create a new stream for the font file
        PdfStream stream = new PdfStream(fontbytes);
        stream.flateCompress();
        stream.put(PdfName.LENGTH1, new PdfNumber(fontbytes.length));
        // create a reader object
        PdfReader reader = new PdfReader(merged);
        int n = reader.getXrefSize();
        PdfObject object;
        PdfDictionary font;
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(result));
        PdfName fontname = new PdfName(BaseFont.createFont(fontfile, BaseFont.WINANSI, BaseFont.NOT_EMBEDDED).getPostscriptFontName());
        for (int i = 0; i < n; i++) {
            object = reader.getPdfObject(i);
            if (object == null || !object.isDictionary())
                continue;
            font = (PdfDictionary)object;
            if (PdfName.FONTDESCRIPTOR.equals(font.get(PdfName.TYPE))
                    && fontname.equals(font.get(PdfName.FONTNAME))) {
                PdfIndirectObject objref = stamper.getWriter().addToBody(stream);
                font.put(PdfName.FONTFILE2, objref.getIndirectReference());
            }
        }
        stamper.close();
        reader.close();
    }
}

运行之后会生成12个文件。

Java利用IText导出PDF（更新）

直观一点的（看文件体积）

Java利用IText导出PDF（更新）

首先看A系列，因为它在创建文件的时候就指定包含用到的字形，所以独立文件的文件属性都是

Java利用IText导出PDF（更新）

合并文件都是

Java利用IText导出PDF（更新）

再来看B系列，因为它指定包含完整字体，所以体积很大。不同的是，合并1是非智能的，所以体积是智能的2倍。独立文件和合并文件的文件属性都是（已嵌入）

Java利用IText导出PDF（更新）

最后看C系列【这里中英文的出入比较大】，如果你是中文PDF，那么文档属性都是已嵌入并且手动嵌入的体积是其它的2倍。

Java利用IText导出PDF（更新）

如果你是英文文档，代码如下，只需要改动两处（1. 输入英文，中文不显示 2. 更改字体编码），生成的文件C系列大不一样。

package com.demo.pdf;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.*;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;

/**
 * 合并文档 & 嵌入字体
 */
public class MergeAndAddFont3 {
    public static final String FONT = "pdf/华庚少女字体.ttf";
    public static final Integer FILE_NUM = 2;   // 合并两个文件
    public static final String[] FILE_A = {
            "pdf/en/testA0.pdf", "pdf/en/testA1.pdf"
    };
    public static final String[] FILE_B = {
            "pdf/en/testB0.pdf", "pdf/en/testB1.pdf"
    };
    public static final String[] FILE_C = {
            "pdf/en/testC0.pdf", "pdf/en/testC1.pdf"
    };
    // 英文PDF内容
    public static final String[] CONTENT = {
            "ABCD", "EFGK"
    };
    public static final String MERGED_A1 = "pdf/en/testA_merged1.pdf";
    public static final String MERGED_A2 = "pdf/en/testA_merged2.pdf";
    public static final String MERGED_B1 = "pdf/en/testB_merged1.pdf";
    public static final String MERGED_B2 = "pdf/en/testB_merged2.pdf";
    public static final String MERGED_C1 = "pdf/en/testC_merged1.pdf";
    public static final String MERGED_C2 = "pdf/en/testC_merged2.pdf";

    public static void main(String[] args) throws DocumentException, IOException {

        File file = new File(MERGED_A1);
        file.getParentFile().mkdirs();
        MergeAndAddFont3 app = new MergeAndAddFont3();

        // 测试一：嵌入字体；生成的文件仅仅包含用到的字形；智能合并；非智能合并
        for (int i = 0; i < FILE_A.length; i++) {
            app.createPdf(FILE_A[i], CONTENT[i], true, true);
        }
        app.mergeFiles(FILE_A, MERGED_A1,false);
        app.mergeFiles(FILE_A, MERGED_A2, true);

        // 测试二：嵌入字体；生成的文件包含完整字体；智能合并；非智能合并
        for (int i = 0; i < FILE_B.length; i++) {
            app.createPdf(FILE_B[i], CONTENT[i], true, false);
        }
        app.mergeFiles(FILE_B, MERGED_B1,false);
        app.mergeFiles(FILE_B, MERGED_B2, true);

        // 测试三：不嵌入字体；生成的文件包含完整字体；智能合并；手动嵌入字体
        for (int i = 0; i < FILE_C.length; i++) {
            app.createPdf(FILE_C[i], CONTENT[i], false, false);
        }
        app.mergeFiles(FILE_C, MERGED_C1, true);
        app.embedFont(MERGED_C1, FONT, MERGED_C2);
    }

    /**
     *
     * @param filename
     * @param text
     * @param embedded  true在PDF中嵌入字体，false不嵌入
     * @param subset    true仅仅包含用到的字形，false包含完整字体
     * @throws DocumentException
     * @throws IOException
     */
    public void createPdf(String filename, String text, boolean embedded, boolean subset) throws DocumentException, IOException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter.getInstance(document, new FileOutputStream(filename));
        // step 3
        document.open();
        // 英文编码
        BaseFont bf = BaseFont.createFont(FONT, BaseFont.WINANSI, embedded); // 生成文件大小与编码有关，如果你没有中文，那么编码用BaseFont.WINANSI就节约很多资源了。
        bf.setSubset(subset);
        Font font = new Font(bf, 12);
        document.add(new Paragraph(text, font));
        // step 5
        document.close();
    }

    /**
     * 合并文件
     * @param files
     * @param result
     * @param smart 智能Copy
     * @throws IOException
     * @throws DocumentException
     */
    public void mergeFiles(String[] files, String result, boolean smart) throws IOException, DocumentException {
        Document document = new Document();
        PdfCopy copy;
        if (smart)
            copy = new PdfSmartCopy(document, new FileOutputStream(result));
        else
            copy = new PdfCopy(document, new FileOutputStream(result));
        document.open();
        PdfReader[] reader = new PdfReader[FILE_NUM];
        for (int i = 0; i < files.length; i++) {
            reader[i] = new PdfReader(files[i]);
            copy.addDocument(reader[i]);
            copy.freeReader(reader[i]);
            reader[i].close();
        }
        document.close();
    }

    /**
     * 嵌入字体
     * @param merged
     * @param fontfile
     * @param result
     * @throws IOException
     * @throws DocumentException
     */
    private void embedFont(String merged, String fontfile, String result) throws IOException, DocumentException {
        // the font file
        RandomAccessFile raf = new RandomAccessFile(fontfile, "r");
        byte fontbytes[] = new byte[(int)raf.length()];
        raf.readFully(fontbytes);
        raf.close();
        // create a new stream for the font file
        PdfStream stream = new PdfStream(fontbytes);
        stream.flateCompress();
        stream.put(PdfName.LENGTH1, new PdfNumber(fontbytes.length));
        // create a reader object
        PdfReader reader = new PdfReader(merged);
        int n = reader.getXrefSize();
        PdfObject object;
        PdfDictionary font;
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(result));
        PdfName fontname = new PdfName(BaseFont.createFont(fontfile, BaseFont.WINANSI, BaseFont.NOT_EMBEDDED).getPostscriptFontName());
        for (int i = 0; i < n; i++) {
            object = reader.getPdfObject(i);
            if (object == null || !object.isDictionary())
                continue;
            font = (PdfDictionary)object;
            if (PdfName.FONTDESCRIPTOR.equals(font.get(PdfName.TYPE))
                    && fontname.equals(font.get(PdfName.FONTNAME))) {
                PdfIndirectObject objref = stamper.getWriter().addToBody(stream);
                font.put(PdfName.FONTFILE2, objref.getIndirectReference());
            }
        }
        stamper.close();
        reader.close();
    }
}

View Code