我很久以前写的还是上大学的时候写的:https://www.cnblogs.com/LUA123/p/5108007.html ,今天心血来潮决定更新一波。
看了下官网(https://itextpdf.com/en),出来个IText 7,但是这个要收费的,怎么收费我也不清楚,为了避免不必要的麻烦,还是用IText5版本玩了。
正文
首先引入依赖:(目前最新版的)
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf --> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13.1</version> </dependency> <!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian --> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itext-asian</artifactId> <version>5.2.0</version> </dependency> <!-- https://mvnrepository.com/artifact/com.itextpdf.tool/xmlworker --> <dependency> <groupId>com.itextpdf.tool</groupId> <artifactId>xmlworker</artifactId> <version>5.5.13.1</version> </dependency>
示例一:HTML文件转PDF
web.html
<div style="text-align: center"><b><span style="font-size: large">Terms and Conditions</span></b></div> <ul> <li>Prices are in AED</li> <li>All Credit Card transactions are subject to a 3.25% processing fee</li> <li>In the event production is required per customer request, 50% of the entire bill will be due prior to start of production, and the balance due upon delivery.</li> <li>All furniture will be delivered in A+ condition. In the event that the equipment is damaged, the renter shall be liable for all repair costs to restore the equipment to its state at the beginning of the rental period.</li> <li>Equipment shall be utilized for the stated purpose and at the stated location only.</li> </ul> <ul class="chinese" style="font-family: SimSun;" > <li>价格以迪拉姆为单位</li> <li>所有信用卡交易都要支付3.25%的手续费</li> <li>如果客户要求生产,则应支付全部账单的50%</li> <li>在开始生产之前,以及交货时的余额。所有家具将以+状态交付。如果设备损坏,承租人应承担所有维修费用,以将设备恢复至租期。</li> <li>设备应仅用于规定用途和规定位置。</li> </ul>
web.css
ul li { color: #0ba79c; } .chinese li { color: #ccc920; }
代码
package com.demo.pdf; import com.itextpdf.text.Document; import com.itextpdf.text.DocumentException; import com.itextpdf.text.Element; import com.itextpdf.text.Utilities; import com.itextpdf.text.pdf.PdfPCell; import com.itextpdf.text.pdf.PdfPTable; import com.itextpdf.text.pdf.PdfWriter; import com.itextpdf.tool.xml.XMLWorker; import com.itextpdf.tool.xml.XMLWorkerFontProvider; import com.itextpdf.tool.xml.XMLWorkerHelper; import com.itextpdf.tool.xml.css.CssFile; import com.itextpdf.tool.xml.css.StyleAttrCSSResolver; import com.itextpdf.tool.xml.html.CssAppliers; import com.itextpdf.tool.xml.html.CssAppliersImpl; import com.itextpdf.tool.xml.html.Tags; import com.itextpdf.tool.xml.parser.XMLParser; import com.itextpdf.tool.xml.pipeline.css.CSSResolver; import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline; import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline; import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline; import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.Charset; /** * HTML转PDF */ public class HTMLAndPDF { public static final String PDF = "pdf/web.pdf"; public static final String PDF2 = "pdf/web2.pdf"; public static final String PDF3 = "pdf/web3.pdf"; public static final String PDF4 = "pdf/web4.pdf"; public static final String HTML = "pdf/web.html"; public static final String CSS = "pdf/web.css"; public static void main(String[] args) throws IOException, DocumentException { File file = new File(PDF); file.getParentFile().mkdirs(); new HTMLAndPDF().createPdf(PDF); file = new File(PDF2); file.getParentFile().mkdirs(); new HTMLAndPDF().createPdf2(PDF2); file = new File(PDF3); file.getParentFile().mkdirs(); new HTMLAndPDF().createPdf3(PDF3); file = new File(PDF4); file.getParentFile().mkdirs(); new HTMLAndPDF().createPdf4(PDF4); } /** * 原封不动转换 * @param file * @throws IOException * @throws DocumentException */ public void createPdf(String file) throws IOException, DocumentException { // step 1 Document document = new Document(); // step 2 PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file)); writer.setInitialLeading(12); // step 3 document.open(); // step 4 XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(HTML), Charset.forName("UTF-8")); // step 5 document.close(); } /** * 引入额外的css * @param file * @throws IOException * @throws DocumentException */ public void createPdf2(String file) throws IOException, DocumentException { // step 1 Document document = new Document(); // step 2 PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file)); writer.setInitialLeading(12); // step 3 document.open(); // step 4 XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(HTML)); String html = Utilities.readFileToString(HTML); String css = "ul { list-style: disc } li { padding: 10px }"; PdfPTable table = new PdfPTable(1); table.setSpacingBefore(20); PdfPCell cell = new PdfPCell(); for (Element e : XMLWorkerHelper.parseToElementList(html, css)) { cell.addElement(e); } table.addCell(cell); document.add(table); // step 5 document.close(); } /** * 引入外部css * @param file * @throws IOException * @throws DocumentException */ public void createPdf3(String file) throws IOException, DocumentException { // step 1 Document document = new Document(); // step 2 PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file)); writer.setInitialLeading(12.5f); // step 3 document.open(); // step 4 // CSS CSSResolver cssResolver = new StyleAttrCSSResolver(); CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS)); cssResolver.addCss(cssFile); // HTML HtmlPipelineContext htmlContext = new HtmlPipelineContext(null); htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory()); // Pipelines PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer); HtmlPipeline html = new HtmlPipeline(htmlContext, pdf); CssResolverPipeline css = new CssResolverPipeline(cssResolver, html); // XML Worker XMLWorker worker = new XMLWorker(css, true); XMLParser p = new XMLParser(worker); p.parse(new FileInputStream(HTML)); // step 5 document.close(); } /** * 处理中文(引入外部字体文件) * @param file * @throws IOException * @throws DocumentException */ public void createPdf4(String file) throws IOException, DocumentException { // step 1 Document document = new Document(); // step 2 PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file)); writer.setInitialLeading(12.5f); // step 3 document.open(); // step 4 // CSS CSSResolver cssResolver = new StyleAttrCSSResolver(); CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS)); cssResolver.addCss(cssFile); // HTML XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS); fontProvider.register("pdf/华庚少女字体.ttf", "girl"); // 字体别名,在web.html使用 CssAppliers cssAppliers = new CssAppliersImpl(fontProvider); HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers); htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory()); // Pipelines PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer); HtmlPipeline html = new HtmlPipeline(htmlContext, pdf); CssResolverPipeline css = new CssResolverPipeline(cssResolver, html); // XML Worker XMLWorker worker = new XMLWorker(css, true); XMLParser p = new XMLParser(worker); p.parse(new FileInputStream(HTML), Charset.forName("UTF-8")); // step 5 document.close(); } }
第一个输出:
第二个输出:
第三个输出:
第四个输出:
大家可以看到中文的问题,注意点有两个:html文件指定 font-family;如果引入外部字体文件,别名要与font-family一致。文件:https://github.com/Mysakura/DataFiles
第四个要想成功,需要将web.html文件里的font-family修改
所以呢,如果你对字体没啥要求,那font-family就指定常用中文字体即可,宋体,雅黑什么的
这部分涉及的文件
注意!!!如果你外部字体为ttc文件,比如simsun.ttc,在引入的地方就要注意写法,如下:后面有个[,1]
合并PDF文件 & 嵌入外部字体
提示:如果你运行上面的例子,你可以发现我的输入输出文件都在工程根目录的pdf文件夹里。
代码
package com.demo.pdf; import com.itextpdf.text.Document; import com.itextpdf.text.DocumentException; import com.itextpdf.text.Font; import com.itextpdf.text.Paragraph; import com.itextpdf.text.pdf.*; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; /** * 合并文档 & 嵌入字体 */ public class MergeAndAddFont { public static final String FONT = "pdf/华庚少女字体.ttf"; public static final Integer FILE_NUM = 2; // 合并两个文件 public static final String[] FILE_A = { "pdf/testA0.pdf", "pdf/testA1.pdf" }; public static final String[] FILE_B = { "pdf/testB0.pdf", "pdf/testB1.pdf" }; public static final String[] FILE_C = { "pdf/testC0.pdf", "pdf/testC1.pdf" }; public static final String[] CONTENT = { "琪亚娜·卡斯兰娜", "德丽莎·阿波卡利斯" }; public static final String MERGED_A1 = "pdf/testA_merged1.pdf"; public static final String MERGED_A2 = "pdf/testA_merged2.pdf"; public static final String MERGED_B1 = "pdf/testB_merged1.pdf"; public static final String MERGED_B2 = "pdf/testB_merged2.pdf"; public static final String MERGED_C1 = "pdf/testC_merged1.pdf"; public static final String MERGED_C2 = "pdf/testC_merged2.pdf"; public static void main(String[] args) throws DocumentException, IOException { File file = new File(MERGED_A1); file.getParentFile().mkdirs(); MergeAndAddFont app = new MergeAndAddFont(); // 测试一:嵌入字体;生成的文件仅仅包含用到的字形;智能合并;非智能合并 for (int i = 0; i < FILE_A.length; i++) { app.createPdf(FILE_A[i], CONTENT[i], true, true); } app.mergeFiles(FILE_A, MERGED_A1,false); app.mergeFiles(FILE_A, MERGED_A2, true); // 测试二:嵌入字体;生成的文件包含完整字体;智能合并;非智能合并 for (int i = 0; i < FILE_B.length; i++) { app.createPdf(FILE_B[i], CONTENT[i], true, false); } app.mergeFiles(FILE_B, MERGED_B1,false); app.mergeFiles(FILE_B, MERGED_B2, true); // 测试三:不嵌入字体;生成的文件包含完整字体;智能合并;手动嵌入字体 for (int i = 0; i < FILE_C.length; i++) { app.createPdf(FILE_C[i], CONTENT[i], false, false); } app.mergeFiles(FILE_C, MERGED_C1, true); app.embedFont(MERGED_C1, FONT, MERGED_C2); } /** * * @param filename * @param text * @param embedded true在PDF中嵌入字体,false不嵌入 * @param subset true仅仅包含用到的字形,false包含完整字体 * @throws DocumentException * @throws IOException */ public void createPdf(String filename, String text, boolean embedded, boolean subset) throws DocumentException, IOException { // step 1 Document document = new Document(); // step 2 PdfWriter.getInstance(document, new FileOutputStream(filename)); // step 3 document.open(); // step 4 BaseFont bf = BaseFont.createFont(FONT, BaseFont.IDENTITY_H, embedded); // 生成文件大小与编码有关,如果你没有中文,那么编码用BaseFont.WINANSI就节约很多资源了。 bf.setSubset(subset); Font font = new Font(bf, 12); document.add(new Paragraph(text, font)); // step 5 document.close(); } /** * 合并文件 * @param files * @param result * @param smart 智能Copy * @throws IOException * @throws DocumentException */ public void mergeFiles(String[] files, String result, boolean smart) throws IOException, DocumentException { Document document = new Document(); PdfCopy copy; if (smart) copy = new PdfSmartCopy(document, new FileOutputStream(result)); else copy = new PdfCopy(document, new FileOutputStream(result)); document.open(); PdfReader[] reader = new PdfReader[FILE_NUM]; for (int i = 0; i < files.length; i++) { reader[i] = new PdfReader(files[i]); copy.addDocument(reader[i]); copy.freeReader(reader[i]); reader[i].close(); } document.close(); } /** * 嵌入字体 * @param merged * @param fontfile * @param result * @throws IOException * @throws DocumentException */ private void embedFont(String merged, String fontfile, String result) throws IOException, DocumentException { // the font file RandomAccessFile raf = new RandomAccessFile(fontfile, "r"); byte fontbytes[] = new byte[(int)raf.length()]; raf.readFully(fontbytes); raf.close(); // create a new stream for the font file PdfStream stream = new PdfStream(fontbytes); stream.flateCompress(); stream.put(PdfName.LENGTH1, new PdfNumber(fontbytes.length)); // create a reader object PdfReader reader = new PdfReader(merged); int n = reader.getXrefSize(); PdfObject object; PdfDictionary font; PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(result)); PdfName fontname = new PdfName(BaseFont.createFont(fontfile, BaseFont.WINANSI, BaseFont.NOT_EMBEDDED).getPostscriptFontName()); for (int i = 0; i < n; i++) { object = reader.getPdfObject(i); if (object == null || !object.isDictionary()) continue; font = (PdfDictionary)object; if (PdfName.FONTDESCRIPTOR.equals(font.get(PdfName.TYPE)) && fontname.equals(font.get(PdfName.FONTNAME))) { PdfIndirectObject objref = stamper.getWriter().addToBody(stream); font.put(PdfName.FONTFILE2, objref.getIndirectReference()); } } stamper.close(); reader.close(); } }
运行之后会生成12个文件。
直观一点的(看文件体积)
首先看A系列,因为它在创建文件的时候就指定包含用到的字形,所以独立文件的文件属性都是
合并文件都是
再来看B系列,因为它指定包含完整字体,所以体积很大。不同的是,合并1是非智能的,所以体积是智能的2倍。独立文件和合并文件的文件属性都是(已嵌入)
最后看C系列【这里中英文的出入比较大】,如果你是中文PDF,那么文档属性都是已嵌入并且手动嵌入的体积是其它的2倍。
如果你是英文文档,代码如下,只需要改动两处(1. 输入英文,中文不显示 2. 更改字体编码),生成的文件C系列大不一样。
package com.demo.pdf; import com.itextpdf.text.Document; import com.itextpdf.text.DocumentException; import com.itextpdf.text.Font; import com.itextpdf.text.Paragraph; import com.itextpdf.text.pdf.*; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; /** * 合并文档 & 嵌入字体 */ public class MergeAndAddFont3 { public static final String FONT = "pdf/华庚少女字体.ttf"; public static final Integer FILE_NUM = 2; // 合并两个文件 public static final String[] FILE_A = { "pdf/en/testA0.pdf", "pdf/en/testA1.pdf" }; public static final String[] FILE_B = { "pdf/en/testB0.pdf", "pdf/en/testB1.pdf" }; public static final String[] FILE_C = { "pdf/en/testC0.pdf", "pdf/en/testC1.pdf" }; // 英文PDF内容 public static final String[] CONTENT = { "ABCD", "EFGK" }; public static final String MERGED_A1 = "pdf/en/testA_merged1.pdf"; public static final String MERGED_A2 = "pdf/en/testA_merged2.pdf"; public static final String MERGED_B1 = "pdf/en/testB_merged1.pdf"; public static final String MERGED_B2 = "pdf/en/testB_merged2.pdf"; public static final String MERGED_C1 = "pdf/en/testC_merged1.pdf"; public static final String MERGED_C2 = "pdf/en/testC_merged2.pdf"; public static void main(String[] args) throws DocumentException, IOException { File file = new File(MERGED_A1); file.getParentFile().mkdirs(); MergeAndAddFont3 app = new MergeAndAddFont3(); // 测试一:嵌入字体;生成的文件仅仅包含用到的字形;智能合并;非智能合并 for (int i = 0; i < FILE_A.length; i++) { app.createPdf(FILE_A[i], CONTENT[i], true, true); } app.mergeFiles(FILE_A, MERGED_A1,false); app.mergeFiles(FILE_A, MERGED_A2, true); // 测试二:嵌入字体;生成的文件包含完整字体;智能合并;非智能合并 for (int i = 0; i < FILE_B.length; i++) { app.createPdf(FILE_B[i], CONTENT[i], true, false); } app.mergeFiles(FILE_B, MERGED_B1,false); app.mergeFiles(FILE_B, MERGED_B2, true); // 测试三:不嵌入字体;生成的文件包含完整字体;智能合并;手动嵌入字体 for (int i = 0; i < FILE_C.length; i++) { app.createPdf(FILE_C[i], CONTENT[i], false, false); } app.mergeFiles(FILE_C, MERGED_C1, true); app.embedFont(MERGED_C1, FONT, MERGED_C2); } /** * * @param filename * @param text * @param embedded true在PDF中嵌入字体,false不嵌入 * @param subset true仅仅包含用到的字形,false包含完整字体 * @throws DocumentException * @throws IOException */ public void createPdf(String filename, String text, boolean embedded, boolean subset) throws DocumentException, IOException { // step 1 Document document = new Document(); // step 2 PdfWriter.getInstance(document, new FileOutputStream(filename)); // step 3 document.open(); // 英文编码 BaseFont bf = BaseFont.createFont(FONT, BaseFont.WINANSI, embedded); // 生成文件大小与编码有关,如果你没有中文,那么编码用BaseFont.WINANSI就节约很多资源了。 bf.setSubset(subset); Font font = new Font(bf, 12); document.add(new Paragraph(text, font)); // step 5 document.close(); } /** * 合并文件 * @param files * @param result * @param smart 智能Copy * @throws IOException * @throws DocumentException */ public void mergeFiles(String[] files, String result, boolean smart) throws IOException, DocumentException { Document document = new Document(); PdfCopy copy; if (smart) copy = new PdfSmartCopy(document, new FileOutputStream(result)); else copy = new PdfCopy(document, new FileOutputStream(result)); document.open(); PdfReader[] reader = new PdfReader[FILE_NUM]; for (int i = 0; i < files.length; i++) { reader[i] = new PdfReader(files[i]); copy.addDocument(reader[i]); copy.freeReader(reader[i]); reader[i].close(); } document.close(); } /** * 嵌入字体 * @param merged * @param fontfile * @param result * @throws IOException * @throws DocumentException */ private void embedFont(String merged, String fontfile, String result) throws IOException, DocumentException { // the font file RandomAccessFile raf = new RandomAccessFile(fontfile, "r"); byte fontbytes[] = new byte[(int)raf.length()]; raf.readFully(fontbytes); raf.close(); // create a new stream for the font file PdfStream stream = new PdfStream(fontbytes); stream.flateCompress(); stream.put(PdfName.LENGTH1, new PdfNumber(fontbytes.length)); // create a reader object PdfReader reader = new PdfReader(merged); int n = reader.getXrefSize(); PdfObject object; PdfDictionary font; PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(result)); PdfName fontname = new PdfName(BaseFont.createFont(fontfile, BaseFont.WINANSI, BaseFont.NOT_EMBEDDED).getPostscriptFontName()); for (int i = 0; i < n; i++) { object = reader.getPdfObject(i); if (object == null || !object.isDictionary()) continue; font = (PdfDictionary)object; if (PdfName.FONTDESCRIPTOR.equals(font.get(PdfName.TYPE)) && fontname.equals(font.get(PdfName.FONTNAME))) { PdfIndirectObject objref = stamper.getWriter().addToBody(stream); font.put(PdfName.FONTFILE2, objref.getIndirectReference()); } } stamper.close(); reader.close(); } }