【问题标题】:compress pdf with large images via java通过java压缩带有大图像的pdf
【发布时间】:2014-01-04 01:06:33
【问题描述】:

正在寻找一种方法来压缩 pdf 中的图像并输出 pdf 以供存档。我不能在创建之前压缩图像,因为它会影响打印质量。

每个 pdf 的大小约为 8MB,其中大部分由 2 张图片组成。图像为 png 格式,并在生成过程中被带入 pdf(使用第三方生成器)

有没有办法在不使用 3rd 方工具的情况下在 java 中压缩这些文件。我已经尝试过使用 pdfbox、itext 和第 3 方 exe(neevia),第 3 方工具是迄今为止唯一给我任何结果的工具(低至大约半 MB),但我不想放弃对 exe 的控制. 示例代码如下。

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;

import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDStream;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfWriter;

public class compressPDF {

public static void main (String[] args) throws IOException, DocumentException, COSVisitorException {


    /*
     * Using PDF Box
     */

    PDDocument doc; // = new PDDocument();

    doc = PDDocument.load("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PDStream stream= new PDStream(doc);
    stream.addCompression();

    doc.save("C:/_dev_env_/TEMP/compressPDF/compressed_pdfBox.pdf");

    doc.close();

    /*
     * Using itext
     */

    PdfReader reader = new PdfReader("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("C:/_dev_env_/TEMP/compressPDF/compressed_Itext.pdf"), PdfWriter.VERSION_1_5);
    stamper.setFullCompression();
    stamper.getWriter().setCompressionLevel(50);
    int total = reader.getNumberOfPages() + 1;
    for (int i = 1; i < total; i++) {
        reader.setPageContent(i, reader.getPageContent(i));
    }
    stamper.close();
    reader.close();

    /*
     * Using 3rd party - Neevia 
     */
    try {
    Process process = new ProcessBuilder("C:/Program Files (x86)/neeviaPDF.com/PDFcompress/cmdLine/CLcompr.exe","C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressed_Neevia.pdf").start();
    InputStream is = process.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    String line;

    System.out.printf("Output of running %s is:", Arrays.toString(args));

    while ((line = br.readLine()) != null) {
      System.out.println(line);
    }
    } catch (Exception e) {
        System.out.println(e);
    } finally {
        System.out.println("Created!!");
    }

}

}

【问题讨论】:

  • 您将图像保存为什么?您是否考虑过像 png 这样的无损格式?
  • 图像以 png 格式存储,并使用名为 doc1(第 3 方)的生成器带入文档。感谢您的快速回复:)
  • 如果 pdf 大小的大部分是这些图像并且它们已经被压缩,那么您可能会陷入僵局。我在此期间做了一些研究,pdf 将存储压缩图像:en.wikipedia.org/wiki/Pdf#Raster_images,因此您可能需要调整 pdf 的组装方式。除非您删除并重新添加图像,否则不确定一旦 pdf 已经组装好,可以做很多事情。
  • 感谢 Taylor,我使用 tinyPNG 压缩了 pdf。我认为这将减少 50% 左右。这可能还不够,所以我可能需要使用 3rd 方工具。如果有人知道如何提取和重新添加图像,我也会有兴趣尝试一下......
  • 谢谢布鲁诺。这让我走上了正确的道路。通过对该代码进行一些小的修改,我得到了我所需要的。我能够将 7546KB 降低到 408KB。结果!我现在将发布修改后的代码:)

标签: java image pdf itext pdfbox


【解决方案1】:

我使用下面的代码来证明概念...工作很愉快 :) 感谢 Bruno 让我走上了正确的道路 :)

package compressPDF;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import javax.imageio.ImageIO;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PRStream;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.parser.PdfImageObject;

public class ResizeImage {

/** The resulting PDF file. */
//public static String RESULT = "results/part4/chapter16/resized_image.pdf";
/** The multiplication factor for the image. */
public static float FACTOR = 0.5f;

/**
 * Manipulates a PDF file src with the file dest as result
 * @param src the original PDF
 * @param dest the resulting PDF
 * @throws IOException
 * @throws DocumentException 
 */
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
    PdfName key = new PdfName("ITXT_SpecialId");
    PdfName value = new PdfName("123456789");
    // Read the file
    PdfReader reader = new PdfReader(src);
    int n = reader.getXrefSize();
    PdfObject object;
    PRStream stream;
    // Look for image and manipulate image stream
    for (int i = 0; i < n; i++) {
        object = reader.getPdfObject(i);
        if (object == null || !object.isStream())
            continue;
        stream = (PRStream)object;
       // if (value.equals(stream.get(key))) {
        PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
        System.out.println(stream.type());
        if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
            PdfImageObject image = new PdfImageObject(stream);
            BufferedImage bi = image.getBufferedImage();
            if (bi == null) continue;
            int width = (int)(bi.getWidth() * FACTOR);
            int height = (int)(bi.getHeight() * FACTOR);
            BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
            AffineTransform at = AffineTransform.getScaleInstance(FACTOR, FACTOR);
            Graphics2D g = img.createGraphics();
            g.drawRenderedImage(bi, at);
            ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
            ImageIO.write(img, "JPG", imgBytes);
            stream.clear();
            stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);
            stream.put(PdfName.TYPE, PdfName.XOBJECT);
            stream.put(PdfName.SUBTYPE, PdfName.IMAGE);
            stream.put(key, value);
            stream.put(PdfName.FILTER, PdfName.DCTDECODE);
            stream.put(PdfName.WIDTH, new PdfNumber(width));
            stream.put(PdfName.HEIGHT, new PdfNumber(height));
            stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
            stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);
        }
    }
    // Save altered PDF
    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
    stamper.close();
    reader.close();
}

/**
 * Main method.
 *
 * @param    args    no arguments needed
 * @throws DocumentException 
 * @throws IOException
 */
public static void main(String[] args) throws IOException, DocumentException {
    //createPdf(RESULT);
    new ResizeImage().manipulatePdf("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressTest.pdf");
}

}

【讨论】:

  • (注:您实际上可以通过单击您自己答案的投票数下方的复选标记来接受您自己的答案。)
  • 我找不到这两个类 BufferedImage ,Graphics2D
【解决方案2】:

只是为了更新@Daniel 的出色答案,我更新了他的代码以与 iText7 兼容。

package opencde.builder.compresspdf;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;

import javax.imageio.ImageIO;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
import com.itextpdf.layout.element.Image;

public class ResizeImageV7 {
    
    // Logging
    private static Logger logger = LoggerFactory.getLogger(ResizeImageV7.class);

    /**
     * Manipulates a PDF file src with the file dest as result
     * 
     * @param src  the original PDF
     * @param dest the resulting PDF
     * @param resizeFactor factor to multiplicate to resize image
     * @throws IOException
     */
    public void manipulatePdf(String src, String dest,Float resizeFactor) throws IOException {
        
        //Get source pdf
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(src), new PdfWriter(dest));

        // Iterate over all pages to get all images.
        for (int i = 1; i <= pdfDoc.getNumberOfPages(); i++)
        {
            PdfPage page = pdfDoc.getPage(i);
            PdfDictionary pageDict = page.getPdfObject();
            PdfDictionary resources = pageDict.getAsDictionary(PdfName.Resources);
            // Get images
            PdfDictionary xObjects = resources.getAsDictionary(PdfName.XObject);
            for (Iterator<PdfName> iter = xObjects.keySet().iterator() ; iter.hasNext(); ) {
                // Get image
                PdfName imgRef = iter.next();
                PdfStream stream = xObjects.getAsStream(imgRef);
                PdfImageXObject image = new PdfImageXObject(stream);
                BufferedImage bi = image.getBufferedImage();
                if (bi == null)
                    continue;
                
                // Create new image
                int width = (int) (bi.getWidth() * resizeFactor);
                int height = (int) (bi.getHeight() * resizeFactor);
                BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
                AffineTransform at = AffineTransform.getScaleInstance(resizeFactor, resizeFactor);
                Graphics2D g = img.createGraphics();
                g.drawRenderedImage(bi, at);
                ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
                
                // Write new image
                ImageIO.write(img, "JPG", imgBytes);
                Image imgNew =new Image(ImageDataFactory.create(imgBytes.toByteArray()));
                
                // Replace the original image with the resized image
                xObjects.put(imgRef, imgNew.getXObject().getPdfObject());
            }          
        }
        
        pdfDoc.close();
    }

    /**
     * Main method.
     *
     * @param src  the original PDF
     * @param dest the resulting PDF
     * @param resizeFactor factor to multiplicate to resize image
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        //Get input parametres
        if (args.length<3 ) {
            System.out.println("Source PDF, Destination PDF and Resize Factor must be provided as parametres");
        } else {
            String sourcePDF=args[0];
            String destPDF=args[1];
            Float resizeFactor=Float.valueOf(new String(args[2]));
            logger.info("Inovking Resize with args, source:" + sourcePDF
                    + " destination:" + destPDF 
                    + " factor:" + resizeFactor);
            //Call method to resize images
            new ResizeImageV7().manipulatePdf(sourcePDF,destPDF,resizeFactor);
            logger.info("PDF resized");
        }
    }

}

【讨论】:

    猜你喜欢
    • 2019-08-23
    • 1970-01-01
    • 2023-01-16
    • 1970-01-01
    • 2020-04-27
    • 2011-07-12
    • 2017-05-16
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多