【发布时间】:2019-02-18 09:27:19
【问题描述】:
我需要在 pdf 文件中找到(光栅)图像并调整它们的大小 (改变他们的分辨率)。
我的代码基于示例 PrintImageLocations。对于部分 那行得通,我扩展了 PDFStreamEngine 并对 “做”运算符:
我使用第一个参数获取原始 PDImageXObject 运营商和资源。
然后我从中创建一个 BufferedImage 并处理它以更改 像素数。
然后我通过 BufferedImage 创建一个新的 PDImageXObject 无损工厂
最后我使用 原始对象的名称
我尝试对内联图像做类似的事情,然后我到达 点我有一个 BufferedImage,但我不知道如何使用它 替换原来的内嵌图片。
也可以用 XObject 替换内联图像,但是 再次,我不知道如何替换这两个......
下面是我的代码;有趣的部分是函数“processOperator”。
// WIP!
// find raster images inside a pdf
// if their resolution is more than 900dpi
// then resize them
// reducing the resolution to 200dpi
// NB bug: fails on pdf files with more than one page
// ...DEBUG ScratchFileBuffer:516 - ScratchFileBuffer not closed!
// also fails on pdf with included pdf
// (e.g. latex \includegraphics{x.pdf})
// # to compile:
// apt install libpdfbox2-java
// export CLASSPATH=.:/usr/share/java/pdfbox2.jar:/usr/share/java/commons-logging.jar
// javac Resampleimages.java
// # to run:
// java Resampleimages x.pdf
// see
// https://pdfbox.apache.org/2.0/examples.html
// https://pdfbox.apache.org/docs/2.0.11/javadocs/
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.awt.image.BufferedImage;
import java.awt.Graphics2D;
import java.awt.RenderingHints;
import java.awt.Color;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class ReplaceBigImages extends PDFStreamEngine
{
private static Log log = LogFactory.getLog(ReplaceBigImages.class);
public ReplaceBigImages() throws IOException
{
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
}
public static PDDocument document;
public static void main( String[] args ) throws IOException
{
if( args.length != 1 )
{
usage();
}
else
{
try
{
document = PDDocument.load(new File(args[0]));
ReplaceBigImages printer = new ReplaceBigImages();
int pageNum = 0;
for( PDPage page : document.getPages() )
{
pageNum++;
log.info( "Processing page: " + pageNum );
printer.processPage(page);
}
}
finally {
if( document != null )
{
document.save(args[0].replace(".pdf", "_test.pdf"));
document.close();
}
}
}
}
protected void processOperator( Operator operator, List<COSBase> operands) throws IOException
{
String operation = operator.getName();
// log.debug(String.format("Operator %s", operation));
if( "Do".equals(operation) ) {
log.debug("### Found Do operator");
COSName objectName = (COSName) operands.get( 0 );
PDXObject xobject = getResources().getXObject( objectName );
// log.debug(String.format("%s isa %s", objectName, xobject.getClass().getSimpleName()));
if( xobject instanceof PDImageXObject)
{
log.debug(String.format("Looking at %s (%s)", objectName.getName(), xobject));
PDImageXObject image = (PDImageXObject)xobject;
BufferedImage scaledImage = changeImageResolution(image);
if (scaledImage != null) {
log.debug(String.format("Replacing with %s", scaledImage));
PDImageXObject replacement_img = LosslessFactory.createFromImage(document, scaledImage);
PDPage currentPage = getCurrentPage();
PDResources resources = currentPage.getResources();
resources.put(objectName, replacement_img);
}
}else if(xobject instanceof PDFormXObject)
{
PDFormXObject form = (PDFormXObject)xobject;
showForm(form);
}
} else if ("BI".equals(operation)) {
PDPage currentPage = getCurrentPage();
log.debug("### Found BI operator");
PDResources resources = currentPage.getResources();
PDInlineImage image = new PDInlineImage(operator.getImageParameters(),
operator.getImageData(),
resources);
BufferedImage scaledImage = changeImageResolution(image);
if (scaledImage != null) {
log.debug(String.format("Replacing with %s", scaledImage));
PDImageXObject replacement_img = LosslessFactory.createFromImage(document, scaledImage);
// ARGH!!! How do I replace the inline image???
resources.add(replacement_img, "pippo");
// operator.setImageParameters(scaledImage???)
// operator.setImageData(scaledImage???)
}
} else {
super.processOperator( operator, operands);
}
}
protected BufferedImage changeImageResolution( PDImage image)
throws IOException
{
int imageWidth = image.getWidth();
int imageHeight = image.getHeight();
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
float imageXScale = Math.abs(ctmNew.getScalingFactorX());
float imageYScale = Math.abs(ctmNew.getScalingFactorY());
float resolution = imageWidth / ( imageXScale / 72 );
String stencil = "";
if (image.isStencil()) {
stencil = " (stencil)";
}
// TODO: take into consideration the size at which this file is included by TeX
log.debug("size: ("+imageWidth+","+imageHeight+")@("+imageXScale+","+imageYScale+") resolution = "+resolution+stencil);
// if ( resolution > 899f ) {
if ( resolution > 200f ) {
// what do the following two lines mean???
BufferedImage bImage = new BufferedImage(imageWidth,
imageHeight,
BufferedImage.TYPE_INT_ARGB);
if (image.isStencil()) {
log.warn("Is stencil; painting black.");
bImage = image.getStencilImage(Color.black);
} else {
bImage = image.getImage();
}
int desiredResolution = 200;
float xFactor = (imageXScale / 72) * desiredResolution / imageWidth;
float yFactor = (imageYScale / 72) * desiredResolution / imageHeight;
log.info("Scaling x to "+xFactor);
int dWidth = (int) (xFactor * imageWidth);
int dHeight = (int) (yFactor * imageHeight);
// the image type is from
// https://docs.oracle.com/javase/6/docs/api/constant-values.html#java.awt.image.
log.debug(String.format("Destination: %d x %d [%s]",
dWidth,
dHeight,
bImage.getType()));
BufferedImage scaledImage = new BufferedImage(dWidth,
dHeight,
bImage.getType());
Graphics2D graphics2D = scaledImage.createGraphics();
graphics2D.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
RenderingHints.VALUE_INTERPOLATION_BILINEAR);
graphics2D.setRenderingHint(RenderingHints.KEY_RENDERING,
RenderingHints.VALUE_RENDER_QUALITY);
graphics2D.setRenderingHint(RenderingHints.KEY_ANTIALIASING,
RenderingHints.VALUE_ANTIALIAS_ON);
graphics2D.drawImage(bImage, 0, 0, dWidth, dHeight, null);
graphics2D.dispose();
// see https://pdfbox.apache.org/docs/2.0.11/javadocs/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObject.html#createFromByteArray-org.apache.pdfbox.pdmodel.PDDocument-byte:A-java.lang.String-
return scaledImage;
}
return null;
}
private static void usage()
{
System.err.println( "Usage: java " + ReplaceBigImages.class.getName() + " <input-pdf>" );
}
}
【问题讨论】:
-
你可以做的是用你的新图像创建一个 PDImageXObject,然后获取原始流并将这些字节分配给 setImageData()。还要确保使用相同的过滤器,但使用短名称(例如 FLATE_DECODE_ABBREVIATION 而不是 FLATE_DECODE)。或者使用解码后的流并从目标字典中删除所有过滤器(文件会更大,但可能是一种测试“某事”是否有效的方法)
-
代码PDInlineImageTest.java可以帮上忙,在源代码下载中搜索这个文件。这将创建一个完全从头开始的未压缩图像。