按原样回答问题
我使用带有您的实现代码和坐标输出的渲染侦听器解析了我手头的任意文件(来自this question 的示例 PDF)的页面内容:
public void ExtractImageCoordinatesFromArchmodels()
{
using (PdfReader reader = new PdfReader(@"EVERMOTION ARCHMODELS VOL.78.pdf"))
{
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
ImageCoordinatesRenderListener listener = new ImageCoordinatesRenderListener();
for (var i = 1; i <= reader.NumberOfPages; i++)
{
parser.ProcessContent(i, listener);
}
}
}
internal class ImageCoordinatesRenderListener : IRenderListener
{
public void BeginTextBlock()
{ }
public void EndTextBlock()
{ }
public void RenderText(TextRenderInfo renderInfo)
{ }
public void RenderImage(ImageRenderInfo renderInfo)
{
Matrix mtx = renderInfo.GetImageCTM();
// x, y
float[] coordinate = new float[] { mtx[Matrix.I31], mtx[Matrix.I32] };
Console.WriteLine("Image at {0}, {1}.", coordinate[0], coordinate[1]);
}
}
输出是
Image at 6,00029, 52,15466.
Image at 19,84251, 363,4501.
Image at 294,091, 361,5604.
Image at 300,0336, 81,089.
Image at 15,59055, 72,94052.
Image at 5,322647, 340,7029.
Image at 288,5311, 386,0621.
Image at 291,7613, 69,35573.
Image at 28,50845, 53,13286.
Image at 41,2021, 380,3172.
Image at 290,8796, 368,9564.
Image at 295,8532, 50,71478.
Image at 19,13385, 49,21146.
Image at 25,5118, 385,9343.
Image at 282,4584, 379,8427.
Image at 293,5927, 65,19702.
Image at 4,535416, 60,35075.
Image at 3,364258, 374,4344.
Image at 288,0557, 373,5591.
Image at 299,9102, 59,13971.
Image at 11,33858, 66,10181.
Image at 11,66959, 380,3134.
Image at 297,1836, 378,4615.
Image at 299,9689, 66,74164.
Image at 10,62991, 53,18137.
Image at 5,180252, 377,7065.
Image at 279,9567, 377,9544.
Image at 289,9219, 69,23323.
Image at 6,400314, 68,17795.
Image at 11,33858, 361,2458.
Image at 297,1935, 373,4553.
Image at 299,8854, 68,30142.
Image at 7,086609, 68,13367.
Image at 3,82518, 352,3451.
Image at 287,9208, 373,4846.
Image at 294,6425, 68,3132.
Image at 41,2271, 68,15968.
Image at 5,709488, 356,2161.
Image at 304,9857, 373,593.
Image at 282,4557, 48,97745.
Image at 5,669281, 53,65367.
Image at 27,34265, 382,0123.
Image at 297,1409, 373,494.
Image at 300,0584, 50,23624.
Image at 7,245102, 68,23528.
Image at 8,503922, 380,1963.
Image at 290,1901, 355,9355.
Image at 287,2598, 60,53516.
Image at 5,102356, 68,01541.
Image at 17,00786, 378,9057.
Image at 296,8928, 373,5667.
Image at 299,9655, 68,04535.
(我的语言环境使用逗号作为小数分隔符。)
所以我无法重现您的主张
X/Y 坐标始终为 0
因此,您观察到的结果要么是由于剩余代码的某些问题,要么是所有测试 PDF 的特殊情况;可能他们确实都将图像定位在 0,0
来自 cmets 的澄清
同时,OP 已经在 cmets 中澄清,感兴趣的图像位于 annotation 外观流中,而不是 page 内容流中。
其中的坐标位于相应的注释外观流坐标系中,该坐标系由外观的边界框(其BBox条目)隐含。这个边界框然后可选地由外观矩阵(它的 Matrix 条目)转换。然后将生成的四边区域缩放并移动到注释的矩形中(其 Rect 条目)。并且根据页面旋转和注释属性,此矩形可能会相对于页面坐标旋转 90° 的倍数。
因此,将这些坐标转换为页面的默认用户空间坐标系的通用解决方案需要一些数学运算。
不过,注释外观中的位图通常会(几乎)完全填充边界框。通常没有外观矩阵。并且注释经常随页面旋转。
因此,一个通常很好的近似方法是简单地使用注释矩形。这也是 OP 现在使用的。