【问题标题】:iText7 / iText7.pdfhtml - generating accessible tagged PDF from HTML results in incorrect tab orderiText7 / iText7.pdfhtml - 从 HTML 生成可访问的标记 PDF 导致不正确的选项卡顺序
【发布时间】:2020-01-10 14:29:34
【问题描述】:

这是一个 HTML sn-p:

    <html xmlns="http://www.w3.org/1999/xhtml">
<head>
    <style type="text/css">
        body {
            font-family: Calibri,Arial,Tahoma,Helvetica,sans-serif;
            color: black;
            font-size: 12pt;
            width: 595pt;
        }

        h1 {
            color: #553c74;
            font-size: 18pt;
            font-weight: bold;
        }

            h1.title {
                display: inline;
                font-size: 28pt;
                font-weight: bold;
                color: #553c74;
            }

        h2 {
            color: #553c74;
            font-size: 16pt;
            font-weight: bold;
        }

        h3 {
            color: #00759b;
            font-size: 12pt;
            font-weight: bold;
        }

            h3.requiredAction {
                color: black;
                font-weight: bold;
                margin-top: 21px;
            }

        .headingLeft {
            float: left;
        }

        .contentRight {
            float: left;
            margin-left: 10px;
            margin-top: 17px;
        }

        .topSection {
            margin-top: 22px;
        }

        .clearBoth {
            clear: both;
        }

        .symbolstandardmet {
            color: #009639;
            font-family: webdings;
            font-size: 18pt;
        }

        .standardmet {
            color: #009639;
        }

        .standard {
            font-size: 16pt;
            font-weight: bold;
            margin-top: 0;
        }

        .pageBreakBefore {
            margin-top: 0px;
        }

        .premisesAddress {
            font-size: 18pt;
            color: #553c74;
            line-height: 0px;
        }

        .pageBreakBefore {
            page-break-before: always;
        }

        #contentWrapperBody {
            padding-left: 8pt;
            padding-right: 45pt;
        }

        h1 {
            display: inline-block;
        }

        @page:first {
            margin-top: 170pt;
        }
    </style>

    <title>Report</title>

</head>
<body id="contentWrapperBody">
    <h1>Name:</h1>
    <span id="lblAddress" class="premisesAddress">Address Line 1, Address line 2, City, Postcode</span>

    <div class="clearBoth"></div>

    <h2 class="headingLeft">Reference:</h2>
    <div class="contentRight topSection">123456789</div>

    <div class="clearBoth"></div>

    <h2 class="headingLeft">Type:</h2>
    <div class="contentRight topSection">The type</div>

    <div class="clearBoth"></div>

    <h2 class="headingLeft">Date:</h2>
    <div class="contentRight topSection">12/04/2019</div>

    <div class="clearBoth"></div>

    <h2>Context</h2>
    <div>
        <p>A paragraph of context info here.</p>
    </div>


    <h2>Overall outcome</h2>
    <h3>
        <span id="lblOverallStandardsMet" class="standardmet"><span class="symbolstandardmet">a</span>Standards met</span>
    </h3>


    <h3 class="headingLeft requiredAction">Required Action:</h3>
    <div class="contentRight topSection"><span id="lblRequiredAction">None</span></div>
    <div class="clearBoth"></div>
    <p>Follow this link to <a href="#tbldefinitions">find out what the possible outcomes mean</a></p>

</body>
</html>

代码如下:

 public static class DocConverter
{
    private const string fontWebdings = "~/content/fonts/webdings.ttf";
    private const string fontCalibri = "~/content/fonts/calibrib.ttf";

    [Flags]
    public enum DocOptions
    {
        None = 0,
        DisplayTitle = 1,
        AddHeaderPageOne = 2,
        AddHeaderAllPages = 4,
        AddLineBottomEachPage = 8
    }

    public static byte[] ConvertToPdfWithTags(string html, string title, string docOptions)
    {
        DocOptions documentOptions = DocOptions.None;
        if (!string.IsNullOrEmpty(docOptions))
        {
            int options;
            if (int.TryParse(docOptions, out options))
                documentOptions = (DocOptions)options;
        }

        PdfFontFactory.RegisterDirectory(System.Web.Hosting.HostingEnvironment.MapPath("~/content/fonts/"));
        ConverterProperties props = new ConverterProperties();


        FontProvider fp = new FontProvider();

        fp.AddDirectory(System.Web.Hosting.HostingEnvironment.MapPath("~/content/fonts/"));

        props.SetFontProvider(fp);
        props.SetTagWorkerFactory(new DefaultTagWorkerFactory());

        using (var workStream = new MemoryStream())
        {
            using (var pdfWriter = new PdfWriter(workStream, new WriterProperties().AddUAXmpMetadata().SetPdfVersion
                (PdfVersion.PDF_2_0).SetFullCompressionMode(true)))
            {

                PdfDocument pdfDoc = new PdfDocument(pdfWriter);
                pdfDoc.GetCatalog().SetLang(new PdfString("en-GB"));


                pdfDoc.GetCatalog().SetViewerPreferences(new PdfViewerPreferences().SetDisplayDocTitle(true));
        //This event handler used for adding background images.  Also where I've tried setting the tab order on pdfPage
                //if (documentOptions > 0)
                //    pdfDoc.AddEventHandler(PdfDocumentEvent.END_PAGE, new PublicReportHeaderFooter(documentOptions, title));
                //Set meta tags
                var pdfMetaData = pdfDoc.GetDocumentInfo();
                pdfMetaData.AddCreationDate();
                pdfMetaData.GetProducer();
                pdfMetaData.SetCreator("iText Software");
                //Set the document to be tagged
                pdfDoc.SetTagged();

                using (var document = HtmlConverter.ConvertToDocument(html, pdfDoc, props))
                {
                    //Can do more with document here if necessary


                }

                //Returns the written-to MemoryStream containing the PDF.   
                return workStream.ToArray();
            }
        }
    }



}

如果我在 Acrobat Pro 中打开生成的 PDF 并生成可访问性报告并检查阅读顺序,它会从第一行的名称跳转到更下方的上下文,而不是位于左上角-正确的顺序 :

这似乎是由于某些元素上的“float:left”样式,但我不知道如何绕过它。我尝试过使用 page.Put(PdfName.Tag, PdfName.S) 和 page.SetTabOrder(PdfName.S),输入不同的参数(例如 C 或 R),但这些似乎没有区别。我正在使用 C# iText7 版本 7.1.9 和 iText7.pdfhtml 版本 2.1.6。任何帮助表示赞赏。

【问题讨论】:

    标签: pdf itext accessibility tagging


    【解决方案1】:

    我看文档越多,我就越认为这只是一个错误。默认情况下,可访问 PDF 中的阅读顺序应与 HTML 文档的结构顺序相匹配,但 float:left 样式似乎会抛出一些东西。我找不到在 GitHub 中为该项目提出错误的方法,但我找到了解决此问题的其他任何人的解决方法,只需根据需要将“float:left”添加到所有其他元素,以及

    <div style="clear: both;"></div>
    

    在实际上不需要浮动的元素之间。这样,可访问的标记订单就可以正确显示。但是,这确实引发了另一个问题,即带有“float:left”的元素不再出现在还设置了“page-break-before:always”的新页面上。我必须在这些元素之前添加另一个空的 div 元素,并设置 page-break-before 样式来解决这个问题。希望 iText 的人会看到这一点,并修复这些问题(如果它们是错误),或者如果我只是弄错了,则以更好的答案做出回应。

    【讨论】:

    • 嗨!我是“iText 人”,所以我会跟进
    猜你喜欢
    • 1970-01-01
    • 2021-10-23
    • 2019-05-12
    • 2021-08-18
    • 2020-01-29
    • 1970-01-01
    • 1970-01-01
    • 2022-11-25
    • 1970-01-01
    相关资源
    最近更新 更多