pdf-extract-001

Tests that pdf-extract works.

Test is expected to pass.

The pipeline

<p:declare-step xmlns:cx="http://xmlcalabash.com/ns/extensions"
                xmlns:p="http://www.w3.org/ns/xproc"
                xmlns:t="http://xproc.org/ns/testsuite/3.0" name="main" version="3.0">
   <p:import href="https://xmlcalabash.com/ext/library/pdf-steps.xpl"/>
   <p:output port="result"/>
   <cx:pdf-info name="longer">
      <p:with-input href="../documents/longer.pdf"/>
   </cx:pdf-info>
   <cx:pdf-extract pages="2">
      <p:with-input href="../documents/longer.pdf"/>
   </cx:pdf-extract>
   <cx:pdf-info name="extracted" page-text="true"/>
   <p:wrap-sequence wrapper="wrapper">
      <p:with-input pipe="@longer @extracted"/>
   </p:wrap-sequence>
</p:declare-step>

Result

<wrapper xmlns:t="http://xproc.org/ns/testsuite/3.0">
   <cx:pdf-info xmlns:cx="http://xmlcalabash.com/ns/extensions"
                href="file:/woodpecker/src/codeberg.org/xmlcalabash/xmlcalabash3/tests/extra-suite/test-suite/documents/longer.pdf">
      <cx:version>1.3</cx:version>
      <cx:form>none</cx:form>
      <cx:pages>2</cx:pages>
      <cx:encrypted>false</cx:encrypted>
      <cx:page-size width="595" height="842" units="px">A4</cx:page-size>
      <cx:title>Longer Example PDF Document</cx:title>
      <cx:creator>Bear</cx:creator>
      <cx:producer>macOS Version 26.5 (Build 25F71) Quartz PDFContext</cx:producer>
      <cx:author>Norm Tovey-Walsh</cx:author>
      <cx:creation-date>2026-06-10T14:32:06Z</cx:creation-date>
      <cx:modification-date>2026-06-10T14:32:06Z</cx:modification-date>
      <cx:subject>Sample documents</cx:subject>
      <cx:keyword>xmlcalabash</cx:keyword>
      <cx:keyword>testsuite</cx:keyword>
      <cx:file-size>70017</cx:file-size>
   </cx:pdf-info>
   <cx:pdf-info xmlns:cx="http://xmlcalabash.com/ns/extensions">
      <cx:version>1.6</cx:version>
      <cx:form>none</cx:form>
      <cx:pages>1</cx:pages>
      <cx:encrypted>false</cx:encrypted>
      <cx:page-size width="595" height="842" units="px">A4</cx:page-size>
      <cx:title>Longer Example PDF Document</cx:title>
      <cx:creator>Bear</cx:creator>
      <cx:producer>macOS Version 26.5 (Build 25F71) Quartz PDFContext</cx:producer>
      <cx:author>Norm Tovey-Walsh</cx:author>
      <cx:creation-date>2026-06-10T14:32:06Z</cx:creation-date>
      <cx:modification-date>2026-06-10T14:32:06Z</cx:modification-date>
      <cx:subject>Sample documents</cx:subject>
      <cx:keyword>xmlcalabash</cx:keyword>
      <cx:keyword>testsuite</cx:keyword>
      <cx:file-size>14808</cx:file-size>
      <cx:page-details>
         <cx:page page-number="1">
            <cx:text>aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum 
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui 
officia deserunt mollit anim id est laborum.
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore 
et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut 
aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum 
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui 
officia deserunt mollit anim id est laborum.
There. That gets us onto a second page, which was all that was really needed.
</cx:text>
         </cx:page>
      </cx:page-details>
   </cx:pdf-info>
</wrapper>

Schematron checks

<s:schema xmlns:s="http://purl.oclc.org/dsdl/schematron"
          xmlns:t="http://xproc.org/ns/testsuite/3.0" queryBinding="xslt2">
   <s:ns prefix="cx"
         uri="http://xmlcalabash.com/ns/extensions"/>
   <s:ns prefix="f"
         uri="http://xmlcalabash.com/ns/acro-form"/>
   <s:pattern>
      <s:rule context="/">
         <s:assert test="wrapper">Wrong document element></s:assert>
      </s:rule>
   </s:pattern>
   <s:pattern>
      <s:rule context="/wrapper">
         <s:assert test="count(cx:pdf-info) = 2">Wrong number of PDF outputs</s:assert>
      </s:rule>
   </s:pattern>
   <s:pattern>
      <s:rule context="/wrapper/cx:pdf-info[1]">
         <s:assert test="cx:pages = 2">Wrong number of pages</s:assert>
      </s:rule>
   </s:pattern>
   <s:pattern>
      <s:rule context="/wrapper/cx:pdf-info[2]">
         <s:assert test="cx:pages = 1">Wrong number of pages</s:assert>
         <s:assert test="starts-with(cx:page-details/cx:page/cx:text, 'aliquip ex ea')">Wrong page</s:assert>
      </s:rule>
   </s:pattern>
</s:schema>

Revision history

11 Jun 2026, Norm Tovey-Walsh
Created test.