tesseract-003

Tests that tsv output works

Test is expected to pass.

The pipeline

<p:declare-step xmlns:cx="http://xmlcalabash.com/ns/extensions"
                xmlns:p="http://www.w3.org/ns/xproc"
                xmlns:t="http://xproc.org/ns/testsuite/3.0" name="main" version="3.0">
   <p:import href="https://xmlcalabash.com/ext/library/pdf-steps.xpl"/>
   <p:import href="https://xmlcalabash.com/ext/library/tesseract.xpl"/>
   <p:output port="result"/>
   <cx:pdf-to-images dpi="300">
      <p:with-input port="source"
                    href="../documents/example.pdf"/>
   </cx:pdf-to-images>
   <cx:tesseract language="eng" output-format="tsv"
                 debug-output="/dev/null"/>
   <p:cast-content-type content-type="application/xml"/>
</p:declare-step>

Result

<array xmlns="http://www.w3.org/2005/xpath-functions"
       xmlns:t="http://xproc.org/ns/testsuite/3.0">
   <array>
      <string>level</string>
      <string>page_num</string>
      <string>block_num</string>
      <string>par_num</string>
      <string>line_num</string>
      <string>word_num</string>
      <string>left</string>
      <string>top</string>
      <string>width</string>
      <string>height</string>
      <string>conf</string>
      <string>text</string>
   </array>
   <array>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>2480</string>
      <string>3507</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>200</string>
      <string>237</string>
      <string>386</string>
      <string>64</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>3</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>0</string>
      <string>200</string>
      <string>237</string>
      <string>386</string>
      <string>64</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>4</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>200</string>
      <string>237</string>
      <string>386</string>
      <string>64</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>200</string>
      <string>237</string>
      <string>173</string>
      <string>63</string>
      <string>96.414978</string>
      <string>PDF</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>2</string>
      <string>404</string>
      <string>237</string>
      <string>182</string>
      <string>64</string>
      <string>96.832092</string>
      <string>Text</string>
   </array>
   <array>
      <string>2</string>
      <string>1</string>
      <string>2</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>191</string>
      <string>387</string>
      <string>651</string>
      <string>44</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>3</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>0</string>
      <string>0</string>
      <string>191</string>
      <string>387</string>
      <string>651</string>
      <string>44</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>4</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>191</string>
      <string>387</string>
      <string>651</string>
      <string>44</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>191</string>
      <string>387</string>
      <string>80</string>
      <string>34</string>
      <string>96.752213</string>
      <string>This</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>2</string>
      <string>289</string>
      <string>389</string>
      <string>27</string>
      <string>32</string>
      <string>96.293854</string>
      <string>is</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>3</string>
      <string>347</string>
      <string>398</string>
      <string>5</string>
      <string>23</string>
      <string>96.293854</string>
      <string>a</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>4</string>
      <string>369</string>
      <string>387</string>
      <string>143</string>
      <string>44</string>
      <string>96.305077</string>
      <string>sample</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>5</string>
      <string>531</string>
      <string>389</string>
      <string>82</string>
      <string>32</string>
      <string>96.379326</string>
      <string>PDF</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>2</string>
      <string>1</string>
      <string>1</string>
      <string>6</string>
      <string>629</string>
      <string>387</string>
      <string>213</string>
      <string>34</string>
      <string>95.736130</string>
      <string>document.</string>
   </array>
   <array>
      <string>2</string>
      <string>1</string>
      <string>3</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>206</string>
      <string>578</string>
      <string>466</string>
      <string>478</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>3</string>
      <string>1</string>
      <string>3</string>
      <string>1</string>
      <string>0</string>
      <string>0</string>
      <string>206</string>
      <string>578</string>
      <string>466</string>
      <string>478</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>4</string>
      <string>1</string>
      <string>3</string>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>206</string>
      <string>578</string>
      <string>466</string>
      <string>478</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>3</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>206</string>
      <string>578</string>
      <string>466</string>
      <string>478</string>
      <string>95.000000</string>
      <string/>
   </array>
   <array>
      <string>2</string>
      <string>1</string>
      <string>4</string>
      <string>0</string>
      <string>0</string>
      <string>0</string>
      <string>190</string>
      <string>1212</string>
      <string>305</string>
      <string>45</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>3</string>
      <string>1</string>
      <string>4</string>
      <string>1</string>
      <string>0</string>
      <string>0</string>
      <string>190</string>
      <string>1212</string>
      <string>305</string>
      <string>45</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>4</string>
      <string>1</string>
      <string>4</string>
      <string>1</string>
      <string>1</string>
      <string>0</string>
      <string>190</string>
      <string>1212</string>
      <string>305</string>
      <string>45</string>
      <string>-1</string>
      <string/>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>4</string>
      <string>1</string>
      <string>1</string>
      <string>1</string>
      <string>190</string>
      <string>1212</string>
      <string>93</string>
      <string>34</string>
      <string>95.616150</string>
      <string>With</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>4</string>
      <string>1</string>
      <string>1</string>
      <string>2</string>
      <string>301</string>
      <string>1223</string>
      <string>44</string>
      <string>23</string>
      <string>95.616150</string>
      <string>an</string>
   </array>
   <array>
      <string>5</string>
      <string>1</string>
      <string>4</string>
      <string>1</string>
      <string>1</string>
      <string>3</string>
      <string>364</string>
      <string>1214</string>
      <string>131</string>
      <string>43</string>
      <string>95.988602</string>
      <string>image.</string>
   </array>
   <array>
      <string/>
   </array>
</array>

Schematron checks

<s:schema xmlns:s="http://purl.oclc.org/dsdl/schematron"
          xmlns:t="http://xproc.org/ns/testsuite/3.0" queryBinding="xslt2">
   <s:ns prefix="fn"
         uri="http://www.w3.org/2005/xpath-functions"/>
   <s:pattern>
      <s:rule context="/">
         <s:assert test="fn:array">Wrong document element</s:assert>
      </s:rule>
   </s:pattern>
   <s:pattern>
      <s:rule context="/fn:array">
         <s:assert test="fn:array[1]/fn:string[1] = 'level'">Wrong output</s:assert>
      </s:rule>
   </s:pattern>
</s:schema>

Revision history

12 Jun 2026, Norm Tovey-Walsh
Created test.