tesseract-003
Tests that tsv output works
Test is expected to pass.
The pipeline
<p:declare-step xmlns:cx="http://xmlcalabash.com/ns/extensions"
xmlns:p="http://www.w3.org/ns/xproc"
xmlns:t="http://xproc.org/ns/testsuite/3.0" name="main" version="3.0">
<p:import href="https://xmlcalabash.com/ext/library/pdf-steps.xpl"/>
<p:import href="https://xmlcalabash.com/ext/library/tesseract.xpl"/>
<p:output port="result"/>
<cx:pdf-to-images dpi="300">
<p:with-input port="source"
href="../documents/example.pdf"/>
</cx:pdf-to-images>
<cx:tesseract language="eng" output-format="tsv"
debug-output="/dev/null"/>
<p:cast-content-type content-type="application/xml"/>
</p:declare-step>
Result
<array xmlns="http://www.w3.org/2005/xpath-functions"
xmlns:t="http://xproc.org/ns/testsuite/3.0">
<array>
<string>level</string>
<string>page_num</string>
<string>block_num</string>
<string>par_num</string>
<string>line_num</string>
<string>word_num</string>
<string>left</string>
<string>top</string>
<string>width</string>
<string>height</string>
<string>conf</string>
<string>text</string>
</array>
<array>
<string>1</string>
<string>1</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>2480</string>
<string>3507</string>
<string>-1</string>
<string/>
</array>
<array>
<string>2</string>
<string>1</string>
<string>1</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>200</string>
<string>237</string>
<string>386</string>
<string>64</string>
<string>-1</string>
<string/>
</array>
<array>
<string>3</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>0</string>
<string>0</string>
<string>200</string>
<string>237</string>
<string>386</string>
<string>64</string>
<string>-1</string>
<string/>
</array>
<array>
<string>4</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>0</string>
<string>200</string>
<string>237</string>
<string>386</string>
<string>64</string>
<string>-1</string>
<string/>
</array>
<array>
<string>5</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>200</string>
<string>237</string>
<string>173</string>
<string>63</string>
<string>96.414978</string>
<string>PDF</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>2</string>
<string>404</string>
<string>237</string>
<string>182</string>
<string>64</string>
<string>96.832092</string>
<string>Text</string>
</array>
<array>
<string>2</string>
<string>1</string>
<string>2</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>191</string>
<string>387</string>
<string>651</string>
<string>44</string>
<string>-1</string>
<string/>
</array>
<array>
<string>3</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>0</string>
<string>0</string>
<string>191</string>
<string>387</string>
<string>651</string>
<string>44</string>
<string>-1</string>
<string/>
</array>
<array>
<string>4</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>0</string>
<string>191</string>
<string>387</string>
<string>651</string>
<string>44</string>
<string>-1</string>
<string/>
</array>
<array>
<string>5</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>191</string>
<string>387</string>
<string>80</string>
<string>34</string>
<string>96.752213</string>
<string>This</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>2</string>
<string>289</string>
<string>389</string>
<string>27</string>
<string>32</string>
<string>96.293854</string>
<string>is</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>3</string>
<string>347</string>
<string>398</string>
<string>5</string>
<string>23</string>
<string>96.293854</string>
<string>a</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>4</string>
<string>369</string>
<string>387</string>
<string>143</string>
<string>44</string>
<string>96.305077</string>
<string>sample</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>5</string>
<string>531</string>
<string>389</string>
<string>82</string>
<string>32</string>
<string>96.379326</string>
<string>PDF</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>2</string>
<string>1</string>
<string>1</string>
<string>6</string>
<string>629</string>
<string>387</string>
<string>213</string>
<string>34</string>
<string>95.736130</string>
<string>document.</string>
</array>
<array>
<string>2</string>
<string>1</string>
<string>3</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>206</string>
<string>578</string>
<string>466</string>
<string>478</string>
<string>-1</string>
<string/>
</array>
<array>
<string>3</string>
<string>1</string>
<string>3</string>
<string>1</string>
<string>0</string>
<string>0</string>
<string>206</string>
<string>578</string>
<string>466</string>
<string>478</string>
<string>-1</string>
<string/>
</array>
<array>
<string>4</string>
<string>1</string>
<string>3</string>
<string>1</string>
<string>1</string>
<string>0</string>
<string>206</string>
<string>578</string>
<string>466</string>
<string>478</string>
<string>-1</string>
<string/>
</array>
<array>
<string>5</string>
<string>1</string>
<string>3</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>206</string>
<string>578</string>
<string>466</string>
<string>478</string>
<string>95.000000</string>
<string/>
</array>
<array>
<string>2</string>
<string>1</string>
<string>4</string>
<string>0</string>
<string>0</string>
<string>0</string>
<string>190</string>
<string>1212</string>
<string>305</string>
<string>45</string>
<string>-1</string>
<string/>
</array>
<array>
<string>3</string>
<string>1</string>
<string>4</string>
<string>1</string>
<string>0</string>
<string>0</string>
<string>190</string>
<string>1212</string>
<string>305</string>
<string>45</string>
<string>-1</string>
<string/>
</array>
<array>
<string>4</string>
<string>1</string>
<string>4</string>
<string>1</string>
<string>1</string>
<string>0</string>
<string>190</string>
<string>1212</string>
<string>305</string>
<string>45</string>
<string>-1</string>
<string/>
</array>
<array>
<string>5</string>
<string>1</string>
<string>4</string>
<string>1</string>
<string>1</string>
<string>1</string>
<string>190</string>
<string>1212</string>
<string>93</string>
<string>34</string>
<string>95.616150</string>
<string>With</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>4</string>
<string>1</string>
<string>1</string>
<string>2</string>
<string>301</string>
<string>1223</string>
<string>44</string>
<string>23</string>
<string>95.616150</string>
<string>an</string>
</array>
<array>
<string>5</string>
<string>1</string>
<string>4</string>
<string>1</string>
<string>1</string>
<string>3</string>
<string>364</string>
<string>1214</string>
<string>131</string>
<string>43</string>
<string>95.988602</string>
<string>image.</string>
</array>
<array>
<string/>
</array>
</array>
Schematron checks
<s:schema xmlns:s="http://purl.oclc.org/dsdl/schematron"
xmlns:t="http://xproc.org/ns/testsuite/3.0" queryBinding="xslt2">
<s:ns prefix="fn"
uri="http://www.w3.org/2005/xpath-functions"/>
<s:pattern>
<s:rule context="/">
<s:assert test="fn:array">Wrong document element</s:assert>
</s:rule>
</s:pattern>
<s:pattern>
<s:rule context="/fn:array">
<s:assert test="fn:array[1]/fn:string[1] = 'level'">Wrong output</s:assert>
</s:rule>
</s:pattern>
</s:schema>
Revision history
- 12 Jun 2026, Norm Tovey-Walsh
- Created test.