資源簡介
pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies. It aims to provide an accurate rendering, while keeping optimized for Web display.
pdf2htmlEX is best for text-based PDF files, for example scientific papers with complicated formulas and figures. Text, fonts and formats are natively preserved in HTML such that you can still search and copy. The generated HTML file is static, with optional features powered by JavaScript.
代碼片段和文件信息
#!/usr/bin/env?python
import?os
import?subprocess
import?shutil
import?unittest
from?PIL?import?Image?ImageChops
from?test?import?Common
class?BrowserTests(Common):
????TEST_DATA_DIR?=?os.path.join(Common.TEST_DIR?‘browser_tests‘)
????DEFAULT_PDF2HTMLEX_ARGS?=?[
????????‘--fit-width‘?800
????????‘--last-page‘?1
????????‘--embed‘?‘fi‘?#?avoid?base64?to?make?it?faster
????]
????BROWSER_WIDTH=800
????BROWSER_HEIGHT=1200
????@classmethod
????def?setUpClass(cls):
????????pass
????@classmethod
????def?tearDownClass(cls):
????????pass
????def?run_test_case(self?filename?pdf2htmlEX_args=[]?page_must_load=True):
????????basefilename?extension?=?os.path.splitext(filename)
????????htmlfilename?=?basefilename?+?‘.html‘
????????ref_htmlfolder?=?os.path.join(self.TEST_DATA_DIR?basefilename)
????????ref_htmlfilename?=?os.path.join(ref_htmlfolder?htmlfilename)
????????out_htmlfilename?=?os.path.join(self.cur_output_dir?htmlfilename)
????????self.assertEquals(extension.lower()?‘.pdf‘?‘Input?file?is?not?PDF‘)
????????pdf2htmlEX_args?=?self.DEFAULT_PDF2HTMLEX_ARGS?\
????????????+?list(pdf2htmlEX_args)?+?[
????????????????os.path.join(self.TEST_DATA_DIR?filename)
????????????????htmlfilename
????????????]
????????result?=?self.run_pdf2htmlEX(pdf2htmlEX_args)
????????self.assertIn(htmlfilename?result[‘output_files‘]?‘HTML?file?is?not?generated‘)
????????if?self.GENERATING_MODE:
????????????#?copy?generated?html?files
????????????shutil.rmtree(ref_htmlfolder?True)
????????????shutil.copytree(self.cur_output_dir?ref_htmlfolder)
????????????return
????????png_out_dir?=?os.path.join(self.cur_temp_dir?‘png_out‘)
????????os.mkdir(png_out_dir)
????????pngfilename_out_fullpath?=?os.path.join(png_out_dir?basefilename?+?‘.out.png‘)
????????self.generate_image(out_htmlfilename?pngfilename_out_fullpath)
????????out_img?=?Image.open(pngfilename_out_fullpath)
????????pngfilename_ref_fullpath?=?os.path.join(png_out_dir?basefilename?+?‘.ref.png‘)
????????self.generate_image(ref_htmlfilename?pngfilename_ref_fullpath?page_must_load=page_must_load)
????????ref_img?=?Image.open(pngfilename_ref_fullpath)
????????diff_img?=?ImageChops.difference(ref_img?out_img);
????????diff_bbox?=?diff_img.getbbox()
????????if?diff_bbox?is?not?None:
????????????diff_size?=?(diff_bbox[2]?-?diff_bbox[0])?*?(diff_bbox[3]?-?diff_bbox[1])
????????????img_size?=?ref_img.size[0]?*?ref_img.size[1]
????????????if?self.SAVE_TMP:
????????????????#?save?the?diff?image
????????????????#?http://stackoverflow.com/questions/15721484/saving-in-png-using-pil-library-after-taking-imagechops-difference-of-two-png
????????????????diff_img.convert(‘RGB‘).save(os.path.join(png_out_dir?basefilename?+?‘.diff.png‘))
????????????self.fail(‘PNG?files?differ?by?<=?%d?pixels?(%f%%?of?%d?pixels?in?total)‘?%?(diff_size?1.0*diff_size/img_size?img_size))
????@unittest.skipIf(Common.GENERATING_MODE?‘Do?not?auto?generate?reference?for?test_fail‘)
????def?test_fail(self):
????????#?The
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????1019??2015-06-22?10:30??AUTHORS
?????文件????????4550??2015-06-22?10:30??ChangeLog
?????目錄???????????0??2015-06-22?10:30??data\
?????文件????????2389??2015-06-22?10:30??data\ba
?????文件???????29208??2015-06-22?10:30??data\pdf2htmlEX.js.in
?????文件????????3422??2015-06-22?10:30??data\pdf2htmlEX-64x64.png
?????文件?????????940??2015-06-22?10:30??data\build_js.sh
?????文件???????29099??2015-06-22?10:30??data\pdf2htmlEX.js
?????文件???????10516??2015-06-22?10:30??data\pdf2htmlEX.min.js
?????文件????????5457??2015-06-22?10:30??data\ba
?????文件????????2554??2015-06-22?10:30??data\manifest
?????文件?????????638??2015-06-22?10:30??data\build_css.sh
?????文件????????1195??2015-06-22?10:30??data\LICENSE
?????文件????????4156??2015-06-22?10:30??data\fancy.css.in
?????文件????????3189??2015-06-22?10:30??data\fancy.min.css
?????文件????????4101??2015-06-22?10:30??data\fancy.css
?????文件????????5113??2015-06-22?10:30??data\ba
?????文件?????????967??2015-06-22?10:31??LICENSE
?????文件???????35147??2015-06-22?10:31??LICENSE_GPLv3
?????文件?????7536128??2015-06-22?16:40??pdf2htmlEX.exe
?????文件????????4010??2015-06-22?10:31??README.md
?????目錄???????????0??2015-06-22?10:31??test\
?????文件??????????27??2015-06-22?10:31??test\.gitattributes
?????文件????????4437??2015-06-22?10:31??test\test_remote_browser.py
?????目錄???????????0??2015-06-22?10:31??test\browser_tests\
?????目錄???????????0??2015-06-22?10:31??test\browser_tests\basic_text\
?????文件????????1620??2015-06-22?10:31??test\browser_tests\basic_text\f2.woff
?????文件????????7055??2015-06-22?10:31??test\browser_tests\basic_text\basic_text.html
?????文件????????3492??2015-06-22?10:31??test\browser_tests\basic_text\f1.woff
?????文件??????118971??2015-06-22?10:31??test\browser_tests\invalid_unicode_issue477.pdf
?????文件??????137669??2015-06-22?10:31??test\browser_tests\fontfile3_opentype.pdf
............此處省略64個文件信息
- 上一篇:用Django 技術搭建的彈球游戲網站
- 下一篇:html學校網站
評論
共有 條評論