資源簡介
針對的是網上下載的OpenCV 3.x with Python By Example 2nd Edition PDF文件,采用Python和PyPDF2編程,去除該PDF上的文字水印。去除水印的方法隨著采用的加水印的方法不同而不同,所采用的方法對于去除一些指定的PDF有用,而對別的可能就是根本用不了。本例程僅供業余時間的時間有效耗散
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
#?去除原PDF文件中的水印,可從簡書下載
#?OpenCV?3.x?with?Python?By?Example?2nd
#?https://www.jianshu.com/p/a4d306dc3b5b
#?Remove?watermark?with?PyPDF2
#?http://blog.uorz.me/2018/06/19/removeing-watermark-with-PyPDF2.html
#?Goshin/Remove-PDF-Watermark
#?https://github.com/Goshin/Remove-PDF-Watermark
from?PyPDF2?import?PdfFileMerger
from?PyPDF2.pdf?import?ContentStream
from?PyPDF2.utils?import?b_
from?PyPDF2.generic?import?Nameobject?Numberobject
import?numpy?as?np
import?argparse
import?logging
class?PdfFileModifyMerger(PdfFileMerger):
????#?書簽向前差了一頁,退回原位
????def?bookmarks_plus_one(self?bookmarks=None):
????????if?bookmarks?==?None:
????????????bookmarks?=?self.bookmarks
????????for?b?in?bookmarks:
????????????if?isinstance(b?list):
????????????????self.bookmarks_plus_one(b)
????????????????continue
????????????bp?=?b[‘/Page‘]
????????????if?isinstance(bp?Numberobject):
????????????????#b[‘/Page‘]?=?Numberobject(bp.as_numeric()?-?1)
????????????????b.__setitem__(Nameobject(‘/Page‘)?Numberobject(bp.as_numeric()?-?1))
????????????????continue
????def?merge(self?*args?after_page_append=[]?**kwargs):
????????super(PdfFileModifyMerger?self).merge(*args?**kwargs)
????????pdfr?=?self.inputs[-1][1]
????????position?=?args[0]
????????pages?=?kwargs.get(‘pages‘?None)
????????if?pages?is?None:
????????????pages?=?(0?pdfr.getNumPages())
????????elif?isinstance(pages?PageRange):
????????????pages?=?pages.indices(pdfr.getNumPages())
????????elif?not?isinstance(pages?tuple):
????????????raise?TypeError(‘“pages“?must?be?a?tuple?of?(start?stop[?step])‘)
???????
評論
共有 條評論