資源簡介
一個單文件爬蟲,實現監聽頁面變化并發送郵件。
代碼片段和文件信息
import?urllib
from?urllib?import?request
from?bs4?import?BeautifulSoup
from?datetime?import?datetime
import?random
import?time
import?pymysql
import?smtplib
import?sys
import?requests
import?json
from?email.header?import?Header
from?email.mime.text?import?MIMEText
#?連接mysql
def?get_mysql():
????db?=?pymysql.connect(host=‘localhost‘
?????????????????????????port=3306
?????????????????????????user=‘root‘?passwd=‘root‘
?????????????????????????db=‘test‘?charset=‘utf8‘
?????????????????????????cursorclass=pymysql.cursors.DictCursor)
????cursor?=?db.cursor()
????return?db?cursor
#?爬蟲
def?get_spiderMsg():
????host?=?{}
????title?=?{}
????lastest_title?=?{}
????host[0]?=?‘http://cjxy.hebtu.edu.cn/a/zxks/tzgg/index.html‘
????title[0]?=?‘自考實踐通知‘
????#?獲取數據庫中保存的最新標題
????db?mysql?=?get_mysql()
????select_sql?=?“SELECT?title?FROM?test?where?url?=?‘“+host[0]+“‘ORDER?BY?updated_at?desc“
????mysql.execute(select_sql)
????res?=?mysql.fetchone()
????lastest_title[0]?=?‘‘
????if?res:
????????lastest_title[0]?=?res[‘title‘];
????#?email相關
????mail_host?=?“smtp.163.com“??????#?SMTP服務器
????mail_user?=?“xxxxxxx“??????????????????#?用戶名
????mail_pass?=?“xxxxxxxxx“???????????????#?授權密碼,非登錄密碼
????sender?=?‘xxxxxx@163.com‘????#?發件人郵箱(最好寫全?不然會失敗)
????receivers?=?[‘1111111111@qq.com‘]??#?接收郵件,可設置為你的QQ郵箱或者其他郵箱
????header_list?=?[‘Mozilla/5.0?(Windows?NT?6.1;?WOW64;?rv:54.0)?Gecko/20100101?Firefox/54.000‘
???????????????????‘Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/57.0.2987.133?Safari/537.36‘
???????????????????‘Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/53.0.2785.104?Safari/537.36?Core/1.53.2759.400?QQBrowser/9.6.11220.400‘
???????????????????‘Mozilla/5.0?(Windows?NT?6.1;?WOW64;?Trident/7.0;?rv:11.0)?like?Gecko‘]
????switch?=?True
????while?switch:
????????time.sleep(3)
????????sys.stdout.flush()
????????i?=?0
????????range_header?=?random.randint(0?3)
????????user_agent?=?header_list[range_header]
????????accept?=?‘text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/
評論
共有 條評論