資源簡介
爬取新浪微博上的妹紙照片的爬蟲程序,來源于中戲北電的美女們的微博上的照片,大學校花們的萬張照片唾手可得。程序可復用。

代碼片段和文件信息
#?-*-?encoding:utf-8?-*-
‘‘‘
獲取“北電中戲的美女們”此微博下的美女照片
輕輕松松得到萬張清純妹紙的照片
‘‘‘
import?requests
import?re
import?urllib2
import?json
import?base64
import?sqlite3
headers?=?{
????‘User-Agent‘:?‘Mozilla/5.0?(X11;?Linux?x86_64)?AppleWebKit/537.36?(KHTML?like?Gecko)?\
????????Ubuntu?Chromium/57.0.2987.98?Chrome/57.0.2987.98?Safari/537.36‘
????‘Connection‘:?‘keep-alive‘
????‘Accept-Language‘:?‘zh-CNzh;q=0.8‘
????#‘Accept-Encoding‘:?‘gzip?deflate?br‘
????#‘Accept‘:?‘*/*‘
}
def?login_pre(username?Session):
????‘‘‘
????獲取驗證碼
????‘‘‘
????su?=?(base64.b64encode(quote_plus(username).encode(‘utf-8‘))
??????????).decode(‘utf-8‘)??#?用戶名用base64加密
????pre_Data?=?{
????????“checkpin“:?‘1‘
????????“entry“:?‘mweibo‘
????????“su“:?su
????????“callback“:?‘jsonpcallback‘?+?str(int(time.time()?*?1000)?+?math.floor(random.random()?*?100000))
????}
????pre_headers?=?headers.copy()
????pre_headers[‘Host‘]?=?‘login.sina.com.cn‘
????pre_headers[‘Referer‘]?=?‘https://passport.weibo.cn/signin/login‘
????pre_url?=?‘https://login.sina.com.cn/sso/prelogin.php‘
????pre_text?=?Session.get(pre_url?params=pre_Data?headers=pre_headers).text
????try:
????????pre_json?=?json.loads(pre_text[0])
????????if?pre_json[‘showpin‘]?==?1:??#?驗證碼
????????????pre_headers[‘Host‘]?=?‘passport.weibo.cn‘
????????????capt?=?Session.get(
????????????????‘https://passport.weibo.cn/captcha/image‘?headers=pre_headers)
????????????capt_json?=?capt.json()
????????????capt_base64?=?capt_json[“data“][“image“].split(‘base64‘)[
????????????????1]??#?captcha?image
????????????with?open(‘../../data/weibo_data/captcha.png‘?‘wb‘)?as?f:
????????????????f.write(base64.b64encode(capt_base64))
????????????????f.close()
????????????#img?=?Image.open(‘../../data/weibo_data/captcha.png‘)
????????????#img.show()
????????????#img.close()
????????????captcha?=?raw_input(‘input?captcha:\n>‘)
????????????return?captcha?capt_json[“data“][“pcid“]
????except?Exception?e:
????????#?print?Exception?‘:‘?e??#?no?captcha
????????return?‘‘
def?login(username?password?Session?pincode):
????‘‘‘
????模擬登錄手機端微博
????‘‘‘
????login_url?=?“https://passport.weibo.cn/signin/login“
????data?=?{
????????‘username‘:?username
????????‘password‘:?password
????????‘savestate‘:?‘1‘
????????‘r‘:?‘http://m.weibo.cn/‘
????????‘ec‘:?‘0‘
????????‘pagerefer‘:?login_url
????????‘entry‘:?‘mweibo‘
????????‘wentry‘:?‘‘
????????‘loginfrom‘:?‘‘
????????‘client_id‘:?‘‘
????????‘code‘:?‘‘
????????‘qq‘:?‘‘
????????‘mainpageflag‘:?‘1‘
????????‘hff‘:?‘‘
????????‘hfp‘:?‘‘
????}
????login_headers?=?headers.copy()
????login_headers[‘Host‘]?=?‘passport.weibo.cn‘
????login_headers[‘Accept-Encoding‘]?=?‘gzip?deflate?br‘
????login_headers[‘Accept‘]?=?‘*/*‘
????login_headers[‘Origin‘]?=?‘https://passport.weibo.cn‘
????login_headers[‘Referer‘]?=?login_url
????login_headers[‘Content-Type‘]?=?‘application/x-www-form-urlencoded‘
????if?pincode?==?‘‘:
?
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????5550??2017-06-20?17:12??crwler.py
?????文件??????724119??2017-06-20?17:20??結果.png
- 上一篇:火影忍者網站的設計與制作
- 下一篇:SIM900A通過51單片機撥打電話程序
評論
共有 條評論