資源簡介
實現對正方教務系統成績的爬取。
實現對正方教務系統成績的爬取。
代碼片段和文件信息
#?-*-?coding:gb2312?-*-
import?urllib?urllib2?cookielib
import?re?os?string
from?bs4?import?BeautifulSoup
#?from?PIL?import?Image
import?sys
reload(sys)
sys.setdefaultencoding(‘gb2312‘)
baseUrl?=?‘http://222.24.19.201/‘
codeUrl?=?‘CheckCode.aspx‘
loginUrl?=?‘default2.aspx‘
scoreUrl?=?‘xscjcx.aspx‘
def?downImg(url?name):
????‘‘‘
????下載驗證碼
????:param?url:驗證碼獲取接口
????:param?name:?驗證碼存儲文件名
????:return:
????‘‘‘
????try:
????????req?=?urllib2.Request(url)
????????req?=?urllib2.urlopen(req)
????????content?=?req.read()
????????file?=?open(os.getcwd()?+?‘/‘?+?name?‘w+b‘)
????????file.write(content)
????????file.close()
????except?Exception?e:
????????print?‘Error?:‘?e
def?setCookie():
????‘‘‘
????創建cookie
????:return:cookie句柄
????‘‘‘
????cookie?=?cookielib.LWPCookieJar()
????opener?=?urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
????urllib2.install_opener(opener)
????opener.open(baseUrl)
????return?cookie
def?login(username?password?cookie):
????‘‘‘
????登錄教務系統
????:param?username:用戶名
????:param?password:密碼
????:param?cookie:setcookie的cookie句柄
????:return:用戶名以及session_id
????‘‘‘
????request?=?urllib2.Request(baseUrl)
????text?=?urllib2.urlopen(request).read()
????downImg(baseUrl?+?codeUrl?‘code.png‘)
????#?image?=?Image.open(‘code.png‘)
????#?print?image_to_string(image)
????code?=?raw_input(‘請輸入驗證碼:‘)
????soup?=?BeautifulSoup(text?‘html.parser‘)
????_VIEWSTATE?=?soup.find_all(‘input‘)[0].get(‘value‘)
????headers?=?{
????????‘User-Agent‘?:?‘Mozilla/5.0?(Windows?NT?6.1;?WOW64;?rv:14.0)?Gecko/20100101?Firefox/14.0.1‘
????????‘Referer‘????:?baseUrl
????}
????postData?=?{
????????‘__VIEWSTATE‘?:?_VIEWSTATE
????????‘txtUserName‘?:?username
????????‘TextBox2‘?:?password
????????‘txtSecretCode‘?:?code
????????‘RadioButtonList1‘?:?‘學生‘
????????‘Button1‘?:?‘‘
????????‘lbLanguage‘?:?‘‘
????????‘hidPdrs‘?:?‘‘
????????‘hidsc‘?:?‘‘
????}
????postData?=?urllib.urlencode(postData)
????request?=?urllib2.Request(baseUrl?+?loginUrl?postData?headers)
????response?=?urllib2.urlopen(request)
????text?=?response.read()
????soup?=?BeautifulSoup(text?‘html.parser‘)
????if?re.search(‘驗證碼不正確‘?text):
????????print?‘驗證碼錯誤‘
????????exit(1)
????elif?re.search(‘‘?text):
????????result?=?{}
????????name?=?soup.find(id?=?‘xhxm‘).string
????????name?=?name.decode(‘gb2312‘).encode(‘gb2312‘)
????????name?=?string.replace(name?‘同學‘?‘‘)
????????result[‘name‘]?=?name
????????session_id?=?cookie._cookies[‘222.24.19.201‘][‘/‘][‘ASP.NET_SessionId‘].value
????????result[‘session_id‘]?=?session_id
????????return?result
????else:
????????print?‘登錄失敗‘
????????exit(1)
def?getScore(username?name?session_id?ddlXN?ddlXQ):
- 上一篇:PyQt5初級教程
- 下一篇:python實現三次自然樣條插值
評論
共有 條評論