資源簡介
租房爬蟲實戰
代碼片段和文件信息
import?requests
from?bs4?import?BeautifulSoup
import?pandas?as?pd
from?sqlalchemy?import?create_engine
import?re
headers?=?{
????‘User-Agent‘:?‘Mozilla/5.0?(X11;?Ubuntu;?Linux?x86_64;?rv:39.0)?Gecko/20100101?Firefox/39.0‘
????‘Accept‘:?‘text/htmlapplication/xhtml+xmlapplication/xml;q=0.9*/*;q=0.8‘
????‘Accept-Language‘:?‘en-USen;q=0.5‘
????‘Accept-Encoding‘:?‘gzip?deflate‘
????‘Connection‘:?‘keep-alive‘
}
xingzhengqu?=?[‘tianhe‘?‘yuexiu‘?‘liwan‘?‘haizhu‘?‘panyu‘?‘baiyun‘?‘huangpugz‘?‘zengcheng‘?‘huadou‘?‘nansha‘]
xingzhengqu_cn?=?[‘天河‘?‘越秀‘?‘荔灣‘?‘海珠‘?‘番禺‘?‘白云‘?‘黃埔‘?‘增城‘?‘花都‘?‘南沙‘]
def?get_url(url?page):
????html?=?requests.get(url?+?‘/pg%s‘?%?str(page)?headers=headers).text
????table?=?BeautifulSoup(html?‘lxml‘).find(‘div‘?{‘class‘:?‘con-box‘}).find_all(‘
評論
共有 條評論