資源簡(jiǎn)介
使用的庫有,requests,re,xlsxwritter,beautifulsoup

代碼片段和文件信息
import?requests
import?re
import?xlsxwriter
from?bs4?import?BeautifulSoup
#?獲取請(qǐng)求
def?get_soup(url?param):
????response?=?requests.get(url?params=param)
????soup?=?BeautifulSoup(response.text?‘html.parser‘)
????return?soup
#?獲取某地區(qū)分頁數(shù)
def?get_page_num(s):
????r?=?s.find_all(name=“div“?attrs={“class“:?re.compile(r‘page‘)})[0]
????if?r.find(“strong“)?is?None:
????????return?0
????else:
????????n?=?r.find(“strong“).find_next_siblings()[0].get_text()
????????return?int(n)
#?獲取url參數(shù)
def?get_param(grade?area?page):
????para?=?{‘grade_type‘:?‘1‘?‘a(chǎn)rea_type‘:?area?“page“:?page}
????return?para
#?獲取地區(qū)
def?get_area(s):
????res_areas?=?[]
????areas?=?s.find_all(name=‘li‘?attrs={“role“:?“presentation“})
????for?area?in?areas:
????????t?=?area.find(‘a(chǎn)‘).string
????????if?t?!=?‘全部‘:
????????????res_areas.append(t)
????return?res_areas
def?main():
????url?=?“http://hzjiaoyufb.hangzhou.com.cn/school_list.php“
????soup?=?get_soup(url?{‘grade_type‘:?‘1‘})
????#?初始化xlsx
????print(‘初始化xlsx...‘)
????workbook?=?xlsxwriter.Workbook(‘school.xlsx‘)
????worksheet?=?workbook.add_worksheet()
????bold?=?workbook.add_format({‘bold‘:?True})
????worksheet.write(‘A1‘?‘學(xué)校名稱‘?bold)
????worksheet.write(‘B1‘?‘學(xué)校地址‘?bold)
????worksheet.write(‘C1‘?‘學(xué)校網(wǎng)址‘?bold)
????worksheet.write(‘D1‘?‘學(xué)校電話‘?bold)
????worksheet.write(‘E1‘?‘學(xué)校微信‘?bold)
????worksheet.write(‘F1‘?‘學(xué)校微博‘?bold)
????worksheet.write(‘G1‘?‘班級(jí)數(shù)目‘?bold)
????worksheet.write(‘H1‘?‘學(xué)校類型‘?bold)
????worksheet.write(‘I1‘?‘學(xué)校層次‘?bold)
????worksheet.write(‘J1‘?‘地區(qū)‘?bold)
????#?根據(jù)地點(diǎn)和分頁的遍歷獲取所有的子葉超鏈接,保存在arr中
????arr?=?[]??#?存儲(chǔ)鏈接地址
????area?=?[]??#?存儲(chǔ)地址信息
????school_name?=?[]
????school_location?=?[]
????school_website?=?[]
????school_tel?=?[]
????school_wx?=?[]
????school_nature?=?[]
????school_class?=?[]
????school_pic?=?[]
????school_wb?=?[]
????school_type?=?[]
????school_level?=?[]
????print(‘獲取所有區(qū)域...‘)
????for?res_area?in?get_area(soup):
????????soup?=?get_soup(url?get_param(‘1‘?res_area?‘1‘))
????????for?num?in?range(get_page_num(soup)):
????????????soup?=?get_soup(url?get_param(‘1‘?res_area?num?-?1))
????????????schools?=?soup.find_all(‘div‘?class_=“pInfo“)
????????????for?school?in?schools:
????????????????arr.append(‘http://hzjiaoyufb.hangzhou.com.cn/‘?+?school.find(‘a(chǎn)‘).attrs[‘href‘])
????????????????area.append(res_area)
????#?遍歷arr中url,獲取子葉信息
????print(‘獲取所有學(xué)校數(shù)據(jù)...‘)
????for?item?in?enumerate(arr):
????????response?=?requests.get(item[1])
????????soup?=?BeautifulSoup(response.text?‘html.parser‘)
????????panel1?=?soup.find(‘h2‘).text
????????panel2?=?soup.find_all(name=‘div‘?attrs=‘panel-body‘)
????????school_name.append(panel1)
????????array?=?[]
????????for?panel?in?panel2:
????????????if?panel.find(‘h6‘)?is?not?None:
????????????????array.append(panel.find(‘h6‘).text.strip())
????????school_location.append(array[1])
????????school_website.append(array[4])
????????school_tel.append(ar
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????文件????????511??2018-09-23?16:50??getlist1\.idea\getlist1.iml
?????文件????????472??2018-09-25?09:57??getlist1\.idea\inspectionProfiles\Project_Default.xm
?????文件????????188??2018-09-23?16:50??getlist1\.idea\misc.xm
?????文件????????275??2018-09-23?16:44??getlist1\.idea\modules.xm
?????文件??????11876??2018-10-10?21:03??getlist1\.idea\workspace.xm
?????文件???????4150??2018-09-25?15:04??getlist1\main.py
?????文件????????896??2018-09-25?09:02??getlist1\rubbish.html
?????文件??????46030??2018-09-25?09:59??getlist1\school.xlsx
?????文件???????1666??2018-09-25?09:14??getlist1\test.py
?????文件?????????55??2018-09-23?16:44??getlist1\venv\Lib\site-packages\easy-install.pth
?????文件??????????1??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\dependency_li
?????文件?????????98??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\entry_points.txt
?????文件??????????2??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\not-zip-safe
?????文件???????2972??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\PKG-INFO
?????文件?????????74??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\requires.txt
?????文件??????12502??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\SOURCES.txt
?????文件??????????4??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\EGG-INFO\top_level.txt
?????文件??????14014??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\ba
?????文件???????8764??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\ba
?????文件???????2773??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\build_env.py
?????文件???????7023??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\cache.py
?????文件??????16679??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\cmdoptions.py
?????文件???????1500??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\check.py
?????文件???????3018??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\completion.py
?????文件???????7343??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\configuration.py
?????文件???????9092??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\download.py
?????文件???????3320??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\freeze.py
?????文件???????1729??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\hash.py
?????文件???????1079??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\help.py
?????文件??????20270??2018-09-23?16:44??getlist1\venv\Lib\site-packages\pip-10.0.1-py3.7.egg\pip\_internal\commands\install.py
............此處省略383個(gè)文件信息
評(píng)論
共有 條評(píng)論