資源簡介
了解google類似的搜索引擎是怎么實(shí)現(xiàn)的;攫取搜索真相。
原理描述請(qǐng)見:http://gaolizhong666.blog.163.com/blog/static/11561504220136242819683/
代碼片段和文件信息
‘‘‘
Created?on?2013-7-2
@author:?glz.shinow
‘‘‘
#the?search?engine?is?divided?into?3?modules:web?crawlbuild?and?use?of?indexpage?rank
#----------------------------web_crawl--------------------------------
def?get_page(url):
????try:
????????import?urllib
????????return?urllib.urlopen(url).read()
????except:
????????return?““
????
def?get_next_target(page):
????start_link?=?page.find(‘????if?start_link?==?-1:
????????return?None?0
????start_quote?=?page.find(‘“‘?start_link)
????end_quote?=?page.find(‘“‘?start_quote?+?1)
????url?=?page[start_quote?+?1:end_quote]
????return?url?end_quote
def?get_all_links(page):
????links?=?[]
????while?True:
????????url?endpos?=?get_next_target(page)
????????if?url:
????????????links.append(url)
????????????
評(píng)論
共有 條評(píng)論