資源簡介
python爬蟲項目開發實戰高清pdf加源代碼包含各章節的源代碼,適合新手入門練習,包會
代碼片段和文件信息
#coding:utf-8
‘‘‘
第一種方式:使用os模塊中的fork方式實現多進程
import?os
if?__name__?==?‘__main__‘:
????print?‘current?Process?(%s)?start?...‘%(os.getpid())
????pid?=?os.fork()
????if?pid?0:
????????print?‘error?in?fork‘
????elif?pid?==?0:
????????print?‘I?am?child?process(%s)?and?my?parent?process?is?(%s)‘(os.getpid()os.getppid())
????else:
????????print?‘I(%s)?created?a?chlid?process?(%s).‘(os.getpid()pid)
‘‘‘
‘‘‘
第二種方法:使用multiprocessing模塊創建多進程
import?os
from?multiprocessing?import?Process
#?子進程要執行的代碼
def?run_proc(name):
????print?‘Child?process?%s?(%s)?Running...‘?%?(name?os.getpid())
if?__name__?==?‘__main__‘:
????print?‘Parent?process?%s.‘?%?os.getpid()
????for?i?in?range(5):
????????p?=?Process(target=run_proc?args=(str(i)))
????????print?‘Process?will?start.‘
????????p.start()
????p.join()
????print?‘Process?end.‘
‘‘‘
‘‘‘
multiprocessing模塊提供了一個Pool類來代表進程池對象
from?multiprocessing?import?Pool
import?os?time?random
def?run_task(name):
????print?‘Task?%s?(pid?=?%s)?is?running...‘?%?(name?os.getpid())
????time.sleep(random.random()?*?3)
????print?‘Task?%s?end.‘?%?name
if?__name__==‘__main__‘:
????print?‘Current?process?%s.‘?%?os.getpid()
????p?=?Pool(processes=3)
????for?i?in?range(5):
????????p.apply_async(run_task?args=(i))
????print?‘Waiting?for?all?subprocesses?done...‘
????p.close()
????p.join()
????print?‘All?subprocesses?done.‘
‘‘‘
‘‘‘
Queue進程間通信
from?multiprocessing?import?Process?Queue
import?os?time?random
#?寫數據進程執行的代碼:
def?proc_write(qurls):
????print(‘Process(%s)?is?writing...‘?%?os.getpid())
????for?url?in?urls:
????????q.put(url)
????????print(‘Put?%s?to?queue...‘?%?url)
????????time.sleep(random.random())
#?讀數據進程執行的代碼:
def?proc_read(q):
????print(‘Process(%s)?is?reading...‘?%?os.getpid())
????while?True:
????????url?=?q.get(True)
????????print(‘Get?%s?from?queue.‘?%?url)
if?__name__==‘__main__‘:
????#?父進程創建Queue,并傳給各個子進程:
????q?=?Queue()
????proc_writer1?=?Process(target=proc_write?args=(q[‘url_1‘?‘url_2‘?‘url_3‘]))
????proc_writer2?=?Process(target=proc_write?args=(q[‘url_4‘‘url_5‘‘url_6‘]))
????proc_reader?=?Process(target=proc_read?args=(q))
????#?啟動子進程proc_writer,寫入:
????proc_writer1.start()
????proc_writer2.start()
????#?啟動子進程proc_reader,讀取:
????proc_reader.start()
????#?等待proc_writer結束:
????proc_writer1.join()
????proc_writer2.join()
????#?proc_reader進程里是死循環,無法等待其結束,只能強行終止:
????proc_reader.terminate()
‘‘‘
‘‘‘
pipe進程間通信
import?multiprocessing
import?random
import?timeos
def?proc_send(pipeurls):
????for?url?in?urls:
????????print?“Process(%s)?send:?%s“?%(os.getpid()url)
????????pipe.send(url)
????????time.sleep(random.random())
def?proc_recv(pipe):
????while?True:
????????print?“Process(%s)?rev:%s“?%(os.getpid()pipe.recv())
????????time.sleep(random.random())
‘‘‘
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???110524328??2018-12-20?18:51??銆奝ython鐖櫕寮€鍙戜笌欏圭洰瀹炴垬銆婡www.java1234.com.pdf
?????目錄???????????0??2018-12-20?19:03??__MACOSX\
?????文件?????????610??2018-12-20?18:51??__MACOSX\._銆奝ython鐖櫕寮€鍙戜笌欏圭洰瀹炴垬銆婡www.java1234.com.pdf
?????目錄???????????0??2018-12-20?19:02??SpiderBook-master\
?????目錄???????????0??2017-07-13?02:08??SpiderBook-master\ch07\
?????目錄???????????0??2017-07-13?02:08??SpiderBook-master\ch07\SpiderNode\
?????文件????????2062??2017-07-13?02:08??SpiderBook-master\ch07\SpiderNode\SpiderWork.py
?????目錄???????????0??2018-12-20?19:03??__MACOSX\SpiderBook-master\
?????目錄???????????0??2018-12-20?19:03??__MACOSX\SpiderBook-master\ch07\
?????目錄???????????0??2018-12-20?19:03??__MACOSX\SpiderBook-master\ch07\SpiderNode\
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\SpiderNode\._SpiderWork.py
?????文件????????1977??2017-07-13?02:08??SpiderBook-master\ch07\SpiderNode\HtmlParser.py
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\SpiderNode\._HtmlParser.py
?????文件???????????0??2017-07-13?02:08??SpiderBook-master\ch07\SpiderNode\__init__.py
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\SpiderNode\.___init__.py
?????文件?????????404??2017-07-13?02:08??SpiderBook-master\ch07\SpiderNode\HtmlDownloader.py
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\SpiderNode\._HtmlDownloader.py
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\._SpiderNode
?????目錄???????????0??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\
?????文件????????4216??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\NodeManager.py
?????目錄???????????0??2018-12-20?19:03??__MACOSX\SpiderBook-master\ch07\ControlNode\
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\ControlNode\._NodeManager.py
?????文件???????????0??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\__init__.py
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\ControlNode\.___init__.py
?????文件???????52988??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\old_urls.txt
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\ControlNode\._old_urls.txt
?????文件????????1496??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\DataOutput.py
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\ControlNode\._DataOutput.py
?????文件?????1092419??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\baike_2016_11_10_20_38_05.html
?????文件?????????212??2017-07-13?02:08??__MACOSX\SpiderBook-master\ch07\ControlNode\._baike_2016_11_10_20_38_05.html
?????文件?????????976??2017-07-13?02:08??SpiderBook-master\ch07\ControlNode\new_urls.txt
............此處省略481個文件信息
評論
共有 條評論