91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 18.93MB
    文件類型: .zip
    金幣: 1
    下載: 0 次
    發布日期: 2023-07-07
  • 語言: 其他
  • 標簽: 學堂在線??python??

資源簡介

目標:爬取鏈家官方網站新房的數據(3-5頁即可,太多可能被封禁ip) 網址:https://bj.fang.lianjia.com/loupan/ 要求:將樓盤名稱、價格、平米數等(可以拓展)數據保存到一個json文件中。 交付:整個project的壓縮包(rar或zip格式)。壓縮包名要求為 "ID-作業序號"! 我的答案

資源截圖

代碼片段和文件信息

import?logging
import?re
from?collections?import?namedtuple
from?datetime?import?time

import?six
from?six.moves.urllib.parse?import?(ParseResult?quote?urlparse
????????????????????????????????????urlunparse)

logger?=?logging.getLogger(__name__)

_Rule?=?namedtuple(‘Rule‘?[‘field‘?‘value‘])
RequestRate?=?namedtuple(
????‘RequestRate‘?[‘requests‘?‘seconds‘?‘start_time‘?‘end_time‘])

_DISALLOW_DIRECTIVE?=?{‘disallow‘?‘dissallow‘?‘dissalow‘?‘disalow‘?‘diasllow‘?‘disallaw‘}
_ALLOW_DIRECTIVE?=?{‘allow‘}
_USER_AGENT_DIRECTIVE?=?{‘user-agent‘?‘useragent‘?‘user?agent‘}
_SITEMAP_DIRECTIVE?=?{‘sitemap‘?‘sitemaps‘?‘site-map‘}
_CRAWL_DELAY_DIRECTIVE?=?{‘crawl-delay‘?‘crawl?delay‘}
_REQUEST_RATE_DIRECTIVE?=?{‘request-rate‘?‘request?rate‘}
_HOST_DIRECTIVE?=?{‘host‘}

_WILDCARDS?=?{‘*‘?‘$‘}

_HEX_DIGITS?=?set(‘0123456789ABCDEFabcdef‘)

__all__?=?[‘RequestRate‘?‘Protego‘]


def?_is_valid_directive_field(field):
????return?any([field?in?_DISALLOW_DIRECTIVE
????????????????field?in?_ALLOW_DIRECTIVE
????????????????field?in?_USER_AGENT_DIRECTIVE
????????????????field?in?_SITEMAP_DIRECTIVE
????????????????field?in?_CRAWL_DELAY_DIRECTIVE
????????????????field?in?_REQUEST_RATE_DIRECTIVE
????????????????field?in?_HOST_DIRECTIVE])


def?_enforce_path(pattern):
????if?pattern.startswith(‘/‘):
????????return?pattern

????return?‘/‘?+?pattern


class?_URLPattern(object):
????“““Internal?class?which?represents?a?URL?pattern.“““

????def?__init__(self?pattern):
????????self._pattern?=?pattern
????????self.priority?=?len(pattern)
????????self._contains_asterisk?=?‘*‘?in?self._pattern
????????self._contains_dollar?=?self._pattern.endswith(‘$‘)

????????if?self._contains_asterisk:
????????????self._pattern_before_asterisk?=?self._pattern[:self._pattern.find(‘*‘)]
????????elif?self._contains_dollar:
????????????self._pattern_before_dollar?=?self._pattern[:-1]

????????self._pattern_compiled?=?False

????def?match(self?url):
????????“““Retun?True?if?pattern?matches?the?given?URL?otherwise?return?False.“““
????????#?check?if?pattern?is?already?compiled
????????if?self._pattern_compiled:
????????????return?self._pattern.match(url)

????????if?not?self._contains_asterisk:
????????????if?not?self._contains_dollar:
????????????????#?answer?directly?for?patterns?without?wildcards
????????????????return?url.startswith(self._pattern)

????????????#?pattern?only?contains?$?wildcard.
????????????return?url?==?self._pattern_before_dollar

????????if?not?url.startswith(self._pattern_before_asterisk):
????????????return?False

????????self._pattern?=?self._prepare_pattern_for_regex(self._pattern)
????????self._pattern?=?re.compile(self._pattern)
????????self._pattern_compiled?=?True
????????return?self._pattern.match(url)

????def?_prepare_pattern_for_regex(self?pattern):
????????“““Return?equivalent?regex?pattern?for?the?given?URL?pattern.“““
????????pattern?=?re.sub(r‘\*+‘?‘*‘?pattern)
????????s?=?re.split(r‘(\*|\$$)‘?pattern)
????????for?index?substr?in

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2020-05-19?11:01??lianjia\
?????目錄???????????0??2020-05-19?11:02??lianjia\.idea\
?????目錄???????????0??2020-05-09?16:17??lianjia\.idea\inspectionProfiles\
?????文件?????????174??2020-05-09?11:03??lianjia\.idea\inspectionProfiles\profiles_settings.xml
?????文件?????????361??2020-05-09?11:03??lianjia\.idea\lianjia.iml
?????文件?????????198??2020-05-09?11:03??lianjia\.idea\misc.xml
?????文件?????????273??2020-05-09?11:03??lianjia\.idea\modules.xml
?????文件????????6342??2020-05-19?11:02??lianjia\.idea\workspace.xml
?????文件???????17790??2020-05-19?10:59??lianjia\MyData.json
?????目錄???????????0??2020-05-09?16:19??lianjia\venv\
?????目錄???????????0??2020-05-09?11:02??lianjia\venv\Include\
?????目錄???????????0??2020-05-09?16:17??lianjia\venv\Lib\
?????目錄???????????0??2020-05-09?16:19??lianjia\venv\Lib\site-packages\
?????目錄???????????0??2020-05-09?16:17??lianjia\venv\Lib\site-packages\attr\
?????目錄???????????0??2020-05-09?16:17??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\
?????文件???????????4??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\INSTALLER
?????文件????????1082??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\LICENSE
?????文件????????9022??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\metaDATA
?????文件????????2184??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\RECORD
?????文件???????????5??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\top_level.txt
?????文件?????????110??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attrs-19.3.0.dist-info\WHEEL
?????文件????????2141??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\converters.py
?????文件?????????351??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\converters.pyi
?????文件????????1635??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\exceptions.py
?????文件?????????458??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\exceptions.pyi
?????文件????????1098??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\filters.py
?????文件?????????214??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\filters.pyi
?????文件???????????0??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\py.typed
?????文件???????11460??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\validators.py
?????文件????????1868??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\validators.pyi
?????文件????????7326??2020-05-09?11:19??lianjia\venv\Lib\site-packages\attr\_compat.py
............此處省略4028個文件信息

評論

共有 條評論