91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

使用python3.7+scrapy+mongodb框架爬取新浪微博的用戶信息以及微博動態 實現了高匿ip代理池、偽裝請求User-Agent 教程貼請移步:https://blog.csdn.net/mengyanyuan8023/article/details/94017903

資源截圖

代碼片段和文件信息

#?-*-?coding:?utf-8?-*-

#?Define?here?the?models?for?your?scraped?items
#
#?See?documentation?in:
#?https://doc.scrapy.org/en/latest/topics/items.html

import?scrapy
from?scrapy.loader.processors?import?TakeFirst


class?SinaUserItem(scrapy.Item):
????#?微博用戶唯一標識
????user_id?=?scrapy.Field(output_processor=TakeFirst())
????#?用戶昵稱
????username?=?scrapy.Field(output_processor=TakeFirst())
????#?微博數量
????webo_num?=?scrapy.Field(output_processor=TakeFirst())
????#?關注人數
????follow_num?=?scrapy.Field(output_processor=TakeFirst())
????#?粉絲人數
????fans_num?=?scrapy.Field(output_processor=TakeFirst())
????#?性別
????gender?=?scrapy.Field(output_processor=TakeFirst())
????#?地區
????district?=?scrapy.Field(output_processor=TakeFirst())
????#?省份
????province?=?scrapy.Field(output_processor=TakeFirst())
????#?地市
????city?=?scrapy.Field(output_processor=TakeFirst())
????#?生日
????birthday?=?scrapy.Field(output_processor=TakeFirst())
????#?簡介
????brief_intro?=?scrapy.Field(output_processor=TakeFirst())
????#?認證
????identify?=?scrapy.Field(output_processor=TakeFirst())
????#?電腦板
????#?internet_url?=?scrapy.Field(output_processor=TakeFirst())
????#?手機版
????#?mobile_url?=?scrapy.Field(output_processor=TakeFirst())
????#?頭像?URL
????head_img?=?scrapy.Field(output_processor=TakeFirst())
????#?標簽
????#?tag?=?scrapy.Field()

????#?爬取時間
????crawl_time?=?scrapy.Field(output_processor=TakeFirst())


class?WeiBoContentItem(scrapy.Item):
????#?用戶?id
????user_id?=?scrapy.Field(output_processor=TakeFirst())
????#?微博?id
????weibo_id?=?scrapy.Field(output_processor=TakeFirst())
????#?權限
????#?authority?=?scrapy.Field(output_processor=TakeFirst())
????#?微博內容
????weibo_content?=?scrapy.Field(output_processor=TakeFirst())
????#?微博圖片
????weibo_images?=?scrapy.Field()
????#?微博圖片保存本地路徑
????#?images_path?=?scrapy.Field()
????#?類型(原創/轉載)
????weibo_type?=?scrapy.Field(output_processor=TakeFirst())
????#?發布時間
????post_time?=?scrapy.Field(output_processor=TakeFirst())
????#?點贊數
????like_count?=?scrapy.Field(output_processor=TakeFirst())
????#?評論數
????comment_count?=?scrapy.Field(output_processor=TakeFirst())
????#?轉發數
????retweet_count?=?scrapy.Field(output_processor=TakeFirst())
????#?發布終端
????terminal?=?scrapy.Field(output_processor=TakeFirst())


if?__name__?==?‘__main__‘:
????item?=?WeiBoContentItem()
????print(item.get(‘weibo_images‘))

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件?????????90??2019-06-28?16:24??sina_crawl\.git\COMMIT_EDITMSG

?????文件????????311??2019-06-19?18:11??sina_crawl\.git\config

?????文件?????????73??2019-06-13?16:42??sina_crawl\.git\description

?????文件????????102??2019-06-18?15:32??sina_crawl\.git\FETCH_HEAD

?????文件?????????25??2019-06-19?18:09??sina_crawl\.git\HEAD

?????文件????????478??2019-06-13?16:42??sina_crawl\.git\hooks\applypatch-msg.sample

?????文件????????896??2019-06-13?16:42??sina_crawl\.git\hooks\commit-msg.sample

?????文件???????3327??2019-06-13?16:42??sina_crawl\.git\hooks\fsmonitor-watchman.sample

?????文件????????189??2019-06-13?16:42??sina_crawl\.git\hooks\post-update.sample

?????文件????????424??2019-06-13?16:42??sina_crawl\.git\hooks\pre-applypatch.sample

?????文件???????1638??2019-06-13?16:42??sina_crawl\.git\hooks\pre-commit.sample

?????文件???????1348??2019-06-13?16:42??sina_crawl\.git\hooks\pre-push.sample

?????文件???????4898??2019-06-13?16:42??sina_crawl\.git\hooks\pre-rebase.sample

?????文件????????544??2019-06-13?16:42??sina_crawl\.git\hooks\pre-receive.sample

?????文件???????1492??2019-06-13?16:42??sina_crawl\.git\hooks\prepare-commit-msg.sample

?????文件???????3610??2019-06-13?16:42??sina_crawl\.git\hooks\update.sample

?????文件???????3052??2019-06-28?16:24??sina_crawl\.git\index

?????文件????????240??2019-06-13?16:42??sina_crawl\.git\info\exclude

?????文件???????3602??2019-06-28?16:24??sina_crawl\.git\logs\HEAD

?????文件???????2383??2019-06-18?17:51??sina_crawl\.git\logs\refs\heads\master

?????文件???????1056??2019-06-28?16:24??sina_crawl\.git\logs\refs\heads\V1.0.619

?????文件???????2186??2019-06-18?17:51??sina_crawl\.git\logs\refs\remotes\origin\master

?????文件????????564??2019-06-28?16:24??sina_crawl\.git\logs\refs\remotes\origin\V1.0.619

?????文件???????1353??2019-06-18?17:51??sina_crawl\.git\objects\03\ba28b053d3d5240e547e4b4b2e085059f828cb

?????文件???????4208??2019-06-19?18:11??sina_crawl\.git\objects\04\3b0786233c2d8f36c77c4f2e010104e3eadeb1

?????文件???????4133??2019-06-18?15:36??sina_crawl\.git\objects\07\8f1dcdae600525ca47a610de0b2f1bb0aec835

?????文件???????4091??2019-06-28?16:24??sina_crawl\.git\objects\09\33f7f23e6f4eb016140a10b7bd04c85d3abc73

?????文件???????1943??2019-06-28?16:22??sina_crawl\.git\objects\0a\23b39cdfdfb9970825a275bff8bfe7be964285

?????文件????????228??2019-06-14?17:52??sina_crawl\.git\objects\0a\b84e0fced0f1e0528e97cfea25e0aa9a1870d1

?????文件???????1931??2019-06-28?16:22??sina_crawl\.git\objects\0b\bdbbf14ec7094db5e2c013380bb55be9f9eee7

............此處省略464個文件信息

評論

共有 條評論