91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6.08MB
    文件類型: .zip
    金幣: 2
    下載: 0 次
    發布日期: 2023-09-24
  • 語言: Python
  • 標簽:

資源簡介

Sequential Event Experiment based on Travel note crawled from XieCheng,基于50W攜程出行游記的采集與順承事件圖譜構建

資源截圖

代碼片段和文件信息

#!/usr/bin/env?python3
#?coding:?utf-8
#?File:?pattern.py
#?Author:?lhy
#?Date:?18-7-15

import?pymongo
import?re
import?jieba
from?sentence_parser?import?*

class?EventGraph:
????def?__init__(self):
????????conn?=?pymongo.MongoClient()
????????self.pattern?=?re.compile(r‘(.*)(其次|然后|接著|隨后|接下來)(.*)‘)
????????self.col?=?conn[‘travel‘][‘doc‘]
????????self.col_insert?=?conn[‘travel‘][‘events‘]
????????self.parse_handler?=?LtpParser()

????‘‘‘長句切分‘‘‘
????def?seg_long_sents(self?content):
????????return?[sentence?for?sentence?in?re.split(r‘[??!!。;;::\n\r….·]‘?content.replace(‘?‘‘‘).replace(‘\u3000‘‘‘))?if?len(sentence)?>?5]

????‘‘‘短句切分‘‘‘
????def?process_subsent(self?content):
????????return?[s?for?s?in?re.split(r‘[、,和與及且跟()~▲.]‘?content)?if?len(s)>1]

????‘‘‘處理數據庫中的文本‘‘‘
????def?process_doc(self):
????????count?=?0
????????for?item?in?self.col.find():
????????????content?=?item[‘content‘]
????????????events_all?=?self.collect_event(content)
????????????if?events_all:
????????????????data?=?{}
????????????????data[‘events‘]?=?events_all
????????????????self.col_insert.insert(data)
????????????else:
????????????????continue

????‘‘‘統計收集EVENT‘‘‘
????def?collect_event(self?content):
????????events_all?=?[]
????????sents=?self.seg_long_sents(content)
????????for?sent?in?sents:
????????????events?=?self.event_extract(sent)
????????????if?events:
????????????????events_all.append(events)
????????return?events_all

????‘‘‘順承事件抽取‘‘‘
????def?event_extract(self?sent):
????????result?=?self.pattern.findall(sent)
????????if?result:
????????????event_seqs?=?[]
????????????for?tmp?in?result:
????????????????pre?=?tmp[0]
????????????????post?=?tmp[2]
????????????????pre_sents?=?self.process_subsent(pre)
????????????????post_sents?=?self.process_subsent(post)
????????????????if?pre_sents?and?post_sents:
????????????????????event_seqs?+=?pre_sents
????????????????????event_seqs?+=?post_sents
????????????????else:
????????????????????continue
????????????‘‘‘對事件進行結構化‘‘‘
????????????if?event_seqs:
????????????????events?=?self.extract_phrase(event_seqs)
????????????????return?events
????????????else:
????????????????pass
????????return?[]


????‘‘‘將一個長句中的句子進行分解,提取出其中的vob短語‘‘‘
????def?extract_phrase(self?event_seqs):
????????events?=?[]
????????for?event?in?event_seqs:
????????????vobs?=?self.vob_exract(event)
????????????if?vobs:
????????????????events?+=?vobs
????????return?events

????‘‘‘提取VOB關系‘‘‘
????def?vob_exract(self?content):
????????vobs?=?[]
????????words?=?list(jieba.cut(content))
????????if?len(words)?>=?300:
????????????return?[]
????????postags?=?self.parse_handler.get_postag(words)
????????tuples?child_dict_list?=?self.parse_handler.parser_main(words?postags)
????????for?tuple?in?tuples:
????????????rel?=?tuple[-1]
????????????pos_verb=?tuple[4][0]
????????????pos_object?=?tuple[2][0]
????????????if?rel?==?‘VOB‘?and?(pos_verb?pos_object)?in?[(‘v‘?‘n‘)?(‘v‘?‘i‘)]:
????????????????phrase?=?‘‘.

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\
?????文件??????????93??2018-12-15?05:16??SequentialEventExtration-master\.gitattributes
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\.idea\
?????文件?????????398??2018-12-15?05:16??SequentialEventExtration-master\.idea\SequentialEventGraph.iml
?????文件?????????706??2018-12-15?05:16??SequentialEventExtration-master\.idea\misc.xml
?????文件?????????292??2018-12-15?05:16??SequentialEventExtration-master\.idea\modules.xml
?????文件?????????180??2018-12-15?05:16??SequentialEventExtration-master\.idea\vcs.xml
?????文件???????20948??2018-12-15?05:16??SequentialEventExtration-master\.idea\workspace.xml
?????文件????????7111??2018-12-15?05:16??SequentialEventExtration-master\README.md
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\event_graph\
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\dist\
?????文件???????30798??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\dist\vis.css
?????文件?????1532584??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\dist\vis.js
?????文件??????781766??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\dist\vis.map
?????文件???????22008??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\dist\vis.min.css
?????文件??????582497??2018-12-15?05:16??SequentialEventExtration-master\event_graph\VIS\dist\vis.min.js
?????文件????????3312??2018-12-15?05:16??SequentialEventExtration-master\event_graph\event_extract.py
?????文件????????4001??2018-12-15?05:16??SequentialEventExtration-master\event_graph\event_graph.py
?????文件????????7007??2018-12-15?05:16??SequentialEventExtration-master\event_graph\sentence_parser.py
?????文件????13643483??2018-12-15?05:16??SequentialEventExtration-master\event_graph\seq_events.txt
?????文件???????55935??2018-12-15?05:16??SequentialEventExtration-master\event_graph\travel_event_graph.html
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\image\
?????文件??????297401??2018-12-15?05:16??SequentialEventExtration-master\image\all.png
?????文件???????54270??2018-12-15?05:16??SequentialEventExtration-master\image\book.png
?????文件???????45649??2018-12-15?05:16??SequentialEventExtration-master\image\food.png
?????文件??????213664??2018-12-15?05:16??SequentialEventExtration-master\image\graph.png
?????文件???????86232??2018-12-15?05:16??SequentialEventExtration-master\image\plane.png
?????文件???????99399??2018-12-15?05:16??SequentialEventExtration-master\image\train.png
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\news_spider\
?????目錄???????????0??2018-12-15?05:16??SequentialEventExtration-master\news_spider\.idea\
............此處省略22個文件信息

評論

共有 條評論