資源簡介
本系統(tǒng)以SpringBoot基礎(chǔ)框架整合其他技術(shù)設(shè)計和搭建而成,選用webmagic框架實(shí)現(xiàn)單節(jié)點(diǎn)的網(wǎng)絡(luò)爬蟲系統(tǒng),爬蟲的生命周期為鏈接提取、頁面下載、內(nèi)容抽取、持久化,多線程抓取機(jī)制,Redis隊列和集合實(shí)現(xiàn)網(wǎng)頁去重和增量抓取,Redis隊列和集合實(shí)現(xiàn)網(wǎng)頁去重和增量抓取。搜索引擎的索引和搜索系統(tǒng)是利用全文搜索引擎框架(ElasticSearch)構(gòu)建,由IK分詞器實(shí)現(xiàn)語句分詞地功能,ElasticSearch是一個企業(yè)分布式、高擴(kuò)展、高實(shí)時的搜索與數(shù)據(jù)技術(shù)分析處理引擎,可以用于搜索各種文當(dāng),它提供可擴(kuò)展的搜索,具有高效的海量數(shù)據(jù)搜索、分析和探索的能力。最后實(shí)現(xiàn)一個簡單的web搜索頁面,來模擬搜索引擎客戶端

代碼片段和文件信息
/*
?*?Copyright?2007-present?the?original?author?or?authors.
?*
?*?Licensed?under?the?Apache?License?Version?2.0?(the?“License“);
?*?you?may?not?use?this?file?except?in?compliance?with?the?License.
?*?You?may?obtain?a?copy?of?the?License?at
?*
?*??????https://www.apache.org/licenses/LICENSE-2.0
?*
?*?Unless?required?by?applicable?law?or?agreed?to?in?writing?software
?*?distributed?under?the?License?is?distributed?on?an?“AS?IS“?BASIS
?*?WITHOUT?WARRANTIES?OR?CONDITIONS?OF?ANY?KIND?either?express?or?implied.
?*?See?the?License?for?the?specific?language?governing?permissions?and
?*?limitations?under?the?License.
?*/
import?java.net.*;
import?java.io.*;
import?java.nio.channels.*;
import?java.util.Properties;
public?class?MavenWrapperDownloader?{
????private?static?final?String?WRAPPER_VERSION?=?“0.5.6“;
????/**
?????*?Default?URL?to?download?the?maven-wrapper.jar?from?if?no?‘downloadUrl‘?is?provided.
?????*/
????private?static?final?String?DEFAULT_DOWNLOAD_URL?=?“https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/“
????????????+?WRAPPER_VERSION?+?“/maven-wrapper-“?+?WRAPPER_VERSION?+?“.jar“;
????/**
?????*?Path?to?the?maven-wrapper.properties?file?which?might?contain?a?downloadUrl?property?to
?????*?use?instead?of?the?default?one.
?????*/
????private?static?final?String?MAVEN_WRAPPER_PROPERTIES_PATH?=
????????????“.mvn/wrapper/maven-wrapper.properties“;
????/**
?????*?Path?where?the?maven-wrapper.jar?will?be?saved?to.
?????*/
????private?static?final?String?MAVEN_WRAPPER_JAR_PATH?=
????????????“.mvn/wrapper/maven-wrapper.jar“;
????/**
?????*?Name?of?the?property?which?should?be?used?to?override?the?default?download?url?for?the?wrapper.
?????*/
????private?static?final?String?PROPERTY_NAME_WRAPPER_URL?=?“wrapperUrl“;
????public?static?void?main(String?args[])?{
????????System.out.println(“-?Downloader?started“);
????????File?baseDirectory?=?new?File(args[0]);
????????System.out.println(“-?Using?base?directory:?“?+?baseDirectory.getAbsolutePath());
????????//?If?the?maven-wrapper.properties?exists?read?it?and?check?if?it?contains?a?custom
????????//?wrapperUrl?parameter.
????????File?mavenWrapperPropertyFile?=?new?File(baseDirectory?MAVEN_WRAPPER_PROPERTIES_PATH);
????????String?url?=?DEFAULT_DOWNLOAD_URL;
????????if?(mavenWrapperPropertyFile.exists())?{
????????????FileInputStream?mavenWrapperPropertyFileInputStream?=?null;
????????????try?{
????????????????mavenWrapperPropertyFileInputStream?=?new?FileInputStream(mavenWrapperPropertyFile);
????????????????Properties?mavenWrapperProperties?=?new?Properties();
????????????????mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
????????????????url?=?mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL?url);
????????????}?catch?(IOException?e)?{
????????????????System.out.println(“-?ERROR?loading?‘“?+?MAVEN_WRAPPER_PROPERTIES_PATH?+?“‘“);
????????????}?finally?{
????????????????try?{
????????????????????if?(mavenWrapperPropertyFileInputStream?!=?null)?{
?
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2020-06-14?12:54??search-engine\
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\
?????文件?????????333??2020-04-23?10:57??search-engine\search-engine\.gitignore
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\
?????文件?????????184??2020-04-29?21:00??search-engine\search-engine\.idea\.gitignore
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\artifacts\
?????文件?????????485??2020-04-23?11:16??search-engine\search-engine\.idea\artifacts\search_engine_war.xm
?????文件???????21281??2020-05-05?00:31??search-engine\search-engine\.idea\artifacts\search_engine_war_exploded.xm
?????文件?????????830??2020-04-23?11:16??search-engine\search-engine\.idea\compiler.xm
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\dataSources\
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\dataSources\7f4171d0-2398-4e1d-a9d2-9c84daaa0f0d\
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\dataSources\7f4171d0-2398-4e1d-a9d2-9c84daaa0f0d\storage_v2\
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\dataSources\7f4171d0-2398-4e1d-a9d2-9c84daaa0f0d\storage_v2\_src_\
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\dataSources\7f4171d0-2398-4e1d-a9d2-9c84daaa0f0d\storage_v2\_src_\schema\
?????文件??????????76??2020-04-25?17:59??search-engine\search-engine\.idea\dataSources\7f4171d0-2398-4e1d-a9d2-9c84daaa0f0d\storage_v2\_src_\schema\information_schema.FNRwLQ.me
?????文件???????29120??2020-05-05?01:49??search-engine\search-engine\.idea\dataSources\7f4171d0-2398-4e1d-a9d2-9c84daaa0f0d.xm
?????文件?????????984??2020-04-25?18:01??search-engine\search-engine\.idea\dataSources.local.xm
?????文件?????????525??2020-04-25?17:58??search-engine\search-engine\.idea\dataSources.xm
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\dictionaries\
?????文件?????????490??2020-04-30?18:16??search-engine\search-engine\.idea\dictionaries\qirui.xm
?????文件?????????267??2020-04-29?21:01??search-engine\search-engine\.idea\encodings.xm
?????文件?????????864??2020-04-29?21:08??search-engine\search-engine\.idea\jarRepositories.xm
?????目錄???????????0??2020-06-14?12:53??search-engine\search-engine\.idea\libraries\
?????文件?????????462??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__antlr_antlr_2_7_7.xm
?????文件?????????568??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__ch_qos_logback_logback_classic_1_2_3.xm
?????文件?????????547??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__ch_qos_logback_logback_core_1_2_3.xm
?????文件?????????543??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__commons_codec_commons_codec_1_13.xm
?????文件?????????616??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__commons_collections_commons_collections_3_2_2.xm
?????文件?????????517??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__commons_io_commons_io_1_3_2.xm
?????文件?????????514??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__com_alibaba_fastjson_1_2_28.xm
?????文件?????????499??2020-04-29?21:08??search-engine\search-engine\.idea\libraries\Maven__com_carrotsearch_hppc_0_7_1.xm
............此處省略300個文件信息
- 上一篇:往年卷子.zip
- 下一篇:基于時間序列分析的故障診斷
評論
共有 條評論