資源簡介
C#源碼,演示字符串相似度 編輯距離 余弦相似性 SimHash算法
代碼片段和文件信息
using?StringSimilarity.Algorithm;
using?StringSimilarity.Common;
using?System;
using?System.Collections.Generic;
using?System.ComponentModel;
using?System.Data;
using?System.Diagnostics;
using?System.Drawing;
using?System.Linq;
using?System.Text;
using?System.Threading.Tasks;
using?System.Windows.Forms;
namespace?StringSimilarity
{
????public?partial?class?MainForm?:?Form
????{
????????public?MainForm()
????????{
????????????InitializeComponent();
????????}
????????public?class?AddressInfo
????????{
????????????public?string?address?{?get;?set;?}
????????????public?string?rateBJ?{?get;?set;?}
????????????public?string?rateYX?{?get;?set;?}
????????????public?string?rateSim?{?get;?set;?}
????????????public?bool?isGet?{?get;?set;?}
????????}
????????public?DataTable?dtResult?=?new?DataTable();
????????private?void?btn_Deal_Click(object?sender?EventArgs?e)
????????{
????????????//計算結果
????????????if?(string.IsNullOrEmpty(txt_Str.Text))
????????????{
????????????????MessageBox.Show(“請輸入黑名單地址!“);
????????????????return;
????????????}
????????????else?if?(label3.Text?==?“--單擊選擇源數(shù)據(jù)文件(.xlsx文件)--“)
????????????{
????????????????MessageBox.Show(“請選擇源數(shù)據(jù)文件!“);
????????????????return;
????????????}
????????????else?if?(!ck_USE_YX.Checked?&&?!ck_USE_BJJL.Checked?&&?!ck_USE_Sim.Checked)
????????????{
????????????????MessageBox.Show(“請選擇算法!“);
????????????????return;
????????????}
????????????List?addressInfoList?=?new?List();
????????????foreach?(DataRow?dr?in?dt.Rows)
????????????{
????????????????AddressInfo?addressInfo?=?new?AddressInfo();
????????????????addressInfo.address?=?dr[“地址“].ToString();
????????????????addressInfoList.Add(addressInfo);
????????????}
????????????Stopwatch?stopwatch?=?new?Stopwatch();
????????????stopwatch.Start();
????????????//采用并行庫多線程處理
????????????System.Threading.Tasks.ParallelOptions?option?=?new?System.Threading.Tasks.ParallelOptions();
????????????option.MaxDegreeOfParallelism?=?1;//單線程處理
????????????System.Threading.Tasks.Parallel.ForEach(addressInfoList?option?addressInfo?=>
????????????{
????????????????if?(ck_USE_BJJL.Checked)
????????????????{
????????????????????bool?isFc?=?false;
????????????????????bool?removeNum?=?false;
????????????????????bool?removeLetter?=?false;
????????????????????bool?removeSymbol?=?false;
????????????????????//是否分詞
????????????????????if?(ck_FC_BJJL.Checked)
????????????????????{
????????????????????????isFc?=?true;
????????????????????}
????????????????????//剔除符號
????????????????????if?(ck_RemoveSymbol_BJJL.Checked)
????????????????????{
????????????????????????removeSymbol?=?true;
????????????????????}
????????????????????//剔除字母
????????????????????if?(ck_RemoveLetter_BJJL.Checked)
????????????????????{
????????????????????????removeLetter?=?true;
????????????????????}
????????????????????//剔除數(shù)字
????????????????????if?(ck_RemoveNum_BJJL.Checked)
????????????????????{
?????????
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????401920??2017-09-21?18:04??StringSimilarity\packages\Excel\ErpCommon.dll
?????文件?????131072??2017-05-11?19:57??StringSimilarity\packages\Excel\ExcelLibrary.dll
?????文件?????200704??2017-05-11?19:57??StringSimilarity\packages\Excel\ICSharpCode.SharpZipLib.dll
?????文件????1678848??2017-05-11?19:57??StringSimilarity\packages\Excel\NPOI.dll
?????文件?????494080??2017-05-11?19:57??StringSimilarity\packages\Excel\NPOI.OOxm
?????文件??????91136??2017-05-11?19:57??StringSimilarity\packages\Excel\NPOI.Openxm
?????文件????2121728??2017-05-11?19:57??StringSimilarity\packages\Excel\NPOI.Openxm
?????文件????2254144??2017-05-11?19:57??StringSimilarity\packages\Excel\NPOI.xm
?????文件??????????0??2018-02-27?17:20??StringSimilarity\packages\jieba.NET.0.39.1\content\_._
?????文件????5815323??2018-05-10?11:46??StringSimilarity\packages\jieba.NET.0.39.1\jieba.NET.0.39.1.nupkg
?????文件??????16896??2018-02-27?17:16??StringSimilarity\packages\jieba.NET.0.39.1\lib\net45\JiebaNet.Analyser.dll
?????文件??????33792??2018-02-27?17:16??StringSimilarity\packages\jieba.NET.0.39.1\lib\net45\JiebaNet.Segmenter.dll
?????文件?????956007??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\char_state_tab.json
?????文件?????910417??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\cn_synonym.txt
?????文件????5420898??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\dict.txt
?????文件????6200957??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\idf.txt
?????文件????3147136??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\pos_prob_emit.json
?????文件???????6437??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\pos_prob_start.json
?????文件?????174313??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\pos_prob_trans.json
?????文件????1251300??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\prob_emit.json
?????文件????????309??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\prob_trans.json
?????文件???????4482??2016-12-05?01:09??StringSimilarity\packages\jieba.NET.0.39.1\Resources\stopwords.txt
?????文件?????479744??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net20\Newtonsoft.Json.dll
?????文件?????552917??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net20\Newtonsoft.Json.xm
?????文件?????443392??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net35\Newtonsoft.Json.dll
?????文件?????495819??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net35\Newtonsoft.Json.xm
?????文件?????484864??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net40\Newtonsoft.Json.dll
?????文件?????514743??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net40\Newtonsoft.Json.xm
?????文件?????521216??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net45\Newtonsoft.Json.dll
?????文件?????514743??2016-01-09?13:50??StringSimilarity\packages\Newtonsoft.Json.8.0.2\lib\net45\Newtonsoft.Json.xm
............此處省略106個文件信息
評論
共有 條評論