資源簡(jiǎn)介
采用TF余弦算法,對(duì)于word內(nèi)容進(jìn)行相似度的比較,并且對(duì)于圖片也有相似度的比較,顯示結(jié)果,按照相似度進(jìn)行降序排序,展示出來(lái)相似度最高的2篇
代碼片段和文件信息
using?System;
using?System.Collections.Generic;
using?System.ComponentModel;
using?System.Data;
using?System.Drawing;
using?System.Linq;
using?System.Text;
using?System.Windows.Forms;
using?System.IO;
using?Word?=?Microsoft.Office.Interop.Word;
using?NPOI.XWPF.UserModel;
namespace?TFIDFMeasure
{
????public?partial?class?Form1?:?Form
????{
????????public?Form1()
????????{
????????????InitializeComponent();
????????????
????????}
????????//文件名
????????public?List?FieldName?=?new?List();
????????//文件路徑
????????public?List?FieldPath?=?new?List();
????????//是否上傳了文件
????????public?bool?IsUpLoad?=?false;
????????//相似度比較結(jié)果
????????public?List?str?=?new?List();
????????public?List?num?=?new?List();
????????public?List?num1?=?new?List();
????????///?
????????///?檢測(cè)按鈕事件
????????///?
????????///?
????????///?
????????private?void?button1_Click(object?sender?EventArgs?e)
????????{
????????????if?(IsUpLoad?==?true)
????????????{
????????????????List?note?=?Doc2Text(FieldPath);
????????????????for?(int?i?=?0;?i?????????????????{
????????????????????for?(int?j?=?i?+?1;?j?????????????????????{
????????????????????????string?name?=?note[i].Name?+?“&“?+?note[j].Name;
????????????????????????string?s1?=?note[i].Note;
????????????????????????string?s2?=?note[j].Note;
????????????????????????Similarity?sl?=?new?Similarity();
????????????????????????double?d?=?sl.getSimilarity(s1?s2);
????????????????????????IAnalyser?analyser?=?new?SimHashAnalyser();
????????????????????????var?l?=?analyser.GetLikenessValue(s1?s2);
????????????????????????double?likeness?=?d;
????????????????????????if?(System.Math.Abs(d?*?100?-?l?*?100)?20)
????????????????????????{
????????????????????????????likeness?=?l;
????????????????????????}
????????????????????????str.Add(name);
????????????????????????num.Add(d);
????????????????????????num1.Add(likeness);
????????????????????????this.textBox1.Text?+=?string.Format(“文章:{0},文章:{1},相似度:{2}“?name.Split(‘&‘)[0]?name.Split(‘&‘)[1]?d)+“\r\n“;
????????????????????????this.textBox2.Text?+=?string.Format(“文章:{0},文章:{1},相似度:{2}“?name.Split(‘&‘)[0]?name.Split(‘&‘)[1]?likeness)?+?“\r\n“;
????????????????????????if?(System.Math.Abs(d?*?100?-?likeness?*?100)?10)
????????????????????????{
????????????????????????????this.textBox3.Text?+=?string.Format(“文章:{0},文章:{1}相似度誤差小于10%,結(jié)果較為準(zhǔn)確!“?name.Split(‘&‘)[0]?name.Split(‘&‘)[1])?+?“\r\n“;
????????????????????????}
????????????????????????else
????????????????????????{
????????????????????????????this.textBox3.Text?+=?string.Format(“文章:{0},文章:{1}相似度最終結(jié)果為:{2}!“?name.Split(‘&‘)[0]?name.Split(‘&‘)[1](decimal)(d+likeness)/2)?+?“\r\n“;
????????????????????????}
????????????????????}
????????????????}
????????????}
????????????els
?屬性????????????大小?????日期????時(shí)間???名稱(chēng)
-----------?---------??----------?-----??----
?????文件?????396288??2018-02-28?09:08??TFIDFMeasure\bin\Debug\NPOI.v2.0.OOxm
?????文件??????84480??2017-07-31?11:47??TFIDFMeasure\bin\Debug\NPOI.v2.0.Openxm
?????文件??????25600??2018-04-18?15:10??TFIDFMeasure\bin\Debug\TFIDFMeasure.exe
?????文件??????54784??2018-04-18?15:10??TFIDFMeasure\bin\Debug\TFIDFMeasure.pdb
?????文件??????11600??2018-04-18?15:21??TFIDFMeasure\bin\Debug\TFIDFMeasure.vshost.exe
?????文件????????490??2017-03-19?05:00??TFIDFMeasure\bin\Debug\TFIDFMeasure.vshost.exe.manifest
?????文件??????11513??2018-04-18?15:23??TFIDFMeasure\Form1.cs
?????文件???????7015??2018-04-17?18:31??TFIDFMeasure\Form1.Designer.cs
?????文件???????6020??2018-04-17?18:31??TFIDFMeasure\Form1.resx
?????文件???????5420??2018-04-08?14:58??TFIDFMeasure\obj\x86\Debug\DesignTimeResolveAssemblyReferences.cache
?????文件???????6650??2018-04-18?15:10??TFIDFMeasure\obj\x86\Debug\DesignTimeResolveAssemblyReferencesInput.cache
?????文件?????407040??2018-04-09?09:04??TFIDFMeasure\obj\x86\Debug\Interop.Microsoft.Office.Core.dll
?????文件?????434688??2018-04-09?08:58??TFIDFMeasure\obj\x86\Debug\Interop.Office.dll
?????文件????????722??2018-04-18?15:21??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csproj.FileListAbsolute.txt
?????文件????????975??2018-04-17?18:32??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csproj.GenerateResource.Cache
?????文件????????870??2018-04-09?09:04??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csproj.ResolveComReference.cache
?????文件??????37119??2018-04-18?15:02??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csprojResolveAssemblyReference.cache
?????文件??????25600??2018-04-18?15:10??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.exe
?????文件????????180??2018-04-17?18:32??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.Form1.resources
?????文件??????54784??2018-04-18?15:10??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.pdb
?????文件????????180??2018-04-17?18:21??TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.Properties.Resources.resources
?????文件????????493??2018-04-08?14:44??TFIDFMeasure\Program.cs
?????文件???????1356??2018-04-08?14:44??TFIDFMeasure\Properties\AssemblyInfo.cs
?????文件???????2876??2018-04-08?14:44??TFIDFMeasure\Properties\Resources.Designer.cs
?????文件???????5612??2018-04-08?14:44??TFIDFMeasure\Properties\Resources.resx
?????文件???????1099??2018-04-08?14:44??TFIDFMeasure\Properties\Settings.Designer.cs
?????文件????????249??2018-04-08?14:44??TFIDFMeasure\Properties\Settings.settings
?????文件???????4446??2018-04-17?18:06??TFIDFMeasure\SimHashAnalyser.cs
?????文件??????11097??2018-04-09?08:21??TFIDFMeasure\Similarity.cs
?????文件???????4621??2018-04-17?18:21??TFIDFMeasure\TFIDFMeasure.csproj
............此處省略19個(gè)文件信息
評(píng)論
共有 條評(píng)論