資源簡介
文件內容比較
代碼片段和文件信息
using?PanGu;
using?PanGu.Match;
using?SimilarCompareCore.Model;
using?System;
using?System.Collections.Generic;
using?System.Linq;
using?System.Text;
using?System.Text.Regularexpressions;
using?System.Threading.Tasks;
namespace?SimilarCompareCore
{
????public?class?CompareCore
????{
????????///?
????????///?默認最小匹配數
????????///?
????????private?int?DefaultMinSameCount?=?4;
????????///?
????????///?默認最小相似度
????????///?
????????private?double?DefaultMinMatchPercent?=?0.6;
????????///?
????????///?默認行長度比:對比的行長度/目標行長度
????????///?
????????private?double?DefaultMinLineLengthPercent?=?0.6;
????????///?
????????///?設置最小匹配字數
????????///?
????????///?
????????public?void?SetDefaultMinSameCount(int?value)
????????{
????????????if?(value?1)?value?=?1;
????????????DefaultMinSameCount?=?value;
????????}
????????///?
????????///?設置最小匹配相似度
????????///?
????????///?
????????public?void?SetDefaultMinMatchPercent(double?value)
????????{
????????????if?(value?<=0)?value?=?0.1;
????????????DefaultMinMatchPercent?=?value;
????????}
????????///?
????????///?設置默認行長度比
????????///?
????????///?
????????public?void?SetDefaultMinLineLengthPercent(double?value)
????????{
????????????if?(value?<=?0)?value?=?0.5;
????????????DefaultMinLineLengthPercent?=?value;
????????}
????????///?
????????///?通過分詞將內容拆分
????????///?
????????///?
????????///?
????????public?virtual?IEnumerable?GetWordList(string?content)
????????{
????????????//TODO?將分詞結果緩存起來
????????????var?matchOptions?=?new?MatchOptions();
????????????matchOptions.FrequencyFirst?=?true;
????????????Segment?segment?=?new?Segment();
????????????var?words?=?segment.DoSegment(content?matchOptions).Select(word?=>?word.Word);
????????????return?words;
????????}
????????///?
????????///?拆分內容成行
????????///?
????????///?
????????///?
????????///?
????????public??List?GetLines(string?content?string?pattern)
????????{
????????????/*
?????????????*?根據正則來拆分,同時,根據正則匹配的結果,把拆分的分隔符記錄下來
?????????????*?因為每次拆分都會是根據匹配的符號拆分,所以必定是拆分的每一段內容都有相應的分隔符
?????????????*?將內容和分割符單獨保存下來,方便后面重新將一句句的重新拼接起來
?????????????*?
?????????????*/
????????????var?regex?=?new?Regex(pattern);
????????????if?(!regex.IsMatch(content))?return?null;
????????????var?matches?=?regex.Matches(content);
????????????var?lines?=?regex.Split(content);
????????????var?LineList?=?new?List();
????????????for?(int?i?=?0;?i?????????????{
????????????????if?(string.IsNullOrEmpty(lines[i]))?continue;
????????????????var?lineModel?=?new?LineModel?{?Content?=?lines[i]?RedTagIDList?=?new?List()?SameWordCountList?=?new?List()?SimilarPercentList?=?new?List
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
????.......??????4979??2019-02-19?15:40??SimilarCompareCoreTest\Data\content.txt
????.......??????5571??2019-02-19?15:40??SimilarCompareCoreTest\Data\content2.txt
????.......??????1135??2019-02-19?15:40??SimilarCompareCoreTest\Data\htmltemplate.html
????.......????266012??2019-02-19?15:40??SimilarCompareCoreTest\Data\jquery.js
?????文件????????208??2021-01-29?15:03??SimilarCompareCoreTest\obj\Debug\.NETfr
????.......??????7163??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\DesignTimeResolveAssemblyReferencesInput.cache
????.......??????2333??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.csproj.FileListAbsolute.txt
?????文件??????26172??2021-01-29?15:03??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.csprojAssemblyReference.cache
????.......?????38181??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.csprojResolveAssemblyReference.cache
????.......??????7680??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.exe
????.......?????15872??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.pdb
????.......??????3034??2019-02-19?15:40??SimilarCompareCoreTest\Program.cs
????.......??????1332??2019-02-19?15:40??SimilarCompareCoreTest\Properties\AssemblyInfo.cs
????.......??????3700??2019-02-19?15:40??SimilarCompareCoreTest\SimilarCompareCoreTest.csproj
?????文件???????1515??2019-02-19?15:40??filediff.sln
?????文件??????54784??2019-02-19?15:40??filediffCore.v12.suo
????..A..H.?????33280??2021-01-29?15:03??.vs\filediff\v16\.suo
????.......?????60416??2019-02-19?15:40??.vs\SimilarCompareCore\v15\.suo
????.......?????????0??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\db.lock
????.......??????4096??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\storage.ide
????.......?????32768??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\storage.ide-shm
????.......???2954072??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\storage.ide-wal
????..A..H.?????75264??2021-01-29?15:03??.vs\SimilarCompareCore\v16\.suo
????.......?????10411??2019-02-19?15:40??SimilarCompareCore\CompareCore.cs
????.......????454656??2019-02-19?15:40??SimilarCompareCore\Libs\Lucene.Net.dll
????.......???3801600??2019-02-19?15:40??SimilarCompareCore\Libs\PanGu.dll
????.......?????12288??2019-02-19?15:40??SimilarCompareCore\Libs\PanGu.HighLight.dll
????.......??????7168??2019-02-19?15:40??SimilarCompareCore\Libs\PanGu.Lucene.Analyzer.dll
????.......???????275??2019-02-19?15:40??SimilarCompareCore\Model\ba
????.......???????292??2019-02-19?15:40??SimilarCompareCore\Model\CompareArticalResultModel.cs
............此處省略41個文件信息
評論
共有 條評論