91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 39.11MB
    文件類型: .zip
    金幣: 1
    下載: 0 次
    發布日期: 2023-07-24
  • 語言: Java
  • 標簽: DOC2VEC??

資源簡介

DOC2vec,是為一群用來產生詞向量的相關模型。這些模型為淺而雙層的神經網絡,用來訓練以重新建構語言學之詞文本。

資源截圖

代碼片段和文件信息

package?com.ansj.vec;

import?java.io.BufferedOutputStream;
import?java.io.BufferedReader;
import?java.io.DataOutputStream;
import?java.io.File;
import?java.io.FileInputStream;
import?java.io.FileOutputStream;
import?java.io.IOException;
import?java.io.InputStreamReader;
import?java.util.ArrayList;
import?java.util.HashMap;
import?java.util.List;
import?java.util.Map;
import?java.util.Map.Entry;

import?love.cq.util.MapCount;

import?com.ansj.vec.domain.HiddenNeuron;
import?com.ansj.vec.domain.Neuron;
import?com.ansj.vec.domain.WordNeuron;
import?com.ansj.vec.util.Haffman;

public?class?Learn?{

private?Map?wordMap?=?new?HashMap<>();

/**
?*?訓練多少個特征
?*/
private?int?layerSize?=?200;

/**
?*?上下文窗口大小
?*/
private?int?window?=?5;

private?double?sample?=?1e-3;
private?double?alpha?=?0.025;
private?double?startingAlpha?=?alpha;

public?int?EXP_TABLE_SIZE?=?1000;

private?Boolean?isCbow?=?false;

private?double[]?expTable?=?new?double[EXP_TABLE_SIZE];

private?int?trainWordsCount?=?0;

private?int?MAX_EXP?=?6;

private?int?freqThresold?=?5;

public?Learn(Boolean?isCbow?Integer?layerSize?Integer?window
Double?alpha?Double?sample)?{
createExpTable();
if?(isCbow?!=?null)?{
this.isCbow?=?isCbow;
}
if?(layerSize?!=?null)
this.layerSize?=?layerSize;
if?(window?!=?null)
this.window?=?window;
if?(alpha?!=?null)
this.alpha?=?alpha;
if?(sample?!=?null)
this.sample?=?sample;
}

public?Learn()?throws?IOException?{
createExpTable();

}

/**
?*?trainModel
?*?
?*?@throws?IOException
?*/
private?void?trainModel(File?file)?throws?IOException?{
try?(BufferedReader?br?=?new?BufferedReader(new?InputStreamReader(
new?FileInputStream(file))))?{
String?temp?=?null;
long?nextRandom?=?5;
int?wordCount?=?0;
int?lastWordCount?=?0;
int?wordCountActual?=?0;
while?((temp?=?br.readLine())?!=?null)?{
if?(wordCount?-?lastWordCount?>?10000)?{
System.out.println(“alpha:“
+?alpha
+?“\tProgress:?“
+?(int)?(wordCountActual
/?(double)?(trainWordsCount?+?1)?*?100)
+?“%“);
wordCountActual?+=?wordCount?-?lastWordCount;
lastWordCount?=?wordCount;
alpha?=?startingAlpha
*?(1?-?wordCountActual
/?(double)?(trainWordsCount?+?1));
if?(alpha? alpha?=?startingAlpha?*?0.0001;
}
}
String[]?strs?=?temp.split(“?“);
wordCount?+=?strs.length;
List?sentence?=?new?ArrayList();
for?(int?i?=?0;?i? Neuron?entry?=?wordMap.get(strs[i]);
if?(entry?==?null)?{
continue;
}
//?The?subsampling?randomly?discards?frequent?words?while
//?keeping?the?ranking?same
if?(sample?>?0)?{
double?ran?=?(Math.sqrt(entry.freq
/?(sample?*?trainWordsCount))?+?1)
*?(sample?*?trainWordsCount)?/?entry.freq;
nextRandom?=?nextRandom?*?25214903917L?+?11;
if?(ran?

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\
?????文件?????????471??2015-07-23?07:26??doc2vec_java-master\.classpath
?????文件?????????371??2015-07-23?07:26??doc2vec_java-master\.project
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\.settings\
?????文件?????????658??2015-07-23?07:26??doc2vec_java-master\.settings\org.eclipse.jdt.core.prefs
?????文件?????????647??2015-07-23?07:26??doc2vec_java-master\README.md
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\com\
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\
?????文件???????11613??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\Learn.class
?????文件???????11686??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\LearnDocVec.class
?????文件????????9784??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\Word2VEC.class
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\domain\
?????文件?????????389??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\domain\HiddenNeuron.class
?????文件?????????718??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\domain\Neuron.class
?????文件????????1207??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\domain\WordEntry.class
?????文件????????1610??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\domain\WordNeuron.class
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\
?????文件????????1475??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\Haffman.class
?????文件????????2818??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\ModelFile.class
?????文件????????1050??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\ReadWriteFile.class
?????文件????????1516??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\WordKmeans$Classes$1.class
?????文件????????3258??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\WordKmeans$Classes.class
?????文件????????4510??2015-07-23?07:26??doc2vec_java-master\bin\com\ansj\vec\util\WordKmeans.class
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\bin\test\
?????文件????????2937??2015-07-23?07:26??doc2vec_java-master\bin\test\Doc2VecTest.class
?????文件????????1484??2015-07-23?07:26??doc2vec_java-master\bin\test\Word2VecTest.class
?????目錄???????????0??2015-07-23?07:26??doc2vec_java-master\file\
?????文件?????7680759??2015-07-23?07:26??doc2vec_java-master\file\amazon_docs.txt
?????文件????16492176??2015-07-23?07:26??doc2vec_java-master\file\clinical_doc_200_java.vec
............此處省略27個文件信息

評論

共有 條評論

相關資源