資源簡介
基于 機(jī)器學(xué)習(xí)的情感分析,簡單實(shí)現(xiàn),可顯示準(zhǔn)確率、精確率、召回率、F1值
代碼片段和文件信息
from?sklearn.feature_extraction.text?import?CountVectorizer
from?sklearn.feature_extraction.text?import?TfidfTransformer
from?sklearn.naive_bayes?import?MultinomialNB
from?sklearn.linear_model?import?SGDClassifier
from?sklearn.linear_model?import?LogisticRegression
from?sklearn.pipeline?import?Pipeline
from?yelp_utils?import?numLines?loadData
import?sys
import?time
import?numpy?as?np
from?sklearn?import?metrics
#?文本情感分類
def?classify(technique?posneg?percentData):
????#?讀取數(shù)據(jù)(按比列分配測試集與訓(xùn)練集-可自行調(diào)節(jié))
????filename?=?‘Phoenix_reviews_filtered.json‘
????num_lines?=?numLines(filename)
????linesToRead?=?int(num_lines*(float(percentData)/100.0))
????train_end?=?linesToRead*0.7
????train_data?train_labels?=?loadData(filename?0?train_end?posneg)
????test_data?test_labels?=?loadData(filename?train_end+1?linesToRead?posneg)
????#?選取分類方式(貝葉斯、svm、邏輯回歸)
????if?technique?==?‘nb‘:
????????clf_obj?=?MultinomialNB()
????elif?technique?==?‘svm‘:
????????clf_obj?=?SGDClassifier(loss=‘hinge‘?penalty=‘l2‘?alpha=1e-3?n_iter=5?random_state=42)
????elif?technique?==?‘lr‘:
????????clf_obj?=?LogisticRegression()
????start_time?=?time.time()
????#?將原始評論數(shù)據(jù)進(jìn)行預(yù)處理(分詞)并用TF-IDF進(jìn)行文本向量化
????text_clf?=?Pipeline([(‘vect‘?CountVectorizer(stop_words=‘english‘))
????????????????????????(‘tfidf‘?TfidfTransformer())
????????????????????????(‘clf‘?clf_obj)
????])
????#?用訓(xùn)練集進(jìn)行訓(xùn)練
????text_clf?=?text_clf.fit(train_data?train_labels)
????#?用測試集進(jìn)行預(yù)測
????predicted?=?text_clf.predict(test_data)
????print(“time:?%s?seconds“?%?(time.time()?-?start_time))
????#?顯示預(yù)測評估結(jié)果
????get_metrics(true_labels=test_labels?predicted_labels=predicted)
def?get_metrics(true_labels?predicted_labels):
????print(‘Accuracy:‘?np.round(
????????metrics.accuracy_score(true_labels
???????????????????????????????predicted_labels)
????????2))
????print(‘Precision:‘?np.round(
????????metrics.precision_score(true_labels
????????????????????????????????predicted_labels
????????????????????????????????average=‘weighted‘)
????????2))
????print(‘Recall:‘?np.round(
????????metrics.recall_score(true_labels
?????????????????????????????predicted_labels
?????????????????????????????average=‘weighted‘)
????????2))
????print(‘F1?Score:‘?np.round(
????????metrics.f1_score(true_labels
?????????????????????????predicted_labels
?????????????????????????average=‘weighted‘)
????????2))
def?print_usage():
????print(“Usage:?classify.py???“)
????print(“e.g.?classify.py?nb?True?85“)
def?valid_args(avail_techniques?technique?posneg?percentData):
????return?technique?in?avail_techniques?and\
????????????(posneg?==?‘True‘?or?posneg?==?‘False‘)?and\
????????????(int(percentData)?>=?0?and?int(percentData)?<=?100)
if?__name__?==?‘__main__‘:
????techniques?=?{‘nb‘:?‘Naive?Bayes‘?‘svm‘:?‘Support?Vector?Machines‘?‘lr‘:?‘Logistic?Regression‘}
????
????try:
????????technique?=?sys.arg
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-12-15?11:54??Sentiment-Analysis-Logistic\
?????目錄???????????0??2018-12-15?11:56??Sentiment-Analysis-Logistic\.idea\
?????文件?????????600??2018-12-15?10:25??Sentiment-Analysis-Logistic\.idea\Sentiment-Analysis-Logistic.iml
?????目錄???????????0??2018-12-15?11:56??Sentiment-Analysis-Logistic\.idea\inspectionProfiles\
?????目錄???????????0??2018-12-15?10:25??Sentiment-Analysis-Logistic\.idea\libraries\
?????文件?????????128??2018-12-15?10:25??Sentiment-Analysis-Logistic\.idea\libraries\R_User_Library.xm
?????文件?????????306??2018-12-15?10:21??Sentiment-Analysis-Logistic\.idea\modules.xm
?????文件???????12842??2018-12-15?11:56??Sentiment-Analysis-Logistic\.idea\workspace.xm
?????文件????????1081??2016-01-03?07:56??Sentiment-Analysis-Logistic\MIT-LICENSE
?????文件????70445752??2018-12-06?23:54??Sentiment-Analysis-Logistic\Phoenix_reviews_filtered.json
?????文件????????2078??2016-01-03?07:56??Sentiment-Analysis-Logistic\README.md
?????目錄???????????0??2018-12-15?10:28??Sentiment-Analysis-Logistic\__pycache__\
?????文件?????????898??2018-12-15?10:28??Sentiment-Analysis-Logistic\__pycache__\yelp_utils.cpython-35.pyc
?????文件????????3855??2018-12-15?11:54??Sentiment-Analysis-Logistic\classify.py
?????文件?????????643??2018-12-15?10:28??Sentiment-Analysis-Logistic\yelp_utils.py
評論
共有 條評論