91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 17.84MB
    文件類型: .rar
    金幣: 1
    下載: 0 次
    發(fā)布日期: 2023-06-30
  • 語言: 其他
  • 標簽: bayes??

資源簡介

內含手機中文評論數(shù)據(jù)集(商品編號和評論),貝葉斯算法中文評論分類代碼,數(shù)據(jù)集+代碼

資源截圖

代碼片段和文件信息

import?jieba
import?pandas?as?pd
from?sklearn?import?metricsnaive_bayes

from?sklearn.feature_extraction.text?import?CountVectorizer#計算詞頻

file=open(‘商品編號和評論.txt‘‘r‘encoding=‘UTF-8‘)
rows=file.readline()



def?main():

????reviews=[]#評論
????score=[]#評分

????num=0
????while?rows:
????????line?=?file.readline()
????????ls?=?line.split(‘‘)
????????if?len(ls)==5:
????????????reviews.append(ls[2])
????????????score.append(ls[3])

????????if?num==10000:
????????????break
????????num+=1

????df=pd.Dataframe({
????????‘reviews‘:reviews
????????‘score‘:score
????})

????df=df[[‘reviews‘‘score‘]]#修改列名
????print(df)
????#?print(df.info())
????#?print(df.ix[0])

????df[‘score‘]=df[‘score‘].str.extract(r‘(\d+)‘expand=False).astype(int)

????#?print(df.info())

????#?print(sum(df[‘score‘].isnull()))
????df[‘score‘]=df[‘score‘].apply(score_get)




????col=df.iloc[:0]
????arrs=col.values
????result=[]#過濾完成的結果
????stopwords={}.fromkeys([‘,‘‘!‘‘。‘‘、‘‘?‘‘~‘])
????for?a?in?arrs:
????????seglist=jieba.lcut(acut_all=False)
????????final=‘‘
????????for?seg?in?seglist:
????????????if?seg?not?in?stopwords:
????????????????final+=seg
????????seglist=jieba.lcut(finalcut_all=False)
????????output=‘?‘.join(list(seglist))#每行過濾出的結果

????????result.append(output)

????#
????vectorize=CountVectorizer()#new

????#?word=vectorize.get_feature_names()#查看
????#?for?i?in?word:
????#?????print(i)


????X=vectorize.fit_transform(result)#計算過濾后列表詞頻率

????X=X.toarray()#轉為列表

????#?print(X)

????x_train=X[:-1000]

????y_train=df.iloc[:-10001]

????x_test?=?X[-1000:]

????y_test=df.iloc[-1000:1]

????nb=naive_bayes.BernoulliNB()

????nb.fit(x_trainy_train)

????nb_pre=nb.predict(x_test)

????print(nb_pre)

????word?=?vectorize.get_feature_names()
????for?w?in?word:
????????print(w)
????accuracy?=?metrics.accuracy_score(y_test?nb_pre)

????print(‘分類準確率:‘a(chǎn)ccuracy)


def?score_get(x):
????if?x<=3:
????????return?1

????else:
????????return?2



if?__name__?==?‘__main__‘:
????main()

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件????????135??2018-12-12?15:25??suanfa\.idea\encodings.xml

?????文件????????295??2018-12-12?15:25??suanfa\.idea\misc.xml

?????文件????????264??2018-12-12?15:25??suanfa\.idea\modules.xml

?????文件????????438??2018-12-12?15:25??suanfa\.idea\suanfa.iml

?????文件???????9188??2018-12-12?15:25??suanfa\.idea\workspace.xml

?????文件???????2112??2018-12-13?10:43??suanfa\suanfa.py

?????文件???75224050??2018-12-13?09:29??suanfa\商品編號和評論.txt

?????目錄??????????0??2018-12-13?10:44??suanfa\.idea

?????目錄??????????0??2018-12-13?10:44??suanfa

-----------?---------??----------?-----??----

?????????????75236482????????????????????9


評論

共有 條評論