91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 5.05MB
    文件類型: .zip
    金幣: 2
    下載: 0 次
    發布日期: 2023-11-17
  • 語言: 其他
  • 標簽: KNN??

資源簡介

KNN文本聚類,下下來就能用,用于文本的無監督學習,注意內存

資源截圖

代碼片段和文件信息


#?coding:?utf-8

#?In[1]:

import?matplotlib.pyplot?as?plt
from?collections?import?defaultdict
from?collections?import?Counter
from?scipy.sparse?import?csr_matrix?find
import?numpy?as?np
import?random
from?sklearn.utils?import?shuffle
from?sklearn.metrics?import?calinski_harabaz_score


def?csr_build(dataIndex?value?nnz?nrows):
????ind?=?np.zeros(nnz?dtype=np.int)
????val?=?np.zeros(nnz?dtype=np.double)
????ptr?=?np.zeros(nrows+1?dtype=np.int)
????i?=?0
????n?=?0
????
????for?(dv)?in?zip(dataIndex?value):
????????l?=?len(d)
????????for?j?in?range(l):
#?????????????print?j?k
????????????ind[int(j)?+?n]?=?d[j]
????????????val[int(j)?+?n]?=?v[j]
????????
????????ptr[i+1]?=?ptr[i]?+?l
????????n?+=?l
????????i?+=?1
????
????mat?=?csr_matrix((val?ind?ptr)?shape=(nrows?max(ind)+1)?dtype=np.double)
????mat.sort_indices()
????
????return?mat????????

#?scale?matrix?and?normalize?its?rows
def?csr_idf(mat?copy=False?**kargs):
????r“““?Scale?a?CSR?matrix?by?idf.?
????Returns?scaling?factors?as?dict.?If?copy?is?True?
????returns?scaled?matrix?and?scaling?factors.
????“““
????if?copy?is?True:
????????mat?=?mat.copy()
????nrows?=?mat.shape[0]
????nnz?=?mat.nnz
????ind?val?ptr?=?mat.indices?mat.data?mat.indptr
????#?document?frequency
????df?=?defaultdict(int)
????for?i?in?ind:
????????df[i]?+=?1
????#?inverse?document?frequency
????for?kv?in?df.items():
????????df[k]?=?np.log(nrows?/?float(v))??##?df?turns?to?idf?-?reusing?memory
????#?scale?by?idf
????for?i?in?range(0?nnz):
????????val[i]?*=?df[ind[i]]
????????
????return?df?if?copy?is?False?else?mat

def?csr_l2normalize(mat?copy=False?**kargs):
????r“““?Normalize?the?rows?of?a?CSR?matrix?by?their?L-2?norm.?
????If?copy?is?True?returns?a?copy?of?the?normalized?matrix.
????“““
????if?copy?is?True:
????????mat?=?mat.copy()
????nrows?=?mat.shape[0]
????nnz?=?mat.nnz
????ind?val?ptr?=?mat.indices?mat.data?mat.indptr
????#?normalize
????for?i?in?range(nrows):
????????rsum?=?0.0????
????????for?j?in?range(ptr[i]?ptr[i+1]):
????????????rsum?+=?val[j]**2
????????if?rsum?==?0.0:
????????????continue??#?do?not?normalize?empty?rows
????????rsum?=?float(1.0/np.sqrt(rsum))
????????for?j?in?range(ptr[i]?ptr[i+1]):
????????????val[j]?*=?rsum
????????????
????if?copy?is?True:
????????return?mat



def?initCentorids(x?k):
????x_shuffle?=?shuffle(x?random_state=0)
????return?x_shuffle[:k:]


#?In[15]:

def?sim(x1?x2):
????sims?=?x1.dot(x2.T)
????return?sims


#?In[16]:

def?findCentroids(mat?centroids):
????idx?=?list()
????simsMatrix?=?sim(mat?centroids)

????for?i?in?range(simsMatrix.shape[0]):
????????row?=?simsMatrix.getrow(i).toarray()[0].ravel()
????????top_indices?=?row.argsort()[-1]
????????top_values?=?row[row.argsort()[-1]]
#?????????print?top_indices
????????idx.append(top_indices?+?1)
????return?idx


def?computeMeans(mat?idx?k):
????centroids?=?list()
????for?i?in?range(1k+1):
????????indi?=?[j?for?j?x?in?enumerate(idx)?if?x?==?i]
????????members?=?mat[indi:]
????????if?

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-05-09?23:43??Text_Clustering-master\
?????目錄???????????0??2017-05-09?23:43??Text_Clustering-master\.ipynb_checkpoints\
?????文件???????10433??2017-05-09?23:43??Text_Clustering-master\.ipynb_checkpoints\Text_clustering-checkpoint.ipynb
?????文件???????47137??2017-05-09?23:43??Text_Clustering-master\1.png
?????文件??????198806??2017-05-09?23:43??Text_Clustering-master\HW6.pdf
?????文件????????3302??2017-05-09?23:43??Text_Clustering-master\README.md
?????文件???????20494??2017-05-09?23:43??Text_Clustering-master\Text_clustering.ipynb
?????文件???????17159??2017-05-09?23:43??Text_Clustering-master\output.dat.txt
?????目錄???????????0??2017-05-09?23:43??Text_Clustering-master\report\
?????文件???????94769??2017-05-09?23:43??Text_Clustering-master\report\README.pdf
?????目錄???????????0??2017-05-09?23:43??Text_Clustering-master\src\
?????文件????????4998??2017-05-09?23:43??Text_Clustering-master\src\Text_clustering.py
?????文件?????7369300??2017-05-09?23:43??Text_Clustering-master\src\train.dat.txt
?????文件?????7369300??2017-05-09?23:43??Text_Clustering-master\train.dat.txt

評論

共有 條評論