資源簡介
recommendation_engines.py Recommenders.py 推薦系統.ipynb

代碼片段和文件信息
#?coding:?utf-8
#?#?Load?Necessary?Dependencies
#?In[1]:
import?pandas?as?pd
import?numpy?as?np
import?time
import?sqlite3
data_home?=?‘./‘
#?#?Load?and?Process?the?Datasets
#?###?Get?more?information?about?the?Millionsong?project?from?https://labrosa.ee.columbia.edu/millionsong/
#?
#?####?Refer?to?Chapter?10:?Section?‘The?Million?Song?Dataset?Taste?Profile‘?for?more?details
#?##?Load?Triplets?data??[user?song?play_count]
#?####?Get?the?data?from?http://labrosa.ee.columbia.edu/millionsong/sites/default/files/challenge/train_triplets.txt.zip
#?In[2]:
“““
triplet_dataset?=?pd.read_csv(filepath_or_buffer=data_home+‘train_triplets.txt‘?
??????????????????????????????nrows=10000sep=‘\t‘?header=None?
??????????????????????????????names=[‘user‘‘song‘‘play_count‘])
#?In[3]:
triplet_dataset.head(n=10)
#?##?Get?User?and?total?play?counts
#?In[5]:
output_dict?=?{}
with?open(data_home+‘train_triplets.txt‘)?as?f:
????for?line_number?line?in?enumerate(f):
????????user?=?line.split(‘\t‘)[0]
????????play_count?=?int(line.split(‘\t‘)[2])
????????if?user?in?output_dict:
????????????play_count?+=output_dict[user]
????????????output_dict.update({user:play_count})
????????output_dict.update({user:play_count})
output_list?=?[{‘user‘:k‘play_count‘:v}?for?kv?in?output_dict.items()]
play_count_df?=?pd.Dataframe(output_list)
play_count_df?=?play_count_df.sort_values(by?=?‘play_count‘?ascending?=?False)
#?In[?]:
play_count_df.to_csv(path_or_buf=‘user_playcount_df.csv‘?index?=?False)
#?##?Get?Song?and?total?play?counts
#?In[7]:
output_dict?=?{}
with?open(data_home+‘train_triplets.txt‘)?as?f:
????for?line_number?line?in?enumerate(f):
????????song?=?line.split(‘\t‘)[1]
????????play_count?=?int(line.split(‘\t‘)[2])
????????if?song?in?output_dict:
????????????play_count?+=output_dict[song]
????????????output_dict.update({song:play_count})
????????output_dict.update({song:play_count})
output_list?=?[{‘song‘:k‘play_count‘:v}?for?kv?in?output_dict.items()]
song_count_df?=?pd.Dataframe(output_list)
song_count_df?=?song_count_df.sort_values(by?=?‘play_count‘?ascending?=?False)
#?In[?]:
song_count_df.to_csv(path_or_buf=‘song_playcount_df.csv‘?index?=?False)
#?##?View?top?users?and?songs
#?In[14]:
“““
play_count_df?=?pd.read_csv(filepath_or_buffer=‘user_playcount_df.csv‘)
play_count_df.head(n?=10)
#?In[15]:
song_count_df?=?pd.read_csv(filepath_or_buffer=‘song_playcount_df.csv‘)
song_count_df.head(10)
#?##?Subsetting?the?data
#?In[15]:
total_play_count?=?sum(song_count_df.play_count)
(float(play_count_df.head(n=100000).play_count.sum())/total_play_count)*100
play_count_subset?=?play_count_df.head(n=100000)
#?In[17]:
(float(song_count_df.head(n=30000).play_count.sum())/total_play_count)*100
#?In[18]:
song_count_subset?=?song_count_df.head(n=30000)
#?In[19]:
user_subset?=?list(play_count_subset.user)
song_subset?=?list(song_count_subset.song)
#?In[20]:
triplet_dataset?=?pd.read_csv(filepath_or_buffer=data_h
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????13987??2018-02-27?11:42??recommendation_engines.py
?????文件??????353123??2018-03-01?15:32??推薦系統.ipynb
?????文件????????9456??2018-02-27?11:42??Recommenders.py
- 上一篇:Demo Voltage Read.vi
- 下一篇:labvIEW英文文獻
評論
共有 條評論