91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

經(jīng)典又兼具備趣味性的案例泰坦尼克號問題源碼。大家都熟悉的『Jack and Rose』的故事,豪華游艇倒了,大家都驚恐逃生,可是救生艇的數(shù)量有限,無法人人都有,副船長發(fā)話了『lady and kid first!』,所以是否獲救其實(shí)并非隨機(jī),而是基于一些背景有rank先后的。 訓(xùn)練和測試數(shù)據(jù)是一些乘客的個人信息以及存活狀況,要嘗試根據(jù)它生成合適的模型并預(yù)測其他人的存活狀況。 對,這是一個二分類問題,很多分類算法都可以解決。

資源截圖

代碼片段和文件信息

import?re
import?numpy?as?np
import?pandas?as?pd
import?random?as?rd
from?sklearn?import?preprocessing
from?sklearn.cluster?import?KMeans
from?sklearn.ensemble?import?RandomForestRegressor
from?sklearn.decomposition?import?PCA

np.set_printoptions(precision=4?threshold=10000?linewidth=160?edgeitems=999?suppress=True)
pd.set_option(‘display.max_columns‘?None)
pd.set_option(‘display.max_rows‘?None)
pd.set_option(‘display.width‘?160)
pd.set_option(‘expand_frame_repr‘?False)
pd.set_option(‘precision‘?4)
????

def?processCabin():???
????global?df
????df[‘Cabin‘][df.Cabin.isnull()]?=?‘U0‘
????df[‘CabinLetter‘]?=?df[‘Cabin‘].map(?lambda?x?:?getCabinLetter(x))
????df[‘CabinLetter‘]?=?pd.factorize(df[‘CabinLetter‘])[0]

????if?keep_binary:
????????cletters?=?pd.get_dummies(df[‘CabinLetter‘]).rename(columns=lambda?x:?‘CabinLetter_‘?+?str(x))
????????df?=?pd.concat([df?cletters]?axis=1)

????df[‘CabinNumber‘]?=?df[‘Cabin‘].map(?lambda?x?:?getCabinNumber(x)).astype(int)?+?1
????if?keep_scaled:
????????scaler?=?preprocessing.StandardScaler()
????????df[‘CabinNumber_scaled‘]?=?scaler.fit_transform(df[‘CabinNumber‘])


def?getCabinLetter(cabin):
????match?=?re.compile(“([a-zA-Z]+)“).search(cabin)
????if?match:
????????return?match.group()
????else:
????????return?‘U‘


def?getCabinNumber(cabin):
????match?=?re.compile(“([0-9]+)“).search(cabin)
????if?match:
????????return?match.group()
????else:
????????return?0


def?processTicket():
????global?df
????
????df[‘TicketPrefix‘]?=?df[‘Ticket‘].map(?lambda?x?:?getTicketPrefix(x.upper()))
????df[‘TicketPrefix‘]?=?df[‘TicketPrefix‘].map(?lambda?x:?re.sub(‘[\.?\/?]‘?‘‘?x)?)
????df[‘TicketPrefix‘]?=?df[‘TicketPrefix‘].map(?lambda?x:?re.sub(‘STON‘?‘SOTON‘?x)?)?
????df[‘TicketPrefixId‘]?=?pd.factorize(df[‘TicketPrefix‘])[0]
????
????if?keep_binary:
????????prefixes?=?pd.get_dummies(df[‘TicketPrefix‘]).rename(columns=lambda?x:?‘TicketPrefix_‘?+?str(x))
????????df?=?pd.concat([df?prefixes]?axis=1)
????
????df.drop([‘TicketPrefix‘]?axis=1?inplace=True)
????
????df[‘TicketNumber‘]?=?df[‘Ticket‘].map(?lambda?x:?getTicketNumber(x)?)
????df[‘TicketNumberDigits‘]?=?df[‘TicketNumber‘].map(?lambda?x:?len(x)?).astype(np.int)
????df[‘TicketNumberStart‘]?=?df[‘TicketNumber‘].map(?lambda?x:?x[0:1]?).astype(np.int)
????
????df[‘TicketNumber‘]?=?df.TicketNumber.astype(np.int)
?????
????if?keep_scaled:
????????scaler?=?preprocessing.StandardScaler()
????????df[‘TicketNumber_scaled‘]?=?scaler.fit_transform(df[‘TicketNumber‘])


def?getTicketPrefix(ticket):
????match?=?re.compile(“([a-zA-Z\.\/]+)“).search(ticket)
????if?match:
????????return?match.group()
????else:
????????return?‘U‘

def?getTicketNumber(ticket):
????match?=?re.compile(“([\d]+$)“).search(ticket)
????if?match:
????????return?match.group()
????else:
????????return?‘0‘


def?processFare():
????global?df???????????
????df[‘Fare‘][?np.isnan(df[‘Fare‘])?]?=?df[‘Fare‘].median()
????df[‘Fare‘][?np.where(df[‘Fare‘]==0)[0]?]?=?df[‘Fare‘][?df[‘Fare‘].nonzero()[0]?]

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2015-03-23?16:38??kaggle-Titanic\
?????文件???????14597??2015-03-23?16:38??kaggle-Titanic\dataProcess.py
?????文件???????72565??2015-03-23?16:38??kaggle-Titanic\figure_1.png
?????文件????????4216??2015-03-23?16:38??kaggle-Titanic\randomForest.py
?????文件?????????200??2015-03-23?16:38??kaggle-Titanic\README.md
?????文件????????2839??2015-03-23?16:38??kaggle-Titanic\result.csv
?????文件???????28210??2015-03-23?16:38??kaggle-Titanic\test.csv
?????文件???????60302??2015-03-23?16:38??kaggle-Titanic\train.csv

評論

共有 條評論