91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

泰坦尼克python數(shù)據(jù)分析,帶數(shù)據(jù)集和源代碼,強烈推薦。

資源截圖

代碼片段和文件信息

import?re
import?numpy?as?np
import?pandas?as?pd
import?random?as?rd
from?sklearn?import?preprocessing
from?sklearn.cluster?import?KMeans
from?sklearn.ensemble?import?RandomForestRegressor
from?sklearn.decomposition?import?PCA

np.set_printoptions(precision=4?threshold=10000?linewidth=160?edgeitems=999?suppress=True)
pd.set_option(‘display.max_columns‘?None)
pd.set_option(‘display.max_rows‘?None)
pd.set_option(‘display.width‘?160)
pd.set_option(‘expand_frame_repr‘?False)
pd.set_option(‘precision‘?4)
????

def?processCabin():???
????global?df
????df[‘Cabin‘][df.Cabin.isnull()]?=?‘U0‘
????df[‘CabinLetter‘]?=?df[‘Cabin‘].map(?lambda?x?:?getCabinLetter(x))
????df[‘CabinLetter‘]?=?pd.factorize(df[‘CabinLetter‘])[0]

????if?keep_binary:
????????cletters?=?pd.get_dummies(df[‘CabinLetter‘]).rename(columns=lambda?x:?‘CabinLetter_‘?+?str(x))
????????df?=?pd.concat([df?cletters]?axis=1)

????df[‘CabinNumber‘]?=?df[‘Cabin‘].map(?lambda?x?:?getCabinNumber(x)).astype(int)?+?1
????if?keep_scaled:
????????scaler?=?preprocessing.StandardScaler()
????????df[‘CabinNumber_scaled‘]?=?scaler.fit_transform(df[‘CabinNumber‘])


def?getCabinLetter(cabin):
????match?=?re.compile(“([a-zA-Z]+)“).search(cabin)
????if?match:
????????return?match.group()
????else:
????????return?‘U‘


def?getCabinNumber(cabin):
????match?=?re.compile(“([0-9]+)“).search(cabin)
????if?match:
????????return?match.group()
????else:
????????return?0


def?processTicket():
????global?df
????
????df[‘TicketPrefix‘]?=?df[‘Ticket‘].map(?lambda?x?:?getTicketPrefix(x.upper()))
????df[‘TicketPrefix‘]?=?df[‘TicketPrefix‘].map(?lambda?x:?re.sub(‘[\.?\/?]‘?‘‘?x)?)
????df[‘TicketPrefix‘]?=?df[‘TicketPrefix‘].map(?lambda?x:?re.sub(‘STON‘?‘SOTON‘?x)?)?
????df[‘TicketPrefixId‘]?=?pd.factorize(df[‘TicketPrefix‘])[0]
????
????if?keep_binary:
????????prefixes?=?pd.get_dummies(df[‘TicketPrefix‘]).rename(columns=lambda?x:?‘TicketPrefix_‘?+?str(x))
????????df?=?pd.concat([df?prefixes]?axis=1)
????
????df.drop([‘TicketPrefix‘]?axis=1?inplace=True)
????
????df[‘TicketNumber‘]?=?df[‘Ticket‘].map(?lambda?x:?getTicketNumber(x)?)
????df[‘TicketNumberDigits‘]?=?df[‘TicketNumber‘].map(?lambda?x:?len(x)?).astype(np.int)
????df[‘TicketNumberStart‘]?=?df[‘TicketNumber‘].map(?lambda?x:?x[0:1]?).astype(np.int)
????
????df[‘TicketNumber‘]?=?df.TicketNumber.astype(np.int)
?????
????if?keep_scaled:
????????scaler?=?preprocessing.StandardScaler()
????????df[‘TicketNumber_scaled‘]?=?scaler.fit_transform(df[‘TicketNumber‘])


def?getTicketPrefix(ticket):
????match?=?re.compile(“([a-zA-Z\.\/]+)“).search(ticket)
????if?match:
????????return?match.group()
????else:
????????return?‘U‘

def?getTicketNumber(ticket):
????match?=?re.compile(“([\d]+$)“).search(ticket)
????if?match:
????????return?match.group()
????else:
????????return?‘0‘


def?processFare():
????global?df???????????
????df[‘Fare‘][?np.isnan(df[‘Fare‘])?]?=?df[‘Fare‘].median()
????df[‘Fare‘][?np.where(df[‘Fare‘]==0)[0]?]?=?df[‘Fare‘][?df[‘Fare‘].nonzero()[0]?]

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2015-03-23?08:38??kaggle-Titanic-master\
?????文件?????????200??2015-03-23?08:38??kaggle-Titanic-master\README.md
?????文件???????14597??2015-03-23?08:38??kaggle-Titanic-master\dataProcess.py
?????文件???????72565??2015-03-23?08:38??kaggle-Titanic-master\figure_1.png
?????文件????????4216??2015-03-23?08:38??kaggle-Titanic-master\randomForest.py
?????文件????????2839??2015-03-23?08:38??kaggle-Titanic-master\result.csv
?????文件???????28210??2015-03-23?08:38??kaggle-Titanic-master\test.csv
?????文件???????60302??2015-03-23?08:38??kaggle-Titanic-master\train.csv

評論

共有 條評論