91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

kaggle入門賽房價預(yù)測,包括pandas數(shù)據(jù)預(yù)處理,使用skitlen線性回歸預(yù)測結(jié)果,輸出結(jié)果表格

資源截圖

代碼片段和文件信息

#coding=UTF-8
‘‘‘
????參看kernel上的數(shù)據(jù)預(yù)處理教程?中文版https://www.leiphone.com/news/201704/Py7Mu3TwRF97pWc7.html
‘‘‘

#invite?people?for?the?Kaggle?party

import?pandas?as?pd

import?matplotlib.pyplot?as?plt
import?seaborn?as?sns
import?numpy?as?np
from?scipy.stats?import?norm
from?sklearn.preprocessing?import?StandardScaler
from?scipy?import?stats
import?warnings
warnings.filterwarnings(‘ignore‘)


#bring?in?the?six?packs
df_train?=?pd.read_csv(r“train.csv“)#../文件目錄
df_test?=?pd.read_csv(r“test.csv“)#../文件目錄
print(df_test.shape)
print(df_train.shape)

#check?the?decoration
#?print?df_train.columns

#descriptive?statistics?summary
#?print?df_train[‘SalePrice‘].describe()
#histogram
sns.distplot(df_train[u‘SalePrice‘])
#?sns.plt.show()

#數(shù)據(jù)偏度和峰指度量
print(“skewness:{}?Kurtosis:{}“.format(df_train[‘SalePrice‘].skew()df_train[‘SalePrice‘].kurt()))

#房價與數(shù)字型變量的關(guān)系
var?=?‘GrLivArea‘
data?=?pd.concat([df_train[‘SalePrice‘]df_train[var]]axis=1)
data.plot.scatter(x=vary=‘SalePrice‘ylim=(0800000))
plt.show()

var?=?‘TotalBsmtSF‘
data?=?pd.concat([df_train[‘SalePrice‘]df_train[var]]axis=1)
data.plot.scatter(x=vary=‘SalePrice‘ylim=(0800000))
plt.show()

#‘OverallQual’與‘SalePrice’箱型圖
var?=?‘OverallQual‘
data?=?pd.concat([df_train[‘SalePrice‘]?df_train[var]]?axis=1)
f?ax?=?plt.subplots(figsize=(8?6))
fig?=?sns.boxplot(x=var?y=“SalePrice“?data=data)
fig.axis(ymin=0?ymax=800000)
plt.show()

#YearBuilt?與?SalePrice?箱型圖
var?=?‘YearBuilt‘
data?=?pd.concat([df_train[‘SalePrice‘]?df_train[var]]?axis=1)
f?ax?=?plt.subplots(figsize=(16?8))
fig?=?sns.boxplot(x=var?y=“SalePrice“?data=data)
fig.axis(ymin=0?ymax=800000)
plt.xticks(rotation=90)
#?plt.show()

#相關(guān)系數(shù)矩陣
corrmat?=?df_train.corr()
fax?=?plt.subplots(figsize=(129))
sns.heatmap(corrmatsquare=True)
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()

#SalePrice與其他變量之間的相關(guān)性
k?=?10?#number?ofvariables?for?heatmap
cols?=?corrmat.nlargest(k?‘SalePrice‘)[‘SalePrice‘].index
cm?=?np.corrcoef(df_train[cols].values.T)
sns.set(font_scale=1.25)
hm?=?sns.heatmap(cm?cbar=True?annot=True?square=True?fmt=‘.2f‘?annot_kws={‘size‘:?10}
yticklabels=cols.values?xticklabels=cols.values)
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()

#?SalePrice?和相關(guān)變量之間的散點(diǎn)圖

sns.set()
cols?=?[‘SalePrice‘?‘OverallQual‘?‘GrLivArea‘‘GarageCars‘?‘TotalBsmtSF‘?‘FullBath‘?‘YearBuilt‘]
sns.pairplot(df_train[cols]?size=2.5)
plt.show()

#缺失數(shù)據(jù)
total?=?df_train.isnull().sum().sort_values(ascending=False)
percent?=?(df_train.isnull().sum()/df_train.isnull().count()).sort_values(ascending=False)
missing_data?=?pd.concat([totalpercent]axis=1keys=[‘Total‘‘Percent‘])
print(missing_data.head(20))

#處理缺失數(shù)據(jù)
print?(missing_data[missing_data[‘Total‘]>1].index)
#?del?df_train[missing_data[missing_data[‘Total‘]>1].index]
df_train?=?df_train.drop((missing_data[missing_data[‘Total‘]>1]).index1)#刪除列
#?df_train=?df_train.drop((missi

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件???????8549??2017-12-06?15:08??房價預(yù)測\kernelhouse.py

?????文件?????????79??2017-12-06?15:05??房價預(yù)測\readme.txt

?????文件??????29035??2017-11-19?20:27??房價預(yù)測\submission.csv

?????文件?????452864??2017-11-19?11:48??房價預(yù)測\test.csv

?????文件?????460676??2017-11-09?18:40??房價預(yù)測\train.csv

?????目錄??????????0??2017-12-06?15:08??房價預(yù)測

-----------?---------??----------?-----??----

???????????????951203????????????????????6


評論

共有 條評論