91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

Kaggle房價預(yù)測代碼 score=12% import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt

資源截圖

代碼片段和文件信息

#?-*-?coding:?utf-8?-*-
“““
Created?on?Mon?Jan?14?11:51:02?2019

@author:?TangYingjie?2018140083
“““

import?numpy?as?np
import?pandas?as?pd
import?seaborn?as?sns
import?matplotlib.pyplot?as?plt
from?sklearn.preprocessing?import?LabelEncoder
from?sklearn.preprocessing?import?OneHotEncoder
from?sklearn.model_selection?import?train_test_split
import?os

df_train?=?pd.read_csv(‘train.csv‘)
df_test?=?pd.read_csv(‘test.csv‘)
pd.set_option(‘display.width‘700)
df_train.head(5)#?影響房價的一些因素

#?!!!----------------------數(shù)據(jù)可視化----------------------
#?數(shù)據(jù)尺寸
print(‘The?train?data?shape?:?{}‘.format(df_train.shape))
print(‘The?test?data?shape?:?{}‘.format(df_test.shape))

#?數(shù)據(jù)類型
print(df_train.dtypes)
df_train.describe()

#?房價數(shù)據(jù)直方圖
#plt.hist(df_train[‘SalePrice‘])
sns.distplot(df_train[‘SalePrice‘])

#?CentralAir?與?房價關(guān)系
var?=?‘CentralAir‘
data?=?pd.concat([df_train[‘SalePrice‘]?df_train[var]]?axis=1)
fig?=?sns.boxplot(x=var?y=‘SalePrice‘?data=data)?
fig.axis(ymin=0?ymax=800000)

#?OverallQual?與?房價關(guān)系
var?=?‘OverallQual‘
data?=?pd.concat([df_train[‘SalePrice‘]?df_train[var]]?axis=1)
fig?=?sns.boxplot(x=var?y=‘SalePrice‘?data=data)?
fig.axis(ymin=0?ymax=800000)

#?YearBuilt?與?房價關(guān)系
var?=?‘YearBuilt‘
data?=?pd.concat([df_train[‘SalePrice‘]?df_train[var]]?axis=1)
fig?=?sns.boxplot(x=var?y=‘SalePrice‘?data=data)?
fig.axis(ymin=0?ymax=800000)

#?YearBuilt?與?房價關(guān)系?散點圖
var?=?‘YearBuilt‘
data?=?pd.concat([df_train[‘SalePrice‘]?df_train[var]]?axis=1)
data.plot.scatter(x=var?y=‘SalePrice‘?ylim=(0?800000))

#?!!!----------------------數(shù)據(jù)清洗----------------------
df_train.drop(‘Id‘?inplace=True?axis=1)
df_test.drop(‘Id‘?inplace=True?axis=1)

#統(tǒng)計null數(shù)據(jù)的Frequency
count_nans?=?len(df_train)?-?df_train.count()
df_count_nans?=?count_nans.to_frame()
df_count_nans.columns?=?[‘train_nan_count‘]
df_count_nans[‘%_train_nans‘]=?(df_count_nans?[‘train_nan_count‘]/df_train.shape[0])*100
#?For?test?data
df_count_nans?[‘test_nan_count‘]?=?len(df_test)?-?df_test.count?()
df_count_nans?[‘%_test_nans‘]?=?(df_count_nans[‘test_nan_count‘]/df_test.shape[0])?*?100

df_count_nans.sort_values(“train_nan_count“?ascending=False?inplace=True)
df_count_nans.query(‘train_nan_count?>?0?or?test_nan_count?>?0‘)

#?整合數(shù)據(jù)
y_train?=?df_train.SalePrice.values
print(y_train)
df_train.drop(“SalePrice“?inplace=True?axis=1)

df_all_data?=?pd.concat([df_train?df_test])
df_all_data.reset_index(inplace=True?drop=True)
print(df_all_data.shape)
df_all_data.columns
df_all_data.head()

#?清洗掉缺失數(shù)據(jù)
df_all_data.drop([‘PoolQC‘?‘Alley‘?‘MiscFeature‘?‘Fence‘]?axis=1?inplace=True)

#?因為FirplaceQu沒有變化,又因為the?NAs?indicates?NA=No?Fireplace所以寫成None
df_all_data[“FireplaceQu“].fillna(“None“?inplace=True)

#?GarageCond?GarageType?GarageFinish?GarageQual這些車庫因素也處理成None
df_all_data[[“GarageCond“?“GarageType“?“GarageFinish“?“GarageQual“]].fillna(“None“?inplace=True)

#?把GarageYrBlt填上0.?假設(shè)車庫不是available?
df_all_

評論

共有 條評論