資源簡介
資源為今年八月份參加天池大數據競賽a股公司營收預測使用的預處理后的數據和對應的算法文件

代碼片段和文件信息
#?-*-?encoding:utf-8?-*-
import?pandas?as?pd
import?numpy?as?np
import?sys
from?keras.layers.core?import?Dense?Activation?Dropout
from?keras.layers.recurrent?import?LSTM
from?keras.models?import?Sequential
from?sklearn.preprocessing?import?MinMaxScaler
#?窗口長度
LEN_SEQ?=?2
np.random.seed(0)
def?load_one(data?x):
????global?LEN_SEQ
????#ticker?=?data.query(“TICKER_SYMBOL==“+str(x))?#?個股
????#ticker?=?data.query(“TICKER_SYMBOL==1?or?TICKER_SYMBOL==5“)
????ticker?=data.loc[data[‘TICKER_SYMBOL‘]==x]
????#ticker?=?data.query(“TICKER_SYMBOL==@x“)
????print(‘query?the?data‘)
????#print(ticker)
????arr?=?ticker.ix[:[1234]]?#?矩陣
???#?print(arr)
????#?做時序差分
????train?label?=?[]?[]
????b_size?=?4
????for?i?in?range(LEN_SEQ0-1):
????????train.append(arr.shift(i))
????????label?+=?[(‘var%d(t-%d)‘?%?(j+1i))?for?j?in?range(b_size)]
????for?i?in?range(LEN_SEQ):
????????train.append(arr.shift(-i))
????????if?i?==0:
????????????label?+=?[(‘var%d(t)‘?%(j+1))?for?j?in?range(b_size)]
????????else:
????????????label?+=?[(‘var%d(t+%d)‘%(j+1i))?for?j?in?range(b_size)]
????Train?=?pd.concat(trainaxis=1)
????Train.dropna(inplace=True)
????#Train.columns?=?label
????return?Train
def?build_model():
????model?=?Sequential()
????model.add(LSTM(20input_shape=(115)))
????model.add(Dense(1))
????model.compile(loss=‘mae‘optimizer=‘adam‘)
????return?model?
????
def?predict_point_by_point(model?data):
????values?=?data.values
????train_Xtrain_y?=?values[::-1]values[:-1]
????#test_Xtest_y?=?values[::-1]data[:-1]
????train_X?=?train_X.reshape((train_X.shape[0]1train_X.shape[1]))
????#test_X?=?test_X.reshape(test_Xshape[0]LEN_SEQtest_X.shape[1])
????LSTM?=?model.fit(train_Xtrain_yepochs=20batch_size=3)
????return?model
#在金融的這份數據里,沒有2018年q2的數據,也就是t+1的cogs,operateprofit,nincome都沒有,無法去預測目標,也就是revenue的值。所以我們需要先對每個單列做出預測,這里我們仍然用lstm對2018q2的這三列預測,再加上向前的三次記錄,共十五列來做預測
def?create_trainX_trainy(data?look_back=1):
????trainX?trainy?=?[]?[]
????for?i?in?range(len(data)-look_back-1):
????????a?=?data[i:(i+look_back)]
????????trainX.append(a)
????????trainy.append(data[i?+?look_back])
????return?np.array(trainX)?np.array(trainy)
def?predict_useone_column(column):
????trainXtrainy?=?create_trainX_trainy(column)
????trainX?=?np.reshape(trainX(trainX.shape[0]1trainX.shape[1]))
????model?=?Sequential()
????model.add(LSTM(20input_shape=(11)))
????model.add(Dense(1))
????model.compile(loss=‘mae‘optimizer=‘adam‘)
????????
????
????model.fit(trainXtrainyepochs=20batch_size=1)
????return?model
def?main():
????dic?=?{}
????data?=?pd.read_csv(“./datanew.csv“?header=0)
????data.drop(‘END_DATE‘1inplace=True)
????#print(data.head()data.columns)
????
????data.fillna(0.00001inplace=True)
????
????#print(temp.isnull().count())
????#temp.fillna(0.000001inplace=True)
????#?做minmax
????#scaler?=?MinMaxScaler(feature_range=(01))
????#data_scaled?=?pd.Dataframe(scaler.fit_transform(temp)columns=[‘TICKER_SYMBOL‘‘REVENUE‘‘COGS‘‘OPERATE_PROFIT‘‘N_INCOME
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????3868151??2018-07-24?05:22??datanew.csv
?????文件????????5279??2018-07-31?09:22??main.py
評論
共有 條評論