# 拉取数据

``````# 导入tushare
import tushare as ts
# 初始化pro接口
pro = ts.pro_api('xxx')

# 拉取数据
df = pro.fx_daily(**{
"ts_code": "XAUUSD.FXCM",
"start_date": 20160910,
"end_date": 20210910,
"exchange": "FXCM",
"limit": "",
"offset": ""
}, fields=[
"ts_code",
"bid_open",
"bid_close",
"bid_high",
"bid_low",
"tick_qty"
])
print(df)
df.to_csv('黄金数据2016-9-10至2021-9-10.csv')
``````

# 数据预处理

LSTM模型的核心是用一个序列数据去预测未来的数据，序列数据的构造思路: 构造一个队列，将每日的数据视为一个个体，当后一个个体进入队列的时候，就会挤出队首的个体，然后在每个时刻都‘拍照’记录下队列的情况，就可以得到一个三维数据（len, men_day, attribute）其中len是序列的个数，假设数据集中有100条数据，我们将5天作为一个序列，那么就会有100-5+1=96个序列，men_day就是那个5天的5，表示序列的长度，attribute则是原数据的协变量（就是数据的特征）；

mem_his_days就是序列的长度，pre_days就是预测未来几天的价格

``````def Stock_Price_LSTM_Data_Precesing(df,mem_his_days,pre_days):
df.dropna(inplace=True)
df.sort_index(inplace=True)
df.drop(columns='ts_code',inplace=True)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
sca_X = scaler.fit_transform(df.iloc[:,:-1])

from collections import deque
deq = deque(maxlen=mem_his_days)

X = []
for i in sca_X:
deq.append(list(i))
if len(deq)==mem_his_days:
X.append(list(deq))

X_lately = X[-pre_days:]
X = X[:-pre_days]

y = df['label'].values[mem_his_days-1:-pre_days]

import numpy as np
X = np.array(X)
y = np.array(y)

return X,y,X_lately
``````

# 训练模型

``````pre_days = 5
# mem_days = [5,10,15]
# lstm_layers = [1,2]
# dense_layers = [1,2,3]
# units = [16,32]
mem_days = [5]
lstm_layers = [3]
dense_layers = [2]
units = [64]
from tensorflow.keras.callbacks import ModelCheckpoint
for the_mem_days in mem_days:
for the_lstm_layers in lstm_layers:
for the_dense_layers in dense_layers:
for the_units in units:
filepath = './models_only_problem/{val_mape:.2f}_{epoch:02d}_'+f'men_{the_mem_days}_lstm_{the_lstm_layers}_dense_{the_dense_layers}_unit_{the_units}'
checkpoint = ModelCheckpoint(
filepath=filepath,
save_weights_only=False,
monitor='val_mape',
mode='min',
save_best_only=True)

X,y,X_lately = Stock_Price_LSTM_Data_Precesing(golden,the_mem_days,pre_days)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,shuffle=False,test_size=0.1)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
model = Sequential()

for i in range(the_lstm_layers):

for i in range(the_dense_layers):

loss='mse',
metrics=['mape'])

model.fit(X_train,y_train,batch_size=32,epochs=50,validation_data=(X_test,y_test),callbacks=[checkpoint])
``````

# 模型预测及查看效果

## 先看整体情况

``````from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt

pre = best_model.predict(X)
print(len(pre))
plt.plot(y,color='red',label='price')
plt.plot(pre,color='green',label='predict')
plt.show()
``````

## 选取特定的一小段查看

``````x_time1 = y[200:300]
pre_time1 = pre[200:300]
plt.plot(x_time1,color='red',label='price')
plt.plot(pre_time1,color='green',label='predict')
plt.legend()
plt.show()
``````

THE END

)">