# Code For Better 谷歌开发者之声——使用TensorFlow的时间序列预测

## 一、 前言

### 1.1 开源地址

https://github.com/LongxingTan/Time-series-prediction

### 1.2 取得成绩

• Bert模型 获得KDD CUP2022百度风机功率预测第3名
• Seq2seq模型 获得阿里天池-AI earth人工智能气象挑战赛第4名

## 二、应用案例

### 2.3 特征工程

``````def add_features(df):
df['cross']= df['u_in'] * df['u_out']
df['cross2']= df['time_step'] * df['u_out']
df['area'] = df['time_step'] * df['u_in']
df['area'] = df.groupby('breath_id')['area'].cumsum()
df['time_step_cumsum'] = df.groupby(['breath_id'])['time_step'].cumsum()
df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
print("Step-1...Completed")

df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
df = df.fillna(0)
print("Step-2...Completed")

df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
df['breath_id__u_in__mean'] = df.groupby(['breath_id'])['u_in'].transform('mean')
df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']

print("Step-3...Completed")

df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
print("Step-4...Completed")
return df

``````

### 2.4 模型

tfts在tensorflow的基础上可以快速搭建不同时序模型。只需要传入对应模型名字即可

``````def build_model(use_model, train_sequence_length, predict_sequence_length=288, target_aggs=1, short_feature_nums=10, long_feature_nums=1, custom_model_params=None):
inputs = (
Input([1]),
Input([train_sequence_length, short_feature_nums]),  # raw feature numbers
Input([train_sequence_length+predict_sequence_length, long_feature_nums])  # 长版
)
teacher_inputs = Input([predict_sequence_length//target_aggs, 1])

ts_inputs = KDD(train_sequence_length, predict_sequence_length)(inputs)
outputs = build_tfts_model(use_model=use_model, predict_sequence_length=predict_sequence_length//target_aggs, custom_model_params=custom_model_params)(ts_inputs, teacher_inputs)
model = tf.keras.Model(inputs={'inputs':inputs, 'teacher': teacher_inputs}, outputs=outputs)
return model
``````

• RNN
• Seq2seq
• TCN
• wavenet
• bert
• transoformer

### 2.5 训练与验证

``````def run_train(cfg, args):

build_model_fn = functools.partial(build_model, cfg.use_model, train_sequence_length=cfg.train_sequence_length, predict_sequence_length=cfg.predict_sequence_length, target_aggs=cfg.target_aggs, short_feature_nums=len(cfg.feature_column_short), long_feature_nums=len(cfg.feature_column_long), custom_model_params=cfg.custom_model_params)

loss_fn = custom_loss  # tf.keras.losses.MeanSquaredError()

trainer = KerasTrainer(build_model_fn, loss_fn=loss_fn, optimizer=optimizer, strategy=None)
valid_pred = trainer.predict(valid_data_loader, batch_size=cfg.fit_params['batch_size'] * 2)  # 3746 * 288
trainer.save_model(model_dir=cfg.model_dir + '/checkpoints/{}_day'.format(cfg.use_model), checkpoint_dir=cfg.checkpoint_dir+'/nn_day_{}.h5'.format(cfg.use_model))
trainer.plot()

``````

• 区间预测
• 异常检测
• 分类

THE END