# 集成时间序列模型提高预测精度

`````` pip install --upgrade scalecast
conda install tensorflow
conda install shap
conda install -c conda-forge cmdstanpy
pip install prophet
``````

## 数据集

`````` importpandasaspd
importnumpyasnp
fromscalecast.ForecasterimportForecaster
fromscalecast.utilimportmetrics
importmatplotlib.pyplotasplt
importseabornassns

'M4-info.csv',
index_col=0,
parse_dates=['StartingDate'],
dayfirst=True,
)
f'Hourly-train.csv',
index_col=0,
).loc[idx]
f'Hourly-test.csv',
index_col=0,
).loc[idx]
y=train.values
sd=info.loc[idx,'StartingDate']
fcst_horizon=info.loc[idx,'Horizon']
cd=pd.date_range(
start=sd,
freq='H',
periods=len(y),
)
f=Forecaster(
y=y, # observed values
current_dates=cd, # current dates
future_dates=fcst_horizon, # forecast length
test_length=fcst_horizon, # test-set length
cis=cis, # whether to evaluate intervals for each model
metrics=metrics, # what metrics to evaluate
)

returnf, test.values

f# display the Forecaster object
``````

## 模型

`````` f.set_estimator('naive')
f.manual_forecast(seasonal=True)
``````

ARIMA

Autoregressive Integrated Moving Average 是一种流行而简单的时间序列技术，它利用序列的滞后和误差以线性方式预测其未来。通过EDA，我们确定这个系列是高度季节性的。所以最终选择了应用order (5,1,4) x(1,1,1,24)的季节性ARIMA模型。

`````` f.set_estimator('arima')
f.manual_forecast(
order = (5,1,4),
seasonal_order = (1,1,1,24),
call_me = 'manual_arima',
)
``````

LSTM

`````` f.set_estimator('rnn')
f.manual_forecast(
lags=48,
layers_struct=[
('LSTM',{'units':100,'activation':'tanh'}),
('LSTM',{'units':100,'activation':'tanh'}),
('LSTM',{'units':100,'activation':'tanh'}),
],
epochs=15,
plot_loss=True,
validation_split=0.2,
call_me='rnn_tanh_activation',
)

f.manual_forecast(
lags=48,
layers_struct=[
('LSTM',{'units':100,'activation':'relu'}),
('LSTM',{'units':100,'activation':'relu'}),
('LSTM',{'units':100,'activation':'relu'}),
],
epochs=15,
plot_loss=True,
validation_split=0.2,
call_me='rnn_relu_activation',
)
``````

Prophet

`````` f.set_estimator('prophet')
f.manual_forecast()
``````

`````` results=f.export(determine_best_by='TestSetSMAPE')
ms=results['model_summaries']
ms[
[
'ModelNickname',
'TestSetLength',
'TestSetSMAPE',
'InSampleSMAPE',
]
]
``````

`````` f.plot(order_by="TestSetSMAPE",ci=True)
plt.show()
``````

## 堆叠模型

`````` f.add_signals(
f.history.keys(), # add signals from all previously evaluated models
)
f.set_estimator('catboost')
``````

`````` f.manual_forecast(
Xvars='all',
call_me='catboost_all_reg',
verbose=False,
)
f.manual_forecast(
Xvars=[xforxinf.get_regressor_names() ifx.startswith('AR')],
call_me='catboost_lags_only',
verbose=False,
)
f.manual_forecast(
Xvars=[xforxinf.get_regressor_names() ifnotx.startswith('AR')],
call_me='catboost_signals_only',
verbose=False,
)
``````

`````` test_results=pd.DataFrame(index=f.history.keys(),columns= ['smape','mase'])
fork, vinf.history.items():
test_results.loc[k,['smape','mase']] = [
metrics.smape(test_set,v['Forecast']),
metrics.mase(test_set,v['Forecast'],m=24,obs=f.y),
]

test_results.sort_values('smape')
``````

`````` fig, ax=plt.subplots(figsize=(12,6))
f.plot(
models= ['catboost_all_reg','catboost_signals_only'],
ci=True,
ax=ax
)
sns.lineplot(
x=f.future_dates,
y=test_set,
ax=ax,
label='held out actuals',
color='darkblue',
alpha=.75,
)
plt.show()
``````

## 哪些信号最重要?

`````` f.export_feature_importance('catboost_all_reg')
``````

## 总结

https://avoid.overfit.cn/post/cd910a41e6b94852b762cd6f2abf8b16

THE END

)">

)">