from sktime.datasets import load_airline
from sktime.forecasting.model_evaluation import evaluate
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.forecasting.model_selection import SlidingWindowSplitter
from sktime.utils.plotting import plot_series
import warnings
warnings.filterwarnings('ignore')
from sktime.datasets import load_longley
from sktime.forecasting.var import VAR
_, y = load_longley() # 16*5
y.head()
GNPDEFL | GNP | UNEMP | ARMED | POP | |
---|---|---|---|---|---|
Period | |||||
1947 | 83.0 | 234289.0 | 2356.0 | 1590.0 | 107608.0 |
1948 | 88.5 | 259426.0 | 2325.0 | 1456.0 | 108632.0 |
1949 | 88.2 | 258054.0 | 3682.0 | 1616.0 | 109773.0 |
1950 | 89.5 | 284599.0 | 3351.0 | 1650.0 | 110929.0 |
1951 | 96.2 | 328975.0 | 2099.0 | 3099.0 | 112075.0 |
import numpy as np
from sktime.performance_metrics.forecasting import MeanAbsoluteScaledError
forecaster = VAR()
fh=np.arange(1, 4)
cv = ExpandingWindowSplitter(step_length=2, fh=fh, initial_window=8)
loss = MeanAbsoluteScaledError(multioutput = 'raw_values')
# implement cross validation
df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy="refit", scoring = loss, return_data=True)
df
test_MeanAbsoluteScaledError | fit_time | pred_time | len_train_window | cutoff | y_train | y_test | y_pred | |
---|---|---|---|---|---|---|---|---|
0 | [0.9347828268675943, 1.0191768136028783, 1.447... | 0.030469 | 0.008766 | 8 | 1954 | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP... |
1 | [1.1927369949042805, 0.4852376765517554, 1.645... | 0.005706 | 0.002183 | 10 | 1956 | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP... |
2 | [1.0830045706133566, 0.9930592040396877, 1.371... | 0.004654 | 0.001715 | 12 | 1958 | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP... |
import pandas as pd
import matplotlib.pyplot as plt
def get_evaluation(df, name = 'test_MeanAbsoluteError'):
"""Extract evaluation information from cross-validation results
"""
evaluation = pd.DataFrame(columns = list(y.columns))
for trial in range(df.shape[0]):
row = {}
for index, feature in enumerate(list(y.columns)):
row[feature] = df[name][trial][index]
evaluation = evaluation.append(row, ignore_index = True)
return evaluation
def get_plots(df):
columns = list(df['y_train'][0].columns)
index_of_last_trial = df.shape[0] - 1
for column in columns:
fig, ax = plt.subplots(figsize=(8, 6))
line1, = ax.plot(df['y_train'][index_of_last_trial][column].index.to_timestamp(), df['y_train'][index_of_last_trial][column], 'bo-')
line2, = ax.plot(df['y_test'][index_of_last_trial][column].index.to_timestamp(), df['y_test'][index_of_last_trial][column], 'go-')
line3, = ax.plot(df['y_pred'][index_of_last_trial][column].index.to_timestamp(), df['y_pred'][index_of_last_trial][column], 'yo-')
ax.legend((line1, line2, line3), ('y', 'y_test', 'y_pred'))
ax.set_ylabel(column)
evaluation = get_evaluation(df, name = 'test_MeanAbsoluteScaledError')
print(evaluation.mean(), '\nAverage:', evaluation.mean().mean())
GNPDEFL 1.070175 GNP 0.832491 UNEMP 1.488010 ARMED 2.551862 POP 0.371150 dtype: float64 Average: 1.2627375758017618
get_plots(df)
# load data
_, y = load_longley() # 16*5
# create a forecaster
forecaster = VAR()
# define forecast horizon
fh=np.arange(1, 4)
# define cross validation parameters
cv = SlidingWindowSplitter(step_length=2, fh=fh, window_length=8)
# select metric
loss = MeanAbsoluteScaledError(multioutput = 'raw_values')
# implement cross validation
df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy="refit", scoring = loss, return_data=True)
df
test_MeanAbsoluteScaledError | fit_time | pred_time | len_train_window | cutoff | y_train | y_test | y_pred | |
---|---|---|---|---|---|---|---|---|
0 | [0.9347828268675943, 1.0191768136028783, 1.447... | 0.004929 | 0.002444 | 8 | 1954 | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP... |
1 | [8.353245543963347, 5.772319748423183, 8.81809... | 0.004558 | 0.002823 | 8 | 1956 | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP... |
2 | [1.355253337848363, 0.46833390173136336, 0.523... | 0.004158 | 0.001823 | 8 | 1958 | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP ARMED ... | GNPDEFL GNP UNEMP... |
evaluation = get_evaluation(df, name = 'test_MeanAbsoluteScaledError')
print(evaluation.mean(), '\nAverage:', evaluation.mean().mean())
GNPDEFL 3.547761 GNP 2.419943 UNEMP 3.596428 ARMED 8.183529 POP 1.645345 dtype: float64 Average: 3.878600975822414
get_plots(df)