Probabilistic Forecasting
¶

In [53]:
import warnings
warnings.filterwarnings('ignore')

Load Data¶

In [54]:
from sktime.datasets import load_longley
_, y = load_longley() # 16*5
y.head()
Out[54]:
GNPDEFL GNP UNEMP ARMED POP
Period
1947 83.0 234289.0 2356.0 1590.0 107608.0
1948 88.5 259426.0 2325.0 1456.0 108632.0
1949 88.2 258054.0 3682.0 1616.0 109773.0
1950 89.5 284599.0 3351.0 1650.0 110929.0
1951 96.2 328975.0 2099.0 3099.0 112075.0

Train Forecaster¶

In [55]:
import numpy as np
from sktime.forecasting.var import VAR

fh = np.arange(1, 5)

forecaster = VAR()
forecaster.fit(y)
y_pred = forecaster.predict(fh)

Predict_interval¶

  • predict_interval(fh=None, X=None, coverage=0.90)
  • produces symmetric forecasting intervals
  • 0.5 - coverage/2 = 0.05, 0.5 + coverage/2 = 0.95
In [56]:
coverage = 0.9
y_pred_ints = forecaster.predict_interval(coverage=coverage)
y_pred_ints.head()
Out[56]:
GNPDEFL GNP UNEMP ARMED POP
0.9 0.9 0.9 0.9 0.9
lower upper lower upper lower upper lower upper lower upper
Period
1963 117.492977 122.208376 560486.177419 587020.649184 3917.719752 5862.537044 1788.057405 3282.655684 132006.104828 132980.082567
1964 118.889313 124.047653 579429.082278 614063.738258 4436.940396 6829.680676 1225.810443 3074.629283 134217.732787 135744.466444
1965 120.921592 127.333840 610262.428743 649634.201865 4541.784488 7222.018501 800.607954 2911.299938 136653.038041 138573.129357
1966 124.033250 130.795854 641781.113945 682795.273342 4963.131313 7730.252362 455.670406 2718.751331 139342.410520 141613.299569
In [57]:
def get_plots_prob(y, y_pred, y_prob, name):
    fig, ax = plot_series(y[name], y_pred[name], labels=["y", "y_pred"])
    ax.fill_between(
        ax.get_lines()[-1].get_xdata(), # x axis
        y_prob[name].iloc[:, 0], # y_low axis
        y_prob[name].iloc[:, 1], # y_high 
        alpha=0.2,
        color=ax.get_lines()[-1].get_c(),
        label=f"{coverage} cov.pred.intervals",
    )
    ax.legend()
    
get_plots_prob(y, y_pred, y_pred_ints, 'GNPDEFL')

Predict_quantiles¶

  • predict_quantiles(fh=None, X=None, alpha=[0.05, 0.95])
  • return quantile values of forecasting
In [67]:
y_pred_quantiles = forecaster.predict_quantiles(alpha=[0.275, 0.975])
y_pred_quantiles.head()
Out[67]:
GNPDEFL GNP UNEMP ARMED POP
0.275 0.975 0.275 0.975 0.275 0.975 0.275 0.975 0.275 0.975
1963 118.993859 578574.890704 4536.743084 2806.934283 132316.115927 122.660049 557944.526676 6048.824766 1644.894698 133073.376727
1964 120.531180 603039.741171 5198.534849 2486.161665 134703.682681 124.541754 576111.541236 7058.873484 1048.718100 135890.707295
1965 122.962571 637102.409649 5394.886421 2239.479716 137264.191248 127.948048 606491.134659 7478.749399 598.431635 138757.048659
1966 126.185745 669740.719719 5843.888853 1998.426650 140065.220479 131.443622 637852.501021 7995.305887 238.897245 141830.820645
In [68]:
# re-order the columns of prediction
y_pred_quantiles = y_pred_quantiles.iloc[:, [0, 5, 6, 7, 2, 1, 8, 3, 4, 9]]
y_pred_quantiles
Out[68]:
GNPDEFL UNEMP ARMED GNP GNPDEFL POP GNP UNEMP POP
0.275 0.975 0.275 0.975 0.275 0.975 0.275 0.975 0.275 0.975
1963 118.993859 122.660049 557944.526676 6048.824766 4536.743084 578574.890704 1644.894698 2806.934283 132316.115927 133073.376727
1964 120.531180 124.541754 576111.541236 7058.873484 5198.534849 603039.741171 1048.718100 2486.161665 134703.682681 135890.707295
1965 122.962571 127.948048 606491.134659 7478.749399 5394.886421 637102.409649 598.431635 2239.479716 137264.191248 138757.048659
1966 126.185745 131.443622 637852.501021 7995.305887 5843.888853 669740.719719 238.897245 1998.426650 140065.220479 141830.820645
In [69]:
def get_plots_prob(y, y_pred, y_prob, name, index_low, index_high):
    fig, ax = plot_series(y[name], y_pred[name], labels=["y", "y_pred"])
    ax.fill_between(
        ax.get_lines()[-1].get_xdata(), # x axis
        y_prob.iloc[:, index_low], # y_low axis
        y_prob.iloc[:, index_high], # y_high 
        alpha=0.2,
        color=ax.get_lines()[-1].get_c(),
        label=f"{coverage} cov.pred.intervals",
    )
    ax.legend()
get_plots_prob(y, y_pred, y_pred_quantiles, 'GNPDEFL', 0, 1)

Predict_var¶

  • predict_var(fh=None, X=None, cov=False)
  • produces variance forecasts
  • not all estimators support cov
In [70]:
y_pred_var = forecaster.predict_var(cov=False) #
y_pred_var.head()
Out[70]:
GNPDEFL GNP UNEMP ARMED POP
1963 2.054577 6.505885e+07 349496.724319 206411.766443 87656.385238
1964 2.458700 1.108427e+08 529025.512014 315845.148830 215383.313893
1965 3.799322 1.432371e+08 663790.333183 411656.726839 340666.369241
1966 4.225843 1.554365e+08 707525.034070 473244.595983 476515.823957

predict_proba¶

  • predict_proba(fh=None, X=None, marginal=True)
  • forecasting mu values and their standard deviation
In [71]:
y_pred_proba = forecaster.predict_proba()
y_pred_proba # mu, sigma
Out[71]:
Normal(columns=Index(['GNPDEFL', 'GNP', 'UNEMP', 'ARMED', 'POP'], dtype='object'),
       index=PeriodIndex(['1963', '1964', '1965', '1966'], dtype='period[A-DEC]'),
       mu=         GNPDEFL            GNP        UNEMP        ARMED            POP
1963  119.850676  573753.413302  4890.128398  2535.356545  132493.093697
1964  121.468483  596746.410268  5633.310536  2150.219863  134981.099616
1965  124.127716  629948.315304  5881.901495  1855.953946  137613.083699
1966  127.414552  662288.193643  6346.691838  1587.210869  140477.855045,
       sigma=       GNPDEFL           GNP       UNEMP       ARMED         POP
1963  1.433380   8065.906695  591.182480  454.325617  296.068210
1964  1.568024  10528.187862  727.341400  562.001022  464.094079
1965  1.949185  11968.169227  814.733290  641.604806  583.666317
1966  2.055686  12467.419205  841.145073  687.927755  690.301256)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Normal(columns=Index(['GNPDEFL', 'GNP', 'UNEMP', 'ARMED', 'POP'], dtype='object'),
       index=PeriodIndex(['1963', '1964', '1965', '1966'], dtype='period[A-DEC]'),
       mu=         GNPDEFL            GNP        UNEMP        ARMED            POP
1963  119.850676  573753.413302  4890.128398  2535.356545  132493.093697
1964  121.468483  596746.410268  5633.310536  2150.219863  134981.099616
1965  124.127716  629948.315304  5881.901495  1855.953946  137613.083699
1966  127.414552  662288.193643  6346.691838  1587.210869  140477.855045,
       sigma=       GNPDEFL           GNP       UNEMP       ARMED         POP
1963  1.433380   8065.906695  591.182480  454.325617  296.068210
1964  1.568024  10528.187862  727.341400  562.001022  464.094079
1965  1.949185  11968.169227  814.733290  641.604806  583.666317
1966  2.055686  12467.419205  841.145073  687.927755  690.301256)
In [74]:
# y_pred_proba.mean()
y_pred_proba.var()
Out[74]:
GNPDEFL GNP UNEMP ARMED POP
1963 2.054577 6.505885e+07 349496.724319 206411.766443 87656.385238
1964 2.458700 1.108427e+08 529025.512014 315845.148830 215383.313893
1965 3.799322 1.432371e+08 663790.333183 411656.726839 340666.369241
1966 4.225843 1.554365e+08 707525.034070 473244.595983 476515.823957

Reference¶

  • Probabilistic Forecasting