Evaluation Procedure¶

This walkthrough covers the functime.evaluation module, which contains functions to rank forecasts and time-series features (e.g. tsfresh).

Forecasting¶

Let's score, rank, and plot point forecasts for multiple commodity prices. We compare two forecasting models:

Seasonal naive (as our benchmark)
AR (autoregressive) LightGBM model with local scaling

Load data¶

In [ ]:

Copied!





%%capture
import polars as pl

from functime.cross_validation import train_test_split
from functime.evaluation import rank_point_forecasts, rank_residuals
from functime.forecasting import lightgbm, snaive
from functime.plotting import plot_comet, plot_forecasts, plot_fva, plot_residuals
from functime.preprocessing import detrend
%%capture
import polars as pl

from functime.cross_validation import train_test_split
from functime.evaluation import rank_point_forecasts, rank_residuals
from functime.forecasting import lightgbm, snaive
from functime.plotting import plot_comet, plot_forecasts, plot_fva, plot_residuals
from functime.preprocessing import detrend

In [ ]:

Copied!





fh = 12
y = pl.read_parquet("../../data/commodities.parquet")
entity_col = y.columns[0]
y_train, y_test = train_test_split(test_size=fh, eager=True)(y)
y.tail()
fh = 12
y = pl.read_parquet("../../data/commodities.parquet")
entity_col = y.columns[0]
y_train, y_test = train_test_split(test_size=fh, eager=True)(y)
y.tail()

There are 71 unique commoditity types and 759 dates (monthly) between and 1960-01-01 and 2023-03-01.

In [ ]:

Copied!





y.select(
    pl.all().exclude("price").n_unique(),
    pl.col("time").min().dt.date().alias("start"),
    pl.col("time").max().dt.date().alias("end"),
)
y.select(
    pl.all().exclude("price").n_unique(),
    pl.col("time").min().dt.date().alias("start"),
    pl.col("time").max().dt.date().alias("end"),
)

Benchmark Forecasts¶

In [ ]:

Copied!

y_pred_bench = snaive(freq="1mo", sp=12)(y=y_train, fh=fh)
y_pred_bench.head()
y_pred_bench = snaive(freq="1mo", sp=12)(y=y_train, fh=fh)
y_pred_bench.head()

Let's plot the top 4 forecasts by best SMAPE score:

In [ ]:

Copied!

ranks = rank_point_forecasts(y_true=y_test, y_pred=y_pred_bench)
ranks.head()
ranks = rank_point_forecasts(y_true=y_test, y_pred=y_pred_bench)
ranks.head()

In [ ]:

Copied!





selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_pred_bench.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_pred_bench.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")

Let's plot the top 3 forecasts by worst SMAPE score:

In [ ]:

Copied!

ranks = rank_point_forecasts(y_true=y_test, y_pred=y_pred_bench, descending=True)
ranks.head()
ranks = rank_point_forecasts(y_true=y_test, y_pred=y_pred_bench, descending=True)
ranks.head()

In [ ]:

Copied!





selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_pred_bench.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_pred_bench.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")

Global AR Forecasts¶

In [ ]:

Copied!





# Instantiate forecaster and backtest
forecaster = lightgbm(freq="1mo", lags=48, target_transform=detrend())
y_preds, y_resids = forecaster.backtest(y=y, test_size=12, step_size=12, n_splits=5)
y_pred = forecaster(fh=fh, y=y_train)
# Instantiate forecaster and backtest
forecaster = lightgbm(freq="1mo", lags=48, target_transform=detrend())
y_preds, y_resids = forecaster.backtest(y=y, test_size=12, step_size=12, n_splits=5)
y_pred = forecaster(fh=fh, y=y_train)

Let's plot backtests for commodities with the best SMAPE score across expanding window splits:

In [ ]:

Copied!





ranks = rank_point_forecasts(
    y_true=y,
    y_pred=y_preds,
)
ranks.head()
ranks = rank_point_forecasts(
    y_true=y,
    y_pred=y_preds,
)
ranks.head()

In [ ]:

Copied!





selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_preds.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_preds.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")

Let's plot backtests for commodities with the worst SMAPE score across expanding window splits:

In [ ]:

Copied!

ranks = rank_point_forecasts(y_true=y, y_pred=y_preds, descending=True)
ranks.head()
ranks = rank_point_forecasts(y_true=y, y_pred=y_preds, descending=True)
ranks.head()

In [ ]:

Copied!





selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_preds.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_preds.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")

Let's plot residuals for top 10 forecasts ranked by highest mean absolute bias:

In [ ]:

Copied!

ranks = rank_residuals(y_resids=y_resids, sort_by="abs_bias", descending=True)
ranks.head()
ranks = rank_residuals(y_resids=y_resids, sort_by="abs_bias", descending=True)
ranks.head()

In [ ]:

Copied!





selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_residuals(
    y_resids=y_resids.filter(pl.col(entity_col).is_in(selected_entities)),
    n_bins=200,
    height=800,
    width=1000,
)
figure.show(renderer="svg")
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_residuals(
    y_resids=y_resids.filter(pl.col(entity_col).is_in(selected_entities)),
    n_bins=200,
    height=800,
    width=1000,
)
figure.show(renderer="svg")

Let's plot residuals for top 10 forecasts ranked by normality test with the null hypothesis that the residuals are normally distributed. Higher test statistic = more likely to reject null hypothesis under the assumption that the null is true.

In [ ]:

Copied!

ranks = rank_residuals(y_resids=y_resids, sort_by="normality", descending=True)
ranks.head()
ranks = rank_residuals(y_resids=y_resids, sort_by="normality", descending=True)
ranks.head()

In [ ]:

Copied!





selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_residuals(
    y_resids=y_resids.filter(pl.col(entity_col).is_in(selected_entities)),
    n_bins=200,
    height=800,
    width=1000,
)
figure.show(renderer="svg")
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_residuals(
    y_resids=y_resids.filter(pl.col(entity_col).is_in(selected_entities)),
    n_bins=200,
    height=800,
    width=1000,
)
figure.show(renderer="svg")

FVA (Forecast Value Add) Plot¶

It is best-practice to have use a single forecast model as a "benchmark". In this walkthrough, we compare seasonal naive forecast to our more "sophisticated" autoregressive LightGBM model.

In [ ]:

Copied!





figure = plot_fva(
    y_true=y_test, y_pred=y_pred, y_pred_bench=y_pred_bench, height=900, width=900
)
figure.show(renderer="svg")
figure = plot_fva(
    y_true=y_test, y_pred=y_pred, y_pred_bench=y_pred_bench, height=900, width=900
)
figure.show(renderer="svg")

Comet Plot¶

Plot of coefficient of variance (CV) against forecast accuracy (SMAPE by default). CV is the ratio of a pattern’s standard deviation to its mean. It expresses the variability of a pattern over time. A flat line would have CV = 0. A highly erratic pattern may have CV of 100% or more. It can be an intuitive heuristic to determine the forecastability across many time-series!

In [ ]:

Copied!





figure = plot_comet(
    y_train=y_train, y_test=y_test, y_pred=y_pred, height=900, width=900
)
figure.show(renderer="svg")
figure = plot_comet(
    y_train=y_train, y_test=y_test, y_pred=y_pred, height=900, width=900
)
figure.show(renderer="svg")

Feature Engineering¶

Coming soon.

FFORMA (Feature-based Model Selection)¶

Coming soon.