Today, give a try to Techtonique web app, a tool designed to help you make informed, data-driven decisions using Mathematics, Statistics, Machine Learning, and Data Visualization. Here is a tutorial with audio, video, code, and slides: https://moudiki2.gumroad.com/l/nrhgb
In this post I benchmark nnetsauce.MTS
’s armada of base models against foundation models (“LLMs”, Amazon’s Chronos, IBM’s TinyTimeMixer) and statistical models. Regarding the LLMs: If I’m not doing it well (I just plugged and played), do not hesitate to reach out.
The armada is now made of Generic Gradient Boosters (see https://www.researchgate.net/publication/386212136_Scalable_Gradient_Boosting_using_Randomized_Neural_Networks).
0 - Install nnetsauce
and mlsauce
!pip install git+https://github.com/Techtonique/mlsauce.git --verbose
!pip install nnetsauce
!pip install git+https://github.com/thierrymoudiki/sktime.git --upgrade --no-cache-dir
import numpy as np
import pandas as pd
import statsmodels.api as sm
import nnetsauce as ns
import mlsauce as ms
from sktime.forecasting.ttm import TinyTimeMixerForecaster
from sktime.forecasting.chronos import ChronosForecaster
from sklearn import linear_model
from statsmodels.tsa.base.datetools import dates_from_str
from sktime.forecasting.nnetsaucemts import NnetsauceMTS
1 - Error metrics
import numpy as np
def rmse(predictions, targets):
return np.sqrt(((predictions.values - targets.values) ** 2).mean())
def mae(predictions, targets):
return np.mean(np.abs(predictions - targets))
1 - 2 - Examples on 3 datasets
filenames = ["a10.csv", "austa.csv", "nile.csv"]
from joblib import Parallel, delayed
from tqdm import tqdm
from sklearn.base import RegressorMixin
from sklearn.utils import all_estimators
from tqdm import tqdm
# Function to process each estimator
def process_estimator(est, df_train, df_test):
try:
if issubclass(est[1], RegressorMixin):
preds = ns.MTS(ms.GenericBoostingRegressor(est[1](), verbose=0), lags=20, verbose=0, show_progress=False).\
fit(df_train).\
predict(h=df_test.shape[0])
return ["MTS(GenBoost(" + est[0] + "))", rmse(df_test, preds), mae(df_test, preds)]
except Exception:
try:
if issubclass(est[1], RegressorMixin):
preds = ns.MTS(ms.GenericBoostingRegressor(est[1](), verbose=0), lags=5, verbose=0, show_progress=False).\
fit(df_train).\
predict(h=df_test.shape[0])
return ["MTS(GenBoost(" + est[0] + "))", rmse(df_test, preds), mae(df_test, preds)]
except Exception:
pass
for filename in filenames:
print("filename: ", filename)
url = "https://raw.githubusercontent.com/Techtonique/"
url += "datasets/main/time_series/univariate/"
url += filename
data = pd.read_csv(url)
data.index = pd.DatetimeIndex(data.date) # must have
data.drop(columns=['date'], inplace=True)
data.plot()
n = data.shape[0]
max_idx_train = np.floor(n * 0.9)
training_index = np.arange(0, max_idx_train)
testing_index = np.arange(max_idx_train, n)
df_train = data.iloc[training_index, :]
print(df_train.tail())
df_test = data.iloc[testing_index, :]
print(df_test.head())
results1 = []
results2 = []
results = []
# Initialise models
chronos = ChronosForecaster("amazon/chronos-t5-tiny")
ttm = TinyTimeMixerForecaster()
regr = linear_model.RidgeCV()
# Fit
h = df_test.shape[0] + 1
chronos.fit(y=df_train, fh=range(1, h))
ttm.fit(y=df_train, fh=range(1, h))
# Predict
pred_chronos = chronos.predict(fh=[i for i in range(1, h)])
pred_ttm = ttm.predict(fh=[i for i in range(1, h)])
# LLMs and sktime
results1.append(["Chronos", rmse(df_test, pred_chronos), mae(df_test, pred_chronos)])
results1.append(["TinyTimeMixer", rmse(df_test, pred_ttm), mae(df_test, pred_ttm)])
# statistical models
for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
try:
regr = ns.ClassicalMTS(model=name)
regr.fit(df_train)
X_pred = regr.predict(h=df_test.shape[0])
results1.append([name, rmse(df_test, X_pred.mean), mae(df_test, X_pred.mean)])
except Exception:
pass
# Parallel processing
results2 = Parallel(n_jobs=-1)(delayed(process_estimator)(est, df_train, df_test) for est in tqdm(all_estimators()))
for elt in results1:
if elt is not None:
results.append(elt)
for elt in results2:
if elt is not None:
results.append(elt)
results_df = pd.DataFrame(results, columns=["model", "rmse", "mae"])
display(results_df.sort_values(by="rmse"))
filename: a10.csv
value
date
2006-05-01 17.78
2006-06-01 16.29
2006-07-01 16.98
2006-08-01 18.61
2006-09-01 16.62
value
date
2006-10-01 21.43
2006-11-01 23.58
2006-12-01 23.33
2007-01-01 28.04
2007-02-01 16.76
model | rmse | mae | |
---|---|---|---|
11 | MTS(GenBoost(ElasticNet)) | 2.74 | 2.32 |
32 | MTS(GenBoost(OrthogonalMatchingPursuitCV)) | 2.86 | 2.58 |
24 | MTS(GenBoost(LassoLars)) | 3.27 | 2.69 |
22 | MTS(GenBoost(Lasso)) | 3.27 | 2.69 |
7 | MTS(GenBoost(BaggingRegressor)) | 3.33 | 2.90 |
5 | MTS(GenBoost(ARDRegression)) | 3.35 | 2.95 |
12 | MTS(GenBoost(ElasticNetCV)) | 3.38 | 3.03 |
23 | MTS(GenBoost(LassoCV)) | 3.38 | 3.04 |
36 | MTS(GenBoost(RANSACRegressor)) | 3.40 | 2.87 |
9 | MTS(GenBoost(DecisionTreeRegressor)) | 3.41 | 2.88 |
25 | MTS(GenBoost(LassoLarsCV)) | 3.41 | 3.07 |
13 | MTS(GenBoost(ExtraTreeRegressor)) | 3.41 | 2.96 |
44 | MTS(GenBoost(TweedieRegressor)) | 3.45 | 3.04 |
37 | MTS(GenBoost(RandomForestRegressor)) | 3.45 | 3.02 |
26 | MTS(GenBoost(LassoLarsIC)) | 3.49 | 3.11 |
8 | MTS(GenBoost(BayesianRidge)) | 3.52 | 3.15 |
42 | MTS(GenBoost(TheilSenRegressor)) | 3.55 | 3.17 |
14 | MTS(GenBoost(ExtraTreesRegressor)) | 3.57 | 3.12 |
31 | MTS(GenBoost(OrthogonalMatchingPursuit)) | 3.72 | 3.27 |
39 | MTS(GenBoost(RidgeCV)) | 4.00 | 3.54 |
6 | MTS(GenBoost(AdaBoostRegressor)) | 4.04 | 3.58 |
33 | MTS(GenBoost(PLSRegression)) | 4.06 | 3.62 |
20 | MTS(GenBoost(KernelRidge)) | 4.08 | 3.61 |
43 | MTS(GenBoost(TransformedTargetRegressor)) | 4.09 | 3.62 |
27 | MTS(GenBoost(LinearRegression)) | 4.09 | 3.62 |
38 | MTS(GenBoost(Ridge)) | 4.10 | 3.64 |
16 | MTS(GenBoost(GradientBoostingRegressor)) | 4.48 | 4.04 |
4 | Theta | 4.57 | 4.24 |
29 | MTS(GenBoost(MLPRegressor)) | 4.70 | 4.29 |
18 | MTS(GenBoost(HuberRegressor)) | 4.77 | 4.35 |
28 | MTS(GenBoost(LinearSVR)) | 4.83 | 4.42 |
35 | MTS(GenBoost(QuantileRegressor)) | 5.02 | 4.22 |
0 | Chronos | 5.10 | 4.81 |
34 | MTS(GenBoost(PassiveAggressiveRegressor)) | 5.19 | 4.66 |
17 | MTS(GenBoost(HistGradientBoostingRegressor)) | 5.40 | 4.65 |
21 | MTS(GenBoost(LarsCV)) | 5.92 | 5.33 |
3 | ETS | 6.19 | 5.37 |
30 | MTS(GenBoost(NuSVR)) | 6.43 | 5.69 |
41 | MTS(GenBoost(SVR)) | 6.53 | 5.78 |
1 | TinyTimeMixer | 6.66 | 5.88 |
19 | MTS(GenBoost(KNeighborsRegressor)) | 9.12 | 6.74 |
15 | MTS(GenBoost(GaussianProcessRegressor)) | 12.71 | 12.29 |
10 | MTS(GenBoost(DummyRegressor)) | 12.72 | 12.30 |
2 | ARIMA | 13.37 | 12.98 |
40 | MTS(GenBoost(SGDRegressor)) | inf | 88755370094251662260627878082695870432152870817... |
filename: austa.csv
value
date
2002-01-01 4.46
2003-01-01 4.38
2004-01-01 4.80
2005-01-01 5.05
2006-01-01 5.10
value
date
2007-01-01 5.20
2008-01-01 5.17
2009-01-01 5.17
2010-01-01 5.44
model | rmse | mae | |
---|---|---|---|
8 | MTS(GenBoost(BayesianRidge)) | 0.09 | 0.08 |
45 | MTS(GenBoost(TweedieRegressor)) | 0.10 | 0.09 |
41 | MTS(GenBoost(SGDRegressor)) | 0.11 | 0.09 |
4 | Theta | 0.12 | 0.09 |
33 | MTS(GenBoost(OrthogonalMatchingPursuitCV)) | 0.14 | 0.12 |
40 | MTS(GenBoost(RidgeCV)) | 0.14 | 0.12 |
35 | MTS(GenBoost(PassiveAggressiveRegressor)) | 0.15 | 0.12 |
5 | MTS(GenBoost(ARDRegression)) | 0.15 | 0.14 |
24 | MTS(GenBoost(LassoCV)) | 0.16 | 0.15 |
34 | MTS(GenBoost(PLSRegression)) | 0.16 | 0.15 |
20 | MTS(GenBoost(KernelRidge)) | 0.17 | 0.15 |
0 | Chronos | 0.17 | 0.15 |
12 | MTS(GenBoost(ElasticNetCV)) | 0.17 | 0.16 |
36 | MTS(GenBoost(QuantileRegressor)) | 0.17 | 0.16 |
3 | ETS | 0.19 | 0.15 |
39 | MTS(GenBoost(Ridge)) | 0.19 | 0.17 |
26 | MTS(GenBoost(LassoLarsCV)) | 0.19 | 0.18 |
27 | MTS(GenBoost(LassoLarsIC)) | 0.21 | 0.18 |
42 | MTS(GenBoost(SVR)) | 0.21 | 0.18 |
25 | MTS(GenBoost(LassoLars)) | 0.22 | 0.20 |
23 | MTS(GenBoost(Lasso)) | 0.22 | 0.20 |
9 | MTS(GenBoost(DecisionTreeRegressor)) | 0.23 | 0.19 |
31 | MTS(GenBoost(NuSVR)) | 0.23 | 0.21 |
13 | MTS(GenBoost(ExtraTreeRegressor)) | 0.23 | 0.20 |
14 | MTS(GenBoost(ExtraTreesRegressor)) | 0.23 | 0.20 |
37 | MTS(GenBoost(RANSACRegressor)) | 0.24 | 0.21 |
7 | MTS(GenBoost(BaggingRegressor)) | 0.24 | 0.21 |
43 | MTS(GenBoost(TheilSenRegressor)) | 0.24 | 0.20 |
29 | MTS(GenBoost(LinearSVR)) | 0.25 | 0.22 |
6 | MTS(GenBoost(AdaBoostRegressor)) | 0.25 | 0.22 |
1 | TinyTimeMixer | 0.25 | 0.19 |
16 | MTS(GenBoost(GradientBoostingRegressor)) | 0.26 | 0.23 |
38 | MTS(GenBoost(RandomForestRegressor)) | 0.26 | 0.23 |
30 | MTS(GenBoost(MLPRegressor)) | 0.27 | 0.23 |
28 | MTS(GenBoost(LinearRegression)) | 0.29 | 0.22 |
44 | MTS(GenBoost(TransformedTargetRegressor)) | 0.29 | 0.22 |
19 | MTS(GenBoost(KNeighborsRegressor)) | 0.30 | 0.27 |
18 | MTS(GenBoost(HuberRegressor)) | 0.39 | 0.29 |
11 | MTS(GenBoost(ElasticNet)) | 0.40 | 0.33 |
32 | MTS(GenBoost(OrthogonalMatchingPursuit)) | 0.90 | 0.81 |
15 | MTS(GenBoost(GaussianProcessRegressor)) | 1.21 | 1.20 |
22 | MTS(GenBoost(LarsCV)) | 1.84 | 1.84 |
10 | MTS(GenBoost(DummyRegressor)) | 1.95 | 1.95 |
17 | MTS(GenBoost(HistGradientBoostingRegressor)) | 1.95 | 1.95 |
2 | ARIMA | 2.40 | 2.40 |
21 | MTS(GenBoost(Lars)) | inf | 69642504841544336879259080339820128177381130744... |
filename: nile.csv
value
date
1960-01-01 815.00
1961-01-01 1020.00
1962-01-01 906.00
1963-01-01 901.00
1964-01-01 1170.00
value
date
1965-01-01 912.00
1966-01-01 746.00
1967-01-01 919.00
1968-01-01 718.00
1969-01-01 714.00
model | rmse | mae | |
---|---|---|---|
36 | MTS(GenBoost(QuantileRegressor)) | 104.93 | 101.50 |
2 | ARIMA | 111.78 | 105.46 |
31 | MTS(GenBoost(NuSVR)) | 114.60 | 107.00 |
27 | MTS(GenBoost(LassoLarsIC)) | 115.42 | 107.39 |
42 | MTS(GenBoost(SVR)) | 117.68 | 108.61 |
17 | MTS(GenBoost(HistGradientBoostingRegressor)) | 117.75 | 108.59 |
10 | MTS(GenBoost(DummyRegressor)) | 117.75 | 108.59 |
15 | MTS(GenBoost(GaussianProcessRegressor)) | 117.75 | 108.59 |
26 | MTS(GenBoost(LassoLarsCV)) | 130.02 | 114.40 |
0 | Chronos | 151.16 | 136.27 |
3 | ETS | 170.52 | 145.80 |
16 | MTS(GenBoost(GradientBoostingRegressor)) | 177.24 | 154.55 |
22 | MTS(GenBoost(LarsCV)) | 178.74 | 152.11 |
6 | MTS(GenBoost(AdaBoostRegressor)) | 179.29 | 156.22 |
9 | MTS(GenBoost(DecisionTreeRegressor)) | 184.44 | 164.52 |
4 | Theta | 195.86 | 173.90 |
38 | MTS(GenBoost(RandomForestRegressor)) | 207.64 | 177.91 |
7 | MTS(GenBoost(BaggingRegressor)) | 209.69 | 182.79 |
12 | MTS(GenBoost(ElasticNetCV)) | 216.37 | 193.03 |
24 | MTS(GenBoost(LassoCV)) | 225.24 | 201.83 |
37 | MTS(GenBoost(RANSACRegressor)) | 296.22 | 260.08 |
8 | MTS(GenBoost(BayesianRidge)) | 310.59 | 287.58 |
14 | MTS(GenBoost(ExtraTreesRegressor)) | 318.41 | 304.40 |
13 | MTS(GenBoost(ExtraTreeRegressor)) | 349.23 | 339.16 |
1 | TinyTimeMixer | 396.16 | 388.58 |
5 | MTS(GenBoost(ARDRegression)) | 407.08 | 377.50 |
19 | MTS(GenBoost(KNeighborsRegressor)) | 409.69 | 398.57 |
33 | MTS(GenBoost(OrthogonalMatchingPursuitCV)) | 560.00 | 518.03 |
30 | MTS(GenBoost(MLPRegressor)) | 622.93 | 573.30 |
32 | MTS(GenBoost(OrthogonalMatchingPursuit)) | 658.25 | 591.60 |
29 | MTS(GenBoost(LinearSVR)) | 744.48 | 693.50 |
45 | MTS(GenBoost(TweedieRegressor)) | 1170.09 | 1020.41 |
20 | MTS(GenBoost(KernelRidge)) | 1313.31 | 1147.54 |
11 | MTS(GenBoost(ElasticNet)) | 1339.16 | 1152.93 |
41 | MTS(GenBoost(SGDRegressor)) | 1358.22 | 1166.58 |
35 | MTS(GenBoost(PassiveAggressiveRegressor)) | 1554.27 | 1319.48 |
40 | MTS(GenBoost(RidgeCV)) | 1708.53 | 1443.61 |
23 | MTS(GenBoost(Lasso)) | 1815.57 | 1534.60 |
25 | MTS(GenBoost(LassoLars)) | 1910.40 | 1612.20 |
43 | MTS(GenBoost(TheilSenRegressor)) | 2050.37 | 1726.27 |
34 | MTS(GenBoost(PLSRegression)) | 2119.73 | 1770.69 |
44 | MTS(GenBoost(TransformedTargetRegressor)) | 2178.49 | 1819.45 |
28 | MTS(GenBoost(LinearRegression)) | 2178.49 | 1819.45 |
18 | MTS(GenBoost(HuberRegressor)) | 2267.51 | 1882.66 |
21 | MTS(GenBoost(Lars)) | 2696.45 | 1941.89 |
39 | MTS(GenBoost(Ridge)) | 2769.55 | 2245.39 |
Comments powered by Talkyard.