Today, give a try to Techtonique web app, a tool designed to help you make informed, data-driven decisions using Mathematics, Statistics, Machine Learning, and Data Visualization. Here is a tutorial with audio, video, code, and slides: https://moudiki2.gumroad.com/l/nrhgb
Disclaimer: I have no affiliation with the TabPFN packages. I just found the package’s idea interesting and thought it would be a good idea to try it out for forecasting.
More details on nnetsauce forecasting can be found at https://www.researchgate.net/publication/382589729_Probabilistic_Forecasting_with_nnetsauce_using_Density_Estimation_Bayesian_inference_Conformal_prediction_and_Vine_copulas.
PS: Go past the first example ;)
0 - Install packages¶
!pip install nnetsauce --upgrade --no-cache-dir
!pip install tabpfn
import nnetsauce as ns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tabpfn import TabPFNRegressor
from sklearn.linear_model import RidgeCV
from time import time
1 - Univariate¶
1 - 1 - a10¶
url = "https://raw.githubusercontent.com/Techtonique/datasets/main/time_series/univariate/a10.csv"
df = pd.read_csv(url)
df.index = pd.DatetimeIndex(df.date)
df.drop(columns=['date'], inplace=True)
df_a10 = df.copy()
url = "https://raw.githubusercontent.com/Techtonique/datasets/main/time_series/univariate/AirPassengers.csv"
df = pd.read_csv(url)
df.index = pd.DatetimeIndex(df.date)
df.drop(columns=['date'], inplace=True)
df_air = df.copy()
df_a10.plot()
<Axes: xlabel='date'>
df_air.plot()
<Axes: xlabel='date'>
regr_ridge = ns.MTS(obj=RidgeCV(alphas=[10**i for i in range(-10, 10)]),
type_pi="scp2-kde",
replications=250,
kernel='gaussian',
lags=25)
start = time()
regr_ridge.fit(df_air)
regr_ridge.predict(h=40);
print("time: ", time() - start)
100%|██████████| 1/1 [00:00<00:00, 84.15it/s]
time: 1.274052381515503
regr_tabpfn = ns.MTS(obj=TabPFNRegressor(),
type_pi="scp2-kde",
replications=250,
kernel='gaussian',
lags=25)
start = time()
regr_tabpfn.fit(df_air)
regr_tabpfn.predict(h=40)
print("time: ", time() - start)
100%|██████████| 1/1 [00:15<00:00, 15.72s/it]
time: 287.80411529541016
import seaborn as sns
sns.set_theme(style="darkgrid")
regr_ridge.plot(type_plot="pi")
regr_tabpfn.plot(type_plot="pi")
1 - 2 - USAccDeaths¶
import pandas as pd
url = "https://raw.githubusercontent.com/Techtonique/datasets/main/time_series/univariate/USAccDeaths.csv"
df2 = pd.read_csv(url)
df2.index = pd.DatetimeIndex(df2.date)
df2.drop(columns=['date'], inplace=True)
df2.plot()
df_usacc = df2.copy()
regr2 = ns.MTS(obj=RidgeCV(alphas=[10**i for i in range(-10, 10)]),
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
lags=15)
start = time()
regr2.fit(df_usacc)
regr2.predict(h=25);
print("time: ", time() - start)
100%|██████████| 1/1 [00:00<00:00, 112.52it/s]
time: 1.2207071781158447
regr2.plot(type_plot="pi")
regr2 = ns.MTS(obj=TabPFNRegressor(),
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
lags=15)
start = time()
regr2.fit(df_usacc)
regr2.predict(h=25);
print("time: ", time() - start)
100%|██████████| 1/1 [00:07<00:00, 7.13s/it]
time: 129.99803948402405
regr2.plot(type_plot="pi")
1 - 3 - austa¶
url = "https://raw.githubusercontent.com/Techtonique/datasets/main/time_series/univariate/austa.csv"
df = pd.read_csv(url)
df.index = pd.DatetimeIndex(df.date)
df.drop(columns=['date'], inplace=True)
df.plot()
df_austa = df.copy()
regr4 = ns.MTS(obj=RidgeCV(alphas=[10**i for i in range(-10, 10)]),
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
lags=15)
start = time()
regr4.fit(df_austa)
regr4.predict(h=10)
print("time: ", time() - start)
regr4.plot(type_plot="pi")
100%|██████████| 1/1 [00:00<00:00, 147.06it/s]
time: 1.136568546295166
regr4 = ns.MTS(obj=TabPFNRegressor(),
replications=250,
kernel='gaussian',
lags=15)
start = time()
regr4.fit(df_austa)
regr4.predict(h=10)
print("time: ", time() - start)
regr4.plot(type_plot="pi")
100%|██████████| 1/1 [00:04<00:00, 4.38s/it]
time: 29.955562114715576
1 - 4 - Nile¶
url = "https://raw.githubusercontent.com/Techtonique/datasets/main/time_series/univariate/nile.csv"
df = pd.read_csv(url)
df.index = pd.DatetimeIndex(df.date)
df.drop(columns=['date'], inplace=True)
df.plot()
df_nile = df.copy()
regr4 = ns.MTS(obj=RidgeCV(alphas=[10**i for i in range(-10, 10)]),
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
lags=15)
start = time()
regr4.fit(df_nile)
regr4.predict(h=10)
print("time: ", time() - start)
regr4.plot(type_plot="pi")
100%|██████████| 1/1 [00:00<00:00, 89.15it/s]
time: 1.7347002029418945
regr4_ = ns.MTS(obj=TabPFNRegressor(),
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
lags=15)
start = time()
regr4_.fit(df_nile)
regr4_.predict(h=10)
print("time: ", time() - start)
regr4_.plot(type_plot="pi")
100%|██████████| 1/1 [00:04<00:00, 4.50s/it]
time: 33.400973081588745
2 - Multivariate¶
url = "https://raw.githubusercontent.com/Techtonique/datasets/main/time_series/multivariate/ice_cream_vs_heater.csv"
df_temp = pd.read_csv(url)
df_temp.index = pd.DatetimeIndex(df_temp.date)
df = df_temp.drop(columns=['date']).diff().dropna()
df.plot()
df_heat = df.copy()
obj_MTS = ns.MTS(obj = RidgeCV(alphas=[10**i for i in range(-10, 10)]),
lags = 15,
n_hidden_features=5,
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
verbose = 1)
start = time()
obj_MTS.fit(df_heat)
res = obj_MTS.predict(h=30);
print("time: ", time() - start)
obj_MTS.plot("heater", type_plot="spaghetti")
obj_MTS.plot("icecream", type_plot="spaghetti")
Adjusting RidgeCV to multivariate time series...
100%|██████████| 2/2 [00:00<00:00, 104.81it/s]
Simulate residuals using gaussian kernel...
Best parameters for gaussian kernel: {'bandwidth': 0.75717214883374}
100%|██████████| 250/250 [00:00<00:00, 2964.61it/s] 100%|██████████| 250/250 [00:00<00:00, 7131.91it/s]
time: 1.3256967067718506
obj_MTS_ = ns.MTS(obj = TabPFNRegressor(),
lags = 15,
n_hidden_features=5,
replications=250,
type_pi="scp2-kde",
kernel='gaussian',
verbose = 1)
start = time()
obj_MTS_.fit(df_heat)
res = obj_MTS_.predict(h=30);
print("time: ", time() - start)
obj_MTS_.plot("heater", type_plot="spaghetti")
obj_MTS_.plot("icecream", type_plot="spaghetti")
Adjusting TabPFNRegressor to multivariate time series...
100%|██████████| 2/2 [00:55<00:00, 27.81s/it]
Simulate residuals using gaussian kernel... Best parameters for gaussian kernel: {'bandwidth': 0.6290102436234454}
100%|██████████| 250/250 [00:00<00:00, 1481.61it/s] 100%|██████████| 250/250 [00:00<00:00, 6120.07it/s]
time: 912.1651566028595
Comments powered by Talkyard.