Adaptive (online/streaming) learning with uncertainty quantification using Polyak averaging in learningmachine

Today, give a try to Techtonique web app, a tool designed to help you make informed, data-driven decisions using Mathematics, Statistics, Machine Learning, and Data Visualization. Here is a tutorial with audio, video, code, and slides: https://moudiki2.gumroad.com/l/nrhgb. 100 API requests are now (and forever) offered to every user every month, no matter the pricing tier.

The model presented here is a frequentist – conformalized – version of the Bayesian one presented in #152. It is implemented in learningmachine, both in Python and R, and is updated as new observations arrive, using Polyak averaging. Model explanations are given as sensitivity analyses.

1 - R version

%load_ext rpy2.ipython

%%R

utils::install.packages("bayesianrvfl", repos = c("https://techtonique.r-universe.dev", "https://cloud.r-project.org"))
utils::install.packages("learningmachine", repos = c("https://techtonique.r-universe.dev", "https://cloud.r-project.org"))

%%R

library(learningmachine)

X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg

set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.6*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 19 10
dim(X_test)

[1] 13 10

%%R

obj <- learningmachine::Regressor$new(method = "bayesianrvfl",
                                      nb_hidden = 5L)
obj$get_type()

[1] "regression"

%%R

obj_GCV <- bayesianrvfl::fit_rvfl(x = X_train, y = y_train)
(best_lambda <- obj_GCV$lambda[which.min(obj_GCV$GCV)])

[1] 12.9155

%%R

t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = best_lambda)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")

Elapsed:  0.01 s

%%R

previous_coefs <- drop(obj$model$coef)

newx <- X_test[1, ]
newy <- y_test[1]

new_X_test <- X_test[-1, ]
new_y_test <- y_test[-1]

t0 <- proc.time()[3]
obj$update(newx, newy, method = "polyak", alpha = 0.6)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")

print(summary(previous_coefs))

print(summary(drop(obj$model$coef) - previous_coefs))


plot(drop(obj$model$coef) - previous_coefs, type='l')
abline(h = 0, lty=2, col="red")

Elapsed:  0.003 s 
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.96778 -0.51401 -0.16335 -0.05234  0.31900  0.98482 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-0.065436 -0.002152  0.027994  0.015974  0.040033  0.058892 

xxx

%%R

print(obj$summary(new_X_test, y=new_y_test, show_progress=FALSE))

$R_squared
[1] 0.6692541

$R_squared_adj
[1] -2.638205

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.5014 -2.2111 -0.5532 -0.3928  1.3495  3.9206 

$Coverage_rate
[1] 100

$citests
         estimate        lower        upper      p-value signif
cyl   -41.4815528  -43.6039915  -39.3591140 1.306085e-13    ***
disp   -0.5937584   -0.7014857   -0.4860311 1.040246e-07    ***
hp     -1.0226867   -1.2175471   -0.8278263 1.719172e-07    ***
drat   84.5859637   73.2987057   95.8732217 4.178658e-09    ***
wt   -169.1047879 -189.5595154 -148.6500603 1.469605e-09    ***
qsec   22.3026258   15.1341951   29.4710566 2.772362e-05    ***
vs    113.3209911   88.3101728  138.3318093 7.599984e-07    ***
am    175.1639102  139.5755741  210.7522464 3.304560e-07    ***
gear   44.3270639   36.1456398   52.5084881 1.240722e-07    ***
carb  -59.6511203  -69.8576126  -49.4446280 5.677270e-08    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             12     
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean     sd       p0      p25      p50      p75     p100
 1 cyl            -41.5    3.34   -43.4    -43.4    -43.3    -41.7    -34.5  
 2 disp            -0.594  0.170   -0.916   -0.635   -0.505   -0.505   -0.356
 3 hp              -1.02   0.307   -1.44    -1.40    -0.877   -0.768   -0.768
 4 drat            84.6   17.8     59.5     76.7     89.5     89.5    128.   
 5 wt            -169.    32.2   -204.    -199.    -166.    -138.    -138.   
 6 qsec            22.3   11.3     13.3     13.3     17.4     29.2     40.1  
 7 vs             113.    39.4     59.6     94.4     94.4    117.     191.   
 8 am             175.    56.0    124.     124.     153.     226.     245.   
 9 gear            44.3   12.9     26.3     38.7     47.9     47.9     76.0  
10 carb           -59.7   16.1    -77.3    -74.6    -58.2    -44.4    -44.4  
   hist 
 1 ▇▁▁▁▂
 2 ▂▁▃▇▁
 3 ▅▁▁▂▇
 4 ▂▃▇▁▁
 5 ▇▁▁▁▇
 6 ▇▂▁▁▃
 7 ▁▇▃▁▂
 8 ▇▁▁▂▃
 9 ▂▃▇▁▁
10 ▇▁▁▁▇

%%R

res <- obj$predict(X = new_X_test)

new_y_train <- c(y_train, newy)

plot(c(new_y_train, res$preds), type='l',
    main="",
    ylab="",
    ylim = c(min(c(res$upper, res$lower, y)),
             max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")

xxx

%%R

newx <- X_test[2, ]
newy <- y_test[2]

new_X_test <- X_test[-c(1, 2), ]
new_y_test <- y_test[-c(1, 2)]

t0 <- proc.time()[3]
obj$update(newx, newy, method = "polyak", alpha = 0.9)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")

print(obj$summary(new_X_test, y=new_y_test, show_progress=FALSE))


res <- obj$predict(X = new_X_test)

new_y_train <- c(y_train, y_test[c(1, 2)])

plot(c(new_y_train, res$preds), type='l',
    main="",
    ylab="",
    ylim = c(min(c(res$upper, res$lower, y)),
             max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")

Elapsed:  0.003 s 
$R_squared
[1] 0.6426871

$R_squared_adj
[1] -Inf

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.5686 -2.4084 -1.0397 -0.3897  1.5507  4.0215 

$Coverage_rate
[1] 100

$citests
         estimate        lower        upper      p-value signif
cyl   -42.1261096  -44.5327541  -39.7194651 2.932516e-12    ***
disp   -0.6256505   -0.7347381   -0.5165629 1.613495e-07    ***
hp     -1.0139634   -1.2198651   -0.8080617 6.747693e-07    ***
drat   82.8645391   74.8033348   90.9257434 5.680663e-10    ***
wt   -170.7891742 -193.1932631 -148.3850853 1.053193e-08    ***
qsec   22.2365552   13.9564091   30.5167012 1.350094e-04    ***
vs    119.1784891   94.0163626  144.3406157 9.681321e-07    ***
am    174.2138307  134.1390652  214.2885963 2.127371e-06    ***
gear   42.7943293   36.9622907   48.6263678 1.523695e-08    ***
carb  -59.4034661  -70.5135723  -48.2933599 3.127231e-07    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             11     
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean     sd       p0      p25      p50      p75     p100
 1 cyl            -42.1    3.58   -44.1    -44.1    -44.1    -43.0    -34.9  
 2 disp            -0.626  0.162   -0.933   -0.643   -0.514   -0.514   -0.514
 3 hp              -1.01   0.306   -1.47    -1.24    -0.787   -0.787   -0.787
 4 drat            82.9   12.0     61.2     79.6     91.7     91.7     91.7  
 5 wt            -171.    33.3   -210.    -204.    -142.    -142.    -142.   
 6 qsec            22.2   12.3     13.2     13.2     13.2     30.7     41.2  
 7 vs             119.    37.5     96.0     96.0     96.0    117.     193.   
 8 am             174.    59.7    123.     123.     123.     233.     247.   
 9 gear            42.8    8.68    27.1     40.4     49.2     49.2     49.2  
10 carb           -59.4   16.5    -78.8    -76.0    -45.1    -45.1    -45.1  
   hist 
 1 ▇▁▁▁▂
 2 ▂▁▁▃▇
 3 ▃▁▁▂▇
 4 ▂▁▁▃▇
 5 ▇▁▁▁▇
 6 ▇▂▁▁▃
 7 ▇▃▁▁▂
 8 ▇▁▁▂▃
 9 ▂▁▁▃▇
10 ▇▁▁▁▇

xxx

2 - Python version

!pip install git+https://github.com/Techtonique/learningmachine_python.git --verbose

import pandas as pd
import numpy as np
import warnings
import learningmachine as lm


# Load the mtcars dataset
data = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv")
X = data.drop("mpg", axis=1).values
X = pd.DataFrame(X).iloc[:,1:]
X = X.astype(np.float16)
X.columns = ["cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb"]
y = data["mpg"].values

display(X.describe())
display(X.head())
display(X.dtypes)

	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
count	32.000000	32.000000	32.0000	32.000000	32.000000	32.000000	32.000000	32.000000	32.000000	32.000000
mean	6.187500	230.750000	146.7500	3.595703	3.216797	17.843750	0.437500	0.406250	3.687500	2.812500
std	1.786133	123.937500	68.5625	0.534668	0.978516	1.788086	0.503906	0.499023	0.737793	1.615234
min	4.000000	71.125000	52.0000	2.759766	1.512695	14.500000	0.000000	0.000000	3.000000	1.000000
25%	4.000000	120.828125	96.5000	3.080078	2.580566	16.898438	0.000000	0.000000	3.000000	2.000000
50%	6.000000	196.312500	123.0000	3.694336	3.325195	17.703125	0.000000	0.000000	4.000000	2.000000
75%	8.000000	326.000000	180.0000	3.919922	3.610352	18.906250	1.000000	1.000000	4.000000	4.000000
max	8.000000	472.000000	335.0000	4.929688	5.425781	22.906250	1.000000	1.000000	5.000000	8.000000

	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
0	6.0	160.0	110.0	3.900391	2.619141	16.453125	0.0	1.0	4.0	4.0
1	6.0	160.0	110.0	3.900391	2.875000	17.015625	0.0	1.0	4.0	4.0
2	4.0	108.0	93.0	3.849609	2.320312	18.609375	1.0	1.0	4.0	1.0
3	6.0	258.0	110.0	3.080078	3.214844	19.437500	1.0	0.0	3.0	1.0
4	8.0	360.0	175.0	3.150391	3.439453	17.015625	0.0	0.0	3.0	2.0

	0
cyl	float16
disp	float16
hp	float16
drat	float16
wt	float16
qsec	float16
vs	float16
am	float16
gear	float16
carb	float16

dtype: object

y.dtype

dtype('float64')

	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
0	6.0	160.0000	110.0	3.900391	2.619141	16.453125	0.0	1.0	4.0	4.0
1	6.0	160.0000	110.0	3.900391	2.875000	17.015625	0.0	1.0	4.0	4.0
2	4.0	108.0000	93.0	3.849609	2.320312	18.609375	1.0	1.0	4.0	1.0
3	6.0	258.0000	110.0	3.080078	3.214844	19.437500	1.0	0.0	3.0	1.0
4	8.0	360.0000	175.0	3.150391	3.439453	17.015625	0.0	0.0	3.0	2.0
5	6.0	225.0000	105.0	2.759766	3.460938	20.218750	1.0	0.0	3.0	1.0
6	8.0	360.0000	245.0	3.210938	3.570312	15.843750	0.0	0.0	3.0	4.0
7	4.0	146.7500	62.0	3.689453	3.189453	20.000000	1.0	0.0	4.0	2.0
8	4.0	140.7500	95.0	3.919922	3.150391	22.906250	1.0	0.0	4.0	2.0
9	6.0	167.6250	123.0	3.919922	3.439453	18.296875	1.0	0.0	4.0	4.0
10	6.0	167.6250	123.0	3.919922	3.439453	18.906250	1.0	0.0	4.0	4.0
11	8.0	275.7500	180.0	3.070312	4.070312	17.406250	0.0	0.0	3.0	3.0
12	8.0	275.7500	180.0	3.070312	3.730469	17.593750	0.0	0.0	3.0	3.0
13	8.0	275.7500	180.0	3.070312	3.779297	18.000000	0.0	0.0	3.0	3.0
14	8.0	472.0000	205.0	2.929688	5.250000	17.984375	0.0	0.0	3.0	4.0
15	8.0	460.0000	215.0	3.000000	5.425781	17.812500	0.0	0.0	3.0	4.0
16	8.0	440.0000	230.0	3.230469	5.343750	17.421875	0.0	0.0	3.0	4.0
17	4.0	78.6875	66.0	4.078125	2.199219	19.468750	1.0	1.0	4.0	1.0
18	4.0	75.6875	52.0	4.929688	1.615234	18.515625	1.0	1.0	4.0	2.0
19	4.0	71.1250	65.0	4.218750	1.834961	19.906250	1.0	1.0	4.0	1.0
20	4.0	120.1250	97.0	3.699219	2.464844	20.015625	1.0	0.0	3.0	1.0
21	8.0	318.0000	150.0	2.759766	3.519531	16.875000	0.0	0.0	3.0	2.0
22	8.0	304.0000	150.0	3.150391	3.435547	17.296875	0.0	0.0	3.0	2.0
23	8.0	350.0000	245.0	3.730469	3.839844	15.406250	0.0	0.0	3.0	4.0
24	8.0	400.0000	175.0	3.080078	3.845703	17.046875	0.0	0.0	3.0	2.0
25	4.0	79.0000	66.0	4.078125	1.934570	18.906250	1.0	1.0	4.0	1.0
26	4.0	120.3125	91.0	4.429688	2.140625	16.703125	0.0	1.0	5.0	2.0
27	4.0	95.1250	113.0	3.769531	1.512695	16.906250	1.0	1.0	5.0	2.0
28	8.0	351.0000	264.0	4.218750	3.169922	14.500000	0.0	1.0	5.0	4.0
29	6.0	145.0000	175.0	3.619141	2.769531	15.500000	0.0	1.0	5.0	6.0
30	8.0	301.0000	335.0	3.539062	3.570312	14.601562	0.0	1.0	5.0	8.0
31	4.0	121.0000	109.0	4.109375	2.779297	18.593750	1.0	1.0	4.0	2.0

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Create a Bayesian RVFL regressor object
obj = lm.Regressor(method = "bayesianrvfl", nb_hidden = 5)

# Fit the model using the training data
obj.fit(X_train, y_train, reg_lambda=12.9155)

# Print the summary of the model
print(obj.summary(X_test, y=y_test, show_progress=False))

$R_squared
[1] 0.6416309

$R_squared_adj
[1] 1.537554

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.0724 -2.0122 -0.1018 -0.1941  1.4361  3.9676 

$Coverage_rate
[1] 100

$citests
         estimate       lower        upper      p-value signif
cyl   -24.5943583  -40.407994   -8.7807230 8.909365e-03     **
disp   -0.2419797   -0.370835   -0.1131245 3.711077e-03     **
hp     -1.5734483   -1.722903   -1.4239939 2.255640e-07    ***
drat  142.5646192  124.575179  160.5540599 1.217808e-06    ***
wt   -144.8871352 -158.911143 -130.8631275 2.523441e-07    ***
qsec   46.8290859   27.829411   65.8287611 9.388045e-04    ***
vs     75.0555146   30.645127  119.4659017 6.110043e-03     **
am    207.5935234  133.205572  281.9814744 4.843095e-04    ***
gear   73.6892658   60.186232   87.1922995 1.091470e-05    ***
carb  -71.2974988  -79.480400  -63.1145974 6.944475e-07    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             7      
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean     sd       p0      p25      p50      p75       p100
 1 cyl            -24.6   17.1    -38.5    -38.5    -33.4    -12.4      1.66   
 2 disp            -0.242  0.139   -0.351   -0.351   -0.285   -0.181    0.00546
 3 hp              -1.57   0.162   -1.90    -1.61    -1.48    -1.47    -1.47   
 4 drat           143.    19.5    125.     125.     141.     154.     174.     
 5 wt            -145.    15.2   -167.    -152.    -142.    -142.    -117.     
 6 qsec            46.8   20.5     14.1     35.3     55.7     62.4     62.4    
 7 vs              75.1   48.0     37.2     37.2     58.7     93.9    167.     
 8 am             208.    80.4     64.3    168.     250.     267.     267.     
 9 gear            73.7   14.6     60.6     60.6     72.7     82.1     96.9    
10 carb           -71.3    8.85   -84.4    -75.2    -69.7    -69.7    -55.2    
   hist 
 1 ▇▁▂▁▃
 2 ▇▂▁▂▂
 3 ▂▁▁▃▇
 4 ▇▂▂▂▂
 5 ▂▅▇▁▂
 6 ▃▁▁▂▇
 7 ▇▁▃▁▂
 8 ▂▂▁▂▇
 9 ▇▂▂▂▂
10 ▂▅▇▁▂

# Select the first test sample
newx = X_test.iloc[0,:]
newy = y_test[0]

# Update the model with the new sample
new_X_test = X_test[1:]
new_y_test = y_test[1:]
obj.update(newx, newy, method="polyak", alpha=0.9)

# Print the summary of the model
print(obj.summary(new_X_test, y=new_y_test, show_progress=False))

$R_squared
[1] 0.6051442

$R_squared_adj
[1] 1.394856

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.6214 -2.5055 -1.5003 -0.8308  1.2738  3.2794 

$Coverage_rate
[1] 100

$citests
         estimate        lower       upper      p-value signif
cyl   -30.0502823  -48.4171958  -11.683369 8.442658e-03     **
disp   -0.2958477   -0.4386085   -0.153087 3.121989e-03     **
hp     -1.6053302   -1.6789750   -1.531685 3.424156e-08    ***
drat  153.7968829  131.5239191  176.069847 1.041460e-05    ***
wt   -155.1954135 -174.4144275 -135.976399 4.804729e-06    ***
qsec   49.8967685   26.9993778   72.794159 2.504905e-03     **
vs     87.4170764   32.4599776  142.374175 9.457226e-03     **
am    214.5918910  119.8712855  309.312496 2.108825e-03     **
gear   83.1355825   65.3159018  100.955263 7.110354e-05    ***
carb  -77.1384645  -88.2087477  -66.068181 9.958425e-06    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             6      
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean      sd       p0      p25      p50      p75      p100
 1 cyl            -30.1   17.5     -42.5    -42.5    -39.9    -17.7     -4.35  
 2 disp            -0.296  0.136    -0.377   -0.377   -0.343   -0.308   -0.0269
 3 hp              -1.61   0.0702   -1.70    -1.66    -1.57    -1.57    -1.53  
 4 drat           154.    21.2     137.     137.     144.     169.     185.    
 5 wt            -155.    18.3    -182.    -160.    -154.    -154.    -125.    
 6 qsec            49.9   21.8      18.8     33.3     60.6     66.6     66.6   
 7 vs              87.4   52.4      46.7     46.7     73.7    105.     178.    
 8 am             215.    90.3      65.6    169.     266.     275.     275.    
 9 gear            83.1   17.0      70.0     70.0     75.5     94.1    109.    
10 carb           -77.1   10.5     -92.5    -79.9    -76.6    -76.6    -59.7   
   hist 
 1 ▇▁▁▁▃
 2 ▇▂▁▁▂
 3 ▅▁▁▇▂
 4 ▇▂▁▁▅
 5 ▂▂▇▁▂
 6 ▅▁▁▂▇
 7 ▇▁▅▁▂
 8 ▂▂▁▁▇
 9 ▇▂▁▂▂
10 ▂▂▇▁▂