10 Sep 2024 | RSS | Back to list of posts | Toggle dark mode | Hire me on Malt | Hire me on Fiverr | Hire me on Upwork
Today, give a try to Techtonique web app , a tool designed to help you make informed, data-driven decisions using Mathematics, Statistics, Machine Learning, and Data Visualization
The model presented here is a frequentist – conformalized – version of the Bayesian one presented in #152 . It is implemented in learningmachine
, both in Python and R , and is updated as new observations arrive, using Polyak averaging . Model explanations are given as sensitivity analyses.
1 - R version
%% R
utils :: install.packages ( "bayesianrvfl" , repos = c ( "https://techtonique.r-universe.dev" , "https://cloud.r-project.org" ))
utils :: install.packages ( "learningmachine" , repos = c ( "https://techtonique.r-universe.dev" , "https://cloud.r-project.org" ))
%% R
library ( learningmachine )
X <- as.matrix ( mtcars [, -1 ])
y <- mtcars $ mpg
set.seed ( 123 )
( index_train <- base :: sample.int ( n = nrow ( X ),
size = floor ( 0.6 * nrow ( X )),
replace = FALSE ))
## [1] 31 15 19 14 3 10 18 22 11 5 20 29 23 30 9 28 8 27 7
X_train <- X [ index_train , ]
y_train <- y [ index_train ]
X_test <- X [ - index_train , ]
y_test <- y [ - index_train ]
dim ( X_train )
## [1] 19 10
dim ( X_test )
%% R
obj <- learningmachine :: Regressor $ new ( method = "bayesianrvfl" ,
nb_hidden = 5L )
obj $ get_type ()
%% R
obj_GCV <- bayesianrvfl :: fit_rvfl ( x = X_train , y = y_train )
( best_lambda <- obj_GCV $ lambda [ which.min ( obj_GCV $ GCV )])
%% R
t0 <- proc.time ()[ 3 ]
obj $ fit ( X_train , y_train , reg_lambda = best_lambda )
cat ( "Elapsed: " , proc.time ()[ 3 ] - t0 , "s \n" )
%% R
previous_coefs <- drop ( obj $ model $ coef )
newx <- X_test [ 1 , ]
newy <- y_test [ 1 ]
new_X_test <- X_test [ -1 , ]
new_y_test <- y_test [ -1 ]
t0 <- proc.time ()[ 3 ]
obj $ update ( newx , newy , method = "polyak" , alpha = 0.6 )
cat ( "Elapsed: " , proc.time ()[ 3 ] - t0 , "s \n" )
print ( summary ( previous_coefs ))
print ( summary ( drop ( obj $ model $ coef ) - previous_coefs ))
plot ( drop ( obj $ model $ coef ) - previous_coefs , type = 'l' )
abline ( h = 0 , lty = 2 , col = "red" )
Elapsed: 0.003 s
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.96778 -0.51401 -0.16335 -0.05234 0.31900 0.98482
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.065436 -0.002152 0.027994 0.015974 0.040033 0.058892
%% R
print ( obj $ summary ( new_X_test , y = new_y_test , show_progress = FALSE ))
$R_squared
[1] 0.6692541
$R_squared_adj
[1] -2.638205
$Residuals
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4.5014 -2.2111 -0.5532 -0.3928 1.3495 3.9206
$Coverage_rate
[1] 100
$citests
estimate lower upper p-value signif
cyl -41.4815528 -43.6039915 -39.3591140 1.306085e-13 ***
disp -0.5937584 -0.7014857 -0.4860311 1.040246e-07 ***
hp -1.0226867 -1.2175471 -0.8278263 1.719172e-07 ***
drat 84.5859637 73.2987057 95.8732217 4.178658e-09 ***
wt -169.1047879 -189.5595154 -148.6500603 1.469605e-09 ***
qsec 22.3026258 15.1341951 29.4710566 2.772362e-05 ***
vs 113.3209911 88.3101728 138.3318093 7.599984e-07 ***
am 175.1639102 139.5755741 210.7522464 3.304560e-07 ***
gear 44.3270639 36.1456398 52.5084881 1.240722e-07 ***
carb -59.6511203 -69.8576126 -49.4446280 5.677270e-08 ***
$effects
── Data Summary ────────────────────────
Values
Name effects
Number of rows 12
Number of columns 10
_______________________
Column type frequency:
numeric 10
________________________
Group variables None
── Variable type: numeric ──────────────────────────────────────────────────────
skim_variable mean sd p0 p25 p50 p75 p100
1 cyl -41.5 3.34 -43.4 -43.4 -43.3 -41.7 -34.5
2 disp -0.594 0.170 -0.916 -0.635 -0.505 -0.505 -0.356
3 hp -1.02 0.307 -1.44 -1.40 -0.877 -0.768 -0.768
4 drat 84.6 17.8 59.5 76.7 89.5 89.5 128.
5 wt -169. 32.2 -204. -199. -166. -138. -138.
6 qsec 22.3 11.3 13.3 13.3 17.4 29.2 40.1
7 vs 113. 39.4 59.6 94.4 94.4 117. 191.
8 am 175. 56.0 124. 124. 153. 226. 245.
9 gear 44.3 12.9 26.3 38.7 47.9 47.9 76.0
10 carb -59.7 16.1 -77.3 -74.6 -58.2 -44.4 -44.4
hist
1 ▇▁▁▁▂
2 ▂▁▃▇▁
3 ▅▁▁▂▇
4 ▂▃▇▁▁
5 ▇▁▁▁▇
6 ▇▂▁▁▃
7 ▁▇▃▁▂
8 ▇▁▁▂▃
9 ▂▃▇▁▁
10 ▇▁▁▁▇
%% R
res <- obj $ predict ( X = new_X_test )
new_y_train <- c ( y_train , newy )
plot ( c ( new_y_train , res $ preds ), type = 'l' ,
main = "" ,
ylab = "" ,
ylim = c ( min ( c ( res $ upper , res $ lower , y )),
max ( c ( res $ upper , res $ lower , y ))))
lines ( c ( new_y_train , res $ upper ), col = "gray60" )
lines ( c ( new_y_train , res $ lower ), col = "gray60" )
lines ( c ( new_y_train , res $ preds ), col = "red" )
lines ( c ( new_y_train , new_y_test ), col = "blue" )
abline ( v = length ( y_train ), lty = 2 , col = "black" )
%% R
newx <- X_test [ 2 , ]
newy <- y_test [ 2 ]
new_X_test <- X_test [ - c ( 1 , 2 ), ]
new_y_test <- y_test [ - c ( 1 , 2 )]
t0 <- proc.time ()[ 3 ]
obj $ update ( newx , newy , method = "polyak" , alpha = 0.9 )
cat ( "Elapsed: " , proc.time ()[ 3 ] - t0 , "s \n" )
print ( obj $ summary ( new_X_test , y = new_y_test , show_progress = FALSE ))
res <- obj $ predict ( X = new_X_test )
new_y_train <- c ( y_train , y_test [ c ( 1 , 2 )])
plot ( c ( new_y_train , res $ preds ), type = 'l' ,
main = "" ,
ylab = "" ,
ylim = c ( min ( c ( res $ upper , res $ lower , y )),
max ( c ( res $ upper , res $ lower , y ))))
lines ( c ( new_y_train , res $ upper ), col = "gray60" )
lines ( c ( new_y_train , res $ lower ), col = "gray60" )
lines ( c ( new_y_train , res $ preds ), col = "red" )
lines ( c ( new_y_train , new_y_test ), col = "blue" )
abline ( v = length ( y_train ), lty = 2 , col = "black" )
Elapsed: 0.003 s
$R_squared
[1] 0.6426871
$R_squared_adj
[1] -Inf
$Residuals
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4.5686 -2.4084 -1.0397 -0.3897 1.5507 4.0215
$Coverage_rate
[1] 100
$citests
estimate lower upper p-value signif
cyl -42.1261096 -44.5327541 -39.7194651 2.932516e-12 ***
disp -0.6256505 -0.7347381 -0.5165629 1.613495e-07 ***
hp -1.0139634 -1.2198651 -0.8080617 6.747693e-07 ***
drat 82.8645391 74.8033348 90.9257434 5.680663e-10 ***
wt -170.7891742 -193.1932631 -148.3850853 1.053193e-08 ***
qsec 22.2365552 13.9564091 30.5167012 1.350094e-04 ***
vs 119.1784891 94.0163626 144.3406157 9.681321e-07 ***
am 174.2138307 134.1390652 214.2885963 2.127371e-06 ***
gear 42.7943293 36.9622907 48.6263678 1.523695e-08 ***
carb -59.4034661 -70.5135723 -48.2933599 3.127231e-07 ***
$effects
── Data Summary ────────────────────────
Values
Name effects
Number of rows 11
Number of columns 10
_______________________
Column type frequency:
numeric 10
________________________
Group variables None
── Variable type: numeric ──────────────────────────────────────────────────────
skim_variable mean sd p0 p25 p50 p75 p100
1 cyl -42.1 3.58 -44.1 -44.1 -44.1 -43.0 -34.9
2 disp -0.626 0.162 -0.933 -0.643 -0.514 -0.514 -0.514
3 hp -1.01 0.306 -1.47 -1.24 -0.787 -0.787 -0.787
4 drat 82.9 12.0 61.2 79.6 91.7 91.7 91.7
5 wt -171. 33.3 -210. -204. -142. -142. -142.
6 qsec 22.2 12.3 13.2 13.2 13.2 30.7 41.2
7 vs 119. 37.5 96.0 96.0 96.0 117. 193.
8 am 174. 59.7 123. 123. 123. 233. 247.
9 gear 42.8 8.68 27.1 40.4 49.2 49.2 49.2
10 carb -59.4 16.5 -78.8 -76.0 -45.1 -45.1 -45.1
hist
1 ▇▁▁▁▂
2 ▂▁▁▃▇
3 ▃▁▁▂▇
4 ▂▁▁▃▇
5 ▇▁▁▁▇
6 ▇▂▁▁▃
7 ▇▃▁▁▂
8 ▇▁▁▂▃
9 ▂▁▁▃▇
10 ▇▁▁▁▇
2 - Python version
! pip install git + https : // github . com / Techtonique / learningmachine_python . git -- verbose
import pandas as pd
import numpy as np
import warnings
import learningmachine as lm
# Load the mtcars dataset
data = pd . read_csv ( "https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv" )
X = data . drop ( "mpg" , axis = 1 ). values
X = pd . DataFrame ( X ). iloc [:, 1 :]
X = X . astype ( np . float16 )
X . columns = [ "cyl" , "disp" , "hp" , "drat" , "wt" , "qsec" , "vs" , "am" , "gear" , "carb" ]
y = data [ "mpg" ]. values
display ( X . describe ())
display ( X . head ())
display ( X . dtypes )
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
count
32.000000
32.000000
32.0000
32.000000
32.000000
32.000000
32.000000
32.000000
32.000000
32.000000
mean
6.187500
230.750000
146.7500
3.595703
3.216797
17.843750
0.437500
0.406250
3.687500
2.812500
std
1.786133
123.937500
68.5625
0.534668
0.978516
1.788086
0.503906
0.499023
0.737793
1.615234
min
4.000000
71.125000
52.0000
2.759766
1.512695
14.500000
0.000000
0.000000
3.000000
1.000000
25%
4.000000
120.828125
96.5000
3.080078
2.580566
16.898438
0.000000
0.000000
3.000000
2.000000
50%
6.000000
196.312500
123.0000
3.694336
3.325195
17.703125
0.000000
0.000000
4.000000
2.000000
75%
8.000000
326.000000
180.0000
3.919922
3.610352
18.906250
1.000000
1.000000
4.000000
4.000000
max
8.000000
472.000000
335.0000
4.929688
5.425781
22.906250
1.000000
1.000000
5.000000
8.000000
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
0
6.0
160.0
110.0
3.900391
2.619141
16.453125
0.0
1.0
4.0
4.0
1
6.0
160.0
110.0
3.900391
2.875000
17.015625
0.0
1.0
4.0
4.0
2
4.0
108.0
93.0
3.849609
2.320312
18.609375
1.0
1.0
4.0
1.0
3
6.0
258.0
110.0
3.080078
3.214844
19.437500
1.0
0.0
3.0
1.0
4
8.0
360.0
175.0
3.150391
3.439453
17.015625
0.0
0.0
3.0
2.0
0
cyl
float16
disp
float16
hp
float16
drat
float16
wt
float16
qsec
float16
vs
float16
am
float16
gear
float16
carb
float16
dtype: object
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
0
6.0
160.0000
110.0
3.900391
2.619141
16.453125
0.0
1.0
4.0
4.0
1
6.0
160.0000
110.0
3.900391
2.875000
17.015625
0.0
1.0
4.0
4.0
2
4.0
108.0000
93.0
3.849609
2.320312
18.609375
1.0
1.0
4.0
1.0
3
6.0
258.0000
110.0
3.080078
3.214844
19.437500
1.0
0.0
3.0
1.0
4
8.0
360.0000
175.0
3.150391
3.439453
17.015625
0.0
0.0
3.0
2.0
5
6.0
225.0000
105.0
2.759766
3.460938
20.218750
1.0
0.0
3.0
1.0
6
8.0
360.0000
245.0
3.210938
3.570312
15.843750
0.0
0.0
3.0
4.0
7
4.0
146.7500
62.0
3.689453
3.189453
20.000000
1.0
0.0
4.0
2.0
8
4.0
140.7500
95.0
3.919922
3.150391
22.906250
1.0
0.0
4.0
2.0
9
6.0
167.6250
123.0
3.919922
3.439453
18.296875
1.0
0.0
4.0
4.0
10
6.0
167.6250
123.0
3.919922
3.439453
18.906250
1.0
0.0
4.0
4.0
11
8.0
275.7500
180.0
3.070312
4.070312
17.406250
0.0
0.0
3.0
3.0
12
8.0
275.7500
180.0
3.070312
3.730469
17.593750
0.0
0.0
3.0
3.0
13
8.0
275.7500
180.0
3.070312
3.779297
18.000000
0.0
0.0
3.0
3.0
14
8.0
472.0000
205.0
2.929688
5.250000
17.984375
0.0
0.0
3.0
4.0
15
8.0
460.0000
215.0
3.000000
5.425781
17.812500
0.0
0.0
3.0
4.0
16
8.0
440.0000
230.0
3.230469
5.343750
17.421875
0.0
0.0
3.0
4.0
17
4.0
78.6875
66.0
4.078125
2.199219
19.468750
1.0
1.0
4.0
1.0
18
4.0
75.6875
52.0
4.929688
1.615234
18.515625
1.0
1.0
4.0
2.0
19
4.0
71.1250
65.0
4.218750
1.834961
19.906250
1.0
1.0
4.0
1.0
20
4.0
120.1250
97.0
3.699219
2.464844
20.015625
1.0
0.0
3.0
1.0
21
8.0
318.0000
150.0
2.759766
3.519531
16.875000
0.0
0.0
3.0
2.0
22
8.0
304.0000
150.0
3.150391
3.435547
17.296875
0.0
0.0
3.0
2.0
23
8.0
350.0000
245.0
3.730469
3.839844
15.406250
0.0
0.0
3.0
4.0
24
8.0
400.0000
175.0
3.080078
3.845703
17.046875
0.0
0.0
3.0
2.0
25
4.0
79.0000
66.0
4.078125
1.934570
18.906250
1.0
1.0
4.0
1.0
26
4.0
120.3125
91.0
4.429688
2.140625
16.703125
0.0
1.0
5.0
2.0
27
4.0
95.1250
113.0
3.769531
1.512695
16.906250
1.0
1.0
5.0
2.0
28
8.0
351.0000
264.0
4.218750
3.169922
14.500000
0.0
1.0
5.0
4.0
29
6.0
145.0000
175.0
3.619141
2.769531
15.500000
0.0
1.0
5.0
6.0
30
8.0
301.0000
335.0
3.539062
3.570312
14.601562
0.0
1.0
5.0
8.0
31
4.0
121.0000
109.0
4.109375
2.779297
18.593750
1.0
1.0
4.0
2.0
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.2 , random_state = 123 )
# Create a Bayesian RVFL regressor object
obj = lm . Regressor ( method = "bayesianrvfl" , nb_hidden = 5 )
# Fit the model using the training data
obj . fit ( X_train , y_train , reg_lambda = 12.9155 )
# Print the summary of the model
print ( obj . summary ( X_test , y = y_test , show_progress = False ))
$R_squared
[1] 0.6416309
$R_squared_adj
[1] 1.537554
$Residuals
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4.0724 -2.0122 -0.1018 -0.1941 1.4361 3.9676
$Coverage_rate
[1] 100
$citests
estimate lower upper p-value signif
cyl -24.5943583 -40.407994 -8.7807230 8.909365e-03 **
disp -0.2419797 -0.370835 -0.1131245 3.711077e-03 **
hp -1.5734483 -1.722903 -1.4239939 2.255640e-07 ***
drat 142.5646192 124.575179 160.5540599 1.217808e-06 ***
wt -144.8871352 -158.911143 -130.8631275 2.523441e-07 ***
qsec 46.8290859 27.829411 65.8287611 9.388045e-04 ***
vs 75.0555146 30.645127 119.4659017 6.110043e-03 **
am 207.5935234 133.205572 281.9814744 4.843095e-04 ***
gear 73.6892658 60.186232 87.1922995 1.091470e-05 ***
carb -71.2974988 -79.480400 -63.1145974 6.944475e-07 ***
$effects
── Data Summary ────────────────────────
Values
Name effects
Number of rows 7
Number of columns 10
_______________________
Column type frequency:
numeric 10
________________________
Group variables None
── Variable type: numeric ──────────────────────────────────────────────────────
skim_variable mean sd p0 p25 p50 p75 p100
1 cyl -24.6 17.1 -38.5 -38.5 -33.4 -12.4 1.66
2 disp -0.242 0.139 -0.351 -0.351 -0.285 -0.181 0.00546
3 hp -1.57 0.162 -1.90 -1.61 -1.48 -1.47 -1.47
4 drat 143. 19.5 125. 125. 141. 154. 174.
5 wt -145. 15.2 -167. -152. -142. -142. -117.
6 qsec 46.8 20.5 14.1 35.3 55.7 62.4 62.4
7 vs 75.1 48.0 37.2 37.2 58.7 93.9 167.
8 am 208. 80.4 64.3 168. 250. 267. 267.
9 gear 73.7 14.6 60.6 60.6 72.7 82.1 96.9
10 carb -71.3 8.85 -84.4 -75.2 -69.7 -69.7 -55.2
hist
1 ▇▁▂▁▃
2 ▇▂▁▂▂
3 ▂▁▁▃▇
4 ▇▂▂▂▂
5 ▂▅▇▁▂
6 ▃▁▁▂▇
7 ▇▁▃▁▂
8 ▂▂▁▂▇
9 ▇▂▂▂▂
10 ▂▅▇▁▂
# Select the first test sample
newx = X_test . iloc [ 0 ,:]
newy = y_test [ 0 ]
# Update the model with the new sample
new_X_test = X_test [ 1 :]
new_y_test = y_test [ 1 :]
obj . update ( newx , newy , method = "polyak" , alpha = 0.9 )
# Print the summary of the model
print ( obj . summary ( new_X_test , y = new_y_test , show_progress = False ))
$R_squared
[1] 0.6051442
$R_squared_adj
[1] 1.394856
$Residuals
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4.6214 -2.5055 -1.5003 -0.8308 1.2738 3.2794
$Coverage_rate
[1] 100
$citests
estimate lower upper p-value signif
cyl -30.0502823 -48.4171958 -11.683369 8.442658e-03 **
disp -0.2958477 -0.4386085 -0.153087 3.121989e-03 **
hp -1.6053302 -1.6789750 -1.531685 3.424156e-08 ***
drat 153.7968829 131.5239191 176.069847 1.041460e-05 ***
wt -155.1954135 -174.4144275 -135.976399 4.804729e-06 ***
qsec 49.8967685 26.9993778 72.794159 2.504905e-03 **
vs 87.4170764 32.4599776 142.374175 9.457226e-03 **
am 214.5918910 119.8712855 309.312496 2.108825e-03 **
gear 83.1355825 65.3159018 100.955263 7.110354e-05 ***
carb -77.1384645 -88.2087477 -66.068181 9.958425e-06 ***
$effects
── Data Summary ────────────────────────
Values
Name effects
Number of rows 6
Number of columns 10
_______________________
Column type frequency:
numeric 10
________________________
Group variables None
── Variable type: numeric ──────────────────────────────────────────────────────
skim_variable mean sd p0 p25 p50 p75 p100
1 cyl -30.1 17.5 -42.5 -42.5 -39.9 -17.7 -4.35
2 disp -0.296 0.136 -0.377 -0.377 -0.343 -0.308 -0.0269
3 hp -1.61 0.0702 -1.70 -1.66 -1.57 -1.57 -1.53
4 drat 154. 21.2 137. 137. 144. 169. 185.
5 wt -155. 18.3 -182. -160. -154. -154. -125.
6 qsec 49.9 21.8 18.8 33.3 60.6 66.6 66.6
7 vs 87.4 52.4 46.7 46.7 73.7 105. 178.
8 am 215. 90.3 65.6 169. 266. 275. 275.
9 gear 83.1 17.0 70.0 70.0 75.5 94.1 109.
10 carb -77.1 10.5 -92.5 -79.9 -76.6 -76.6 -59.7
hist
1 ▇▁▁▁▃
2 ▇▂▁▁▂
3 ▅▁▁▇▂
4 ▇▂▁▁▅
5 ▂▂▇▁▂
6 ▅▁▁▂▇
7 ▇▁▅▁▂
8 ▂▂▁▁▇
9 ▇▂▁▂▂
10 ▂▂▇▁▂
Comments powered by Talkyard.