Version 0.18.1 of nnetsauce (Python version) is available on PyPI and for conda. New developments include Bayesian inference and conformal prediction. Bayesian inference is available for scikit-learn models that possess a posterior distribution (BayesianRidge, ARDRegressor, and GaussianProcessRegressor). Conformal prediction is available for every regression model that follows the “fit_predict” API. Conformal prediction for classification will be available in future versions.

Note: In examples, QRNN = Quasi-Randomized Nnetworks

# 1 - Installation

!pip uninstall nnetsauce --yes

!pip install nnetsauce --upgrade --no-cache-dir

!pip install matplotlib==3.1.3

import os
import nnetsauce as ns
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV
from sklearn.ensemble import ExtraTreesRegressor
from time import time


# 2 - Useful plotting functions


warnings.filterwarnings('ignore')

split_color = 'green'
split_color2 = 'orange'
local_color = 'gray'

def plot_func(x,
y,
y_u=None,
y_l=None,
pred=None,
method_name="",
title=""):

fig = plt.figure()

plt.plot(x, y, 'k.', alpha=.3, markersize=10,
fillstyle='full', label=u'Test set observations')

if (y_u is not None) and (y_l is not None):
plt.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_u, y_l[::-1]]),
label = method_name + ' Prediction interval')

if pred is not None:
plt.plot(x, pred, 'k--', lw=2, alpha=0.9,
label=u'Predicted value')

#plt.ylim([-2.5, 7])
plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.legend(loc='upper right')
plt.title(title)

plt.show()


# 3 - Examples of use

## 3 - 1 Conformalized Quasi-Randomized Nnetworks

data = fetch_california_housing()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)


# 3 - 1 conformalized QRNN

## RidgeCV

regr1 = ns.CustomRegressor(RidgeCV()) # 5 hidden nodes, ReLU activation function
regr1.fit(X_train, y_train)
start = time()
preds1 = regr1.predict(X_test, method="splitconformal", return_pi=True, level=95)
print(f"Elapsed: {time() - start}s")
print(f"coverage_rate conformalized QRNN RidgeCV: {np.mean((preds1[1]<=y_test)*(preds1[2]>=y_test))}")

max_idx = 50
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds1[2][0:max_idx],
y_l = preds1[1][0:max_idx],
pred = preds1[0][0:max_idx],
title = f"conformalized QRNN RidgeCV ({max_idx} first points in test set)")

coverage_rate conformalized QRNN RidgeCV: 0.9578488372093024


## Extra Trees

regr4 = ns.CustomRegressor(ExtraTreesRegressor()) # 5 hidden nodes, ReLU activation function
regr4.fit(X_train, y_train)
start = time()
preds4 = regr4.predict(X_test, method="splitconformal", return_pi=True, level=90)
print(f"Elapsed: {time() - start}s")
print(f"preds4: {preds4}")
print(f"coverage_rate conformalized QRNN ExtraTreesRegressor: {np.mean((preds4[1]<=y_test)*(preds4[2]>=y_test))}")

plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds4[2][0:max_idx],
y_l = preds4[1][0:max_idx],
pred = preds4[0][0:max_idx],
title = f"conformalized QRNN ExtraTreesRegressor ({max_idx} first points in test set)")

preds4: (array([2.1156401, 1.11028  , 1.40237  , ..., 0.91221  , 1.94403  ,
3.1501305]), array([1.2909301, 0.28557  , 0.57766  , ..., 0.0875   , 1.11932  ,
2.3254205]), array([2.9403501, 1.93499  , 2.22708  , ..., 1.73692  , 2.76874  ,
3.9748405]))
coverage_rate conformalized QRNN ExtraTreesRegressor: 0.9011627906976745


## Bayesian Ridge

data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)

print(X.shape)
regr = ns.CustomRegressor(BayesianRidge())
start = time()
regr.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds = regr.predict(X_test, return_std=True)
print(f"coverage_rate Bayesian Ridge: {np.mean((preds[2]<=y_test)*(preds[3]>=y_test))}")


(442, 10)
coverage_rate Bayesian Ridge: 0.9775280898876404

plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds[3][0:max_idx],
y_l = preds[2][0:max_idx],
pred = preds[0][0:max_idx],
title = f"Bayesian Ridge QRNN ({max_idx} first points in test set)")


## ARD Regression

regr2 = ns.CustomRegressor(ARDRegression())
start = time()
regr2.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds2 = regr2.predict(X_test, return_std=True)
print(f"coverage_rate ARD Regressor: {np.mean((preds2[2]<=y_test)*(preds2[3]>=y_test))}")


coverage_rate ARD Regressor: 0.9775280898876404

plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds2[3][0:max_idx],
y_l = preds2[2][0:max_idx],
pred = preds2[0][0:max_idx],