Today, give a try to Techtonique web app, a tool designed to help you make informed, data-driven decisions using Mathematics, Statistics, Machine Learning, and Data Visualization
This post is about Ridge2Classifier
, a classifier that I presented 5 years ago in this document. It’s now possible to choose starting values of the (likelihood) optimization algorithm which are solutions from least squares regression. Not always better,
but can be seen as a new hyperparameter. Also, Ridge2Classifier
used to fail miserably on digits data sets but now, with nnetsauce
’s maturity, Ridge2Classifier
is doing much better on this type of data, as demonstrated below.
0 - Install and load packages
!pip install nnetsauce
!pip install GPopt
import GPopt as gp
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn import metrics
from time import time
1 - Cross-validation and hyperparameter tuning
def ridge2_cv(X_train, y_train,
lambda1 = 0.1,
lambda2 = 0.1,
n_hidden_features=5,
n_clusters=5,
dropout = 0.8,
solver="L-BFGS-B"): # 'solver' is the optimization algorithm
estimator = ns.Ridge2Classifier(lambda1 = lambda1,
lambda2 = lambda2,
n_hidden_features=n_hidden_features,
n_clusters=n_clusters,
dropout = dropout,
solver=solver)
return -cross_val_score(estimator, X_train, y_train,
scoring='accuracy',
cv=5, n_jobs=None,
verbose=0).mean()
def optimize_ridge2(X_train, y_train, solver="L-BFGS-B"):
# objective function for hyperparams tuning
def crossval_objective(x):
return ridge2_cv(X_train=X_train,
y_train=y_train,
lambda1 = 10**x[0],
lambda2 = 10**x[1],
n_hidden_features=int(x[2]),
n_clusters=int(x[3]),
dropout = x[4],
solver = solver)
gp_opt = gp.GPOpt(objective_func=crossval_objective,
lower_bound = np.array([ -10, -10, 3, 2, 0.6]),
upper_bound = np.array([ 10, 10, 100, 5, 1]),
params_names=["lambda1", "lambda2", "n_hidden_features", "n_clusters", "dropout"],
n_init=10, n_iter=90, seed=3137)
return gp_opt.optimize(verbose=2, abs_tol=1e-3)
dataset = load_digits()
X = dataset.data
y = dataset.target
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=3137)
# hyperparams tuning
res_opt1 = optimize_ridge2(X_train, y_train, solver="L-BFGS-B")
print(res_opt1)
# hyperparams tuning with different starting values for the optimization algorithm
res_opt2 = optimize_ridge2(X_train, y_train, solver="L-BFGS-B-lstsq")
print(res_opt2)
res_opt1.best_params["lambda1"] = 10**(res_opt1.best_params["lambda1"])
res_opt1.best_params["lambda2"] = 10**(res_opt1.best_params["lambda2"])
res_opt1.best_params["n_hidden_features"] = int(res_opt1.best_params["n_hidden_features"])
res_opt1.best_params["n_clusters"] = int(res_opt1.best_params["n_clusters"])
print(res_opt1.best_params)
res_opt2.best_params["lambda1"] = 10**(res_opt2.best_params["lambda1"])
res_opt2.best_params["lambda2"] = 10**(res_opt2.best_params["lambda2"])
res_opt2.best_params["n_hidden_features"] = int(res_opt2.best_params["n_hidden_features"])
res_opt2.best_params["n_clusters"] = int(res_opt2.best_params["n_clusters"])
print(res_opt2.best_params)
{'lambda1': 5.243297406977503e-10, 'lambda2': 1.2433817601870388e-05, 'n_hidden_features': 14, 'n_clusters': 2, 'dropout': 0.94100341796875}
{'lambda1': 1.747558169384434e-08, 'lambda2': 1360.0188315151736, 'n_hidden_features': 14, 'n_clusters': 2, 'dropout': 0.7794189453125}
2 - Out-of-sample scores
from time import time
clf1 = ns.Ridge2Classifier(**res_opt1.best_params,
solver="L-BFGS-B")
start = time()
clf1.fit(X_train, y_train)
print(f"Elapsed: {time()-start}")
print(clf1.score(X_test, y_test))
clf2 = ns.Ridge2Classifier(**res_opt2.best_params,
solver="L-BFGS-B-lstsq")
start = time()
clf2.fit(X_train, y_train)
print(f"Elapsed: {time()-start}")
print(clf2.score(X_test, y_test))
Elapsed: 2.6086528301239014
0.9138888888888889
Elapsed: 1.2307183742523193
0.9416666666666667
# confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
y_pred = clf2.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(10, 8))
ax = sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=np.arange(0, 10), yticklabels=np.arange(0, 10))
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
plt.show()
Comments powered by Talkyard.