from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
# we will use only two features but all the classes
= load_iris()
iris = iris.target
y = iris.data[:, 0:2]
X
# look at the data
import matplotlib.pyplot as plt
import numpy as np
= plt.subplots(figsize=(5,5))
_, ax = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
scatter set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1])
ax.= ax.legend(
_ 0], iris.target_names, loc="lower right", title="Classes"
scatter.legend_elements()[ )
Conformal Prediction
We will use conformalization techniques to characterize the uncertainty so that certain statistical guarantees can be provided for the predictions. Most importantly, we will predict sets (not points). A set is general mathematical object that subsumes both intervals, labels, and categories.
Further, conformal procedures allow us to tune classifiers in a much more principled way. The traditional way of tuning a classifier is to plot the ROC curves and pick an operating point. The levers typically are precision, recall, fpr, tpr, etc.
Tuning a binary classifier is actually straightforward. But how does one pick a point or sets of points when there are more than two classes.
Later we will see, the coverage probability and conformalization procedures turn this multivariate decision problem into a single variable decision problem.
from sklearn.model_selection import train_test_split
= 0.70
train_ratio = 0.15
calibration_ratio = 0.15
test_ratio
# train is now 70% of the entire data set
= train_test_split(X, y, test_size=1 - train_ratio, shuffle=True)
X_train, X_test, y_train, y_test
# test is now 10% of the initial data set
# validation is now 15% of the initial data set
= train_test_split(X_test, y_test, test_size=test_ratio/(test_ratio + calibration_ratio),shuffle=True)
X_cal, X_test, y_cal, y_test
print(X_train.shape, X_cal.shape, X_test.shape)
print(y_train.shape, y_cal.shape, y_test.shape)
(104, 2) (23, 2) (23, 2)
(104,) (23,) (23,)
# Fit a logistic regression model
from sklearn.linear_model import LogisticRegression
= LogisticRegression(random_state=0,multi_class='multinomial').fit(X_train, y_train)
clf = clf.predict(X_train)
yh_train
from sklearn.metrics import classification_report
print('on train set')
print(classification_report(y_train, yh_train, target_names=iris.target_names))
= clf.predict(X_test)
yh_test print('on test set')
print(classification_report(y_test, yh_test, target_names=iris.target_names))
on train set
precision recall f1-score support
setosa 1.00 0.97 0.99 34
versicolor 0.70 0.81 0.75 37
virginica 0.75 0.64 0.69 33
accuracy 0.81 104
macro avg 0.82 0.81 0.81 104
weighted avg 0.81 0.81 0.81 104
on test set
precision recall f1-score support
setosa 1.00 1.00 1.00 6
versicolor 0.75 0.43 0.55 7
virginica 0.69 0.90 0.78 10
accuracy 0.78 23
macro avg 0.81 0.78 0.78 23
weighted avg 0.79 0.78 0.77 23
/opt/miniconda3/envs/ai839/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:1247: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. From then on, it will always use 'multinomial'. Leave it to its default value to avoid this warning.
warnings.warn(
# Implement Expected Calibration Error
# Ref: https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d)
def expected_calibration_error(samples, true_labels, M=5):
# uniform binning approach with M number of bins
= np.linspace(0, 1, M + 1)
bin_boundaries = bin_boundaries[:-1]
bin_lowers = bin_boundaries[1:]
bin_uppers
# get max probability per sample i
= np.max(samples, axis=1)
confidences # get predictions from confidences (positional in this case)
= np.argmax(samples, axis=1)
predicted_label
# get a boolean list of correct/false predictions
= predicted_label==true_labels
accuracies
= np.zeros(1)
ece for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
# determine if sample is in bin m (between bin lower & upper)
= np.logical_and(confidences > bin_lower.item(), confidences <= bin_upper.item())
in_bin # can calculate the empirical probability of a sample falling into bin m: (|Bm|/n)
= in_bin.mean()
prob_in_bin
if prob_in_bin.item() > 0:
# get the accuracy of bin m: acc(Bm)
= accuracies[in_bin].mean()
accuracy_in_bin # get the average confidence of bin m: conf(Bm)
= confidences[in_bin].mean()
avg_confidence_in_bin # calculate |acc(Bm) - conf(Bm)| * (|Bm|/n) for bin m and add to the total ECE
+= np.abs(avg_confidence_in_bin - accuracy_in_bin) * prob_in_bin
ece return ece
# ece on train set
= clf.predict_proba(X_train)
pred_proba_train = expected_calibration_error(pred_proba_train, y_train, M=10)
ece_train print('ece on train set',ece_train)
= clf.predict_proba(X_test)
pred_proba_test = expected_calibration_error(pred_proba_test, y_test, M=5)
ece_test print('ece on test set',ece_test)
ece on train set [0.07482647]
ece on test set [0.11760297]
# Wrap the trained classifier into PUNCC
# Create a wrapper of the random forest model to redefine its predict method
# into logits predictions. Make sure to subclass BasePredictor.
# Note that we needed to build a new wrapper (over BasePredictor) only because
# the predict(.) method of RandomForestClassifier does not predict logits.
# Otherwise, it is enough to use BasePredictor (e.g., neural network with softmax).
from deel.puncc.classification import RAPS
from deel.puncc.api.prediction import BasePredictor
from deel.puncc.metrics import classification_mean_coverage
from deel.puncc.metrics import classification_mean_size
import numpy as np
class LogisticPredictor(BasePredictor):
def predict(self, X, **kwargs):
return self.model.predict_proba(X, **kwargs)
# Wrap model in the newly created RFPredictor
= LogisticPredictor(clf)
clf_predictor
# CP method initialization
= RAPS(clf_predictor, train=True, k_reg=2, lambd=0)
raps_cp
# The call to `fit` trains (with flag True) the model and computes the nonconformity
# scores on the calibration set
=X_train, y_fit=y_train, X_calib=X_cal, y_calib=y_cal)
raps_cp.fit(X_fit
# The predict method infers prediction intervals with respect to
# the significance level alpha = 20%
= raps_cp.predict(X_test, alpha=.1)
y_pred, set_pred
# Compute marginal coverage
= classification_mean_coverage(y_test, set_pred)
coverage = classification_mean_size(set_pred)
size
print(f"Marginal coverage: {np.round(coverage, 2)}")
print(f"Average prediction set size: {np.round(size, 2)}")
Marginal coverage: 0.87
Average prediction set size: 1.7
/opt/miniconda3/envs/ai839/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:1247: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. From then on, it will always use 'multinomial'. Leave it to its default value to avoid this warning.
warnings.warn(
# Let us subset data s.t we will decide to predict if the set size is 1. o.w we will abstain
# for illustration, we will do it on train set.
from sklearn.metrics import accuracy_score
= raps_cp.predict(X_train, alpha=.1)
y_pred, set_pred = np.array([len(x)==1 for x in set_pred])
ind
= X_train[ind,:]
Xsub_train = y_train[ind]
ysub_train = clf.predict(Xsub_train)
yh_sub_train = accuracy_score(ysub_train,yh_sub_train)
acc_sub = accuracy_score(y_train,clf.predict(X_train))
acc print('acc before', acc, 'acc after', acc_sub)
acc before 0.8076923076923077 acc after 0.9459459459459459
# we will vary alpha, compute accuracy on withheld sets
= np.linspace(0.05,0.4,num=10)
alphas = []
acc_sub = []
rej_ratio
def get_acc(X,y, alpha):
= []
acc_sub = []
rej_ratio = raps_cp.predict(X, alpha=alpha)
y_pred, set_pred = np.array([len(x)==1 for x in set_pred])
ind = X[ind,:]
X_sub = y[ind]
y_sub = clf.predict(X_sub)
yh_sub = accuracy_score(y_sub,yh_sub)
acc = 1-sum(ind)/len(ind)
rej return acc, rej
for alpha in alphas:
= get_acc(X_train,y_train, alpha)
acc, rej
acc_sub.append(acc)
rej_ratio.append(rej)
='Acc')
plt.plot(alphas, acc_sub,label='Abstention Rate')
plt.plot(alphas, rej_ratio, label
plt.legend()'alpha') plt.xlabel(
Text(0.5, 0, 'alpha')
plt.plot(rej_ratio, acc_sub)'abstention rate')
plt.xlabel('accuracy') plt.ylabel(
Text(0, 0.5, 'accuracy')