print(__doc__)
# 作者: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr># Jan Hendrik Metzen <jhm@informatik.uni-bremen.de># 許可證: BSD Style.
import matplotlib.pyplot as plt
from sklearn import datasetsfrom sklearn.naive_bayes import GaussianNBfrom sklearn.svm import LinearSVCfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import (brier_score_loss, precision_score, recall_score, f1_score)from sklearn.calibration import CalibratedClassifierCV, calibration_curvefrom sklearn.model_selection import train_test_split
# 創建分類任務的數據集,其中有很多冗餘的特徵而有很少# 具有信息性的特徵X, y = datasets.make_classification(n_samples=100000, n_features=20, n_informative=2, n_redundant=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.99, random_state=42)
def plot_calibration_curve(est, name, fig_index): """Plot calibration curve for est w/o and with calibration. """ # 等滲校準 isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')
# sigmoid校準 sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')
# 做為基準的沒有校準的邏輯回歸 lr = LogisticRegression(C=1.)
fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0))
ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # 使用決策函數 prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred))
fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10)
ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score))
ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2)
ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)')
ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2)
plt.tight_layout()
# 繪製高斯樸素貝葉斯的校準曲線plot_calibration_curve(GaussianNB(), "Naive Bayes", 1)
# 繪製線性支持向量分類器(Linear SVC)的校準曲線plot_calibration_curve(LinearSVC(max_iter=10000), "SVC", 2)
plt.show()