ROC曲線的橫軸是FPRate,縱軸是TPRate分類器給出預測的概率之後,我們需要設定已給閾值來把各個預測值劃分為預測為正/負。即,小於等於這個閾值的所有樣本預測為負,大於這個閾值的樣本預測為正。
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn.metrics import roc_curve, auc
# Calculate 'True Positive Rate' and 'False Positive Rate' of each thresholdscores = np.array([ 0.95, 0.9, 1, 0.9, 0.9, 0.8, 0.8, 0.7, 0.7, 0.7, 0.7, 0.6, 0.6, 0.6, 0.6, 0.5, 0.5, 0.5, 0.5, 0.4, 0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.0, 0.2, 0.1, 0.11])true_values = np.array([0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1])fpr, tpr, thresholds = roc_curve(true_values, scores, pos_label=1)d = {'Threshold': thresholds, 'True Positive Rate': tpr, 'False Positive Rate': fpr}metric_table = pd.DataFrame(d, columns = ['Threshold', 'True Positive Rate', 'False Positive Rate'])metric_tableroc_auc = auc(fpr, tpr)plt.figure()plt.plot(fpr, tpr, color='red', lw=2, marker='o', label='ROC curve (area = %0.2f)' % roc_auc)x = [0, 1]y = [0, 1]plt.plot(x, y, color='black', lw=2, linestyle='--')plt.xlim([0.0, 1.0])plt.ylim([0.0, 1.05])plt.xlabel('False Positive Rate')plt.ylabel('True Positive Rate')plt.title('ROC Curve')plt.legend()plt.grid(alpha=0.2)plt.show()AUC就是ROC曲線下的面積,在這個例子中就算出來就是0.63.