RandomTreesEmbedding提供了一種將數據映射到非常高維的稀疏表示的方法,這對分類是有提升的作用。該映射是無監督且非常有效的。
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn.naive_bayes import BernoulliNB
# 構建一個綜合數據集
X, y = make_circles(factor=0.5, random_state=0, noise=0.05)
# 使用RandomTreesEmbedding轉換後的數據
hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
X_transformed = hasher.fit_transform(X)
# 可視化使用截斷SVD(truncated SVD)降維後的結果
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
# 在轉換後的數據上學習一個樸素貝葉斯分類器
nb = BernoulliNB()
nb.fit(X_transformed, y)
# 學習ExtraTreesClassifier用於比較
trees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)
trees.fit(X, y)
# 原始數據和精簡數據的散點圖
fig = plt.figure(figsize=(9, 8))
ax = plt.subplot(221)
ax.scatter(X[:, 0], X[:, 1], c=y, s=50, edgecolor='k')
ax.set_title("Original Data (2d)")
ax.set_xticks(())
ax.set_yticks(())
ax = plt.subplot(222)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50, edgecolor='k')
ax.set_title("Truncated SVD reduction (2d) of transformed data (%dd)" %
X_transformed.shape[1])
ax.set_xticks(())
ax.set_yticks(())
# 在原始空間中繪製決策。 為此,我們為
# 網格[x_min,x_max] x [y_min,y_max]中的每個點分配一種顏色。
h = .01
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# 使用RandomTreesEmbedding變換網格
transformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])
y_grid_pred = nb.predict_proba(transformed_grid)[:, 1]
ax = plt.subplot(223)
ax.set_title("Naive Bayes on Transformed data")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c=y, s=50, edgecolor='k')
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
# 使用ExtraTreesClassifier變換網格
y_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
ax = plt.subplot(224)
ax.set_title("ExtraTrees predictions")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c=y, s=50, edgecolor='k')
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
plt.tight_layout()
plt.show()
由Sphinx-Gallery生成的畫廊
☆☆☆為方便大家查閱,小編已將scikit-learn學習路線專欄文章統一整理到公眾號底部菜單欄,同步更新中,關注公眾號,點擊左下方「系列文章」,如圖:歡迎大家和我一起沿著scikit-learn文檔這條路線,一起鞏固機器學習算法基礎。(添加微信:mthler,備註:sklearn學習,一起進【sklearn機器學習進步群】開啟打怪升級的學習之旅。)