ランダムフォレストの特徴量の重要度
Coding
code: Python
import numpy as np
import pandas as pd
# wineデータセットを読み込む
df_wine.columns = 'Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline' # クラスラベルを表示
df_wine.head()
---------------------------------------------------------------------------------------
Class label Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline
0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 1065
1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 1050
2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 1185
3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 1480
4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 735
---------------------------------------------------------------------------------------
code: Python
from sklearn.model_selection import train_test_split
# 特徴量とクラスラベルを別々に抽出
X, y = df_wine.iloc:, 1:.values, df_wine.iloc:, 0.values # トレーニングデータとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
# Wineデータセットの特徴量の名称
feat_labels = df_wine.columns1: # ランダムフォレストオブジェクトの生成(決定木の個数=500)
forest = RandomForestClassifier(n_estimators=500, random_state=1)
# モデルを適合
forest.fit(X_train, y_train)
# 特徴量の重要度を抽出
importances = forest.feature_importances_
# 重要度の降順で特徴量のインデックスを抽出
indices = np.argsort(importances)::-1 # 重要度の降順で特徴量の名称、重要度を表示
for f in range(X_train.shape1): print('%2d) %-*s %f' % (f + 1, 30, feat_labels[indicesf], importances[indicesf])) plt.title('Feature Importance')
plt.bar(range(X_train.shape1), importancesindices, align='center') plt.xticks(range(X_train.shape1), feat_labelsindices, rotation=90) plt.xlim([-1, X_train.shape1]) plt.tight_layout()
plt.show()
---------------------------------------------------------------------------------------
1) Proline 0.185453
2) Flavanoids 0.174751
3) Color intensity 0.143920
4) OD280/OD315 of diluted wines 0.136162
5) Alcohol 0.118529
6) Hue 0.058739
7) Total phenols 0.050872
8) Magnesium 0.031357
9) Malic acid 0.025648
10) Proanthocyanins 0.025570
11) Alcalinity of ash 0.022366
12) Nonflavanoid phenols 0.013354
13) Ash 0.013279
---------------------------------------------------------------------------------------
https://gyazo.com/930c290317d3c9c01450ac92ef89b368