パイプランによるワークフローの効率化

Coding

code: Python

import pandas as pd

X = df.loc:, 2:.values

y = df.loc:, 1.values

# yのカテゴリ変数「M」「B」を数値に変換する

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

y = le.fit_transform(y)

print(le.classes_)

le.transform('B', 'M')

-------------------------------------------------------------------------------

array(0, 1, dtype=int64)

-------------------------------------------------------------------------------

code: Python

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1)

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression

from sklearn.pipeline import make_pipeline

# 連結する処理としてスケーリング、主成分分析、ロジスティック回帰を指定

pipe_lr = make_pipeline(StandardScaler(), PCA(n_components=2), LogisticRegression(random_state=1))

pipe_lr.fit(X_train, y_train)

y_pred = pipe_lr.predict(X_test)

print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))

-------------------------------------------------------------------------------

Test Accuracy: 0.956

-------------------------------------------------------------------------------