Data Science
의사결정나무(DecisionTree) 파이썬 예제 코드
태지쌤
2023. 12. 3. 13:08
반응형
import os
import pandas as pd
import numpy as np
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text, export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
penguins = sns.load_dataset('penguins')
print (penguins.shape)
penguins.head()
penguins['bill_length_mm'].fillna(value = penguins['bill_length_mm'].mean(), inplace=True)
penguins ['bill_depth_mm'].fillna(value = penguins['bill_depth_mm'].mean(), inplace=True)
penguins ['flipper_length_mm'].fillna(value = penguins['flipper_length_mm'].mean(), inplace=True)
penguins ['body_mass_g'].fillna(value = penguins['body_mass_g'].mean(), inplace=True)
penguins ['sex'] = penguins['sex'].apply(lambda x: 1 if x == 'MALE' else 0)
penguins ['Biscoe'] = penguins['island' ].apply(lambda x: 1 if x == 'Biscoe' else 0)
penguins ['Dream'] = penguins['island'].apply(lambda x: 1 if x == 'Dream' else 0)
colnames = ['bill_length_mm', 'bill_depth_mm','flipper_length_mm', 'body_mass_g', 'sex',
'Biscoe', 'Dream']
X = penguins [colnames]
y = penguins.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=123)
pen_tree = DecisionTreeClassifier(criterion = 'gini', max_depth = 3,random_state =1).fit(X_train, y_train)
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (10,5), dpi = 300)
plotResult = sklearn.tree.plot_tree(pen_tree,
feature_names = colnames,
filled = True)
print(pen_tree.score(X_test,y_test))
pred_y = pen_tree.predict(X_test)
print(confusion_matrix(y_test, pred_y))
반응형