본문 바로가기

파이썬

13. 모델평가 (모델성능비교)

회귀 모델 평가

# 📌 Wine 데이터 - 일반 Train/Test Split 후 다양한 회귀 모델 성능 평가

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1️⃣ 데이터 불러오기
wine_data = pd.read_csv("datasets/winequality-white.csv", sep=";")

# 2️⃣ X (특성)과 y (타겟) 분리
X_wine = wine_data.drop(columns=["quality"])
y_wine = wine_data["quality"]

# 3️⃣ 데이터 정규화
scaler = StandardScaler()
X_wine_scaled = scaler.fit_transform(X_wine)

# 4️⃣ Train/Test Split (80:20 비율)
X_train_w, X_test_w, y_train_w, y_test_w = train_test_split(X_wine_scaled, y_wine, test_size=0.2, random_state=42)

# 5️⃣ 다양한 회귀 모델 학습 및 평가

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train_w, y_train_w)
y_pred_lin = lin_reg.predict(X_test_w)

print("Linear Regression:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_lin))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_lin))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_lin, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_lin))
print("\n")

# Random Forest Regressor
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train_w, y_train_w)
y_pred_rf = rf_reg.predict(X_test_w)

print("Random Forest Regressor:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_rf))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_rf))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_rf, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_rf))
print("\n")

# XGBoost Regressor
xgb_reg = XGBRegressor(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42)
xgb_reg.fit(X_train_w, y_train_w)
y_pred_xgb = xgb_reg.predict(X_test_w)

print("XGBoost Regressor:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_xgb))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_xgb))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_xgb, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_xgb))
print("\n")

# LightGBM Regressor
lgbm_reg = LGBMRegressor(n_estimators=50, learning_rate=0.1, max_depth=-1, random_state=42)
lgbm_reg.fit(X_train_w, y_train_w)
y_pred_lgbm = lgbm_reg.predict(X_test_w)

print("LightGBM Regressor:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_lgbm))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_lgbm))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_lgbm, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_lgbm))

 

분류 모델 평가

# 📌 Heart 데이터 - 일반 Train/Test Split 후 다양한 분류 모델 성능 평가

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# 1️⃣ 데이터 불러오기
heart_data = pd.read_csv("datasets/heart.csv")

# 2️⃣ X (특성)과 y (타겟) 분리
X_heart = heart_data.drop(columns=["output"])
y_heart = heart_data["output"]

# 3️⃣ 데이터 정규화
scaler = StandardScaler()
X_heart_scaled = scaler.fit_transform(X_heart)

# 4️⃣ Train/Test Split (80:20 비율)
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_heart_scaled, y_heart, test_size=0.2, random_state=42)

# 5️⃣ 다양한 분류 모델 학습 및 평가

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_h, y_train_h)
y_pred_log = log_reg.predict(X_test_h)

print("Logistic Regression:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_log))
print("Precision:", precision_score(y_test_h, y_pred_log))
print("Recall:", recall_score(y_test_h, y_pred_log))
print("F1 Score:", f1_score(y_test_h, y_pred_log))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_log))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_log))
print("\n")

# Decision Tree
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train_h, y_train_h)
y_pred_dt = dt.predict(X_test_h)

print("Decision Tree:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_dt))
print("Precision:", precision_score(y_test_h, y_pred_dt))
print("Recall:", recall_score(y_test_h, y_pred_dt))
print("F1 Score:", f1_score(y_test_h, y_pred_dt))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_dt))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_dt))
print("\n")

# SVM
svm = SVC(kernel="linear", probability=True, random_state=42)
svm.fit(X_train_h, y_train_h)
y_pred_svm = svm.predict(X_test_h)

print("SVM:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_svm))
print("Precision:", precision_score(y_test_h, y_pred_svm))
print("Recall:", recall_score(y_test_h, y_pred_svm))
print("F1 Score:", f1_score(y_test_h, y_pred_svm))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_svm))
print("\n")

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_h, y_train_h)
y_pred_rf = rf.predict(X_test_h)

print("Random Forest:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_rf))
print("Precision:", precision_score(y_test_h, y_pred_rf))
print("Recall:", recall_score(y_test_h, y_pred_rf))
print("F1 Score:", f1_score(y_test_h, y_pred_rf))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_rf))
print("\n")

# XGBoost
xgb = XGBClassifier(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42, 
                    use_label_encoder=False, eval_metric="logloss")
xgb.fit(X_train_h, y_train_h)
y_pred_xgb = xgb.predict(X_test_h)

print("XGBoost:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_xgb))
print("Precision:", precision_score(y_test_h, y_pred_xgb))
print("Recall:", recall_score(y_test_h, y_pred_xgb))
print("F1 Score:", f1_score(y_test_h, y_pred_xgb))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_xgb))

 

분류 모델 평가기준 비교 (수작업)

# 모델별 성능을 개별 변수에 저장 후 직접 정리
log_reg_results = [accuracy_score(y_test_h, y_pred_log), precision_score(y_test_h, y_pred_log),
                   recall_score(y_test_h, y_pred_log), f1_score(y_test_h, y_pred_log), roc_auc_score(y_test_h, y_pred_log)]

dt_results = [accuracy_score(y_test_h, y_pred_dt), precision_score(y_test_h, y_pred_dt),
              recall_score(y_test_h, y_pred_dt), f1_score(y_test_h, y_pred_dt), roc_auc_score(y_test_h, y_pred_dt)]

svm_results = [accuracy_score(y_test_h, y_pred_svm), precision_score(y_test_h, y_pred_svm),
               recall_score(y_test_h, y_pred_svm), f1_score(y_test_h, y_pred_svm), roc_auc_score(y_test_h, y_pred_svm)]

rf_results = [accuracy_score(y_test_h, y_pred_rf), precision_score(y_test_h, y_pred_rf),
              recall_score(y_test_h, y_pred_rf), f1_score(y_test_h, y_pred_rf), roc_auc_score(y_test_h, y_pred_rf)]

xgb_results = [accuracy_score(y_test_h, y_pred_xgb), precision_score(y_test_h, y_pred_xgb),
               recall_score(y_test_h, y_pred_xgb), f1_score(y_test_h, y_pred_xgb), roc_auc_score(y_test_h, y_pred_xgb)]

# 성능 비교를 위한 데이터프레임 생성
import pandas as pd

performance_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1 Score", "ROC AUC"],
    "Logistic Regression": log_reg_results,
    "Decision Tree": dt_results,
    "SVM": svm_results,
    "Random Forest": rf_results,
    "XGBoost": xgb_results
})

# 성능 비교 테이블 출력
import ace_tools as tools
tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=performance_df)

 

분류 모델 평가기준 비교 (사용자정의함수)

# 모델 성능 평가를 위한 함수 정의
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred),
        "ROC AUC": roc_auc_score(y_test, y_pred)
    }

# 모든 모델 성능 평가 후 데이터프레임으로 정리
models = {
    "Logistic Regression": log_reg,
    "Decision Tree": dt,
    "SVM": svm,
    "Random Forest": rf,
    "XGBoost": xgb
}

performance_results = {name: evaluate_model(model, X_test_h, y_test_h) for name, model in models.items()}
performance_df = pd.DataFrame(performance_results)

# 성능 비교 테이블 출력
tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=performance_df)

 

  • evaluate_model() 함수는 모델을 넣으면 자동으로 정확도, 정밀도, 재현율, F1 점수, ROC AUC 점수를 계산해줍니다.
  • performance_results 딕셔너리를 활용해 모든 모델의 성능을 한 번에 비교할 수 있습니다.
  • for 문을 사용하지 않고 dict comprehension을 사용해 더 깔끔하게 정리했습니다.

 

분류 모델 평가기준 비교 ( for 문을 사용한 성능 비교 )

# 모델 성능 비교 리스트 생성
model_names = ["Logistic Regression", "Decision Tree", "SVM", "Random Forest", "XGBoost"]
models = [log_reg, dt, svm, rf, xgb]

performance_results = []

for name, model in zip(model_names, models):
    y_pred = model.predict(X_test_h)
    performance_results.append([
        name,
        accuracy_score(y_test_h, y_pred),
        precision_score(y_test_h, y_pred),
        recall_score(y_test_h, y_pred),
        f1_score(y_test_h, y_pred),
        roc_auc_score(y_test_h, y_pred)
    ])

# 데이터프레임 생성
performance_df = pd.DataFrame(performance_results, columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score", "ROC AUC"])

# 성능 비교 테이블 출력
tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=performance_df)