회귀 모델 평가
# 📌 Wine 데이터 - 일반 Train/Test Split 후 다양한 회귀 모델 성능 평가
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# 1️⃣ 데이터 불러오기
wine_data = pd.read_csv("datasets/winequality-white.csv", sep=";")
# 2️⃣ X (특성)과 y (타겟) 분리
X_wine = wine_data.drop(columns=["quality"])
y_wine = wine_data["quality"]
# 3️⃣ 데이터 정규화
scaler = StandardScaler()
X_wine_scaled = scaler.fit_transform(X_wine)
# 4️⃣ Train/Test Split (80:20 비율)
X_train_w, X_test_w, y_train_w, y_test_w = train_test_split(X_wine_scaled, y_wine, test_size=0.2, random_state=42)
# 5️⃣ 다양한 회귀 모델 학습 및 평가
# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train_w, y_train_w)
y_pred_lin = lin_reg.predict(X_test_w)
print("Linear Regression:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_lin))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_lin))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_lin, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_lin))
print("\n")
# Random Forest Regressor
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train_w, y_train_w)
y_pred_rf = rf_reg.predict(X_test_w)
print("Random Forest Regressor:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_rf))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_rf))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_rf, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_rf))
print("\n")
# XGBoost Regressor
xgb_reg = XGBRegressor(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42)
xgb_reg.fit(X_train_w, y_train_w)
y_pred_xgb = xgb_reg.predict(X_test_w)
print("XGBoost Regressor:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_xgb))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_xgb))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_xgb, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_xgb))
print("\n")
# LightGBM Regressor
lgbm_reg = LGBMRegressor(n_estimators=50, learning_rate=0.1, max_depth=-1, random_state=42)
lgbm_reg.fit(X_train_w, y_train_w)
y_pred_lgbm = lgbm_reg.predict(X_test_w)
print("LightGBM Regressor:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_w, y_pred_lgbm))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_w, y_pred_lgbm))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test_w, y_pred_lgbm, squared=False))
print("R² Score:", r2_score(y_test_w, y_pred_lgbm))
분류 모델 평가
# 📌 Heart 데이터 - 일반 Train/Test Split 후 다양한 분류 모델 성능 평가
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
# 1️⃣ 데이터 불러오기
heart_data = pd.read_csv("datasets/heart.csv")
# 2️⃣ X (특성)과 y (타겟) 분리
X_heart = heart_data.drop(columns=["output"])
y_heart = heart_data["output"]
# 3️⃣ 데이터 정규화
scaler = StandardScaler()
X_heart_scaled = scaler.fit_transform(X_heart)
# 4️⃣ Train/Test Split (80:20 비율)
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_heart_scaled, y_heart, test_size=0.2, random_state=42)
# 5️⃣ 다양한 분류 모델 학습 및 평가
# Logistic Regression
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_h, y_train_h)
y_pred_log = log_reg.predict(X_test_h)
print("Logistic Regression:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_log))
print("Precision:", precision_score(y_test_h, y_pred_log))
print("Recall:", recall_score(y_test_h, y_pred_log))
print("F1 Score:", f1_score(y_test_h, y_pred_log))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_log))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_log))
print("\n")
# Decision Tree
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train_h, y_train_h)
y_pred_dt = dt.predict(X_test_h)
print("Decision Tree:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_dt))
print("Precision:", precision_score(y_test_h, y_pred_dt))
print("Recall:", recall_score(y_test_h, y_pred_dt))
print("F1 Score:", f1_score(y_test_h, y_pred_dt))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_dt))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_dt))
print("\n")
# SVM
svm = SVC(kernel="linear", probability=True, random_state=42)
svm.fit(X_train_h, y_train_h)
y_pred_svm = svm.predict(X_test_h)
print("SVM:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_svm))
print("Precision:", precision_score(y_test_h, y_pred_svm))
print("Recall:", recall_score(y_test_h, y_pred_svm))
print("F1 Score:", f1_score(y_test_h, y_pred_svm))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_svm))
print("\n")
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_h, y_train_h)
y_pred_rf = rf.predict(X_test_h)
print("Random Forest:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_rf))
print("Precision:", precision_score(y_test_h, y_pred_rf))
print("Recall:", recall_score(y_test_h, y_pred_rf))
print("F1 Score:", f1_score(y_test_h, y_pred_rf))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_rf))
print("\n")
# XGBoost
xgb = XGBClassifier(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42,
use_label_encoder=False, eval_metric="logloss")
xgb.fit(X_train_h, y_train_h)
y_pred_xgb = xgb.predict(X_test_h)
print("XGBoost:")
print("Accuracy:", accuracy_score(y_test_h, y_pred_xgb))
print("Precision:", precision_score(y_test_h, y_pred_xgb))
print("Recall:", recall_score(y_test_h, y_pred_xgb))
print("F1 Score:", f1_score(y_test_h, y_pred_xgb))
print("ROC AUC Score:", roc_auc_score(y_test_h, y_pred_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test_h, y_pred_xgb))
분류 모델 평가기준 비교 (수작업)
# 모델별 성능을 개별 변수에 저장 후 직접 정리
log_reg_results = [accuracy_score(y_test_h, y_pred_log), precision_score(y_test_h, y_pred_log),
recall_score(y_test_h, y_pred_log), f1_score(y_test_h, y_pred_log), roc_auc_score(y_test_h, y_pred_log)]
dt_results = [accuracy_score(y_test_h, y_pred_dt), precision_score(y_test_h, y_pred_dt),
recall_score(y_test_h, y_pred_dt), f1_score(y_test_h, y_pred_dt), roc_auc_score(y_test_h, y_pred_dt)]
svm_results = [accuracy_score(y_test_h, y_pred_svm), precision_score(y_test_h, y_pred_svm),
recall_score(y_test_h, y_pred_svm), f1_score(y_test_h, y_pred_svm), roc_auc_score(y_test_h, y_pred_svm)]
rf_results = [accuracy_score(y_test_h, y_pred_rf), precision_score(y_test_h, y_pred_rf),
recall_score(y_test_h, y_pred_rf), f1_score(y_test_h, y_pred_rf), roc_auc_score(y_test_h, y_pred_rf)]
xgb_results = [accuracy_score(y_test_h, y_pred_xgb), precision_score(y_test_h, y_pred_xgb),
recall_score(y_test_h, y_pred_xgb), f1_score(y_test_h, y_pred_xgb), roc_auc_score(y_test_h, y_pred_xgb)]
# 성능 비교를 위한 데이터프레임 생성
import pandas as pd
performance_df = pd.DataFrame({
"Metric": ["Accuracy", "Precision", "Recall", "F1 Score", "ROC AUC"],
"Logistic Regression": log_reg_results,
"Decision Tree": dt_results,
"SVM": svm_results,
"Random Forest": rf_results,
"XGBoost": xgb_results
})
# 성능 비교 테이블 출력
import ace_tools as tools
tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=performance_df)
분류 모델 평가기준 비교 (사용자정의함수)
# 모델 성능 평가를 위한 함수 정의
def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
return {
"Accuracy": accuracy_score(y_test, y_pred),
"Precision": precision_score(y_test, y_pred),
"Recall": recall_score(y_test, y_pred),
"F1 Score": f1_score(y_test, y_pred),
"ROC AUC": roc_auc_score(y_test, y_pred)
}
# 모든 모델 성능 평가 후 데이터프레임으로 정리
models = {
"Logistic Regression": log_reg,
"Decision Tree": dt,
"SVM": svm,
"Random Forest": rf,
"XGBoost": xgb
}
performance_results = {name: evaluate_model(model, X_test_h, y_test_h) for name, model in models.items()}
performance_df = pd.DataFrame(performance_results)
# 성능 비교 테이블 출력
tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=performance_df)
- evaluate_model() 함수는 모델을 넣으면 자동으로 정확도, 정밀도, 재현율, F1 점수, ROC AUC 점수를 계산해줍니다.
- performance_results 딕셔너리를 활용해 모든 모델의 성능을 한 번에 비교할 수 있습니다.
- for 문을 사용하지 않고 dict comprehension을 사용해 더 깔끔하게 정리했습니다.
분류 모델 평가기준 비교 ( for 문을 사용한 성능 비교 )
# 모델 성능 비교 리스트 생성
model_names = ["Logistic Regression", "Decision Tree", "SVM", "Random Forest", "XGBoost"]
models = [log_reg, dt, svm, rf, xgb]
performance_results = []
for name, model in zip(model_names, models):
y_pred = model.predict(X_test_h)
performance_results.append([
name,
accuracy_score(y_test_h, y_pred),
precision_score(y_test_h, y_pred),
recall_score(y_test_h, y_pred),
f1_score(y_test_h, y_pred),
roc_auc_score(y_test_h, y_pred)
])
# 데이터프레임 생성
performance_df = pd.DataFrame(performance_results, columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score", "ROC AUC"])
# 성능 비교 테이블 출력
tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=performance_df)
'파이썬' 카테고리의 다른 글
12-4. 머신러닝 회귀모델 총정리 (이패스 신성진 김용재 와 함께하는 AI모델링) (0) | 2025.03.18 |
---|---|
12-3. 머신러닝 분류모델 총정리(이패스 신성진 김용재 와 함께하는 AI모델링) (0) | 2025.03.18 |
12-1 데이터분할(분할, k-fold) (0) | 2025.03.18 |
12-2. 머신러닝 지도학습 (0) | 2025.03.18 |
11. 시각화 (0) | 2025.03.18 |