파이썬 이것저것/파이썬 머신러닝

[파이썬] XGB 활용하여 성적예측

agingcurve 2022. 4. 23. 18:27
반응형

 

import pandas as pd
import numpy as np
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

 

# trainindex 678 
# StudentID 제거 필요, 통합데 이터
alldata = pd.concat([X_train, X_test], axis=0)
alldata2 = alldata.drop("StudentID",axis=1)
y_train_dr = y_train.drop("StudentID",axis=1)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
alldata3 = alldata2.copy()
a = alldata3.columns[alldata3.dtypes == object]
for i in a:
    alldata3[i] = le.fit_transform(alldata3[i])
alldata3.describe()
# 다시 train test
X_train_df = alldata3[:678]
X_test_df = alldata3[678:]
# 학습하자
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
xgb = XGBRegressor(learning_rate=0.1)
rf = RandomForestRegressor(random_state =42)
X_train_model, X_test_valid, y_train, y_test_vaild = train_test_split(X_train_df,y_train_dr, test_size=0.33, random_state=42)
rf.fit(X_train_model, y_train)
xgb.fit(X_train_model, y_train)
y_valid_pred = rf.predict(X_test_valid)
from sklearn.metrics import r2_score
r2_score(y_valid_pred, y_test_vaild)
y_pred = rf.predict(X_test_df)

 

# 제출
sub = y_test.copy()
sub["G3"] = y_pred
r2_score(sub["G3"], y_test["G3"])

y_pred = xgb.predict(X_test_df)