data <- Default
head(data,5)
## default student balance income
## 1 No No 729.5265 44361.63
## 2 No Yes 817.1804 12106.13
## 3 No No 1073.5492 31767.14
## 4 No No 529.2506 35704.49
## 5 No No 785.6559 38463.50
Based on the result, the income and balance are both significant to default, and both of them have positive relations with default. It means that the higher the income (balance), the higher the default probability.
set.seed(123)
model_a <- glm(default ~ income + balance, data= data, family= "binomial")
summary(model_a)
##
## Call:
## glm(formula = default ~ income + balance, family = "binomial",
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4725 -0.1444 -0.0574 -0.0211 3.7245
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.154e+01 4.348e-01 -26.545 < 2e-16 ***
## income 2.081e-05 4.985e-06 4.174 2.99e-05 ***
## balance 5.647e-03 2.274e-04 24.836 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1579.0 on 9997 degrees of freedom
## AIC: 1585
##
## Number of Fisher Scoring iterations: 8
#i
set.seed(123)
data_split <- initial_split(data, prop=0.8,strata= default)
data_training <- training(data_split)
data_testing <- testing(data_split)
#ii
model_b2 <- glm(default ~ income + balance, data= data_training, family ="binomial")
predict_b2 <- predict(model_b2, new_data= data_testing, type="response")
#iii
predict_binary_b2 <- ifelse(predict_b2 > 0.5, "Yes", "No")
#iv
##mean((default[data_testing] - predict_binary_b2)^2)
##head(data_testing,5)
data_split <- initial_split(data, prop=0.8,strata= default)
data_training <- training(data_split)
data_testing <- testing(data_split)
#ii
model_b2 <- glm(default ~ income + balance, data= data_training, family ="binomial")
predict_b2 <- predict(model_b2, new_data= data_testing, type="response")
#iii
predict_binary_b2 <- ifelse(predict_b2 > 0.5, "Yes", "No")
#iv
##mean((default[data_testing] - predict_binary_b2)^2)
##head(data_testing,5)
data_split <- initial_split(data, prop=0.8,strata= default)
data_training <- training(data_split)
data_testing <- testing(data_split)
#ii
model_b2 <- glm(default ~ income + balance, data= data_training, family ="binomial")
predict_b2 <- predict(model_b2, new_data= data_testing, type="response")
#iii
predict_binary_b2 <- ifelse(predict_b2 > 0.5, "Yes", "No")
#iv
##mean((default[data_testing] - predict_binary_b2)^2)
##head(data_testing,5)
data_split <- initial_split(data, prop=0.8,strata= default)
data_training <- training(data_split)
data_testing <- testing(data_split)
#ii
model_b2 <- glm(default ~ income + balance, data= data_training, family ="binomial")
predict_b2 <- predict(model_b2, new_data= data_testing, type="response")
#iii
predict_binary_b2 <- ifelse(predict_b2 > 0.5, "Yes", "No")
#iv
##mean((default[data_testing] - predict_binary_b2)^2)
##head(data_testing,5)
model_d <- glm(default ~ income + balance + student, data=data_training, family= "binomial")
predict_d <- predict(model_d, new_data= data_testing, type="response")
predict_binary_d <- ifelse(predict_d> 0.5, "Yes", "No")
##mean((default[data_testing] - predict_binary_b2)^2)
##head(data_testing,5)