Deepak Krishnan
library(ISLR)
library(data.table)
# reading inbuilt data as data table
default.dt <- data.table(Default)
# dimension of the data table
dim(default.dt)
[1] 10000 4
# fit simple linear logistic model
Model1 <- glm(default ~ balance, data = default.dt, family = binomial())
# summary of the model
summary(Model1)
Call:
glm(formula = default ~ balance, family = binomial(), data = default.dt)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2697 -0.1465 -0.0589 -0.0221 3.7589
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.065e+01 3.612e-01 -29.49 <2e-16 ***
balance 5.499e-03 2.204e-04 24.95 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2920.6 on 9999 degrees of freedom
Residual deviance: 1596.5 on 9998 degrees of freedom
AIC: 1600.5
Number of Fisher Scoring iterations: 8
# create a single value dataframe
newdata1 <- data.frame(balance = 1500)
newdata1
balance
1 1500
# prediction of glm
predict(Model1, newdata1, type = "response")
1
0.08294762
# fit multiple linear logistic model with two variables
Model2 <- glm(default ~ student,
data = default.dt, family = binomial())
# summary of the model
summary(Model2)
Call:
glm(formula = default ~ student, family = binomial(), data = default.dt)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.2970 -0.2970 -0.2434 -0.2434 2.6585
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.50413 0.07071 -49.55 < 2e-16 ***
studentYes 0.40489 0.11502 3.52 0.000431 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2920.6 on 9999 degrees of freedom
Residual deviance: 2908.7 on 9998 degrees of freedom
AIC: 2912.7
Number of Fisher Scoring iterations: 6
# create a single value dataframe
newdata2 <- data.frame(student = "No")
newdata2
student
1 No
# prediction of glm
predict(Model2, newdata2, type = "response")
1
0.02919501
# fit multiple linear logistic model with all variables
Model3 <- glm(default ~ .,
data = default.dt, family = binomial())
# summary of the model
summary(Model3)
Call:
glm(formula = default ~ ., family = binomial(), data = default.dt)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4691 -0.1418 -0.0557 -0.0203 3.7383
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.087e+01 4.923e-01 -22.080 < 2e-16 ***
studentYes -6.468e-01 2.363e-01 -2.738 0.00619 **
balance 5.737e-03 2.319e-04 24.738 < 2e-16 ***
income 3.033e-06 8.203e-06 0.370 0.71152
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2920.6 on 9999 degrees of freedom
Residual deviance: 1571.5 on 9996 degrees of freedom
AIC: 1579.5
Number of Fisher Scoring iterations: 8
# create a single value dataframe
newdata3 <- data.frame(balance = mean(default.dt$balance),income = mean(default.dt$income),student = "Yes")
newdata3
balance income student
1 835.3749 33516.98 Yes
# prediction of glm
predict(Model3, newdata3, type = "response")
1
0.001328976
# create a single value dataframe
newdata3 <- data.frame(balance = mean(default.dt$balance),income = mean(default.dt$income),student = "No")
newdata3
balance income student
1 835.3749 33516.98 No
# prediction of glm
predict(Model3, newdata3, type = "response")
1
0.002534451