Sameer Mathur
Using Default Data from ISLR Package
---
library(ISLR)
library(data.table)
# reading inbuilt data as data table
default.dt <- data.table(Default)
# dimension of the data table
dim(default.dt)
[1] 10000 4
# fit simple linear logistic model
Model1 <- glm(default ~ balance, data = default.dt, family = binomial())
# summary of the model
summary(Model1)
Call:
glm(formula = default ~ balance, family = binomial(), data = default.dt)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2697 -0.1465 -0.0589 -0.0221 3.7589
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.065e+01 3.612e-01 -29.49 <2e-16 ***
balance 5.499e-03 2.204e-04 24.95 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2920.6 on 9999 degrees of freedom
Residual deviance: 1596.5 on 9998 degrees of freedom
AIC: 1600.5
Number of Fisher Scoring iterations: 8
# create a single value dataframe
newdata1 <- data.frame(balance = 2000)
newdata1
balance
1 2000
# prediction of glm
predict(Model1, newdata1, type = "response")
1
0.5857694
# fit multiple linear logistic model with two variables
Model2 <- glm(default ~ student,
data = default.dt, family = binomial())
# summary of the model
summary(Model2)
Call:
glm(formula = default ~ student, family = binomial(), data = default.dt)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.2970 -0.2970 -0.2434 -0.2434 2.6585
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.50413 0.07071 -49.55 < 2e-16 ***
studentYes 0.40489 0.11502 3.52 0.000431 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2920.6 on 9999 degrees of freedom
Residual deviance: 2908.7 on 9998 degrees of freedom
AIC: 2912.7
Number of Fisher Scoring iterations: 6
# create a single value dataframe
newdata2 <- data.frame(student = "Yes")
newdata2
student
1 Yes
# prediction of glm
predict(Model2, newdata2, type = "response")
1
0.04313859
# fit multiple linear logistic model with all variables
Model3 <- glm(default ~ .,
data = default.dt, family = binomial())
# summary of the model
summary(Model3)
Call:
glm(formula = default ~ ., family = binomial(), data = default.dt)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4691 -0.1418 -0.0557 -0.0203 3.7383
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.087e+01 4.923e-01 -22.080 < 2e-16 ***
studentYes -6.468e-01 2.363e-01 -2.738 0.00619 **
balance 5.737e-03 2.319e-04 24.738 < 2e-16 ***
income 3.033e-06 8.203e-06 0.370 0.71152
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2920.6 on 9999 degrees of freedom
Residual deviance: 1571.5 on 9996 degrees of freedom
AIC: 1579.5
Number of Fisher Scoring iterations: 8
# create a single value dataframe
newdata3 <- data.frame(balance = 2000,income = 30000,student = "Yes")
newdata3
balance income student
1 2000 30000 Yes
# prediction of glm
predict(Model3, newdata3, type = "response")
1
0.5120459