library(car)
## Loading required package: carData
library(ResourceSelection)
## ResourceSelection 0.3-6 2023-06-27
library(MASS)
mat <- read.csv("~/Documents/Regression Analysis/final/student-mat.csv",sep=";")
# Define a binary y-variable (failing:0 vs. passing:1)
mat$y_pass <- ifelse(mat$G3 >= 0 & mat$G3 <= 11, 0, 1)
# Exclude these data fields because we want to exclusively look at behavior and environment
mat <- mat[, !names(mat) %in% c("G1", "G2", "G3","failures","school","age","sex","health")]
names(mat) #response (y_pass) & predictor variables
## [1] "address" "famsize" "Pstatus" "Medu" "Fedu"
## [6] "Mjob" "Fjob" "reason" "guardian" "traveltime"
## [11] "studytime" "schoolsup" "famsup" "paid" "activities"
## [16] "nursery" "higher" "internet" "romantic" "famrel"
## [21] "freetime" "goout" "Dalc" "Walc" "absences"
## [26] "y_pass"
# Full logistic regression model
model_full <- glm(y_pass ~ as.factor(address) + as.factor(famsize) +
as.factor(Pstatus) + Medu + Fedu + as.factor(Mjob) + as.factor(Fjob) + as.factor(reason) + as.factor(guardian) + traveltime + studytime + as.factor(schoolsup) + as.factor(famsup) + as.factor(paid) + as.factor(activities) + as.factor(nursery) + as.factor(higher) + as.factor(internet) + as.factor(romantic) + famrel + freetime + goout + Dalc + Walc + absences, data = mat, family=binomial)
summary(model_full) # AIC: 528.19
##
## Call:
## glm(formula = y_pass ~ as.factor(address) + as.factor(famsize) +
## as.factor(Pstatus) + Medu + Fedu + as.factor(Mjob) + as.factor(Fjob) +
## as.factor(reason) + as.factor(guardian) + traveltime + studytime +
## as.factor(schoolsup) + as.factor(famsup) + as.factor(paid) +
## as.factor(activities) + as.factor(nursery) + as.factor(higher) +
## as.factor(internet) + as.factor(romantic) + famrel + freetime +
## goout + Dalc + Walc + absences, family = binomial, data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.62824 1.29867 -1.254 0.20992
## as.factor(address)U 0.41813 0.31935 1.309 0.19042
## as.factor(famsize)LE3 0.51155 0.26480 1.932 0.05338 .
## as.factor(Pstatus)T 0.12638 0.39124 0.323 0.74667
## Medu 0.35127 0.17570 1.999 0.04558 *
## Fedu 0.15232 0.14745 1.033 0.30159
## as.factor(Mjob)health 0.31019 0.60235 0.515 0.60658
## as.factor(Mjob)other -0.15904 0.39877 -0.399 0.69002
## as.factor(Mjob)services 0.01389 0.43019 0.032 0.97424
## as.factor(Mjob)teacher -1.00198 0.56576 -1.771 0.07656 .
## as.factor(Fjob)health -0.27181 0.77631 -0.350 0.72624
## as.factor(Fjob)other -0.22016 0.54541 -0.404 0.68646
## as.factor(Fjob)services -0.26782 0.56135 -0.477 0.63329
## as.factor(Fjob)teacher 0.56540 0.69821 0.810 0.41806
## as.factor(reason)home -0.03841 0.30212 -0.127 0.89885
## as.factor(reason)other 0.22625 0.44095 0.513 0.60788
## as.factor(reason)reputation 0.42878 0.31192 1.375 0.16924
## as.factor(guardian)mother 0.04406 0.29542 0.149 0.88144
## as.factor(guardian)other -0.47516 0.51284 -0.927 0.35417
## traveltime -0.16422 0.19281 -0.852 0.39439
## studytime 0.17517 0.15272 1.147 0.25138
## as.factor(schoolsup)yes -1.60860 0.42428 -3.791 0.00015 ***
## as.factor(famsup)yes -0.47704 0.25702 -1.856 0.06345 .
## as.factor(paid)yes -0.09770 0.25828 -0.378 0.70522
## as.factor(activities)yes -0.06395 0.24070 -0.266 0.79049
## as.factor(nursery)yes -0.19930 0.29953 -0.665 0.50580
## as.factor(higher)yes 0.71212 0.63570 1.120 0.26263
## as.factor(internet)yes 0.28561 0.33903 0.842 0.39954
## as.factor(romantic)yes -0.17692 0.25512 -0.693 0.48801
## famrel 0.01100 0.13038 0.084 0.93273
## freetime 0.16716 0.12967 1.289 0.19738
## goout -0.35497 0.12712 -2.792 0.00523 **
## Dalc 0.06428 0.18688 0.344 0.73088
## Walc -0.07395 0.13805 -0.536 0.59216
## absences -0.03270 0.01851 -1.767 0.07724 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 458.19 on 360 degrees of freedom
## AIC: 528.19
##
## Number of Fisher Scoring iterations: 4
# Null model
model_null <- glm(y_pass ~ 1, data = mat, family=binomial)
# Automated Stepwise Regression AIC 499.91
step_model1 <- stepAIC(model_null,
scope = list(lower = model_null, upper = model_full),
direction = "both", family=binomial)
## Start: AIC=536.75
## y_pass ~ 1
##
## Df Deviance AIC
## + as.factor(schoolsup) 1 520.21 524.21
## + Medu 1 521.47 525.47
## + Fedu 1 526.45 530.45
## + goout 1 528.24 532.24
## + absences 1 529.27 533.27
## + traveltime 1 530.01 534.01
## + as.factor(Mjob) 4 524.13 534.13
## + Walc 1 530.37 534.37
## + as.factor(higher) 1 530.56 534.56
## + as.factor(address) 1 530.71 534.71
## <none> 534.75 536.75
## + as.factor(famsize) 1 532.78 536.78
## + as.factor(internet) 1 532.79 536.79
## + studytime 1 532.87 536.87
## + Dalc 1 533.14 537.14
## + as.factor(famsup) 1 533.54 537.54
## + as.factor(guardian) 2 531.69 537.69
## + as.factor(romantic) 1 534.54 538.54
## + freetime 1 534.60 538.60
## + as.factor(activities) 1 534.65 538.65
## + as.factor(nursery) 1 534.66 538.66
## + famrel 1 534.70 538.70
## + as.factor(reason) 3 530.71 538.71
## + as.factor(Pstatus) 1 534.75 538.75
## + as.factor(paid) 1 534.75 538.75
## + as.factor(Fjob) 4 530.34 540.34
##
## Step: AIC=524.21
## y_pass ~ as.factor(schoolsup)
##
## Df Deviance AIC
## + Medu 1 507.52 513.52
## + Fedu 1 510.77 516.77
## + goout 1 512.64 518.64
## + Walc 1 514.12 520.12
## + absences 1 514.92 520.92
## + traveltime 1 515.10 521.10
## + as.factor(higher) 1 515.13 521.13
## + as.factor(address) 1 515.66 521.66
## + as.factor(Mjob) 4 510.55 522.55
## + studytime 1 517.85 523.85
## + as.factor(guardian) 2 516.14 524.14
## <none> 520.21 524.21
## + as.factor(internet) 1 518.27 524.27
## + Dalc 1 518.31 524.31
## + as.factor(famsize) 1 518.46 524.46
## + as.factor(romantic) 1 519.62 525.62
## + as.factor(famsup) 1 519.67 525.67
## + as.factor(reason) 3 515.74 525.74
## + as.factor(Fjob) 4 513.81 525.81
## + as.factor(activities) 1 519.96 525.96
## + as.factor(nursery) 1 519.98 525.98
## + freetime 1 520.16 526.16
## + famrel 1 520.16 526.16
## + as.factor(Pstatus) 1 520.16 526.16
## + as.factor(paid) 1 520.20 526.20
## - as.factor(schoolsup) 1 534.75 536.75
##
## Step: AIC=513.52
## y_pass ~ as.factor(schoolsup) + Medu
##
## Df Deviance AIC
## + goout 1 498.63 506.63
## + absences 1 500.04 508.04
## + Walc 1 502.06 510.06
## + as.factor(higher) 1 504.62 512.62
## + traveltime 1 504.62 512.62
## + as.factor(address) 1 504.69 512.69
## + Dalc 1 505.30 513.30
## + as.factor(famsize) 1 505.37 513.37
## + as.factor(famsup) 1 505.40 513.40
## <none> 507.52 513.52
## + studytime 1 505.77 513.77
## + as.factor(guardian) 2 504.18 514.18
## + Fedu 1 506.29 514.29
## + as.factor(romantic) 1 506.70 514.70
## + as.factor(paid) 1 506.95 514.95
## + as.factor(internet) 1 507.11 515.11
## + as.factor(Mjob) 4 501.24 515.24
## + famrel 1 507.46 515.46
## + as.factor(nursery) 1 507.47 515.47
## + as.factor(Pstatus) 1 507.48 515.48
## + freetime 1 507.50 515.50
## + as.factor(activities) 1 507.51 515.51
## + as.factor(reason) 3 504.41 516.41
## + as.factor(Fjob) 4 504.54 518.54
## - Medu 1 520.21 524.21
## - as.factor(schoolsup) 1 521.47 525.47
##
## Step: AIC=506.63
## y_pass ~ as.factor(schoolsup) + Medu + goout
##
## Df Deviance AIC
## + absences 1 491.95 501.95
## + as.factor(address) 1 494.97 504.97
## + traveltime 1 495.85 505.85
## + as.factor(famsize) 1 496.10 506.10
## + as.factor(higher) 1 496.11 506.11
## + as.factor(famsup) 1 496.23 506.23
## <none> 498.63 506.63
## + as.factor(Mjob) 4 491.15 507.15
## + Walc 1 497.20 507.20
## + Fedu 1 497.28 507.28
## + studytime 1 497.30 507.30
## + freetime 1 497.63 507.63
## + as.factor(guardian) 2 495.69 507.69
## + as.factor(romantic) 1 497.73 507.73
## + as.factor(internet) 1 497.90 507.90
## + Dalc 1 498.06 508.06
## + as.factor(paid) 1 498.10 508.10
## + famrel 1 498.45 508.45
## + as.factor(Pstatus) 1 498.57 508.57
## + as.factor(activities) 1 498.58 508.58
## + as.factor(nursery) 1 498.58 508.58
## + as.factor(reason) 3 495.91 509.91
## + as.factor(Fjob) 4 496.13 512.13
## - goout 1 507.52 513.52
## - Medu 1 512.64 518.64
## - as.factor(schoolsup) 1 513.42 519.42
##
## Step: AIC=501.95
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences
##
## Df Deviance AIC
## + as.factor(address) 1 488.78 500.78
## + as.factor(famsize) 1 488.96 500.96
## + traveltime 1 489.13 501.13
## + as.factor(famsup) 1 489.47 501.47
## <none> 491.95 501.95
## + as.factor(higher) 1 490.04 502.04
## + as.factor(Mjob) 4 484.18 502.18
## + as.factor(internet) 1 490.85 502.85
## + Fedu 1 490.89 502.89
## + studytime 1 491.03 503.03
## + Walc 1 491.23 503.23
## + freetime 1 491.30 503.30
## + as.factor(paid) 1 491.34 503.34
## + as.factor(romantic) 1 491.68 503.68
## + Dalc 1 491.71 503.71
## + famrel 1 491.87 503.87
## + as.factor(nursery) 1 491.90 503.90
## + as.factor(Pstatus) 1 491.93 503.93
## + as.factor(activities) 1 491.94 503.94
## + as.factor(guardian) 2 490.34 504.34
## + as.factor(reason) 3 488.70 504.70
## - absences 1 498.63 506.63
## + as.factor(Fjob) 4 489.85 507.85
## - goout 1 500.04 508.04
## - as.factor(schoolsup) 1 506.73 514.73
## - Medu 1 507.92 515.92
##
## Step: AIC=500.78
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address)
##
## Df Deviance AIC
## + as.factor(famsize) 1 486.14 500.14
## + as.factor(famsup) 1 486.23 500.23
## <none> 488.78 500.78
## + as.factor(higher) 1 486.90 500.90
## + traveltime 1 487.43 501.43
## + as.factor(Mjob) 4 481.61 501.61
## + Fedu 1 487.65 501.65
## + studytime 1 487.74 501.74
## - as.factor(address) 1 491.95 501.95
## + freetime 1 488.07 502.07
## + as.factor(paid) 1 488.08 502.08
## + as.factor(internet) 1 488.28 502.28
## + Walc 1 488.41 502.41
## + as.factor(romantic) 1 488.50 502.50
## + as.factor(reason) 3 484.62 502.62
## + as.factor(nursery) 1 488.71 502.71
## + famrel 1 488.71 502.71
## + Dalc 1 488.73 502.73
## + as.factor(activities) 1 488.73 502.73
## + as.factor(Pstatus) 1 488.77 502.77
## + as.factor(guardian) 2 487.12 503.12
## - absences 1 494.97 504.97
## + as.factor(Fjob) 4 486.58 506.58
## - goout 1 497.58 507.58
## - Medu 1 502.84 512.84
## - as.factor(schoolsup) 1 503.93 513.93
##
## Step: AIC=500.14
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize)
##
## Df Deviance AIC
## + as.factor(famsup) 1 484.07 500.07
## <none> 486.14 500.14
## + as.factor(Mjob) 4 478.25 500.25
## + as.factor(higher) 1 484.31 500.31
## + traveltime 1 484.42 500.42
## - as.factor(famsize) 1 488.78 500.78
## + studytime 1 484.88 500.88
## + Fedu 1 484.88 500.88
## - as.factor(address) 1 488.96 500.96
## + freetime 1 485.44 501.44
## + as.factor(paid) 1 485.45 501.45
## + Walc 1 485.58 501.58
## + as.factor(internet) 1 485.61 501.61
## + as.factor(romantic) 1 485.82 501.82
## + as.factor(reason) 3 481.88 501.88
## + as.factor(nursery) 1 485.94 501.94
## + Dalc 1 485.97 501.97
## + famrel 1 486.05 502.05
## + as.factor(activities) 1 486.09 502.09
## + as.factor(Pstatus) 1 486.10 502.10
## + as.factor(guardian) 2 484.57 502.57
## - absences 1 492.80 504.80
## + as.factor(Fjob) 4 483.73 505.73
## - goout 1 495.26 507.26
## - Medu 1 500.86 512.86
## - as.factor(schoolsup) 1 500.98 512.98
##
## Step: AIC=500.07
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize) + as.factor(famsup)
##
## Df Deviance AIC
## + as.factor(Mjob) 4 475.91 499.91
## + as.factor(higher) 1 482.05 500.05
## <none> 484.07 500.07
## - as.factor(famsup) 1 486.14 500.14
## - as.factor(famsize) 1 486.23 500.23
## + studytime 1 482.26 500.26
## + traveltime 1 482.45 500.45
## + Fedu 1 482.46 500.46
## - as.factor(address) 1 486.98 500.98
## + freetime 1 483.31 501.31
## + Walc 1 483.31 501.31
## + as.factor(internet) 1 483.39 501.39
## + as.factor(reason) 3 479.49 501.49
## + as.factor(romantic) 1 483.77 501.77
## + Dalc 1 483.87 501.87
## + as.factor(paid) 1 483.88 501.88
## + as.factor(nursery) 1 483.93 501.93
## + famrel 1 484.00 502.00
## + as.factor(Pstatus) 1 484.03 502.03
## + as.factor(activities) 1 484.05 502.05
## + as.factor(guardian) 2 482.57 502.57
## - absences 1 490.70 504.70
## + as.factor(Fjob) 4 481.42 505.42
## - goout 1 493.59 507.59
## - as.factor(schoolsup) 1 497.81 511.81
## - Medu 1 500.41 514.41
##
## Step: AIC=499.91
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize) + as.factor(famsup) + as.factor(Mjob)
##
## Df Deviance AIC
## <none> 475.91 499.91
## + Fedu 1 474.00 500.00
## + as.factor(higher) 1 474.03 500.03
## - as.factor(Mjob) 4 484.07 500.07
## + studytime 1 474.23 500.23
## - as.factor(address) 1 478.24 500.24
## - as.factor(famsup) 1 478.25 500.25
## + traveltime 1 474.47 500.47
## + freetime 1 474.66 500.66
## - as.factor(famsize) 1 478.67 500.67
## + as.factor(internet) 1 475.00 501.00
## + Walc 1 475.21 501.21
## + as.factor(romantic) 1 475.26 501.26
## + as.factor(Pstatus) 1 475.73 501.73
## + as.factor(nursery) 1 475.76 501.76
## + as.factor(activities) 1 475.83 501.83
## + famrel 1 475.84 501.84
## + Dalc 1 475.84 501.84
## + as.factor(paid) 1 475.84 501.84
## + as.factor(guardian) 2 474.51 502.51
## + as.factor(reason) 3 472.72 502.72
## + as.factor(Fjob) 4 472.10 504.10
## - absences 1 482.99 504.99
## - goout 1 486.79 508.79
## - as.factor(schoolsup) 1 490.92 512.92
## - Medu 1 491.10 513.10
# Automated Stepwise Regression AIC 499.91
step_model2 <- stepAIC(model_null,
scope = list(lower = model_null, upper = model_full),
direction = "forward", family=binomial)
## Start: AIC=536.75
## y_pass ~ 1
##
## Df Deviance AIC
## + as.factor(schoolsup) 1 520.21 524.21
## + Medu 1 521.47 525.47
## + Fedu 1 526.45 530.45
## + goout 1 528.24 532.24
## + absences 1 529.27 533.27
## + traveltime 1 530.01 534.01
## + as.factor(Mjob) 4 524.13 534.13
## + Walc 1 530.37 534.37
## + as.factor(higher) 1 530.56 534.56
## + as.factor(address) 1 530.71 534.71
## <none> 534.75 536.75
## + as.factor(famsize) 1 532.78 536.78
## + as.factor(internet) 1 532.79 536.79
## + studytime 1 532.87 536.87
## + Dalc 1 533.14 537.14
## + as.factor(famsup) 1 533.54 537.54
## + as.factor(guardian) 2 531.69 537.69
## + as.factor(romantic) 1 534.54 538.54
## + freetime 1 534.60 538.60
## + as.factor(activities) 1 534.65 538.65
## + as.factor(nursery) 1 534.66 538.66
## + famrel 1 534.70 538.70
## + as.factor(reason) 3 530.71 538.71
## + as.factor(Pstatus) 1 534.75 538.75
## + as.factor(paid) 1 534.75 538.75
## + as.factor(Fjob) 4 530.34 540.34
##
## Step: AIC=524.21
## y_pass ~ as.factor(schoolsup)
##
## Df Deviance AIC
## + Medu 1 507.52 513.52
## + Fedu 1 510.77 516.77
## + goout 1 512.64 518.64
## + Walc 1 514.12 520.12
## + absences 1 514.92 520.92
## + traveltime 1 515.10 521.10
## + as.factor(higher) 1 515.13 521.13
## + as.factor(address) 1 515.66 521.66
## + as.factor(Mjob) 4 510.55 522.55
## + studytime 1 517.85 523.85
## + as.factor(guardian) 2 516.14 524.14
## <none> 520.21 524.21
## + as.factor(internet) 1 518.27 524.27
## + Dalc 1 518.31 524.31
## + as.factor(famsize) 1 518.46 524.46
## + as.factor(romantic) 1 519.62 525.62
## + as.factor(famsup) 1 519.67 525.67
## + as.factor(reason) 3 515.74 525.74
## + as.factor(Fjob) 4 513.81 525.81
## + as.factor(activities) 1 519.96 525.96
## + as.factor(nursery) 1 519.98 525.98
## + freetime 1 520.16 526.16
## + famrel 1 520.16 526.16
## + as.factor(Pstatus) 1 520.16 526.16
## + as.factor(paid) 1 520.20 526.20
##
## Step: AIC=513.52
## y_pass ~ as.factor(schoolsup) + Medu
##
## Df Deviance AIC
## + goout 1 498.63 506.63
## + absences 1 500.04 508.04
## + Walc 1 502.06 510.06
## + as.factor(higher) 1 504.62 512.62
## + traveltime 1 504.62 512.62
## + as.factor(address) 1 504.69 512.69
## + Dalc 1 505.30 513.30
## + as.factor(famsize) 1 505.37 513.37
## + as.factor(famsup) 1 505.40 513.40
## <none> 507.52 513.52
## + studytime 1 505.77 513.77
## + as.factor(guardian) 2 504.18 514.18
## + Fedu 1 506.29 514.29
## + as.factor(romantic) 1 506.70 514.70
## + as.factor(paid) 1 506.95 514.95
## + as.factor(internet) 1 507.11 515.11
## + as.factor(Mjob) 4 501.24 515.24
## + famrel 1 507.46 515.46
## + as.factor(nursery) 1 507.47 515.47
## + as.factor(Pstatus) 1 507.48 515.48
## + freetime 1 507.50 515.50
## + as.factor(activities) 1 507.51 515.51
## + as.factor(reason) 3 504.41 516.41
## + as.factor(Fjob) 4 504.54 518.54
##
## Step: AIC=506.63
## y_pass ~ as.factor(schoolsup) + Medu + goout
##
## Df Deviance AIC
## + absences 1 491.95 501.95
## + as.factor(address) 1 494.97 504.97
## + traveltime 1 495.85 505.85
## + as.factor(famsize) 1 496.10 506.10
## + as.factor(higher) 1 496.11 506.11
## + as.factor(famsup) 1 496.23 506.23
## <none> 498.63 506.63
## + as.factor(Mjob) 4 491.15 507.15
## + Walc 1 497.20 507.20
## + Fedu 1 497.28 507.28
## + studytime 1 497.30 507.30
## + freetime 1 497.63 507.63
## + as.factor(guardian) 2 495.69 507.69
## + as.factor(romantic) 1 497.73 507.73
## + as.factor(internet) 1 497.90 507.90
## + Dalc 1 498.06 508.06
## + as.factor(paid) 1 498.10 508.10
## + famrel 1 498.45 508.45
## + as.factor(Pstatus) 1 498.57 508.57
## + as.factor(activities) 1 498.58 508.58
## + as.factor(nursery) 1 498.58 508.58
## + as.factor(reason) 3 495.91 509.91
## + as.factor(Fjob) 4 496.13 512.13
##
## Step: AIC=501.95
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences
##
## Df Deviance AIC
## + as.factor(address) 1 488.78 500.78
## + as.factor(famsize) 1 488.96 500.96
## + traveltime 1 489.13 501.13
## + as.factor(famsup) 1 489.47 501.47
## <none> 491.95 501.95
## + as.factor(higher) 1 490.04 502.04
## + as.factor(Mjob) 4 484.18 502.18
## + as.factor(internet) 1 490.85 502.85
## + Fedu 1 490.89 502.89
## + studytime 1 491.03 503.03
## + Walc 1 491.23 503.23
## + freetime 1 491.30 503.30
## + as.factor(paid) 1 491.34 503.34
## + as.factor(romantic) 1 491.68 503.68
## + Dalc 1 491.71 503.71
## + famrel 1 491.87 503.87
## + as.factor(nursery) 1 491.90 503.90
## + as.factor(Pstatus) 1 491.93 503.93
## + as.factor(activities) 1 491.94 503.94
## + as.factor(guardian) 2 490.34 504.34
## + as.factor(reason) 3 488.70 504.70
## + as.factor(Fjob) 4 489.85 507.85
##
## Step: AIC=500.78
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address)
##
## Df Deviance AIC
## + as.factor(famsize) 1 486.14 500.14
## + as.factor(famsup) 1 486.23 500.23
## <none> 488.78 500.78
## + as.factor(higher) 1 486.90 500.90
## + traveltime 1 487.43 501.43
## + as.factor(Mjob) 4 481.61 501.61
## + Fedu 1 487.65 501.65
## + studytime 1 487.74 501.74
## + freetime 1 488.07 502.07
## + as.factor(paid) 1 488.08 502.08
## + as.factor(internet) 1 488.28 502.28
## + Walc 1 488.41 502.41
## + as.factor(romantic) 1 488.50 502.50
## + as.factor(reason) 3 484.62 502.62
## + as.factor(nursery) 1 488.71 502.71
## + famrel 1 488.71 502.71
## + Dalc 1 488.73 502.73
## + as.factor(activities) 1 488.73 502.73
## + as.factor(Pstatus) 1 488.77 502.77
## + as.factor(guardian) 2 487.12 503.12
## + as.factor(Fjob) 4 486.58 506.58
##
## Step: AIC=500.14
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize)
##
## Df Deviance AIC
## + as.factor(famsup) 1 484.07 500.07
## <none> 486.14 500.14
## + as.factor(Mjob) 4 478.25 500.25
## + as.factor(higher) 1 484.31 500.31
## + traveltime 1 484.42 500.42
## + studytime 1 484.88 500.88
## + Fedu 1 484.88 500.88
## + freetime 1 485.44 501.44
## + as.factor(paid) 1 485.45 501.45
## + Walc 1 485.58 501.58
## + as.factor(internet) 1 485.61 501.61
## + as.factor(romantic) 1 485.82 501.82
## + as.factor(reason) 3 481.88 501.88
## + as.factor(nursery) 1 485.94 501.94
## + Dalc 1 485.97 501.97
## + famrel 1 486.05 502.05
## + as.factor(activities) 1 486.09 502.09
## + as.factor(Pstatus) 1 486.10 502.10
## + as.factor(guardian) 2 484.57 502.57
## + as.factor(Fjob) 4 483.73 505.73
##
## Step: AIC=500.07
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize) + as.factor(famsup)
##
## Df Deviance AIC
## + as.factor(Mjob) 4 475.91 499.91
## + as.factor(higher) 1 482.05 500.05
## <none> 484.07 500.07
## + studytime 1 482.26 500.26
## + traveltime 1 482.45 500.45
## + Fedu 1 482.46 500.46
## + freetime 1 483.31 501.31
## + Walc 1 483.31 501.31
## + as.factor(internet) 1 483.39 501.39
## + as.factor(reason) 3 479.49 501.49
## + as.factor(romantic) 1 483.77 501.77
## + Dalc 1 483.87 501.87
## + as.factor(paid) 1 483.88 501.88
## + as.factor(nursery) 1 483.93 501.93
## + famrel 1 484.00 502.00
## + as.factor(Pstatus) 1 484.03 502.03
## + as.factor(activities) 1 484.05 502.05
## + as.factor(guardian) 2 482.57 502.57
## + as.factor(Fjob) 4 481.42 505.42
##
## Step: AIC=499.91
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize) + as.factor(famsup) + as.factor(Mjob)
##
## Df Deviance AIC
## <none> 475.91 499.91
## + Fedu 1 474.00 500.00
## + as.factor(higher) 1 474.03 500.03
## + studytime 1 474.23 500.23
## + traveltime 1 474.47 500.47
## + freetime 1 474.66 500.66
## + as.factor(internet) 1 475.00 501.00
## + Walc 1 475.21 501.21
## + as.factor(romantic) 1 475.26 501.26
## + as.factor(Pstatus) 1 475.73 501.73
## + as.factor(nursery) 1 475.76 501.76
## + as.factor(activities) 1 475.83 501.83
## + famrel 1 475.84 501.84
## + Dalc 1 475.84 501.84
## + as.factor(paid) 1 475.84 501.84
## + as.factor(guardian) 2 474.51 502.51
## + as.factor(reason) 3 472.72 502.72
## + as.factor(Fjob) 4 472.10 504.10
# Automated Stepwise Regression AIC 536.75
step_model3 <- stepAIC(model_null,
scope = list(lower = model_null, upper = model_full),
direction = "back", family=binomial)
## Start: AIC=536.75
## y_pass ~ 1
# Automated Stepwise Regression Using BIC Criterion: AIC 521.84
n <- nrow(mat)
step_model4 <- stepAIC(model_null,
scope = list(lower = model_null, upper = model_full),
direction = "both", family=binomial,
k = log(n))
## Start: AIC=540.73
## y_pass ~ 1
##
## Df Deviance AIC
## + as.factor(schoolsup) 1 520.21 532.17
## + Medu 1 521.47 533.43
## + Fedu 1 526.45 538.40
## + goout 1 528.24 540.19
## <none> 534.75 540.73
## + absences 1 529.27 541.23
## + traveltime 1 530.01 541.97
## + Walc 1 530.37 542.33
## + as.factor(higher) 1 530.56 542.51
## + as.factor(address) 1 530.71 542.67
## + as.factor(famsize) 1 532.78 544.74
## + as.factor(internet) 1 532.79 544.74
## + studytime 1 532.87 544.83
## + Dalc 1 533.14 545.10
## + as.factor(famsup) 1 533.54 545.50
## + as.factor(romantic) 1 534.54 546.50
## + freetime 1 534.60 546.56
## + as.factor(activities) 1 534.65 546.61
## + as.factor(nursery) 1 534.66 546.62
## + famrel 1 534.70 546.66
## + as.factor(Pstatus) 1 534.75 546.71
## + as.factor(paid) 1 534.75 546.71
## + as.factor(guardian) 2 531.69 549.63
## + as.factor(Mjob) 4 524.13 554.03
## + as.factor(reason) 3 530.71 554.63
## + as.factor(Fjob) 4 530.34 560.23
##
## Step: AIC=532.17
## y_pass ~ as.factor(schoolsup)
##
## Df Deviance AIC
## + Medu 1 507.52 525.45
## + Fedu 1 510.77 528.71
## + goout 1 512.64 530.58
## + Walc 1 514.12 532.05
## <none> 520.21 532.17
## + absences 1 514.92 532.86
## + traveltime 1 515.10 533.04
## + as.factor(higher) 1 515.13 533.06
## + as.factor(address) 1 515.66 533.60
## + studytime 1 517.85 535.79
## + as.factor(internet) 1 518.27 536.21
## + Dalc 1 518.31 536.25
## + as.factor(famsize) 1 518.46 536.40
## + as.factor(romantic) 1 519.62 537.56
## + as.factor(famsup) 1 519.67 537.61
## + as.factor(activities) 1 519.96 537.90
## + as.factor(nursery) 1 519.98 537.91
## + freetime 1 520.16 538.09
## + famrel 1 520.16 538.10
## + as.factor(Pstatus) 1 520.16 538.10
## + as.factor(paid) 1 520.20 538.13
## + as.factor(guardian) 2 516.14 540.05
## - as.factor(schoolsup) 1 534.75 540.73
## + as.factor(reason) 3 515.74 545.64
## + as.factor(Mjob) 4 510.55 546.43
## + as.factor(Fjob) 4 513.81 549.68
##
## Step: AIC=525.45
## y_pass ~ as.factor(schoolsup) + Medu
##
## Df Deviance AIC
## + goout 1 498.63 522.54
## + absences 1 500.04 523.95
## <none> 507.52 525.45
## + Walc 1 502.06 525.97
## + as.factor(higher) 1 504.62 528.54
## + traveltime 1 504.62 528.54
## + as.factor(address) 1 504.69 528.61
## + Dalc 1 505.30 529.22
## + as.factor(famsize) 1 505.37 529.28
## + as.factor(famsup) 1 505.40 529.32
## + studytime 1 505.77 529.69
## + Fedu 1 506.29 530.20
## + as.factor(romantic) 1 506.70 530.62
## + as.factor(paid) 1 506.95 530.86
## + as.factor(internet) 1 507.11 531.03
## + famrel 1 507.46 531.37
## + as.factor(nursery) 1 507.47 531.39
## + as.factor(Pstatus) 1 507.48 531.40
## + freetime 1 507.50 531.42
## + as.factor(activities) 1 507.51 531.43
## - Medu 1 520.21 532.17
## - as.factor(schoolsup) 1 521.47 533.43
## + as.factor(guardian) 2 504.18 534.07
## + as.factor(reason) 3 504.41 540.29
## + as.factor(Mjob) 4 501.24 543.09
## + as.factor(Fjob) 4 504.54 546.39
##
## Step: AIC=522.54
## y_pass ~ as.factor(schoolsup) + Medu + goout
##
## Df Deviance AIC
## + absences 1 491.95 521.84
## <none> 498.63 522.54
## + as.factor(address) 1 494.97 524.87
## - goout 1 507.52 525.45
## + traveltime 1 495.85 525.74
## + as.factor(famsize) 1 496.10 526.00
## + as.factor(higher) 1 496.11 526.01
## + as.factor(famsup) 1 496.23 526.12
## + Walc 1 497.20 527.10
## + Fedu 1 497.28 527.18
## + studytime 1 497.30 527.20
## + freetime 1 497.63 527.52
## + as.factor(romantic) 1 497.73 527.62
## + as.factor(internet) 1 497.90 527.79
## + Dalc 1 498.06 527.96
## + as.factor(paid) 1 498.10 528.00
## + famrel 1 498.45 528.34
## + as.factor(Pstatus) 1 498.57 528.47
## + as.factor(activities) 1 498.58 528.48
## + as.factor(nursery) 1 498.58 528.48
## - Medu 1 512.64 530.58
## - as.factor(schoolsup) 1 513.42 531.35
## + as.factor(guardian) 2 495.69 531.56
## + as.factor(reason) 3 495.91 537.77
## + as.factor(Mjob) 4 491.15 538.99
## + as.factor(Fjob) 4 496.13 543.97
##
## Step: AIC=521.84
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences
##
## Df Deviance AIC
## <none> 491.95 521.84
## - absences 1 498.63 522.54
## - goout 1 500.04 523.95
## + as.factor(address) 1 488.78 524.65
## + as.factor(famsize) 1 488.96 524.83
## + traveltime 1 489.13 525.00
## + as.factor(famsup) 1 489.47 525.34
## + as.factor(higher) 1 490.04 525.91
## + as.factor(internet) 1 490.85 526.72
## + Fedu 1 490.89 526.77
## + studytime 1 491.03 526.90
## + Walc 1 491.23 527.10
## + freetime 1 491.30 527.17
## + as.factor(paid) 1 491.34 527.21
## + as.factor(romantic) 1 491.68 527.55
## + Dalc 1 491.71 527.58
## + famrel 1 491.87 527.75
## + as.factor(nursery) 1 491.90 527.78
## + as.factor(Pstatus) 1 491.93 527.80
## + as.factor(activities) 1 491.94 527.81
## - as.factor(schoolsup) 1 506.73 530.64
## - Medu 1 507.92 531.84
## + as.factor(guardian) 2 490.34 532.19
## + as.factor(reason) 3 488.70 536.53
## + as.factor(Mjob) 4 484.18 537.99
## + as.factor(Fjob) 4 489.85 543.66
# Using the BIC Criterion creates a model with a higher AIC
## From stepwise regression, step_model1 seems to be the best fit.
# Significant predictors (p-val<0.05): extra_edu_supp, Mother_edu, goout_with_friends, absences
summary(step_model1)
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(address) + as.factor(famsize) + as.factor(famsup) +
## as.factor(Mjob), family = binomial, data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.55606 0.51017 -1.090 0.275730
## as.factor(schoolsup)yes -1.42606 0.40451 -3.525 0.000423 ***
## Medu 0.52897 0.13909 3.803 0.000143 ***
## goout -0.33774 0.10466 -3.227 0.001250 **
## absences -0.04096 0.01695 -2.416 0.015675 *
## as.factor(address)U 0.42107 0.27870 1.511 0.130824
## as.factor(famsize)LE3 0.41056 0.24752 1.659 0.097172 .
## as.factor(famsup)yes -0.35670 0.23398 -1.524 0.127391
## as.factor(Mjob)health 0.29872 0.54631 0.547 0.584522
## as.factor(Mjob)other -0.07399 0.36371 -0.203 0.838797
## as.factor(Mjob)services 0.10627 0.39554 0.269 0.788182
## as.factor(Mjob)teacher -0.84124 0.51252 -1.641 0.100716
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 475.91 on 383 degrees of freedom
## AIC: 499.91
##
## Number of Fisher Scoring iterations: 4
#### Significant Predictors:
### Full model likelihood
model1.logit <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout +
absences, data = mat, family = binomial)
summary(model1.logit)
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences, family = binomial, data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.24899 0.41479 -0.600 0.548324
## as.factor(schoolsup)yes -1.37040 0.39287 -3.488 0.000486 ***
## Medu 0.39679 0.10160 3.906 9.4e-05 ***
## goout -0.27963 0.09961 -2.807 0.004996 **
## absences -0.03828 0.01622 -2.360 0.018282 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 491.95 on 390 degrees of freedom
## AIC: 501.95
##
## Number of Fisher Scoring iterations: 4
linear_pred_full <- predict.glm(model1.logit, data=mat)
full_log_lik <- sum(mat$y_pass*linear_pred_full - log(1 + exp(linear_pred_full)))
### LR: schoolsup (should we drop schoolsup from the model?)
model_with_schoolsup <- glm(y_pass ~ Medu + goout + absences,
family = binomial, data = mat)
pred_with_schoolsup <- predict.glm(model_with_schoolsup, mat)
ll_with_schoolsup <- sum(mat$y_pass * pred_with_schoolsup - log(1 + exp(pred_with_schoolsup)))
lrt_schoolsup <- -2*(ll_with_schoolsup - full_log_lik)
crit_val_schoolsup <- qchisq(0.95, 1) # df = 1 for 1 added parameter
p_val_schoolsup <- pchisq(lrt_schoolsup, 1, lower.tail = FALSE)
lrt_schoolsup
## [1] 14.78016
crit_val_schoolsup
## [1] 3.841459
p_val_schoolsup ## p-val:0.00012<0.05 --> schoolsup is significant
## [1] 0.0001207997
### LR: Medu (should we drop Medu from the model?)
model_with_Medu <- glm(y_pass ~ as.factor(schoolsup) + goout + absences,
family = binomial, data = mat)
pred_with_Medu <- predict.glm(model_with_Medu, mat)
ll_with_Medu <- sum(mat$y_pass * pred_with_Medu - log(1 + exp(pred_with_Medu)))
lrt_Medu <- -2*(ll_with_Medu - full_log_lik)
crit_val_Medu <- qchisq(0.95, 1) # df = 1 for 1 added parameter
p_val_Medu <- pchisq(lrt_Medu, 1, lower.tail = FALSE)
lrt_Medu
## [1] 15.9776
crit_val_Medu
## [1] 3.841459
p_val_Medu ## p-val: 6.4e-05<0.05 --> Medu is significant
## [1] 6.409626e-05
### LR: goout (should we drop goout from the model?)
model_with_goout <- glm(y_pass ~ as.factor(schoolsup) + Medu + absences,
family = binomial, data = mat)
pred_with_goout <- predict.glm(model_with_goout, mat)
ll_with_goout <- sum(mat$y_pass * pred_with_goout - log(1 + exp(pred_with_goout)))
lrt_goout <- -2 * (ll_with_goout - full_log_lik)
crit_val_goout <- qchisq(0.95, 1) # df = 1 for 1 added parameter
p_val_goout <- pchisq(lrt_goout, 1, lower.tail = FALSE)
lrt_goout
## [1] 8.0913
crit_val_goout
## [1] 3.841459
p_val_goout ## p-val: 0.004<0.05 --> goout is significant
## [1] 0.004447824
### LR: absences (should we drop absences from the model?)
model_with_absences <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout,
family = binomial, data = mat)
pred_with_absences <- predict.glm(model_with_absences, mat)
ll_with_absences <- sum(mat$y_pass * pred_with_absences - log(1 + exp(pred_with_absences)))
lrt_absences <- -2 * (ll_with_absences - full_log_lik)
crit_val_absences <- qchisq(0.95, 1) # df = 1 for 1 added parameter
p_val_absences <- pchisq(lrt_absences, 1, lower.tail = FALSE)
lrt_absences
## [1] 6.681073
crit_val_absences
## [1] 3.841459
p_val_absences ## p-val:0.0097<0.05 --> absences is significant
## [1] 0.009744195
############# Pearson GOF test for model1.logit
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
# Pearson GOF test for model1.logit
pearson_chi1 <- sum(residuals(model1.logit, type = "pearson")^2)
df1 <- model1.logit$df.residual
p_value1 <- pchisq(pearson_chi1, df1, lower.tail = FALSE)
crit_val1 <- qchisq(.95,df=df1-2)
cat("Model 1 - Pearson Chi-square:", pearson_chi1, "critical value:", crit_val1, "df:", df1, "p-value:", p_value1, "\n") #p-val=0.2325391>0.05 --> Can't reject null that model is fitting
## Model 1 - Pearson Chi-square: 392.5875 critical value: 434.9289 df: 390 p-value: 0.4537058
pi_hats1 <- predict.glm(model1.logit)
############## Hosmer-Lemeshow Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
hos_test1 <- hoslem.test(model1.logit$y,predict.glm(model1.logit, mat,type="response"),5)
obs5 <- hos_test1$observed
exp5 <- hos_test1$expected
crit_val1 <- qchisq(.95,4-2)
crit_val1
## [1] 5.991465
p_val1 <- pchisq(hos_test1$statistic, 4-2,lower.tail=F)
p_val1 ##p_val = 0.30>0.05 --> fail to reject the null hypothesis that model is fitting
## X-squared
## 0.2996385
###### Insignificant Predictors from Stepwise Model:
## Likelihood Ratio Test
#H0: smaller number of parameters are sufficient
### LRT: address
model_with_address <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address), family = binomial, data = mat)
pred_with_address <- predict(model_with_address)
ll_with_address <- sum(mat$y_pass * pred_with_address - log(1 + exp(pred_with_address)))
model_bic <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences,
family = binomial, data = mat)
pred_bic <- predict(model_bic)
ll_bic <- sum(mat$y_pass * pred_bic - log(1 + exp(pred_bic)))
lrt_address <- -2 * (ll_bic - ll_with_address)
crit_val_address <- qchisq(0.95, 1)
p_val_address <- pchisq(lrt_address, 1, lower.tail = FALSE)
p_val_address #p-val: 0.075>0.05 --> address remains insignificant
## [1] 0.07522347
### LRT: famsize
model_with_famsize <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(famsize),
family = binomial, data = mat)
pred_with_famsize <- predict(model_with_famsize)
ll_with_famsize <- sum(mat$y_pass * pred_with_famsize - log(1 + exp(pred_with_famsize)))
model_bic <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences,
family = binomial, data = mat)
pred_bic <- predict(model_bic)
ll_bic <- sum(mat$y_pass * pred_bic - log(1 + exp(pred_bic)))
lrt_famsize <- -2 * (ll_bic - ll_with_famsize)
crit_val_famsize <- qchisq(0.95, 1)
p_val_famsize <- pchisq(lrt_famsize, 1, lower.tail = FALSE)
p_val_famsize #p-val: 0.084>0.05 --> famsize remains insignificant
## [1] 0.08400694
### LRT: famsup
model_with_famsup <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(famsup),
family = binomial, data = mat)
pred_with_famsup <- predict(model_with_famsup)
ll_with_famsup <- sum(mat$y_pass * pred_with_famsup - log(1 + exp(pred_with_famsup)))
model_bic <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences,
family = binomial, data = mat)
pred_bic <- predict(model_bic)
ll_bic <- sum(mat$y_pass * pred_bic - log(1 + exp(pred_bic)))
lrt_famsup <- -2 * (ll_bic - ll_with_famsup)
crit_val_famsup <- qchisq(0.95, 1)
p_val_famsup <- pchisq(lrt_famsup, 1, lower.tail = FALSE)
p_val_famsup #p-val: 0.12>0.05 --> famsup remains insignificant
## [1] 0.1153993
### LRT: MJob
model_with_Mjob <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(Mjob),
family = binomial, data = mat)
pred_with_Mjob <- predict(model_with_Mjob)
ll_with_Mjob <- sum(mat$y_pass * pred_with_Mjob - log(1 + exp(pred_with_Mjob)))
model_bic <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences,
family = binomial, data = mat)
pred_bic <- predict(model_bic)
ll_bic <- sum(mat$y_pass * pred_bic - log(1 + exp(pred_bic)))
lrt_Mjob <- -2 * (ll_bic - ll_with_Mjob)
crit_val_Mjob <- qchisq(0.95, 1)
p_val_Mjob <- pchisq(lrt_Mjob, 1, lower.tail = FALSE)
p_val_Mjob #p-val:0.00532<0.05 --> MJob significantly contributes to the model
## [1] 0.005324737
###### After adding Mjob from LRT: model2.logit
model2.logit <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout +
absences + as.factor(Mjob), data = mat, family = binomial)
############# Pearson GOF test for model2.logit
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
pearson_chi1 <- sum(residuals(model2.logit, type = "pearson")^2)
df2 <- model2.logit$df.residual
p_value2 <- pchisq(pearson_chi1, df2, lower.tail = FALSE)
crit_val2 <- qchisq(.95,df=df2-2)
cat("Model 2 - Pearson Chi-square:", pearson_chi1, "critical value:", crit_val2, "df:", df2, "p-value:", p_value2, "\n") #p-val=0.416>0.05 --> Can't reject null that model is fitting
## Model 2 - Pearson Chi-square: 391.2732 critical value: 430.6919 df: 386 p-value: 0.4157033
pi_hats1 <- predict.glm(model2.logit)
############## Hosmer-Lemeshow Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
hos_test2 <- hoslem.test(model2.logit$y,predict.glm(model2.logit, mat,type="response"),5)
obs2 <- hos_test2$observed
exp2 <- hos_test2$expected
crit.val2 <- qchisq(.95,4-2)
crit.val2
## [1] 5.991465
p_val2 <- pchisq(hos_test2$statistic, 4-2,lower.tail=F)
p_val2 ##p_val = 0.739>0.05 --> fail to reject the null hypothesis that model is fitting
## X-squared
## 0.7390676
## Checking model fits
BIC(model2.logit) #BIC 537.99
## [1] 537.9896
summary(model2.logit) #502.18
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(Mjob), family = binomial, data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.39219 0.46708 -0.840 0.401097
## as.factor(schoolsup)yes -1.45179 0.39989 -3.630 0.000283 ***
## Medu 0.47128 0.13458 3.502 0.000462 ***
## goout -0.30321 0.10124 -2.995 0.002745 **
## absences -0.03993 0.01663 -2.401 0.016360 *
## as.factor(Mjob)health 0.45347 0.53529 0.847 0.396912
## as.factor(Mjob)other 0.03785 0.35816 0.106 0.915828
## as.factor(Mjob)services 0.25473 0.38720 0.658 0.510622
## as.factor(Mjob)teacher -0.63241 0.49895 -1.267 0.204984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 484.18 on 386 degrees of freedom
## AIC: 502.18
##
## Number of Fisher Scoring iterations: 4
model_full_probit <- glm(y_pass ~ as.factor(address) + as.factor(famsize) +
as.factor(Pstatus) + Medu + Fedu + as.factor(Mjob) + as.factor(Fjob) + as.factor(reason) + as.factor(guardian) + traveltime + studytime + as.factor(schoolsup) + as.factor(famsup) + as.factor(paid) + as.factor(activities) + as.factor(nursery) + as.factor(higher) + as.factor(internet) + as.factor(romantic) + famrel + freetime + goout + Dalc + Walc + absences, data = mat, family=binomial(link=probit))
summary(model_full_probit) # AIC: 528.74
##
## Call:
## glm(formula = y_pass ~ as.factor(address) + as.factor(famsize) +
## as.factor(Pstatus) + Medu + Fedu + as.factor(Mjob) + as.factor(Fjob) +
## as.factor(reason) + as.factor(guardian) + traveltime + studytime +
## as.factor(schoolsup) + as.factor(famsup) + as.factor(paid) +
## as.factor(activities) + as.factor(nursery) + as.factor(higher) +
## as.factor(internet) + as.factor(romantic) + famrel + freetime +
## goout + Dalc + Walc + absences, family = binomial(link = probit),
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.940794 0.772905 -1.217 0.223521
## as.factor(address)U 0.244100 0.189707 1.287 0.198193
## as.factor(famsize)LE3 0.301113 0.158926 1.895 0.058135 .
## as.factor(Pstatus)T 0.061230 0.235129 0.260 0.794549
## Medu 0.209102 0.105272 1.986 0.046999 *
## Fedu 0.096796 0.088866 1.089 0.276047
## as.factor(Mjob)health 0.178180 0.361834 0.492 0.622412
## as.factor(Mjob)other -0.118577 0.236950 -0.500 0.616772
## as.factor(Mjob)services -0.012823 0.257203 -0.050 0.960236
## as.factor(Mjob)teacher -0.623989 0.338174 -1.845 0.065013 .
## as.factor(Fjob)health -0.136863 0.465759 -0.294 0.768873
## as.factor(Fjob)other -0.080812 0.329126 -0.246 0.806042
## as.factor(Fjob)services -0.118509 0.339401 -0.349 0.726961
## as.factor(Fjob)teacher 0.335185 0.419208 0.800 0.423961
## as.factor(reason)home -0.025078 0.181334 -0.138 0.890006
## as.factor(reason)other 0.156775 0.264164 0.593 0.552864
## as.factor(reason)reputation 0.266692 0.186776 1.428 0.153329
## as.factor(guardian)mother 0.013118 0.177637 0.074 0.941131
## as.factor(guardian)other -0.299083 0.306020 -0.977 0.328405
## traveltime -0.096902 0.114283 -0.848 0.396485
## studytime 0.099212 0.091436 1.085 0.277904
## as.factor(schoolsup)yes -0.923133 0.238118 -3.877 0.000106 ***
## as.factor(famsup)yes -0.280173 0.154300 -1.816 0.069407 .
## as.factor(paid)yes -0.050471 0.154958 -0.326 0.744644
## as.factor(activities)yes -0.025373 0.144195 -0.176 0.860321
## as.factor(nursery)yes -0.126675 0.180160 -0.703 0.481976
## as.factor(higher)yes 0.403901 0.365616 1.105 0.269283
## as.factor(internet)yes 0.179272 0.202669 0.885 0.376397
## as.factor(romantic)yes -0.084178 0.153128 -0.550 0.582508
## famrel -0.003099 0.078593 -0.039 0.968542
## freetime 0.103452 0.077444 1.336 0.181606
## goout -0.210082 0.074734 -2.811 0.004938 **
## Dalc 0.042901 0.111406 0.385 0.700177
## Walc -0.054071 0.082638 -0.654 0.512916
## absences -0.020114 0.010838 -1.856 0.063463 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 458.74 on 360 degrees of freedom
## AIC: 528.74
##
## Number of Fisher Scoring iterations: 5
# Null model
model_null_probit <- glm(y_pass ~ 1, data = mat, family=binomial(link=probit))
# Automated Stepwise Regression AIC 500.05
step_model1_probit <- stepAIC(model_null_probit,
scope = list(lower = model_null_probit, upper = model_full_probit),
direction = "both", family=binomial(link=probit))
## Start: AIC=536.75
## y_pass ~ 1
##
## Df Deviance AIC
## + as.factor(schoolsup) 1 520.21 524.21
## + Medu 1 521.53 525.53
## + Fedu 1 526.47 530.47
## + goout 1 528.20 532.20
## + absences 1 529.14 533.14
## + traveltime 1 529.99 533.99
## + as.factor(Mjob) 4 524.13 534.13
## + Walc 1 530.38 534.38
## + as.factor(higher) 1 530.56 534.56
## + as.factor(address) 1 530.71 534.71
## <none> 534.75 536.75
## + as.factor(famsize) 1 532.78 536.78
## + as.factor(internet) 1 532.79 536.79
## + studytime 1 532.88 536.88
## + Dalc 1 533.16 537.16
## + as.factor(famsup) 1 533.54 537.54
## + as.factor(guardian) 2 531.69 537.69
## + as.factor(romantic) 1 534.54 538.54
## + freetime 1 534.60 538.60
## + as.factor(activities) 1 534.65 538.65
## + as.factor(nursery) 1 534.66 538.66
## + famrel 1 534.70 538.70
## + as.factor(reason) 3 530.71 538.71
## + as.factor(Pstatus) 1 534.75 538.75
## + as.factor(paid) 1 534.75 538.75
## + as.factor(Fjob) 4 530.34 540.34
##
## Step: AIC=524.21
## y_pass ~ as.factor(schoolsup)
##
## Df Deviance AIC
## + Medu 1 507.80 513.80
## + Fedu 1 510.82 516.82
## + goout 1 512.62 518.62
## + Walc 1 514.03 520.03
## + absences 1 514.67 520.67
## + traveltime 1 515.02 521.02
## + as.factor(higher) 1 515.10 521.10
## + as.factor(address) 1 515.79 521.79
## + as.factor(Mjob) 4 510.68 522.68
## + as.factor(guardian) 2 516.14 524.14
## + studytime 1 518.15 524.15
## <none> 520.21 524.21
## + Dalc 1 518.33 524.33
## + as.factor(internet) 1 518.36 524.36
## + as.factor(famsize) 1 518.57 524.57
## + as.factor(famsup) 1 519.66 525.66
## + as.factor(romantic) 1 519.66 525.66
## + as.factor(reason) 3 515.77 525.77
## + as.factor(Fjob) 4 513.95 525.95
## + as.factor(activities) 1 519.99 525.99
## + as.factor(nursery) 1 520.00 526.00
## + as.factor(Pstatus) 1 520.17 526.17
## + freetime 1 520.17 526.17
## + famrel 1 520.18 526.18
## + as.factor(paid) 1 520.20 526.20
## - as.factor(schoolsup) 1 534.75 536.75
##
## Step: AIC=513.8
## y_pass ~ as.factor(schoolsup) + Medu
##
## Df Deviance AIC
## + goout 1 498.76 506.76
## + absences 1 500.09 508.09
## + Walc 1 502.29 510.29
## + traveltime 1 504.80 512.80
## + as.factor(higher) 1 504.92 512.92
## + as.factor(address) 1 505.04 513.04
## + as.factor(famsup) 1 505.64 513.64
## + Dalc 1 505.69 513.69
## + as.factor(famsize) 1 505.69 513.69
## <none> 507.80 513.80
## + studytime 1 506.25 514.25
## + as.factor(guardian) 2 504.45 514.45
## + Fedu 1 506.51 514.51
## + as.factor(romantic) 1 507.06 515.06
## + as.factor(paid) 1 507.31 515.31
## + as.factor(internet) 1 507.39 515.39
## + as.factor(Mjob) 4 501.53 515.53
## + as.factor(nursery) 1 507.76 515.76
## + as.factor(Pstatus) 1 507.76 515.76
## + famrel 1 507.77 515.77
## + freetime 1 507.79 515.79
## + as.factor(activities) 1 507.79 515.79
## + as.factor(reason) 3 504.63 516.63
## + as.factor(Fjob) 4 504.82 518.82
## - Medu 1 520.21 524.21
## - as.factor(schoolsup) 1 521.53 525.53
##
## Step: AIC=506.76
## y_pass ~ as.factor(schoolsup) + Medu + goout
##
## Df Deviance AIC
## + absences 1 491.81 501.81
## + as.factor(address) 1 495.12 505.12
## + traveltime 1 495.94 505.94
## + as.factor(higher) 1 496.17 506.17
## + as.factor(famsize) 1 496.26 506.26
## + as.factor(famsup) 1 496.32 506.32
## <none> 498.76 506.76
## + as.factor(Mjob) 4 491.25 507.25
## + Walc 1 497.34 507.34
## + Fedu 1 497.35 507.35
## + studytime 1 497.56 507.56
## + as.factor(guardian) 2 495.71 507.71
## + freetime 1 497.73 507.73
## + as.factor(internet) 1 497.97 507.97
## + as.factor(romantic) 1 497.98 507.98
## + Dalc 1 498.28 508.28
## + as.factor(paid) 1 498.32 508.32
## + famrel 1 498.64 508.64
## + as.factor(Pstatus) 1 498.72 508.72
## + as.factor(nursery) 1 498.73 508.73
## + as.factor(activities) 1 498.73 508.73
## + as.factor(reason) 3 496.03 510.03
## + as.factor(Fjob) 4 496.46 512.46
## - goout 1 507.80 513.80
## - Medu 1 512.62 518.62
## - as.factor(schoolsup) 1 513.36 519.36
##
## Step: AIC=501.81
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences
##
## Df Deviance AIC
## + as.factor(address) 1 488.66 500.66
## + as.factor(famsize) 1 488.91 500.91
## + traveltime 1 488.94 500.94
## + as.factor(famsup) 1 489.33 501.33
## <none> 491.81 501.81
## + as.factor(higher) 1 489.85 501.85
## + as.factor(Mjob) 4 483.97 501.97
## + as.factor(internet) 1 490.64 502.64
## + Fedu 1 490.74 502.74
## + studytime 1 490.98 502.98
## + Walc 1 491.08 503.08
## + freetime 1 491.15 503.15
## + as.factor(paid) 1 491.27 503.27
## + as.factor(romantic) 1 491.60 503.60
## + Dalc 1 491.60 503.60
## + famrel 1 491.77 503.77
## + as.factor(Pstatus) 1 491.79 503.79
## + as.factor(nursery) 1 491.79 503.79
## + as.factor(activities) 1 491.81 503.81
## + as.factor(guardian) 2 490.19 504.19
## + as.factor(reason) 3 488.59 504.59
## - absences 1 498.76 506.76
## + as.factor(Fjob) 4 489.92 507.92
## - goout 1 500.09 508.09
## - as.factor(schoolsup) 1 506.47 514.47
## - Medu 1 507.69 515.69
##
## Step: AIC=500.66
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address)
##
## Df Deviance AIC
## + as.factor(famsize) 1 486.05 500.05
## + as.factor(famsup) 1 486.19 500.19
## <none> 488.66 500.66
## + as.factor(higher) 1 486.82 500.82
## + traveltime 1 487.28 501.28
## + as.factor(Mjob) 4 481.40 501.40
## + Fedu 1 487.48 501.48
## + studytime 1 487.76 501.76
## - as.factor(address) 1 491.81 501.81
## + freetime 1 487.94 501.94
## + as.factor(paid) 1 488.06 502.06
## + as.factor(internet) 1 488.16 502.16
## + Walc 1 488.29 502.29
## + as.factor(romantic) 1 488.43 502.43
## + as.factor(reason) 3 484.48 502.48
## + as.factor(nursery) 1 488.60 502.60
## + as.factor(activities) 1 488.62 502.62
## + Dalc 1 488.63 502.63
## + famrel 1 488.63 502.63
## + as.factor(Pstatus) 1 488.65 502.65
## + as.factor(guardian) 2 486.98 502.98
## - absences 1 495.12 505.12
## + as.factor(Fjob) 4 486.72 506.72
## - goout 1 497.68 507.68
## - Medu 1 502.72 512.72
## - as.factor(schoolsup) 1 503.72 513.72
##
## Step: AIC=500.05
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize)
##
## Df Deviance AIC
## <none> 486.05 500.05
## + as.factor(famsup) 1 484.06 500.06
## + as.factor(higher) 1 484.19 500.19
## + as.factor(Mjob) 4 478.22 500.22
## + traveltime 1 484.25 500.25
## - as.factor(famsize) 1 488.66 500.66
## + Fedu 1 484.75 500.75
## - as.factor(address) 1 488.91 500.91
## + studytime 1 484.93 500.93
## + freetime 1 485.34 501.34
## + as.factor(paid) 1 485.44 501.44
## + Walc 1 485.44 501.44
## + as.factor(internet) 1 485.49 501.49
## + as.factor(romantic) 1 485.78 501.78
## + as.factor(nursery) 1 485.86 501.86
## + as.factor(reason) 3 481.90 501.90
## + Dalc 1 485.90 501.90
## + as.factor(activities) 1 486.00 502.00
## + famrel 1 486.01 502.01
## + as.factor(Pstatus) 1 486.03 502.03
## + as.factor(guardian) 2 484.51 502.51
## - absences 1 492.94 504.94
## + as.factor(Fjob) 4 484.00 506.00
## - goout 1 495.42 507.42
## - as.factor(schoolsup) 1 500.67 512.67
## - Medu 1 500.82 512.82
# Automated Stepwise Regression AIC 500.05
step_model2_probit <- stepAIC(model_null_probit,
scope = list(lower = model_null_probit, upper = model_full_probit),
direction = "forward", family=binomial(link=probit))
## Start: AIC=536.75
## y_pass ~ 1
##
## Df Deviance AIC
## + as.factor(schoolsup) 1 520.21 524.21
## + Medu 1 521.53 525.53
## + Fedu 1 526.47 530.47
## + goout 1 528.20 532.20
## + absences 1 529.14 533.14
## + traveltime 1 529.99 533.99
## + as.factor(Mjob) 4 524.13 534.13
## + Walc 1 530.38 534.38
## + as.factor(higher) 1 530.56 534.56
## + as.factor(address) 1 530.71 534.71
## <none> 534.75 536.75
## + as.factor(famsize) 1 532.78 536.78
## + as.factor(internet) 1 532.79 536.79
## + studytime 1 532.88 536.88
## + Dalc 1 533.16 537.16
## + as.factor(famsup) 1 533.54 537.54
## + as.factor(guardian) 2 531.69 537.69
## + as.factor(romantic) 1 534.54 538.54
## + freetime 1 534.60 538.60
## + as.factor(activities) 1 534.65 538.65
## + as.factor(nursery) 1 534.66 538.66
## + famrel 1 534.70 538.70
## + as.factor(reason) 3 530.71 538.71
## + as.factor(Pstatus) 1 534.75 538.75
## + as.factor(paid) 1 534.75 538.75
## + as.factor(Fjob) 4 530.34 540.34
##
## Step: AIC=524.21
## y_pass ~ as.factor(schoolsup)
##
## Df Deviance AIC
## + Medu 1 507.80 513.80
## + Fedu 1 510.82 516.82
## + goout 1 512.62 518.62
## + Walc 1 514.03 520.03
## + absences 1 514.67 520.67
## + traveltime 1 515.02 521.02
## + as.factor(higher) 1 515.10 521.10
## + as.factor(address) 1 515.79 521.79
## + as.factor(Mjob) 4 510.68 522.68
## + as.factor(guardian) 2 516.14 524.14
## + studytime 1 518.15 524.15
## <none> 520.21 524.21
## + Dalc 1 518.33 524.33
## + as.factor(internet) 1 518.36 524.36
## + as.factor(famsize) 1 518.57 524.57
## + as.factor(famsup) 1 519.66 525.66
## + as.factor(romantic) 1 519.66 525.66
## + as.factor(reason) 3 515.77 525.77
## + as.factor(Fjob) 4 513.95 525.95
## + as.factor(activities) 1 519.99 525.99
## + as.factor(nursery) 1 520.00 526.00
## + as.factor(Pstatus) 1 520.17 526.17
## + freetime 1 520.17 526.17
## + famrel 1 520.18 526.18
## + as.factor(paid) 1 520.20 526.20
##
## Step: AIC=513.8
## y_pass ~ as.factor(schoolsup) + Medu
##
## Df Deviance AIC
## + goout 1 498.76 506.76
## + absences 1 500.09 508.09
## + Walc 1 502.29 510.29
## + traveltime 1 504.80 512.80
## + as.factor(higher) 1 504.92 512.92
## + as.factor(address) 1 505.04 513.04
## + as.factor(famsup) 1 505.64 513.64
## + Dalc 1 505.69 513.69
## + as.factor(famsize) 1 505.69 513.69
## <none> 507.80 513.80
## + studytime 1 506.25 514.25
## + as.factor(guardian) 2 504.45 514.45
## + Fedu 1 506.51 514.51
## + as.factor(romantic) 1 507.06 515.06
## + as.factor(paid) 1 507.31 515.31
## + as.factor(internet) 1 507.39 515.39
## + as.factor(Mjob) 4 501.53 515.53
## + as.factor(nursery) 1 507.76 515.76
## + as.factor(Pstatus) 1 507.76 515.76
## + famrel 1 507.77 515.77
## + freetime 1 507.79 515.79
## + as.factor(activities) 1 507.79 515.79
## + as.factor(reason) 3 504.63 516.63
## + as.factor(Fjob) 4 504.82 518.82
##
## Step: AIC=506.76
## y_pass ~ as.factor(schoolsup) + Medu + goout
##
## Df Deviance AIC
## + absences 1 491.81 501.81
## + as.factor(address) 1 495.12 505.12
## + traveltime 1 495.94 505.94
## + as.factor(higher) 1 496.17 506.17
## + as.factor(famsize) 1 496.26 506.26
## + as.factor(famsup) 1 496.32 506.32
## <none> 498.76 506.76
## + as.factor(Mjob) 4 491.25 507.25
## + Walc 1 497.34 507.34
## + Fedu 1 497.35 507.35
## + studytime 1 497.56 507.56
## + as.factor(guardian) 2 495.71 507.71
## + freetime 1 497.73 507.73
## + as.factor(internet) 1 497.97 507.97
## + as.factor(romantic) 1 497.98 507.98
## + Dalc 1 498.28 508.28
## + as.factor(paid) 1 498.32 508.32
## + famrel 1 498.64 508.64
## + as.factor(Pstatus) 1 498.72 508.72
## + as.factor(nursery) 1 498.73 508.73
## + as.factor(activities) 1 498.73 508.73
## + as.factor(reason) 3 496.03 510.03
## + as.factor(Fjob) 4 496.46 512.46
##
## Step: AIC=501.81
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences
##
## Df Deviance AIC
## + as.factor(address) 1 488.66 500.66
## + as.factor(famsize) 1 488.91 500.91
## + traveltime 1 488.94 500.94
## + as.factor(famsup) 1 489.33 501.33
## <none> 491.81 501.81
## + as.factor(higher) 1 489.85 501.85
## + as.factor(Mjob) 4 483.97 501.97
## + as.factor(internet) 1 490.64 502.64
## + Fedu 1 490.74 502.74
## + studytime 1 490.98 502.98
## + Walc 1 491.08 503.08
## + freetime 1 491.15 503.15
## + as.factor(paid) 1 491.27 503.27
## + as.factor(romantic) 1 491.60 503.60
## + Dalc 1 491.60 503.60
## + famrel 1 491.77 503.77
## + as.factor(Pstatus) 1 491.79 503.79
## + as.factor(nursery) 1 491.79 503.79
## + as.factor(activities) 1 491.81 503.81
## + as.factor(guardian) 2 490.19 504.19
## + as.factor(reason) 3 488.59 504.59
## + as.factor(Fjob) 4 489.92 507.92
##
## Step: AIC=500.66
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address)
##
## Df Deviance AIC
## + as.factor(famsize) 1 486.05 500.05
## + as.factor(famsup) 1 486.19 500.19
## <none> 488.66 500.66
## + as.factor(higher) 1 486.82 500.82
## + traveltime 1 487.28 501.28
## + as.factor(Mjob) 4 481.40 501.40
## + Fedu 1 487.48 501.48
## + studytime 1 487.76 501.76
## + freetime 1 487.94 501.94
## + as.factor(paid) 1 488.06 502.06
## + as.factor(internet) 1 488.16 502.16
## + Walc 1 488.29 502.29
## + as.factor(romantic) 1 488.43 502.43
## + as.factor(reason) 3 484.48 502.48
## + as.factor(nursery) 1 488.60 502.60
## + as.factor(activities) 1 488.62 502.62
## + Dalc 1 488.63 502.63
## + famrel 1 488.63 502.63
## + as.factor(Pstatus) 1 488.65 502.65
## + as.factor(guardian) 2 486.98 502.98
## + as.factor(Fjob) 4 486.72 506.72
##
## Step: AIC=500.05
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(address) +
## as.factor(famsize)
##
## Df Deviance AIC
## <none> 486.05 500.05
## + as.factor(famsup) 1 484.06 500.06
## + as.factor(higher) 1 484.19 500.19
## + as.factor(Mjob) 4 478.22 500.22
## + traveltime 1 484.25 500.25
## + Fedu 1 484.75 500.75
## + studytime 1 484.93 500.93
## + freetime 1 485.34 501.34
## + as.factor(paid) 1 485.44 501.44
## + Walc 1 485.44 501.44
## + as.factor(internet) 1 485.49 501.49
## + as.factor(romantic) 1 485.78 501.78
## + as.factor(nursery) 1 485.86 501.86
## + as.factor(reason) 3 481.90 501.90
## + Dalc 1 485.90 501.90
## + as.factor(activities) 1 486.00 502.00
## + famrel 1 486.01 502.01
## + as.factor(Pstatus) 1 486.03 502.03
## + as.factor(guardian) 2 484.51 502.51
## + as.factor(Fjob) 4 484.00 506.00
# Automated Stepwise Regression AIC 536.75
step_model3_probit <- stepAIC(model_null_probit,
scope = list(lower = model_null_probit, upper = model_full_probit),
direction = "back", family=binomial(link=probit))
## Start: AIC=536.75
## y_pass ~ 1
# Automated Stepwise Regression Using BIC Criterion: AIC 521.71
step_model4_probit <- stepAIC(model_null_probit,
scope = list(lower = model_null_probit, upper = model_full_probit),
direction = "both", family=binomial(link=probit),
k = log(n))
## Start: AIC=540.73
## y_pass ~ 1
##
## Df Deviance AIC
## + as.factor(schoolsup) 1 520.21 532.17
## + Medu 1 521.53 533.49
## + Fedu 1 526.47 538.43
## + goout 1 528.20 540.16
## <none> 534.75 540.73
## + absences 1 529.14 541.10
## + traveltime 1 529.99 541.95
## + Walc 1 530.38 542.34
## + as.factor(higher) 1 530.56 542.51
## + as.factor(address) 1 530.71 542.67
## + as.factor(famsize) 1 532.78 544.74
## + as.factor(internet) 1 532.79 544.74
## + studytime 1 532.88 544.84
## + Dalc 1 533.16 545.12
## + as.factor(famsup) 1 533.54 545.50
## + as.factor(romantic) 1 534.54 546.50
## + freetime 1 534.60 546.56
## + as.factor(activities) 1 534.65 546.61
## + as.factor(nursery) 1 534.66 546.62
## + famrel 1 534.70 546.66
## + as.factor(Pstatus) 1 534.75 546.71
## + as.factor(paid) 1 534.75 546.71
## + as.factor(guardian) 2 531.69 549.63
## + as.factor(Mjob) 4 524.13 554.03
## + as.factor(reason) 3 530.71 554.63
## + as.factor(Fjob) 4 530.34 560.23
##
## Step: AIC=532.17
## y_pass ~ as.factor(schoolsup)
##
## Df Deviance AIC
## + Medu 1 507.80 525.73
## + Fedu 1 510.82 528.75
## + goout 1 512.62 530.56
## + Walc 1 514.03 531.97
## <none> 520.21 532.17
## + absences 1 514.67 532.60
## + traveltime 1 515.02 532.96
## + as.factor(higher) 1 515.10 533.04
## + as.factor(address) 1 515.79 533.72
## + studytime 1 518.15 536.09
## + Dalc 1 518.33 536.27
## + as.factor(internet) 1 518.36 536.29
## + as.factor(famsize) 1 518.57 536.51
## + as.factor(famsup) 1 519.66 537.59
## + as.factor(romantic) 1 519.66 537.60
## + as.factor(activities) 1 519.99 537.93
## + as.factor(nursery) 1 520.00 537.93
## + as.factor(Pstatus) 1 520.17 538.10
## + freetime 1 520.17 538.10
## + famrel 1 520.18 538.12
## + as.factor(paid) 1 520.20 538.13
## + as.factor(guardian) 2 516.14 540.06
## - as.factor(schoolsup) 1 534.75 540.73
## + as.factor(reason) 3 515.77 545.67
## + as.factor(Mjob) 4 510.68 546.56
## + as.factor(Fjob) 4 513.95 549.82
##
## Step: AIC=525.73
## y_pass ~ as.factor(schoolsup) + Medu
##
## Df Deviance AIC
## + goout 1 498.76 522.68
## + absences 1 500.09 524.01
## <none> 507.80 525.73
## + Walc 1 502.29 526.21
## + traveltime 1 504.80 528.71
## + as.factor(higher) 1 504.92 528.84
## + as.factor(address) 1 505.04 528.95
## + as.factor(famsup) 1 505.64 529.56
## + Dalc 1 505.69 529.60
## + as.factor(famsize) 1 505.69 529.61
## + studytime 1 506.25 530.16
## + Fedu 1 506.51 530.42
## + as.factor(romantic) 1 507.06 530.98
## + as.factor(paid) 1 507.31 531.23
## + as.factor(internet) 1 507.39 531.30
## + as.factor(nursery) 1 507.76 531.67
## + as.factor(Pstatus) 1 507.76 531.68
## + famrel 1 507.77 531.69
## + freetime 1 507.79 531.70
## + as.factor(activities) 1 507.79 531.71
## - Medu 1 520.21 532.17
## - as.factor(schoolsup) 1 521.53 533.49
## + as.factor(guardian) 2 504.45 534.34
## + as.factor(reason) 3 504.63 540.51
## + as.factor(Mjob) 4 501.53 543.38
## + as.factor(Fjob) 4 504.82 546.68
##
## Step: AIC=522.68
## y_pass ~ as.factor(schoolsup) + Medu + goout
##
## Df Deviance AIC
## + absences 1 491.81 521.71
## <none> 498.76 522.68
## + as.factor(address) 1 495.12 525.01
## - goout 1 507.80 525.73
## + traveltime 1 495.94 525.83
## + as.factor(higher) 1 496.17 526.06
## + as.factor(famsize) 1 496.26 526.16
## + as.factor(famsup) 1 496.32 526.22
## + Walc 1 497.34 527.23
## + Fedu 1 497.35 527.25
## + studytime 1 497.56 527.46
## + freetime 1 497.73 527.63
## + as.factor(internet) 1 497.97 527.86
## + as.factor(romantic) 1 497.98 527.87
## + Dalc 1 498.28 528.17
## + as.factor(paid) 1 498.32 528.21
## + famrel 1 498.64 528.53
## + as.factor(Pstatus) 1 498.72 528.62
## + as.factor(nursery) 1 498.73 528.63
## + as.factor(activities) 1 498.73 528.63
## - Medu 1 512.62 530.56
## - as.factor(schoolsup) 1 513.36 531.30
## + as.factor(guardian) 2 495.71 531.58
## + as.factor(reason) 3 496.03 537.88
## + as.factor(Mjob) 4 491.25 539.08
## + as.factor(Fjob) 4 496.46 544.29
##
## Step: AIC=521.71
## y_pass ~ as.factor(schoolsup) + Medu + goout + absences
##
## Df Deviance AIC
## <none> 491.81 521.71
## - absences 1 498.76 522.68
## - goout 1 500.09 524.01
## + as.factor(address) 1 488.66 524.54
## + as.factor(famsize) 1 488.91 524.79
## + traveltime 1 488.94 524.82
## + as.factor(famsup) 1 489.33 525.20
## + as.factor(higher) 1 489.85 525.72
## + as.factor(internet) 1 490.64 526.51
## + Fedu 1 490.74 526.61
## + studytime 1 490.98 526.85
## + Walc 1 491.08 526.95
## + freetime 1 491.15 527.03
## + as.factor(paid) 1 491.27 527.15
## + as.factor(romantic) 1 491.60 527.48
## + Dalc 1 491.60 527.48
## + famrel 1 491.77 527.64
## + as.factor(Pstatus) 1 491.79 527.66
## + as.factor(nursery) 1 491.79 527.66
## + as.factor(activities) 1 491.81 527.68
## - as.factor(schoolsup) 1 506.47 530.39
## - Medu 1 507.69 531.61
## + as.factor(guardian) 2 490.19 532.05
## + as.factor(reason) 3 488.59 536.42
## + as.factor(Mjob) 4 483.97 537.78
## + as.factor(Fjob) 4 489.92 543.73
# Using the BIC Criterion creates a model with a higher AIC, predictors: schoolsup, Medu, goout, absences
## From stepwise regression, step_model1_probit seems to be the best fit of probit models
# Significant predictors (p-val<0.05): school_supp, Medu, goout, absences
summary(step_model1_probit)
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(address) + as.factor(famsize), family = binomial(link = probit),
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.376979 0.274384 -1.374 0.169470
## as.factor(schoolsup)yes -0.811741 0.223711 -3.629 0.000285 ***
## Medu 0.237768 0.062342 3.814 0.000137 ***
## goout -0.185231 0.061324 -3.021 0.002523 **
## absences -0.024455 0.009761 -2.505 0.012232 *
## as.factor(address)U 0.276286 0.164708 1.677 0.093459 .
## as.factor(famsize)LE3 0.235701 0.146219 1.612 0.106967
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 486.05 on 388 degrees of freedom
## AIC: 500.05
##
## Number of Fisher Scoring iterations: 4
##### Supports significant predictors found in logit model 1: schoolsup, Medu, goout, absences
model3.probit <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences, data = mat, family = binomial(link=probit))
######## Pearson Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
pearson_chi3 <- sum(residuals(model3.probit, type = "pearson")^2)
df3 <- model3.probit$df.residual
p_value3 <- pchisq(pearson_chi3, df3, lower.tail = FALSE)
crit_val3 <- qchisq(.95,df=df3-2)
cat("Model 3 - Pearson Chi-square:", pearson_chi3, "critical value:", crit_val3, "df:", df3, "p-value:", p_value3, "\n") #p-val: 0.453 --> Can't reject null that model is fitting
## Model 3 - Pearson Chi-square: 392.6587 critical value: 434.9289 df: 390 p-value: 0.4527007
pi_hats3 <- predict.glm(model3.probit)
######## Hosmer-Lemeshow Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
hos_test3 <- hoslem.test(model3.probit$y,predict.glm(model3.probit, mat,type="response"),5)
obs3 <- hos_test3$observed
exp3 <- hos_test3$expected
crit_val3 <- qchisq(.95,4-2)
crit_val3
## [1] 5.991465
p_val3 <- pchisq(hos_test3$statistic, 4-2,lower.tail=F)
p_val3 ##p_val = 0.207 >0.05 --> fail to reject the null hypothesis that model is fitting
## X-squared
## 0.2070925
# Probit model on Model 2
model4.probit <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences +
as.factor(Mjob) + Medu * as.factor(Mjob),
data = mat, family = binomial(link = "probit"))
######## Pearson Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
pearson_chi4 <- sum(residuals(model4.probit, type = "pearson")^2)
df4 <- model4.probit$df.residual
p_value4 <- pchisq(pearson_chi4, df4, lower.tail = FALSE)
crit_val4 <- qchisq(.95,df=df4-2)
cat("Model 4 - Pearson Chi-square:", pearson_chi4, "critical value:", crit_val4, "df:", df4, "p-value:", p_value4, "\n") #p-val: 0.453 --> Can't reject null that model is fitting
## Model 4 - Pearson Chi-square: 385.0345 critical value: 426.4537 df: 382 p-value: 0.4468596
pi_hats4 <- predict.glm(model4.probit)
#Hosmer-Lemeshow Goodness-of-Fit to compare probit and logit
# Hosmer-Lemeshow GOF for logit model (with 5 groups)
hoslem_test_logit <- hoslem.test(mat$y_pass,
predict(model2.logit, type = "response"),
g = 5)
obs_logit <- hoslem_test_logit$observed
exp_logit <- hoslem_test_logit$expected
HL_stat_logit <- sum((obs_logit - exp_logit)^2 / exp_logit)
crit_val_logit <- qchisq(0.95, 5 - 2)
p_val_logit <- pchisq(HL_stat_logit, 5 - 2, lower.tail = FALSE)
cat("Logit HL stat =", HL_stat_logit, "\nCritical value =", crit_val_logit,
"\nP-value =", p_val_logit, "\n")
## Logit HL stat = 0.6047318
## Critical value = 7.814728
## P-value = 0.8953483
# Hosmer-Lemeshow GOF for probit model (with 5 groups)
hoslem_test_probit <- hoslem.test(mat$y_pass,
predict(model4.probit, type = "response"),
g = 5)
obs_probit <- hoslem_test_probit$observed
exp_probit <- hoslem_test_probit$expected
HL_stat_probit <- sum((obs_probit - exp_probit)^2 / exp_probit)
crit_val_probit <- qchisq(0.95, 5 - 2)
p_val_probit <- pchisq(HL_stat_probit, 5 - 2, lower.tail = FALSE)
cat("Probit HL stat =", HL_stat_probit, "\nCritical value =", crit_val_probit,
"\nP-value =", p_val_probit, "\n")
## Probit HL stat = 1.681302
## Critical value = 7.814728
## P-value = 0.6410988
### Both models are good, AIC and Hosmer-Lemeshow are very close factors. Thus, for simplicity,
#choose the logistic model. With the model, school supplies, Mother education, going out,
# absences, and Mother job explained final performance the best.
Logit and probit models are comparable, but logit models have a consistently lower AIC and thus likely a better fit to the data. From this point on, only the logit models will be considered.
######################### Manually Checking Interaction Terms:##################
############################ For Model 2 Logit: #########################
# Checking goout * absences from Model 2
model2_gooutxabsences <- glm(y_pass ~ as.factor(schoolsup) * as.factor(Mjob) + Medu +
goout + absences + goout*absences, family = binomial, data = mat)
summary(model2_gooutxabsences) #AIC 509.3
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) * as.factor(Mjob) +
## Medu + goout + absences + goout * absences, family = binomial,
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -3.931e-01 5.341e-01 -0.736
## as.factor(schoolsup)yes -6.775e-01 8.762e-01 -0.773
## as.factor(Mjob)health 6.045e-01 5.558e-01 1.088
## as.factor(Mjob)other 1.411e-01 3.772e-01 0.374
## as.factor(Mjob)services 3.194e-01 4.085e-01 0.782
## as.factor(Mjob)teacher -4.998e-01 5.121e-01 -0.976
## Medu 4.594e-01 1.348e-01 3.408
## goout -3.224e-01 1.277e-01 -2.525
## absences -5.196e-02 4.493e-02 -1.157
## as.factor(schoolsup)yes:as.factor(Mjob)health -1.504e+01 1.027e+03 -0.015
## as.factor(schoolsup)yes:as.factor(Mjob)other -9.879e-01 1.101e+00 -0.897
## as.factor(schoolsup)yes:as.factor(Mjob)services -5.753e-01 1.084e+00 -0.531
## as.factor(schoolsup)yes:as.factor(Mjob)teacher -1.454e+01 7.992e+02 -0.018
## goout:absences 4.119e-03 1.406e-02 0.293
## Pr(>|z|)
## (Intercept) 0.461685
## as.factor(schoolsup)yes 0.439380
## as.factor(Mjob)health 0.276751
## as.factor(Mjob)other 0.708334
## as.factor(Mjob)services 0.434370
## as.factor(Mjob)teacher 0.329115
## Medu 0.000654 ***
## goout 0.011572 *
## absences 0.247476
## as.factor(schoolsup)yes:as.factor(Mjob)health 0.988318
## as.factor(schoolsup)yes:as.factor(Mjob)other 0.369724
## as.factor(schoolsup)yes:as.factor(Mjob)services 0.595730
## as.factor(schoolsup)yes:as.factor(Mjob)teacher 0.985485
## goout:absences 0.769652
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 481.30 on 381 degrees of freedom
## AIC: 509.3
##
## Number of Fisher Scoring iterations: 14
# Checking Medu * go out from Model 2
model2_Meduxgoout <- glm(y_pass ~ as.factor(schoolsup) * as.factor(Mjob) + Medu +
goout + absences + Medu*goout, family = binomial, data = mat)
summary(model2_Meduxgoout) #AIC 509.35
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) * as.factor(Mjob) +
## Medu + goout + absences + Medu * goout, family = binomial,
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -0.30007 0.95106 -0.316
## as.factor(schoolsup)yes -0.69487 0.87842 -0.791
## as.factor(Mjob)health 0.60595 0.55596 1.090
## as.factor(Mjob)other 0.14514 0.37725 0.385
## as.factor(Mjob)services 0.32772 0.40971 0.800
## as.factor(Mjob)teacher -0.48836 0.51271 -0.953
## Medu 0.40081 0.32806 1.222
## goout -0.35477 0.29541 -1.201
## absences -0.04027 0.01683 -2.393
## as.factor(schoolsup)yes:as.factor(Mjob)health -15.01217 1027.95470 -0.015
## as.factor(schoolsup)yes:as.factor(Mjob)other -0.96499 1.10291 -0.875
## as.factor(schoolsup)yes:as.factor(Mjob)services -0.56313 1.08356 -0.520
## as.factor(schoolsup)yes:as.factor(Mjob)teacher -14.50877 797.80575 -0.018
## Medu:goout 0.01974 0.09923 0.199
## Pr(>|z|)
## (Intercept) 0.7524
## as.factor(schoolsup)yes 0.4289
## as.factor(Mjob)health 0.2758
## as.factor(Mjob)other 0.7004
## as.factor(Mjob)services 0.4238
## as.factor(Mjob)teacher 0.3408
## Medu 0.2218
## goout 0.2298
## absences 0.0167 *
## as.factor(schoolsup)yes:as.factor(Mjob)health 0.9883
## as.factor(schoolsup)yes:as.factor(Mjob)other 0.3816
## as.factor(schoolsup)yes:as.factor(Mjob)services 0.6033
## as.factor(schoolsup)yes:as.factor(Mjob)teacher 0.9855
## Medu:goout 0.8423
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 481.35 on 381 degrees of freedom
## AIC: 509.35
##
## Number of Fisher Scoring iterations: 14
# Checking as.factor(schoolsup) * Medu from Model 2
model2_schoolsupxMedu <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout +
absences + as.factor(Mjob) + as.factor(schoolsup) * Medu, data = mat, family = binomial)
summary(model2_schoolsupxMedu) #AIC 502.33
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(Mjob) + as.factor(schoolsup) * Medu,
## family = binomial, data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.543044 0.482244 -1.126 0.260133
## as.factor(schoolsup)yes 0.001982 1.083571 0.002 0.998541
## Medu 0.526274 0.141776 3.712 0.000206 ***
## goout -0.293673 0.101701 -2.888 0.003882 **
## absences -0.041513 0.016898 -2.457 0.014022 *
## as.factor(Mjob)health 0.383367 0.539408 0.711 0.477259
## as.factor(Mjob)other 0.026291 0.357392 0.074 0.941358
## as.factor(Mjob)services 0.245506 0.386966 0.634 0.525795
## as.factor(Mjob)teacher -0.704687 0.503694 -1.399 0.161801
## as.factor(schoolsup)yes:Medu -0.535402 0.391995 -1.366 0.171989
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 482.33 on 385 degrees of freedom
## AIC: 502.33
##
## Number of Fisher Scoring iterations: 4
# Checking Medu * goout from Model 2
model2_Meduxgoout <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout +
absences + as.factor(Mjob) + Medu * goout, data = mat, family = binomial)
summary(model2_Meduxgoout) #AIC 504.1
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(Mjob) + Medu * goout, family = binomial,
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.16312 0.94387 -0.173 0.862796
## as.factor(schoolsup)yes -1.46104 0.40121 -3.642 0.000271 ***
## Medu 0.38825 0.32601 1.191 0.233694
## goout -0.38045 0.29534 -1.288 0.197692
## absences -0.04032 0.01672 -2.411 0.015919 *
## as.factor(Mjob)health 0.44638 0.53599 0.833 0.404955
## as.factor(Mjob)other 0.04017 0.35876 0.112 0.910857
## as.factor(Mjob)services 0.26198 0.38857 0.674 0.500180
## as.factor(Mjob)teacher -0.62334 0.49991 -1.247 0.212432
## Medu:goout 0.02752 0.09864 0.279 0.780272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 484.10 on 385 degrees of freedom
## AIC: 504.1
##
## Number of Fisher Scoring iterations: 4
# Checking as.factor(schoolsup) * goout
model2_schoolsupxgoout <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout +
absences + as.factor(Mjob) + as.factor(schoolsup) * goout, data = mat, family = binomial)
summary(model2_schoolsupxgoout) #AIC: 503.8
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(Mjob) + as.factor(schoolsup) * goout,
## family = binomial, data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.46518 0.48208 -0.965 0.334575
## as.factor(schoolsup)yes -0.86552 1.02396 -0.845 0.397959
## Medu 0.47590 0.13518 3.521 0.000431 ***
## goout -0.28342 0.10601 -2.674 0.007505 **
## absences -0.03976 0.01668 -2.383 0.017166 *
## as.factor(Mjob)health 0.44775 0.53624 0.835 0.403726
## as.factor(Mjob)other 0.03327 0.35814 0.093 0.925980
## as.factor(Mjob)services 0.25633 0.38710 0.662 0.507857
## as.factor(Mjob)teacher -0.63558 0.49949 -1.272 0.203207
## as.factor(schoolsup)yes:goout -0.21166 0.34800 -0.608 0.543048
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 483.80 on 385 degrees of freedom
## AIC: 503.8
##
## Number of Fisher Scoring iterations: 4
# Checking as.factor(Mjob) * Medu
model2_MjobxMedu <- glm(y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(Mjob) + as.factor(Mjob) * Medu, data = mat, family = binomial)
summary(model2_MjobxMedu) #AIC:498.84 --> LOWERS AIC
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(Mjob) + as.factor(Mjob) * Medu, family = binomial,
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.89124 0.71316 1.250 0.211408
## as.factor(schoolsup)yes -1.41320 0.40377 -3.500 0.000465 ***
## Medu -0.29139 0.37251 -0.782 0.434073
## goout -0.30763 0.10289 -2.990 0.002790 **
## absences -0.03884 0.01671 -2.324 0.020100 *
## as.factor(Mjob)health -6.59602 3.50604 -1.881 0.059927 .
## as.factor(Mjob)other -1.55322 0.83184 -1.867 0.061874 .
## as.factor(Mjob)services -0.75082 0.98677 -0.761 0.446724
## as.factor(Mjob)teacher -59.68389 3319.80671 -0.018 0.985656
## Medu:as.factor(Mjob)health 2.31714 0.97873 2.367 0.017909 *
## Medu:as.factor(Mjob)other 0.88843 0.41794 2.126 0.033527 *
## Medu:as.factor(Mjob)services 0.66606 0.44724 1.489 0.136422
## Medu:as.factor(Mjob)teacher 15.22040 829.95177 0.018 0.985368
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 472.84 on 382 degrees of freedom
## AIC: 498.84
##
## Number of Fisher Scoring iterations: 14
######## Determining whether the significant interaction term adds to Model 2 #######
## Likelihood Ratio Test: Does adding Mjob * Medu significantly improve the model fit?
# H0: The simpler model without the interaction term is sufficient.
# Ha: The simpler model is not sufficient without the interaction term.
anova(model2.logit, model2_MjobxMedu, test = "LRT") #p-val: 0.02297<0.05
## Analysis of Deviance Table
##
## Model 1: y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(Mjob)
## Model 2: y_pass ~ as.factor(schoolsup) + Medu + goout + absences + as.factor(Mjob) +
## as.factor(Mjob) * Medu
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 386 484.18
## 2 382 472.84 4 11.343 0.02297 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Defining model2_MjobxMedu as model5.interaction
model5.interaction <- model2_MjobxMedu
summary(model5.interaction)
##
## Call:
## glm(formula = y_pass ~ as.factor(schoolsup) + Medu + goout +
## absences + as.factor(Mjob) + as.factor(Mjob) * Medu, family = binomial,
## data = mat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.89124 0.71316 1.250 0.211408
## as.factor(schoolsup)yes -1.41320 0.40377 -3.500 0.000465 ***
## Medu -0.29139 0.37251 -0.782 0.434073
## goout -0.30763 0.10289 -2.990 0.002790 **
## absences -0.03884 0.01671 -2.324 0.020100 *
## as.factor(Mjob)health -6.59602 3.50604 -1.881 0.059927 .
## as.factor(Mjob)other -1.55322 0.83184 -1.867 0.061874 .
## as.factor(Mjob)services -0.75082 0.98677 -0.761 0.446724
## as.factor(Mjob)teacher -59.68389 3319.80671 -0.018 0.985656
## Medu:as.factor(Mjob)health 2.31714 0.97873 2.367 0.017909 *
## Medu:as.factor(Mjob)other 0.88843 0.41794 2.126 0.033527 *
## Medu:as.factor(Mjob)services 0.66606 0.44724 1.489 0.136422
## Medu:as.factor(Mjob)teacher 15.22040 829.95177 0.018 0.985368
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 534.75 on 394 degrees of freedom
## Residual deviance: 472.84 on 382 degrees of freedom
## AIC: 498.84
##
## Number of Fisher Scoring iterations: 14
######## Pearson Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
pearson_chi5 <- sum(residuals(model5.interaction, type = "pearson")^2)
df5 <- model5.interaction$df.residual
p_value5 <- pchisq(pearson_chi5, df5, lower.tail = FALSE)
crit_val5 <- qchisq(.95,df=df5-2)
cat("Model 5 - Pearson Chi-square:", pearson_chi5, "critical value:", crit_val5, "df:", df5, "p-value:", p_value5, "\n") #p-val: 0.435 --> Can't reject null that model is fitting
## Model 5 - Pearson Chi-square: 385.8941 critical value: 426.4537 df: 382 p-value: 0.4346656
pi_hats5 <- predict.glm(model5.interaction)
######## Hosmer-Lemeshow Goodness of Fit Test
# H0: Model accurately fits observed data; Ha: Model does not accurately fit observed data.
hos_test5 <- hoslem.test(model5.interaction$y,predict.glm(model5.interaction, mat,type="response"),5)
obs5 <- hos_test5$observed
exp5 <- hos_test5$expected
crit_val5 <- qchisq(.95,5-2)
crit_val5
## [1] 7.814728
p_val5 <- pchisq(hos_test5$statistic, 5-2,lower.tail=F)
p_val5 ##p_val = 0.656 >0.05 --> fail to reject the null hypothesis that model is fitting
## X-squared
## 0.655625
##### Logistic Model Assumptions:
#1. Dependent variable is binary (pass(1) vs. fail(0)).
#2. Observations are collected independently of each other.
#3. No to minimal multicollinearity among independent variables.
vif(model1.logit) # all are around 1, which is less than 5 --> minimal multicollinearity
## as.factor(schoolsup) Medu goout
## 1.006362 1.024805 1.017622
## absences
## 1.020360
vif(model2.logit) # all are around 1, which is less than 5 --> minimal multicollinearity
## GVIF Df GVIF^(1/(2*Df))
## as.factor(schoolsup) 1.029432 1 1.014609
## Medu 1.736719 1 1.317846
## goout 1.032023 1 1.015885
## absences 1.033095 1 1.016413
## as.factor(Mjob) 1.746609 4 1.072197
vif(model3.probit) # all are around 1, which is less than 5 --> minimal multicollinearity
## as.factor(schoolsup) Medu goout
## 1.004869 1.020436 1.014400
## absences
## 1.017817
#4. A linear relationship exists between the independent variables and the log-odds of the dependent variable
###Box-Tidwell Test: determines whether transformations are needed on continuous variables
#Ho: There is a linear relationship btwn predictor and log(odds of response)
#Ha: There is no linear relationship btwn predictor and log(odds of response)
mat$Medu_adj <- mat$Medu + 1
mat$absences_adj <- mat$absences + 1
boxTidwell(y_pass ~ Medu_adj + goout + absences_adj, data = mat,max.iter=50)
## Warning in boxTidwell.default(y, X1, X2, max.iter = max.iter, tol = tol, :
## maximum iterations exceeded
## MLE of lambda Score Statistic (t) Pr(>|t|)
## Medu_adj 2.1059 0.7864 0.4321
## goout 2.6089 -1.5654 0.1183
## absences_adj 1.3278 -0.6437 0.5201
##
## iterations = 51
##
## Score test for null hypothesis that all lambdas = 1:
## F = 1.2868, df = 3 and 388, Pr(>F) = 0.2785
#p-val:0.2785 --> can't reject that there is a linear relationship btwn predictor and log(odds of response)
#5. Sample size is 395, which is sufficiently large.
### Converting log odds to normal odds
odds_ratios <- exp(coef(model5.interaction))
odds_ratios
## (Intercept) as.factor(schoolsup)yes
## 2.438142e+00 2.433626e-01
## Medu goout
## 7.472218e-01 7.351837e-01
## absences as.factor(Mjob)health
## 9.619015e-01 1.365791e-03
## as.factor(Mjob)other as.factor(Mjob)services
## 2.115662e-01 4.719771e-01
## as.factor(Mjob)teacher Medu:as.factor(Mjob)health
## 1.201203e-26 1.014660e+01
## Medu:as.factor(Mjob)other Medu:as.factor(Mjob)services
## 2.431309e+00 1.946545e+00
## Medu:as.factor(Mjob)teacher
## 4.075092e+06
n <- nrow(mat)
# 1. Leverage points
length(coef(model5.interaction))
## [1] 13
leverage <- hatvalues(model5.interaction)
high_leverage_obs <- which(leverage > 0.0962) #0.0962 = 2*9/395
mat[high_leverage_obs, ]# shows the details of the 277th observation
## address famsize Pstatus Medu Fedu Mjob Fjob reason guardian
## 115 R GT3 T 2 1 health services reputation mother
## 128 U GT3 T 0 1 at_home other course other
## 147 U GT3 T 3 2 health services home father
## 189 U GT3 A 3 3 health other reputation mother
## 198 R LE3 T 3 3 teacher other home father
## 277 R GT3 A 3 2 other services home mother
## 282 U LE3 A 3 2 teacher services home mother
## 296 U GT3 T 3 3 health other home mother
## 304 U GT3 T 3 2 health health reputation father
## 325 U LE3 T 0 2 at_home at_home home father
## 352 U GT3 T 3 3 health other course mother
## 364 U LE3 T 4 4 at_home at_home course mother
## 389 U LE3 T 3 1 teacher services course mother
## traveltime studytime schoolsup famsup paid activities nursery higher
## 115 1 2 no no no yes yes yes
## 128 1 2 no yes no no no no
## 147 1 2 no yes no no yes yes
## 189 1 2 no yes no no no yes
## 198 3 1 no yes yes yes yes yes
## 277 2 2 no no no no no no
## 282 1 1 no no no no yes yes
## 296 1 1 no yes yes no yes yes
## 304 1 4 no yes yes yes no yes
## 325 2 3 no no no no yes yes
## 352 2 2 no yes yes no yes yes
## 364 1 2 no yes yes yes yes yes
## 389 1 2 no yes yes no yes yes
## internet romantic famrel freetime goout Dalc Walc absences y_pass Medu_adj
## 115 yes yes 5 4 2 1 1 8 0 3
## 128 no no 3 4 2 1 1 2 0 1
## 147 yes no 3 3 2 1 1 0 0 4
## 189 yes yes 3 3 3 1 3 6 0 4
## 198 yes no 3 3 4 3 5 8 0 4
## 277 yes yes 4 1 1 1 1 75 0 4
## 282 yes no 4 4 4 3 4 19 0 4
## 296 yes no 4 4 3 1 3 4 0 4
## 304 yes no 5 2 2 1 2 0 1 4
## 325 yes no 3 3 3 2 3 0 1 1
## 352 yes no 4 5 4 2 3 2 1 4
## 364 yes yes 2 3 4 1 1 0 1 5
## 389 yes no 4 3 4 1 1 0 0 4
## absences_adj
## 115 9
## 128 3
## 147 1
## 189 7
## 198 9
## 277 76
## 282 20
## 296 5
## 304 1
## 325 1
## 352 3
## 364 1
## 389 1
leverage #the 277th student is an outlier
## 1 2 3 4 5 6 7
## 0.04058564 0.03146899 0.03557356 0.03153174 0.01508999 0.03274560 0.01390466
## 8 9 10 11 12 13 14
## 0.04467583 0.01595798 0.02231262 0.02147394 0.02550955 0.03464832 0.02030136
## 15 16 17 18 19 20 21
## 0.01523081 0.04137353 0.03083871 0.03428446 0.02218808 0.03578240 0.02822080
## 22 23 24 25 26 27 28
## 0.03045839 0.02793080 0.01390466 0.04034667 0.03147361 0.01418677 0.04137353
## 29 30 31 32 33 34 35
## 0.02997921 0.02674738 0.03045839 0.03172352 0.02303019 0.01640835 0.01388250
## 36 37 38 39 40 41 42
## 0.02270297 0.02030136 0.02872982 0.03770903 0.03656291 0.02578816 0.01977585
## 43 44 45 46 47 48 49
## 0.03149156 0.05460326 0.01675191 0.05687633 0.01858116 0.03288500 0.02030136
## 50 51 52 53 54 55 56
## 0.05009569 0.02265836 0.03464832 0.05174569 0.05332741 0.01292071 0.01132162
## 57 58 59 60 61 62 63
## 0.03088153 0.02188963 0.02429605 0.03149156 0.04263602 0.02731213 0.01775091
## 64 65 66 67 68 69 70
## 0.03082777 0.05332741 0.02030136 0.02824696 0.02997921 0.04834435 0.01840030
## 71 72 73 74 75 76 77
## 0.01640835 0.02901431 0.01573455 0.01551868 0.02901731 0.01945813 0.01977585
## 78 79 80 81 82 83 84
## 0.01234008 0.03590729 0.06125915 0.02630992 0.02491016 0.01422332 0.02206417
## 85 86 87 88 89 90 91
## 0.02693931 0.03446910 0.02320960 0.02824696 0.02937081 0.02776942 0.01388250
## 92 93 94 95 96 97 98
## 0.04301042 0.02780567 0.02147394 0.02233481 0.04129094 0.05380555 0.01707140
## 99 100 101 102 103 104 105
## 0.03082136 0.05015605 0.03580046 0.03238523 0.03096424 0.01985795 0.01682303
## 106 107 108 109 110 111 112
## 0.02508992 0.02303421 0.01292442 0.03714906 0.05077757 0.01945813 0.03979163
## 113 114 115 116 117 118 119
## 0.02766625 0.02382242 0.18065701 0.02258948 0.02844396 0.01640835 0.02010766
## 120 121 122 123 124 125 126
## 0.01171907 0.03209062 0.02233481 0.01418677 0.05945944 0.01390466 0.03043952
## 127 128 129 130 131 132 133
## 0.03789789 0.10778684 0.02374719 0.02542531 0.01595798 0.03317372 0.01247278
## 134 135 136 137 138 139 140
## 0.03041949 0.07728853 0.03238523 0.06089592 0.01640835 0.05835832 0.02303019
## 141 142 143 144 145 146 147
## 0.04455135 0.02233494 0.02222378 0.03251891 0.01867795 0.02830424 0.16542873
## 148 149 150 151 152 153 154
## 0.03523749 0.02303019 0.03001262 0.02421348 0.02236204 0.02185017 0.01682303
## 155 156 157 158 159 160 161
## 0.07202144 0.02007680 0.02544349 0.03286600 0.03003641 0.01896572 0.03130619
## 162 163 164 165 166 167 168
## 0.02335466 0.02308935 0.03209062 0.02421348 0.02225267 0.01592756 0.03381810
## 169 170 171 172 173 174 175
## 0.01867795 0.03045839 0.02200656 0.02693931 0.02408725 0.03739936 0.01896572
## 176 177 178 179 180 181 182
## 0.02159540 0.02370980 0.01335668 0.02056374 0.02824696 0.02056374 0.01292442
## 183 184 185 186 187 188 189
## 0.02642953 0.08081027 0.02051010 0.01519888 0.05994138 0.01234008 0.14289925
## 190 191 192 193 194 195 196
## 0.03146899 0.02316543 0.03317372 0.03114580 0.01507436 0.01234008 0.02642953
## 197 198 199 200 201 202 203
## 0.03096424 0.33148519 0.04861653 0.02303019 0.05017386 0.01862283 0.02031865
## 204 205 206 207 208 209 210
## 0.02287444 0.02600320 0.05361743 0.01193356 0.02382242 0.03530327 0.02204244
## 211 212 213 214 215 216 217
## 0.01286689 0.04143996 0.01958877 0.02537555 0.03827509 0.01431408 0.04674601
## 218 219 220 221 222 223 224
## 0.01585573 0.02475875 0.02429883 0.02616543 0.03400816 0.05242466 0.01234008
## 225 226 227 228 229 230 231
## 0.02303019 0.01824394 0.01354668 0.02265836 0.02430560 0.01129968 0.04738735
## 232 233 234 235 236 237 238
## 0.01366350 0.02083724 0.04047347 0.02919148 0.06308034 0.01366350 0.02557650
## 239 240 241 242 243 244 245
## 0.03906454 0.01390466 0.05969428 0.02030136 0.03019470 0.03088153 0.01390466
## 246 247 248 249 250 251 252
## 0.01028043 0.01366350 0.02218808 0.01203543 0.04559726 0.02388165 0.03328578
## 253 254 255 256 257 258 259
## 0.02688068 0.01523081 0.01523081 0.04403309 0.01945813 0.03827509 0.01132162
## 260 261 262 263 264 265 266
## 0.03256298 0.04787231 0.02222378 0.01590788 0.01192479 0.02684646 0.01898362
## 267 268 269 270 271 272 273
## 0.01527299 0.02104306 0.05487436 0.01867795 0.02002982 0.01592756 0.02250498
## 274 275 276 277 278 279 280
## 0.03523749 0.02533425 0.02523864 0.14274079 0.03043901 0.08264064 0.01977585
## 281 282 283 284 285 286 287
## 0.05898731 0.21622243 0.03550255 0.01468082 0.01592756 0.02250498 0.02395040
## 288 289 290 291 292 293 294
## 0.05789746 0.02233481 0.02011255 0.02456944 0.03045839 0.02442295 0.01457937
## 295 296 297 298 299 300 301
## 0.01507436 0.14649063 0.03989813 0.03718365 0.02901431 0.02190460 0.05194822
## 302 303 304 305 306 307 308
## 0.03031780 0.02901431 0.16542873 0.02229233 0.02233494 0.01419883 0.04775829
## 309 310 311 312 313 314 315
## 0.01388250 0.02562496 0.05835832 0.04920252 0.02643007 0.03283363 0.04038634
## 316 317 318 319 320 321 322
## 0.05552421 0.02374719 0.03139330 0.06796615 0.02258948 0.05220875 0.01648860
## 323 324 325 326 327 328 329
## 0.02556350 0.01350234 0.10798765 0.02829640 0.01953287 0.02553888 0.02100967
## 330 331 332 333 334 335 336
## 0.01962983 0.01707140 0.02357010 0.01682303 0.01234008 0.02619318 0.02218808
## 337 338 339 340 341 342 343
## 0.01414171 0.01640835 0.01191394 0.01192479 0.02278338 0.02147394 0.01343905
## 344 345 346 347 348 349 350
## 0.03992941 0.02429883 0.01181220 0.02319461 0.03019470 0.03381810 0.01804595
## 351 352 353 354 355 356 357
## 0.01896895 0.13693823 0.03135592 0.02031865 0.04301042 0.01682303 0.01962983
## 358 359 360 361 362 363 364
## 0.01292442 0.02603095 0.03588096 0.03400816 0.03209062 0.01595798 0.13657549
## 365 366 367 368 369 370 371
## 0.02830424 0.03148012 0.02303019 0.03709216 0.02684646 0.03211269 0.01458060
## 372 373 374 375 376 377 378
## 0.03172320 0.01057403 0.01815535 0.03266719 0.02693931 0.03578240 0.01962983
## 379 380 381 382 383 384 385
## 0.01388250 0.04724616 0.02159540 0.01030776 0.01113530 0.02830424 0.03392743
## 386 387 388 389 390 391 392
## 0.02533425 0.01955682 0.02642953 0.45229240 0.03709216 0.02311323 0.02127747
## 393 394 395
## 0.02193733 0.02091195 0.02110802
par(mfrow = c(2, 2))
plot(leverage, main = "Leverage", ylab = "Hat Values")
abline(h = 2 * length(coef(model5.interaction)) / n, col = "red", lty = 2)
# 2. Cook's Distance
cooks_d <- cooks.distance(model5.interaction)
n <- nrow(mat)
threshold <- 4 / n
influential_obs <- which(cooks_d > threshold)
mat[influential_obs, ] #number 40,81,113,126,213,223,276,325 are influential observations
## address famsize Pstatus Medu Fedu Mjob Fjob reason guardian
## 20 U LE3 T 4 3 health other home father
## 40 R GT3 T 2 2 at_home other reputation mother
## 81 U GT3 T 2 3 other services course father
## 113 U GT3 T 2 2 at_home other home mother
## 126 U GT3 T 3 4 services services home father
## 128 U GT3 T 0 1 at_home other course other
## 139 U LE3 T 1 1 services other course mother
## 147 U GT3 T 3 2 health services home father
## 213 U GT3 A 2 2 other other reputation mother
## 223 U GT3 T 2 3 services teacher other mother
## 250 U GT3 T 0 2 other other other mother
## 276 U LE3 T 2 2 services other course mother
## 297 U GT3 T 4 4 health other reputation other
## 302 U LE3 T 4 4 other teacher home father
## 304 U GT3 T 3 2 health health reputation father
## 325 U LE3 T 0 2 at_home at_home home father
## 352 U GT3 T 3 3 health other course mother
## 364 U LE3 T 4 4 at_home at_home course mother
## traveltime studytime schoolsup famsup paid activities nursery higher
## 20 1 1 no no yes yes yes yes
## 40 1 1 yes yes yes yes yes yes
## 81 1 1 yes yes yes yes no yes
## 113 1 2 yes no no yes yes yes
## 126 1 1 yes no no no yes yes
## 128 1 2 no yes no no no no
## 139 1 2 no no no no yes yes
## 147 1 2 no yes no no yes yes
## 213 1 2 yes yes yes no yes yes
## 223 1 2 yes no no no yes yes
## 250 1 1 no no yes no no yes
## 276 2 2 yes yes yes no yes yes
## 297 2 2 no yes yes yes yes yes
## 302 2 1 no no yes no yes yes
## 304 1 4 no yes yes yes no yes
## 325 2 3 no no no no yes yes
## 352 2 2 no yes yes no yes yes
## 364 1 2 no yes yes yes yes yes
## internet romantic famrel freetime goout Dalc Walc absences y_pass Medu_adj
## 20 yes no 3 1 3 1 3 4 0 5
## 40 no no 4 3 1 1 1 8 1 3
## 81 yes yes 3 2 2 1 3 2 1 3
## 113 yes no 3 1 2 1 1 6 1 3
## 126 yes no 5 5 5 3 2 0 1 4
## 128 no no 3 4 2 1 1 2 0 1
## 139 no yes 4 4 4 1 3 0 1 2
## 147 yes no 3 3 2 1 1 0 0 4
## 213 yes no 3 3 4 1 1 0 1 3
## 223 yes no 2 3 1 1 1 2 1 3
## 250 yes no 4 3 2 2 4 0 1 1
## 276 yes yes 4 4 4 2 3 6 1 3
## 297 yes no 2 3 4 2 3 0 0 5
## 302 yes no 4 1 1 2 2 0 0 5
## 304 yes no 5 2 2 1 2 0 1 4
## 325 yes no 3 3 3 2 3 0 1 1
## 352 yes no 4 5 4 2 3 2 1 4
## 364 yes yes 2 3 4 1 1 0 1 5
## absences_adj
## 20 5
## 40 9
## 81 3
## 113 7
## 126 1
## 128 3
## 139 1
## 147 1
## 213 1
## 223 3
## 250 1
## 276 7
## 297 1
## 302 1
## 304 1
## 325 1
## 352 3
## 364 1
plot(cooks_d, main = "Cook's Distance", ylab = "Cook's D")
abline(h = 4 / n, col = "red", lty = 2)
# 3. Standardized (Pearson) residuals
std_resid <- rstandard(model5.interaction)
which(abs(std_resid) > 2) #potential outlier
## 113 213 276
## 113 213 276
mat[which(abs(std_resid) > 2), ] #213, 276 are potential outliers
## address famsize Pstatus Medu Fedu Mjob Fjob reason guardian
## 113 U GT3 T 2 2 at_home other home mother
## 213 U GT3 A 2 2 other other reputation mother
## 276 U LE3 T 2 2 services other course mother
## traveltime studytime schoolsup famsup paid activities nursery higher
## 113 1 2 yes no no yes yes yes
## 213 1 2 yes yes yes no yes yes
## 276 2 2 yes yes yes no yes yes
## internet romantic famrel freetime goout Dalc Walc absences y_pass Medu_adj
## 113 yes no 3 1 2 1 1 6 1 3
## 213 yes no 3 3 4 1 1 0 1 3
## 276 yes yes 4 4 4 2 3 6 1 3
## absences_adj
## 113 7
## 213 1
## 276 7
which(abs(std_resid) > 3) #likely outlier
## named integer(0)
mat[which(abs(std_resid) > 3), ] #no likely outlier
## [1] address famsize Pstatus Medu Fedu
## [6] Mjob Fjob reason guardian traveltime
## [11] studytime schoolsup famsup paid activities
## [16] nursery higher internet romantic famrel
## [21] freetime goout Dalc Walc absences
## [26] y_pass Medu_adj absences_adj
## <0 rows> (or 0-length row.names)
plot(std_resid, main = "Standardized Residuals")
abline(h = c(-2, 2), col = "red", lty = 2)
# 4. Deviance residuals
dev_resid <- residuals(model5.interaction, type = "deviance")
which(abs(dev_resid) > 2) #potential outlier
## 113 213 276
## 113 213 276
mat[which(abs(dev_resid) > 2), ] #213, 276 are potential outliers
## address famsize Pstatus Medu Fedu Mjob Fjob reason guardian
## 113 U GT3 T 2 2 at_home other home mother
## 213 U GT3 A 2 2 other other reputation mother
## 276 U LE3 T 2 2 services other course mother
## traveltime studytime schoolsup famsup paid activities nursery higher
## 113 1 2 yes no no yes yes yes
## 213 1 2 yes yes yes no yes yes
## 276 2 2 yes yes yes no yes yes
## internet romantic famrel freetime goout Dalc Walc absences y_pass Medu_adj
## 113 yes no 3 1 2 1 1 6 1 3
## 213 yes no 3 3 4 1 1 0 1 3
## 276 yes yes 4 4 4 2 3 6 1 3
## absences_adj
## 113 7
## 213 1
## 276 7
which(abs(dev_resid) > 3) #likely outlier
## named integer(0)
mat[which(abs(dev_resid) > 3), ] #no likely outlier
## [1] address famsize Pstatus Medu Fedu
## [6] Mjob Fjob reason guardian traveltime
## [11] studytime schoolsup famsup paid activities
## [16] nursery higher internet romantic famrel
## [21] freetime goout Dalc Walc absences
## [26] y_pass Medu_adj absences_adj
## <0 rows> (or 0-length row.names)
plot(dev_resid, main = "Deviance Residuals")
high_leverage <- which(leverage > (2 * length(coef(model5.interaction)) / n))
high_cooks <- which(cooks_d > (4 / n))
high_resid <- which(abs(std_resid) > 2)
#option 1: plot y_pass against each predictor using the model1.logit while controlling for all other predictor. Flaw: great deviation because we're making so many assumptions on the baseline value
#1.0.1 Medu
plot(mat$Medu, mat$y_pass,
ylim=c(-.4,1.4),
xlab="Mother Education",
ylab="G3 Pass/Fail")
abline(h=1,lty=2,col="darkred")
text(2, 1.15, cex = 0.8, "G3 Passed")
abline(h=0,lty=2,col="darkblue")
text(1,-0.15,cex=0.8,"G3 Failed")
x <- seq(0,4,.01)
y <- predict.glm(model1.logit, data.frame(Medu=x, schoolsup = "yes",goout = 2,absences = mean(mat$absences, na.rm = TRUE)), type="response")
lines(x,y,lwd=1.5,col="purple")
agg <- aggregate(y_pass~Medu,mean,data=mat)
points(c(0,1,2,3,4),agg$y_pass)
#1.0.1.1 Medu different controls
plot(mat$Medu, mat$y_pass,
ylim=c(-.4,1.4),
xlab="Mother Education",
ylab="G3 Pass/Fail")
abline(h=1,lty=2,col="darkred")
text(2, 1.15, cex = 0.8, "G3 Passed")
abline(h=0,lty=2,col="darkblue")
text(1,-0.15,cex=0.8,"G3 Failed")
x <- seq(0,4,.01)
y <- predict.glm(model1.logit, data.frame(Medu=x, schoolsup = "no",goout = 2,absences = mean(mat$absences, na.rm = TRUE)), type="response")
lines(x,y,lwd=1.5,col="purple")
agg <- aggregate(y_pass~Medu,mean,data=mat)
points(c(0,1,2,3,4),agg$y_pass)
# option 1.1: plot every predictor against y using a simple logistic regression using only that predictor as the regressor. Flaw: doesn't make the most sense statistically and what we're trying to achieve.
#1.1.1 Medu
plot(mat$Medu, mat$y_pass,
ylim=c(-.4,1.4),
xlab="Mother Education",
ylab="G3 Pass/Fail")
abline(h=1,lty=2,col="darkred")
text(1,1.15,cex=0.8,"G3 Passed")
abline(h=0,lty=2,col="darkblue")
text(1,-0.15,cex=0.8,"G3 Failed")
x <- seq(0,4,.01)
model_Medu.logit <- glm(y_pass~ Medu, data = mat, family = binomial)
y <- predict.glm(model_Medu.logit, data.frame(Medu=x), type="response")
lines(x,y,lwd=1.5,col="purple")
agg <- aggregate(y_pass~Medu,mean,data=mat)
points(agg$Medu,agg$y_pass)
# 1.1.2 goout
plot(mat$goout, mat$y_pass,
ylim = c(-.4, 1.4),
xlab = "Going Out Frequency",
ylab = "G3 Pass/Fail")
abline(h = 1, lty = 2, col = "darkred")
text(2, 1.15, cex = 0.8, "G3 Passed")
abline(h = 0, lty = 2, col = "darkblue")
text(2, -0.15, cex = 0.8, "G3 Failed")
x <- seq(1, 5, .01)
model_goout.logit <- glm(y_pass ~ goout, data = mat, family = binomial)
y <- predict.glm(model_goout.logit, data.frame(goout = x), type = "response")
lines(x, y, lwd = 1.5, col = "purple")
agg <- aggregate(y_pass ~ goout, mean, data = mat)
points(agg$goout, agg$y_pass)
# 1.1.3 absences (continuous)
plot(mat$absences, mat$y_pass,
ylim = c(-.4, 1.4),
xlab = "Absences",
ylab = "G3 Pass/Fail")
abline(h = 1, lty = 2, col = "darkred")
text(10, 1.15, cex = 0.8, "G3 Passed")
abline(h = 0, lty = 2, col = "darkblue")
text(10, -0.15, cex = 0.8, "G3 Failed")
x <- seq(0, 80, .01)
model_absences.logit <- glm(y_pass ~ absences, data = mat, family = binomial)
y <- predict.glm(model_absences.logit, data.frame(absences = x), type = "response")
lines(x, y, lwd = 1.5, col = "purple")
agg <- aggregate(y_pass ~ absences, mean, data = mat)
points(agg$absences, agg$y_pass)