library("ggplot2")
library("gplots")
library("glmnet")
library("MASS")
library("tidyverse")
library("dplyr")
library("reshape")
library("ggpubr")
library("ggplot2")
library("glmnet")
library("reshape2")
library("heatmaply")
library("dummies")
library("dplyr")
library("tidyr")
library("caTools")
library("caret")
library("ROCR")
library("ggpubr")
library("glmnetUtils")
library("GGally")
library("glmnet")
library("dplyr")
library("ggplot2")
library("tidyr")
library("lars")
library("leaps")
library("gbm")
library("rpart")
library("corrplot")
library("Metrics")
library("rpart.plot")
library("randomForest")
pacman::p_load(tidyverse)
pacman::p_load(caret) 
pacman::p_load(rpart)
pacman::p_load(rpart.plot)
pacman::p_load(corrplot)
pacman::p_load(Metrics)

Reading Data

rm(list=ls())
setwd("/Users/kayhanbabakan/Dropbox/15071 Analytics Edge Team/Team Project")
The working directory was changed to /Users/kayhanbabakan/Dropbox/15071 Analytics Edge Team/Team Project inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
students_math<-read.csv("student-mat.csv",sep = ";")
students_por<-read.csv("student-por.csv", sep = ";")
students_both<-read.csv("studentsinboth.csv", sep = ",")

Pre-Processing

students_math$school = as.factor(students_math$school)
students_math$sex = as.factor(students_math$sex)
students_math$address = as.factor(students_math$address)
students_math$famsize = as.factor(students_math$famsize)
students_math$Fjob = as.factor(students_math$Fjob)
students_math$Mjob = as.factor(students_math$Mjob)
students_math$reason = as.factor(students_math$reason)
students_math$guardian = as.factor(students_math$guardian)
students_math$schoolsup = as.factor(students_math$schoolsup)
students_math$famsup = as.factor(students_math$famsup)
students_math$paid = as.factor(students_math$paid)
students_math$activities = as.factor(students_math$activities)
students_math$nursery = as.factor(students_math$nursery)
students_math$higher = as.factor(students_math$higher)
students_math$internet = as.factor(students_math$internet)
students_math$higher = as.factor(students_math$higher)
students_math$romantic = as.factor(students_math$romantic)
students_math$Pstatus = as.factor(students_math$Pstatus)

Coliniearity testing

students_mathonly = select(students_math,-c(G1,G2))
students_mathonly = na.omit(students_mathonly) #removing nas from the entire data set
students_mathonly2 = model.matrix(~.,data=students_mathonly)
corplot=ggcorr(students_mathonly2,size=2)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

WalcxDalc = ggplot(students_mathonly)+
  geom_bar(aes(Walc,Dalc),stat="identity")

MeduxFedu=ggplot(students_mathonly)+
  geom_bar(aes(as.factor(Medu),fill=as.factor(Fedu)),stat="count")

Initial indications

#grade above percetnage
FailuresxG3=ggplot(students_mathonly,aes(x=failures,fill=G3<=15))+
  geom_histogram(binwidth=1,position='fill')

FamrelxG3=ggplot(students_mathonly,aes(x=famrel,fill=G3<=15))+
  geom_histogram(binwidth=1,position='fill')

GooutxG3= ggplot(students_mathonly,aes(x=goout,fill=G3<=15))+
  geom_histogram(binwidth=1,position='fill')

StudytimexG3=ggplot(students_mathonly,aes(x=studytime,fill=G3<=15))+
  geom_histogram(binwidth=1,position='fill')

RomanticxG3=ggplot(students_mathonly,aes(x=romantic,fill=G3<=14))+
  geom_bar(position='fill')

AbsencesxG3= ggplot(students_mathonly, aes(absences, G3))+
  geom_jitter(data=students_mathonly, color="blue")+
  xlab("absences")+
  ylab("3Q Grades")+
  theme(axis.line=element_line(color="black"))+
  border(color="black", size=1, linetype=1)
ggarrange(FailuresxG3,FamrelxG3,GooutxG3,StudytimexG3,RomanticxG3,AbsencesxG3,legend = "top",common.legend = TRUE)

Linear Modeling

set.seed(1)
split = createDataPartition(students_mathonly$G3, p = 0.65, list = FALSE) 
math.train = students_mathonly[split,]
math.test = students_mathonly[-split,]
math.lm = glm(G3~., data = math.train, family="gaussian")
step.math.lm =step(math.lm)
Start:  AIC=1518.53
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + Fjob + reason + guardian + traveltime + studytime + 
    failures + schoolsup + famsup + paid + activities + nursery + 
    higher + internet + romantic + famrel + freetime + goout + 
    Dalc + Walc + health + absences

             Df Deviance    AIC
- Fjob        4   3925.4 1513.1
- reason      3   3910.0 1514.1
- guardian    2   3895.9 1515.1
- nursery     1   3887.0 1516.5
- internet    1   3887.0 1516.5
- Walc        1   3887.6 1516.6
- Dalc        1   3887.9 1516.6
- school      1   3892.1 1516.9
- address     1   3892.7 1516.9
- traveltime  1   3893.0 1516.9
- Fedu        1   3893.8 1517.0
- activities  1   3894.1 1517.0
- freetime    1   3897.1 1517.2
- famrel      1   3897.5 1517.2
- Pstatus     1   3897.9 1517.2
- paid        1   3904.8 1517.7
- goout       1   3911.7 1518.2
<none>            3886.9 1518.5
- famsup      1   3918.9 1518.7
- age         1   3924.3 1519.0
- Medu        1   3925.0 1519.0
- higher      1   3925.3 1519.1
- health      1   3932.5 1519.5
- studytime   1   3944.5 1520.3
- romantic    1   3949.1 1520.6
- schoolsup   1   3952.6 1520.9
- famsize     1   3957.0 1521.2
- absences    1   3964.7 1521.7
- sex         1   3992.6 1523.5
- Mjob        4   4101.6 1524.5
- failures    1   4078.2 1529.0

Step:  AIC=1513.08
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + reason + guardian + traveltime + studytime + 
    failures + schoolsup + famsup + paid + activities + nursery + 
    higher + internet + romantic + famrel + freetime + goout + 
    Dalc + Walc + health + absences

             Df Deviance    AIC
- reason      3   3956.5 1509.1
- guardian    2   3935.2 1509.7
- Dalc        1   3925.4 1511.1
- nursery     1   3925.4 1511.1
- Walc        1   3925.8 1511.1
- internet    1   3926.3 1511.1
- school      1   3929.0 1511.3
- traveltime  1   3929.7 1511.4
- famrel      1   3930.8 1511.4
- address     1   3931.3 1511.5
- Pstatus     1   3935.4 1511.7
- freetime    1   3936.3 1511.8
- activities  1   3938.0 1511.9
- goout       1   3944.3 1512.3
- paid        1   3944.9 1512.4
- Fedu        1   3946.6 1512.5
<none>            3925.4 1513.1
- age         1   3958.2 1513.2
- famsup      1   3959.7 1513.3
- Medu        1   3964.3 1513.6
- health      1   3965.2 1513.7
- higher      1   3966.8 1513.8
- studytime   1   3979.6 1514.6
- romantic    1   3983.9 1514.9
- famsize     1   3990.7 1515.3
- schoolsup   1   3993.0 1515.5
- absences    1   4005.4 1516.3
- Mjob        4   4132.8 1518.4
- sex         1   4039.9 1518.5
- failures    1   4114.0 1523.2

Step:  AIC=1509.12
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + guardian + traveltime + studytime + failures + 
    schoolsup + famsup + paid + activities + nursery + higher + 
    internet + romantic + famrel + freetime + goout + Dalc + 
    Walc + health + absences

             Df Deviance    AIC
- guardian    2   3966.7 1505.8
- Dalc        1   3956.7 1507.1
- nursery     1   3956.8 1507.1
- Walc        1   3956.9 1507.2
- internet    1   3958.0 1507.2
- address     1   3959.5 1507.3
- school      1   3960.0 1507.3
- traveltime  1   3961.6 1507.5
- famrel      1   3962.7 1507.5
- freetime    1   3966.2 1507.8
- Pstatus     1   3966.3 1507.8
- activities  1   3966.6 1507.8
- Fedu        1   3974.7 1508.3
- paid        1   3976.3 1508.4
- goout       1   3980.5 1508.7
<none>            3956.5 1509.1
- famsup      1   3990.8 1509.3
- age         1   3993.3 1509.5
- higher      1   3993.9 1509.5
- Medu        1   4001.6 1510.0
- health      1   4007.3 1510.4
- romantic    1   4015.6 1511.0
- studytime   1   4021.2 1511.3
- famsize     1   4023.7 1511.5
- schoolsup   1   4026.8 1511.7
- absences    1   4041.4 1512.6
- sex         1   4063.7 1514.0
- Mjob        4   4185.2 1515.7
- failures    1   4161.0 1520.2

Step:  AIC=1505.79
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + traveltime + studytime + failures + schoolsup + 
    famsup + paid + activities + nursery + higher + internet + 
    romantic + famrel + freetime + goout + Dalc + Walc + health + 
    absences

             Df Deviance    AIC
- nursery     1   3966.8 1503.8
- Dalc        1   3967.1 1503.8
- Walc        1   3967.2 1503.8
- internet    1   3967.8 1503.9
- traveltime  1   3969.8 1504.0
- school      1   3970.0 1504.0
- address     1   3972.4 1504.2
- famrel      1   3973.0 1504.2
- Pstatus     1   3977.3 1504.5
- activities  1   3977.4 1504.5
- freetime    1   3978.0 1504.5
- paid        1   3986.6 1505.1
- Fedu        1   3993.7 1505.5
<none>            3966.7 1505.8
- goout       1   3997.7 1505.8
- famsup      1   3999.7 1505.9
- age         1   4002.1 1506.1
- Medu        1   4005.9 1506.3
- higher      1   4007.1 1506.4
- health      1   4016.3 1507.0
- romantic    1   4026.1 1507.6
- famsize     1   4034.9 1508.2
- schoolsup   1   4036.6 1508.3
- studytime   1   4037.6 1508.4
- absences    1   4052.4 1509.3
- sex         1   4079.9 1511.1
- Mjob        4   4199.9 1512.6
- failures    1   4176.3 1517.1

Step:  AIC=1503.79
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + traveltime + studytime + failures + schoolsup + 
    famsup + paid + activities + higher + internet + romantic + 
    famrel + freetime + goout + Dalc + Walc + health + absences

             Df Deviance    AIC
- Dalc        1   3967.1 1501.8
- Walc        1   3967.2 1501.8
- internet    1   3967.9 1501.9
- traveltime  1   3969.9 1502.0
- school      1   3970.0 1502.0
- address     1   3972.4 1502.2
- famrel      1   3973.0 1502.2
- Pstatus     1   3977.3 1502.5
- activities  1   3977.5 1502.5
- freetime    1   3978.1 1502.5
- paid        1   3986.8 1503.1
- Fedu        1   3993.9 1503.5
<none>            3966.8 1503.8
- goout       1   3997.7 1503.8
- famsup      1   3999.7 1503.9
- age         1   4002.2 1504.1
- Medu        1   4005.9 1504.3
- higher      1   4007.1 1504.4
- health      1   4016.3 1505.0
- romantic    1   4026.1 1505.6
- schoolsup   1   4036.6 1506.3
- famsize     1   4036.8 1506.3
- studytime   1   4038.3 1506.4
- absences    1   4052.4 1507.3
- sex         1   4079.9 1509.1
- Mjob        4   4200.0 1510.6
- failures    1   4177.8 1515.2

Step:  AIC=1501.81
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + traveltime + studytime + failures + schoolsup + 
    famsup + paid + activities + higher + internet + romantic + 
    famrel + freetime + goout + Walc + health + absences

             Df Deviance    AIC
- Walc        1   3967.3 1499.8
- internet    1   3968.2 1499.9
- traveltime  1   3970.1 1500.0
- school      1   3970.3 1500.0
- address     1   3972.8 1500.2
- famrel      1   3973.2 1500.2
- Pstatus     1   3977.5 1500.5
- activities  1   3978.0 1500.5
- freetime    1   3979.5 1500.6
- paid        1   3987.5 1501.1
- Fedu        1   3994.2 1501.6
<none>            3967.1 1501.8
- goout       1   3998.2 1501.8
- famsup      1   3999.8 1501.9
- age         1   4002.2 1502.1
- Medu        1   4006.4 1502.4
- higher      1   4007.9 1502.5
- health      1   4016.4 1503.0
- romantic    1   4026.1 1503.6
- schoolsup   1   4036.9 1504.3
- famsize     1   4037.7 1504.4
- studytime   1   4039.0 1504.5
- absences    1   4053.3 1505.4
- sex         1   4083.9 1507.3
- Mjob        4   4200.1 1508.6
- failures    1   4177.8 1513.2

Step:  AIC=1499.82
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + traveltime + studytime + failures + schoolsup + 
    famsup + paid + activities + higher + internet + romantic + 
    famrel + freetime + goout + health + absences

             Df Deviance    AIC
- internet    1   3968.4 1497.9
- traveltime  1   3970.5 1498.0
- school      1   3970.5 1498.0
- address     1   3973.2 1498.2
- famrel      1   3973.7 1498.2
- Pstatus     1   3977.7 1498.5
- activities  1   3978.1 1498.5
- freetime    1   3979.8 1498.6
- paid        1   3987.5 1499.1
- Fedu        1   3994.2 1499.6
<none>            3967.3 1499.8
- famsup      1   3999.8 1499.9
- age         1   4002.2 1500.1
- Medu        1   4007.2 1500.4
- goout       1   4008.0 1500.5
- higher      1   4008.0 1500.5
- health      1   4016.8 1501.0
- romantic    1   4026.2 1501.6
- schoolsup   1   4036.9 1502.3
- famsize     1   4038.0 1502.4
- studytime   1   4042.1 1502.7
- absences    1   4055.1 1503.5
- sex         1   4090.5 1505.8
- Mjob        4   4200.6 1506.6
- failures    1   4179.5 1511.3

Step:  AIC=1497.89
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + traveltime + studytime + failures + schoolsup + 
    famsup + paid + activities + higher + romantic + famrel + 
    freetime + goout + health + absences

             Df Deviance    AIC
- traveltime  1   3971.9 1496.1
- school      1   3972.0 1496.1
- address     1   3973.6 1496.2
- famrel      1   3974.6 1496.3
- Pstatus     1   3978.1 1496.5
- activities  1   3978.9 1496.6
- freetime    1   3980.9 1496.7
- paid        1   3987.9 1497.2
- Fedu        1   3994.4 1497.6
<none>            3968.4 1497.9
- famsup      1   4001.8 1498.1
- age         1   4003.1 1498.2
- Medu        1   4008.0 1498.5
- goout       1   4009.6 1498.6
- higher      1   4009.9 1498.6
- health      1   4016.8 1499.0
- romantic    1   4029.8 1499.9
- schoolsup   1   4038.4 1500.4
- famsize     1   4039.2 1500.5
- studytime   1   4042.6 1500.7
- absences    1   4055.1 1501.5
- sex         1   4090.6 1503.8
- Mjob        4   4202.4 1504.7
- failures    1   4179.6 1509.3

Step:  AIC=1496.12
G3 ~ school + sex + age + address + famsize + Pstatus + Medu + 
    Fedu + Mjob + studytime + failures + schoolsup + famsup + 
    paid + activities + higher + romantic + famrel + freetime + 
    goout + health + absences

             Df Deviance    AIC
- school      1   3974.1 1494.3
- famrel      1   3977.9 1494.5
- address     1   3980.6 1494.7
- Pstatus     1   3981.3 1494.7
- activities  1   3984.0 1494.9
- freetime    1   3984.8 1495.0
- paid        1   3992.9 1495.5
- Fedu        1   4002.2 1496.1
<none>            3971.9 1496.1
- age         1   4005.1 1496.3
- Medu        1   4010.6 1496.6
- famsup      1   4011.4 1496.7
- higher      1   4014.6 1496.9
- goout       1   4015.0 1496.9
- health      1   4018.7 1497.2
- romantic    1   4036.0 1498.3
- famsize     1   4040.1 1498.5
- schoolsup   1   4044.4 1498.8
- studytime   1   4049.5 1499.1
- absences    1   4058.2 1499.7
- sex         1   4091.3 1501.8
- Mjob        4   4204.6 1502.9
- failures    1   4182.0 1507.5

Step:  AIC=1494.27
G3 ~ sex + age + address + famsize + Pstatus + Medu + Fedu + 
    Mjob + studytime + failures + schoolsup + famsup + paid + 
    activities + higher + romantic + famrel + freetime + goout + 
    health + absences

             Df Deviance    AIC
- famrel      1   3979.7 1492.6
- address     1   3981.3 1492.7
- Pstatus     1   3983.7 1492.9
- freetime    1   3987.6 1493.1
- activities  1   3987.8 1493.2
- paid        1   3996.2 1493.7
- Fedu        1   4004.4 1494.2
<none>            3974.1 1494.3
- age         1   4006.0 1494.3
- Medu        1   4012.7 1494.8
- famsup      1   4016.8 1495.0
- higher      1   4020.2 1495.2
- goout       1   4020.2 1495.3
- health      1   4020.4 1495.3
- romantic    1   4037.3 1496.3
- famsize     1   4043.3 1496.7
- schoolsup   1   4046.3 1496.9
- studytime   1   4050.1 1497.2
- absences    1   4058.6 1497.7
- sex         1   4092.9 1499.9
- Mjob        4   4206.7 1501.0
- failures    1   4187.0 1505.8

Step:  AIC=1492.63
G3 ~ sex + age + address + famsize + Pstatus + Medu + Fedu + 
    Mjob + studytime + failures + schoolsup + famsup + paid + 
    activities + higher + romantic + freetime + goout + health + 
    absences

             Df Deviance    AIC
- address     1   3987.4 1491.1
- Pstatus     1   3991.1 1491.4
- activities  1   3993.1 1491.5
- freetime    1   3996.2 1491.7
- paid        1   4001.8 1492.1
- age         1   4008.5 1492.5
- Fedu        1   4009.8 1492.6
<none>            3979.7 1492.6
- Medu        1   4018.0 1493.1
- famsup      1   4022.4 1493.4
- health      1   4024.3 1493.5
- goout       1   4025.1 1493.6
- higher      1   4026.6 1493.7
- famsize     1   4046.8 1495.0
- romantic    1   4047.4 1495.0
- schoolsup   1   4050.8 1495.2
- studytime   1   4057.9 1495.7
- absences    1   4062.9 1496.0
- sex         1   4098.2 1498.2
- Mjob        4   4214.3 1499.5
- failures    1   4201.7 1504.7

Step:  AIC=1491.14
G3 ~ sex + age + famsize + Pstatus + Medu + Fedu + Mjob + studytime + 
    failures + schoolsup + famsup + paid + activities + higher + 
    romantic + freetime + goout + health + absences

             Df Deviance    AIC
- Pstatus     1   3998.7 1489.9
- activities  1   4003.1 1490.2
- freetime    1   4005.7 1490.3
- paid        1   4009.1 1490.5
- Fedu        1   4017.4 1491.1
<none>            3987.4 1491.1
- age         1   4020.1 1491.2
- Medu        1   4027.1 1491.7
- goout       1   4030.9 1491.9
- health      1   4031.9 1492.0
- famsup      1   4033.4 1492.1
- higher      1   4036.6 1492.3
- romantic    1   4051.9 1493.3
- famsize     1   4052.6 1493.3
- schoolsup   1   4057.5 1493.7
- studytime   1   4065.2 1494.1
- absences    1   4067.4 1494.3
- sex         1   4101.8 1496.5
- Mjob        4   4223.6 1498.0
- failures    1   4209.7 1503.2

Step:  AIC=1489.87
G3 ~ sex + age + famsize + Medu + Fedu + Mjob + studytime + failures + 
    schoolsup + famsup + paid + activities + higher + romantic + 
    freetime + goout + health + absences

             Df Deviance    AIC
- activities  1   4011.4 1488.7
- freetime    1   4016.8 1489.0
- paid        1   4023.3 1489.5
- Fedu        1   4027.3 1489.7
- age         1   4029.5 1489.8
<none>            3998.7 1489.9
- Medu        1   4033.4 1490.1
- goout       1   4042.8 1490.7
- famsup      1   4043.7 1490.8
- health      1   4045.1 1490.9
- higher      1   4045.5 1490.9
- famsize     1   4059.8 1491.8
- romantic    1   4063.0 1492.0
- schoolsup   1   4070.6 1492.5
- absences    1   4071.9 1492.6
- studytime   1   4076.6 1492.9
- sex         1   4115.4 1495.3
- Mjob        4   4231.4 1496.5
- failures    1   4230.2 1502.4

Step:  AIC=1488.69
G3 ~ sex + age + famsize + Medu + Fedu + Mjob + studytime + failures + 
    schoolsup + famsup + paid + higher + romantic + freetime + 
    goout + health + absences

            Df Deviance    AIC
- freetime   1   4027.6 1487.7
- Fedu       1   4036.4 1488.3
- age        1   4037.5 1488.4
- paid       1   4037.9 1488.4
<none>           4011.4 1488.7
- Medu       1   4046.8 1489.0
- higher     1   4053.2 1489.4
- famsup     1   4055.5 1489.5
- goout      1   4058.3 1489.7
- health     1   4058.9 1489.7
- famsize    1   4073.6 1490.7
- romantic   1   4081.0 1491.1
- absences   1   4082.5 1491.2
- schoolsup  1   4082.9 1491.3
- studytime  1   4083.2 1491.3
- sex        1   4118.0 1493.5
- Mjob       4   4237.3 1494.9
- failures   1   4245.2 1501.4

Step:  AIC=1487.73
G3 ~ sex + age + famsize + Medu + Fedu + Mjob + studytime + failures + 
    schoolsup + famsup + paid + higher + romantic + goout + health + 
    absences

            Df Deviance    AIC
- Fedu       1   4050.0 1487.2
- paid       1   4052.0 1487.3
- age        1   4054.0 1487.4
<none>           4027.6 1487.7
- goout      1   4063.2 1488.0
- Medu       1   4065.1 1488.1
- famsup     1   4068.0 1488.3
- higher     1   4068.5 1488.3
- health     1   4074.9 1488.8
- famsize    1   4089.7 1489.7
- absences   1   4092.3 1489.9
- studytime  1   4093.6 1489.9
- romantic   1   4097.1 1490.2
- schoolsup  1   4099.7 1490.3
- sex        1   4150.5 1493.5
- Mjob       4   4257.5 1494.1
- failures   1   4262.5 1500.4

Step:  AIC=1487.17
G3 ~ sex + age + famsize + Medu + Mjob + studytime + failures + 
    schoolsup + famsup + paid + higher + romantic + goout + health + 
    absences

            Df Deviance    AIC
- paid       1   4071.2 1486.5
- age        1   4076.2 1486.8
<none>           4050.0 1487.2
- famsup     1   4085.3 1487.4
- goout      1   4087.2 1487.5
- health     1   4093.1 1487.9
- higher     1   4097.1 1488.2
- famsize    1   4109.0 1488.9
- studytime  1   4109.2 1488.9
- absences   1   4112.6 1489.1
- schoolsup  1   4118.8 1489.5
- romantic   1   4119.3 1489.6
- Medu       1   4160.0 1492.1
- sex        1   4173.6 1493.0
- Mjob       4   4285.8 1493.8
- failures   1   4311.9 1501.4

Step:  AIC=1486.52
G3 ~ sex + age + famsize + Medu + Mjob + studytime + failures + 
    schoolsup + famsup + higher + romantic + goout + health + 
    absences

            Df Deviance    AIC
- famsup     1   4094.6 1486.0
- age        1   4096.7 1486.1
<none>           4071.2 1486.5
- goout      1   4107.2 1486.8
- health     1   4118.8 1487.5
- higher     1   4126.2 1488.0
- famsize    1   4129.2 1488.2
- absences   1   4130.3 1488.2
- studytime  1   4136.1 1488.6
- schoolsup  1   4136.3 1488.6
- romantic   1   4138.3 1488.8
- Medu       1   4182.0 1491.5
- sex        1   4189.9 1492.0
- Mjob       4   4299.9 1492.7
- failures   1   4347.4 1501.5

Step:  AIC=1486
G3 ~ sex + age + famsize + Medu + Mjob + studytime + failures + 
    schoolsup + higher + romantic + goout + health + absences

            Df Deviance    AIC
- age        1   4114.5 1485.3
<none>           4094.6 1486.0
- goout      1   4135.3 1486.6
- higher     1   4145.4 1487.2
- health     1   4147.2 1487.3
- absences   1   4151.1 1487.5
- studytime  1   4156.5 1487.9
- schoolsup  1   4158.9 1488.0
- romantic   1   4166.2 1488.5
- famsize    1   4170.8 1488.8
- Medu       1   4199.6 1490.6
- Mjob       4   4322.9 1492.1
- sex        1   4241.6 1493.1
- failures   1   4378.1 1501.3

Step:  AIC=1485.26
G3 ~ sex + famsize + Medu + Mjob + studytime + failures + schoolsup + 
    higher + romantic + goout + health + absences

            Df Deviance    AIC
<none>           4114.5 1485.3
- absences   1   4162.9 1486.3
- health     1   4164.0 1486.4
- schoolsup  1   4164.2 1486.4
- goout      1   4165.8 1486.5
- higher     1   4172.9 1486.9
- studytime  1   4173.0 1486.9
- famsize    1   4187.0 1487.8
- romantic   1   4203.3 1488.8
- Medu       1   4229.5 1490.4
- Mjob       4   4343.3 1491.3
- sex        1   4268.9 1492.8
- failures   1   4447.4 1503.4

Stepwise variable reduction linear output basis standard threshold

summary(step.math.lm)

Call:
glm(formula = G3 ~ sex + famsize + Medu + Mjob + studytime + 
    failures + schoolsup + higher + romantic + goout + health + 
    absences, family = "gaussian", data = math.train)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-11.7901   -1.8104    0.3114    2.9874    7.7140  

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   7.59026    1.77241   4.282 2.66e-05 ***
sexM          1.73983    0.57600   3.021  0.00279 ** 
famsizeLE3    1.19372    0.57669   2.070  0.03952 *  
Medu          0.84212    0.32314   2.606  0.00973 ** 
Mjobhealth    0.37399    1.22879   0.304  0.76112    
Mjobother    -1.26355    0.80875  -1.562  0.11951    
Mjobservices  0.50424    0.91135   0.553  0.58057    
Mjobteacher  -2.27170    1.23298  -1.842  0.06663 .  
studytime     0.60745    0.32659   1.860  0.06410 .  
failures     -1.67838    0.37847  -4.435 1.40e-05 ***
schoolsupyes -1.40571    0.82011  -1.714  0.08780 .  
higheryes     2.07726    1.11806   1.858  0.06439 .  
romanticyes  -1.26152    0.55074  -2.291  0.02284 *  
goout        -0.39622    0.22746  -1.742  0.08279 .  
health       -0.31522    0.18421  -1.711  0.08831 .  
absences      0.05206    0.03077   1.692  0.09201 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian family taken to be 16.93193)

    Null deviance: 5724.9  on 258  degrees of freedom
Residual deviance: 4114.5  on 243  degrees of freedom
AIC: 1485.3

Number of Fisher Scoring iterations: 2

#Linear Regression Predictions

PredictTrain = predict(step.math.lm, newdata = math.train)
PredictTest = predict(step.math.lm, newdata = math.test)

#Linear Regression KPIs

mean_train = mean(math.train$G3)
SSETrain = sum((PredictTrain - math.train$G3)^2)
SSTTrain = sum((math.train$G3 - mean_train)^2)

R2 = 1 - SSETrain/SSTTrain
SSETest = sum((PredictTest - math.test$G3)^2)
SSTTest = sum((math.test$G3 - mean_train)^2)
OSR2 = 1 - SSETest/SSTTest
RMSE = RMSE(PredictTest,math.test$G3)
MAE = MAE(PredictTest,math.test$G3)
KPILM=data.frame("Model"="LM","R2"=R2,"OSR2"=OSR2,"RMSE"=RMSE,"MAE"=MAE)
KPILM

#Lasso

#set training and test dataset
x.math.train=model.matrix(G3~.-1,data=math.train) 
y.math.train<-math.train[,c("G3")] #set Y for glmnet fitting
x.math.test=model.matrix(G3~.-1,data=math.test) 
y.math.test=math.test[,"G3"]
lasso.lambdas = c(exp(seq(5,-5,-.1)))
set.seed(1)
cv.lasso = cv.glmnet(x.math.train,y.math.train,alpha=1,lambda=lasso.lambdas,nfolds=10)
bestlambda = cv.lasso$lambda.min
lasso = glmnet(x.math.train,y.math.train,alpha=1,lambda = bestlambda)

#Lasso Predictions

pred.train.lasso <- predict(lasso,x.math.train)
pred.test.lasso <- predict(lasso,x.math.test)

#Lasso KPIs

R2.Lasso = 1-sum((pred.train.lasso - y.math.train)^2)/SSTTrain
OSR2.lasso <- 1-sum((pred.test.lasso-y.math.test)^2)/SSTTest
RMSE.lasso = RMSE(pred.test.lasso,math.test$G3)
MAE.lasso = MAE(pred.test.lasso,math.test$G3)
KPILasso=data.frame("Model"="Lasso","R2"=R2.Lasso,"OSR2"=OSR2.lasso,"RMSE"= RMSE.lasso,"MAE"=MAE.lasso)
KPILasso

#cart

default_tree <- rpart(G3 ~ school+sex+age+address+famsize+Pstatus+Medu+Fedu+Mjob+Fjob+reason+guardian+traveltime+studytime+failures+schoolsup+famsup+paid+activities+nursery+higher+internet+romantic+famrel+freetime+goout+Dalc+Walc+health+absences, data=math.train)
par(mar=c(1,1,1,1))
prp(default_tree)

print(default_tree, digits=3)
n= 259 

node), split, n, deviance, yval
      * denotes terminal node

  1) root 259 5720.0 10.40  
    2) failures>=0.5 54 1410.0  7.15  
      4) absences< 1 19  219.0  1.42 *
      5) absences>=1 35  233.0 10.30 *
    3) failures< 0.5 205 3600.0 11.20  
      6) Medu< 2.5 79 1400.0 10.10  
       12) absences< 0.5 22  875.0  7.64  
         24) guardian=mother,other 13  373.0  4.08 *
         25) guardian=father 9   99.6 12.80 *
       13) absences>=0.5 57  342.0 11.00 *
      7) Medu>=2.5 126 2030.0 11.90  
       14) schoolsup=yes 12  169.0  8.67 *
       15) schoolsup=no 114 1720.0 12.30  
         30) age>=15.5 90 1420.0 11.80  
           60) studytime< 2.5 68 1030.0 11.20  
            120) famsup=yes 41  560.0 10.40  
              240) Mjob=at_home,other,teacher 26  408.0  9.46  
                480) health>=1.5 19  247.0  8.42 *
                481) health< 1.5 7   85.4 12.30 *
              241) Mjob=health,services 15   92.9 11.90 *
            121) famsup=no 27  393.0 12.50 *
           61) studytime>=2.5 22  302.0 13.50  
            122) Walc>=1.5 9   35.6 10.20 *
            123) Walc< 1.5 13  102.0 15.80 *
         31) age< 15.5 24  194.0 14.20 *

#Cart Cross Validation

RSquared <- function(data, lev = NULL, model = NULL, ...) {
  c(RSq = cor(data$obs, data$pred) ** 2)
}

cv.trees = train(G3~school+sex+age+address+famsize+Pstatus+Medu+Fedu+Mjob+Fjob+reason+guardian+traveltime+studytime+failures+schoolsup+famsup+paid+activities+nursery+higher+internet+romantic+famrel+freetime+goout+Dalc+Walc+health+absences,
                 data = math.train,
                 method = "rpart",
                 trControl = trainControl(method = "cv", number = 10, summaryFunction=RSquared), # 10-fold cv
                 metric="RSq", maximize=TRUE,                
                 tuneGrid = data.frame(.cp = seq(0,.0004,.00001)))  

#cart best tree

best_cp <- cv.trees$bestTune$cp
best_tree <- rpart(G3 ~ school+sex+age+address+famsize+Pstatus+Medu+Fedu+Mjob+Fjob+reason+guardian+traveltime+studytime+failures+schoolsup+famsup+paid+activities+nursery+higher+internet+romantic+famrel+freetime+goout+Dalc+Walc+health+absences, data=math.train, cp=best_cp)
best_tree
n= 259 

node), split, n, deviance, yval
      * denotes terminal node

  1) root 259 5724.91900 10.378380  
    2) failures>=0.5 54 1412.81500  7.148148  
      4) absences< 1 19  218.63160  1.421053 *
      5) absences>=1 35  232.68570 10.257140  
       10) Walc>=1.5 22  155.31820  9.590909  
         20) Mjob=health,other 8   29.87500  8.375000 *
         21) Mjob=at_home,services,teacher 14  106.85710 10.285710 *
       11) Walc< 1.5 13   51.07692 11.384620 *
    3) failures< 0.5 205 3600.22400 11.229270  
      6) Medu< 2.5 79 1400.38000 10.088610  
       12) absences< 0.5 22  875.09090  7.636364  
         24) guardian=mother,other 13  372.92310  4.076923 *
         25) guardian=father 9   99.55556 12.777780 *
       13) absences>=0.5 57  341.92980 11.035090  
         26) health>=2.5 44  222.72730 10.727270  
           52) romantic=no 28  151.71430 10.285710  
            104) sex=F 16   67.93750  9.562500 *
            105) sex=M 12   64.25000 11.250000 *
           53) romantic=yes 16   56.00000 11.500000 *
         27) health< 2.5 13  100.92310 12.076920 *
      7) Medu>=2.5 126 2032.61100 11.944440  
       14) schoolsup=yes 12  168.66670  8.666667 *
       15) schoolsup=no 114 1721.44700 12.289470  
         30) age>=15.5 90 1415.55600 11.777780  
           60) studytime< 2.5 68 1027.69100 11.220590  
            120) famsup=yes 41  559.51220 10.365850  
              240) Mjob=at_home,other,teacher 26  408.46150  9.461538  
                480) health>=1.5 19  246.63160  8.421053 *
                481) health< 1.5 7   85.42857 12.285710 *
              241) Mjob=health,services 15   92.93333 11.933330 *
            121) famsup=no 27  392.74070 12.518520  
              242) health>=3.5 18  334.27780 11.611110 *
              243) health< 3.5 9   14.00000 14.333330 *
           61) studytime>=2.5 22  301.50000 13.500000  
            122) Walc>=1.5 9   35.55556 10.222220 *
            123) Walc< 1.5 13  102.30770 15.769230 *
         31) age< 15.5 24  193.95830 14.208330  
           62) studytime>=2.5 7   28.00000 13.000000 *
           63) studytime< 2.5 17  151.52940 14.705880 *
prp(best_tree)

print(best_tree, digits=3)
n= 259 

node), split, n, deviance, yval
      * denotes terminal node

  1) root 259 5720.0 10.40  
    2) failures>=0.5 54 1410.0  7.15  
      4) absences< 1 19  219.0  1.42 *
      5) absences>=1 35  233.0 10.30  
       10) Walc>=1.5 22  155.0  9.59  
         20) Mjob=health,other 8   29.9  8.38 *
         21) Mjob=at_home,services,teacher 14  107.0 10.30 *
       11) Walc< 1.5 13   51.1 11.40 *
    3) failures< 0.5 205 3600.0 11.20  
      6) Medu< 2.5 79 1400.0 10.10  
       12) absences< 0.5 22  875.0  7.64  
         24) guardian=mother,other 13  373.0  4.08 *
         25) guardian=father 9   99.6 12.80 *
       13) absences>=0.5 57  342.0 11.00  
         26) health>=2.5 44  223.0 10.70  
           52) romantic=no 28  152.0 10.30  
            104) sex=F 16   67.9  9.56 *
            105) sex=M 12   64.2 11.20 *
           53) romantic=yes 16   56.0 11.50 *
         27) health< 2.5 13  101.0 12.10 *
      7) Medu>=2.5 126 2030.0 11.90  
       14) schoolsup=yes 12  169.0  8.67 *
       15) schoolsup=no 114 1720.0 12.30  
         30) age>=15.5 90 1420.0 11.80  
           60) studytime< 2.5 68 1030.0 11.20  
            120) famsup=yes 41  560.0 10.40  
              240) Mjob=at_home,other,teacher 26  408.0  9.46  
                480) health>=1.5 19  247.0  8.42 *
                481) health< 1.5 7   85.4 12.30 *
              241) Mjob=health,services 15   92.9 11.90 *
            121) famsup=no 27  393.0 12.50  
              242) health>=3.5 18  334.0 11.60 *
              243) health< 3.5 9   14.0 14.30 *
           61) studytime>=2.5 22  302.0 13.50  
            122) Walc>=1.5 9   35.6 10.20 *
            123) Walc< 1.5 13  102.0 15.80 *
         31) age< 15.5 24  194.0 14.20  
           62) studytime>=2.5 7   28.0 13.00 *
           63) studytime< 2.5 17  152.0 14.70 *

#Cart KPIs

default_pred_train = predict(default_tree, newdata = math.train)
best_pred_train = predict(best_tree, newdata=math.train)
default_pred <- predict(default_tree, newdata = math.test)
best_pred    <- predict(best_tree, newdata=math.test)
actualtrain <- math.train$G3
actual <- math.test$G3

R2cart_default=cor(actualtrain, default_pred_train) ^ 2
R2cart=cor(actualtrain, best_pred_train) ^ 2

#OSR2
OSR2cart_default=cor(actual, default_pred) ^ 2
OSR2cart=cor(actual, best_pred) ^ 2
#MAE
MAEcart_default=Metrics::mae(actual, default_pred)
MAEcart=Metrics::mae(actual, best_pred)
#RMSE
RMSEcart_default=Metrics::rmse(actual, default_pred)
RMSEcart=Metrics::rmse(actual, best_pred)

KPIcart = data.frame("Model"="Cart","R2"=R2cart,"OSR2"=OSR2cart,"RMSE"= RMSEcart,"MAE"=MAEcart) 
KPIcart
set.seed(1)
rf.cv.math=train(y=math.train$G3, x=subset(math.train, select=-c(G3)), method="rf", nodsize=25, ntree=80, trControl=trainControl(method="cv", number=10), tuneGrid=data.frame(mtry=seq(10,30,1)))
rf.cv.math
Random Forest 

259 samples
 30 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 234, 234, 232, 232, 233, 233, ... 
Resampling results across tuning parameters:

  mtry  RMSE      Rsquared   MAE     
  10    3.963764  0.3172931  3.077470
  11    3.851030  0.3541703  2.958513
  12    3.900203  0.3373594  3.028728
  13    3.816625  0.3667195  2.960741
  14    3.856354  0.3491972  2.953694
  15    3.834740  0.3633247  2.957087
  16    3.905774  0.3233089  3.004829
  17    3.834463  0.3545586  2.982463
  18    3.859076  0.3409640  3.010669
  19    3.842274  0.3496722  2.990125
  20    3.834634  0.3460991  2.980646
  21    3.787923  0.3648602  2.916338
  22    3.826180  0.3503290  2.933181
  23    3.877063  0.3278590  3.007338
  24    3.861419  0.3372604  2.981451
  25    3.829523  0.3545664  2.980368
  26    3.810090  0.3565369  2.954413
  27    3.775078  0.3698736  2.906899
  28    3.833981  0.3512126  2.960694
  29    3.921614  0.3164576  3.026684
  30    3.834718  0.3441478  2.943957

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was mtry = 27.
#4 RF with CV
mod.rf.math=randomForest(G3~., data=math.train,mtry=27,nodesize=25,ntree=80)
important_vars_ames=importance(mod.rf.math)
important_vars_ames
           IncNodePurity
school         13.598025
sex            74.051025
age           117.017024
address        17.549492
famsize        22.460835
Pstatus         9.767272
Medu          177.078430
Fedu          151.125383
Mjob          175.574637
Fjob           90.998446
reason         96.759936
guardian      124.566849
traveltime     36.180535
studytime      61.785055
failures      811.508862
schoolsup      43.680819
famsup         39.000993
paid           12.276863
activities     17.551460
nursery        17.105788
higher         62.736530
internet        3.906562
romantic       33.805567
famrel         63.653592
freetime       86.209069
goout          85.048995
Dalc           31.216909
Walc           97.885504
health        164.735303
absences     1016.886700
#predict
pred.train.rf.math=predict(mod.rf.math,newdata=math.train)
pred.test.rf.math=predict(mod.rf.math,newdata=math.test)
#performance of rf
R2.rf.math=1-sum((pred.train.rf.math-math.train$G3)^2)/SSTTrain
MAE.rf.math=mean(abs(pred.train.rf.math-math.train$G3))
RMSE.rf.math=sqrt(mean((pred.train.rf.math-math.train$G3)^2))
OSR2.rf.math.test=1-sum((pred.test.rf.math-math.test$G3)^2)/SSTTest
MAE.rf.math.test=mean(abs(pred.test.rf.math-math.test$G3))
RMSE.rf.math.test=sqrt(mean((pred.test.rf.math-math.test$G3)^2))

KPIrf=data.frame("Model"="RF","R2"=R2.rf.math,"OSR2"=OSR2.rf.math.test,"MAE"=MAE.rf.math.test,"RMSE"=RMSE.rf.math.test)
KPIrf

#all Model KPIs

rbind(KPILM,KPILasso,KPIcart,KPIrf)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3IgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSgiZ2dwbG90MiIpCmxpYnJhcnkoImdwbG90cyIpCmxpYnJhcnkoImdsbW5ldCIpCmxpYnJhcnkoIk1BU1MiKQpsaWJyYXJ5KCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KCJkcGx5ciIpCmxpYnJhcnkoInJlc2hhcGUiKQpsaWJyYXJ5KCJnZ3B1YnIiKQpsaWJyYXJ5KCJnZ3Bsb3QyIikKbGlicmFyeSgiZ2xtbmV0IikKbGlicmFyeSgicmVzaGFwZTIiKQpsaWJyYXJ5KCJoZWF0bWFwbHkiKQpsaWJyYXJ5KCJkdW1taWVzIikKbGlicmFyeSgiZHBseXIiKQpsaWJyYXJ5KCJ0aWR5ciIpCmxpYnJhcnkoImNhVG9vbHMiKQpsaWJyYXJ5KCJjYXJldCIpCmxpYnJhcnkoIlJPQ1IiKQpsaWJyYXJ5KCJnZ3B1YnIiKQpsaWJyYXJ5KCJnbG1uZXRVdGlscyIpCmxpYnJhcnkoIkdHYWxseSIpCmxpYnJhcnkoImdsbW5ldCIpCmxpYnJhcnkoImRwbHlyIikKbGlicmFyeSgiZ2dwbG90MiIpCmxpYnJhcnkoInRpZHlyIikKbGlicmFyeSgibGFycyIpCmxpYnJhcnkoImxlYXBzIikKbGlicmFyeSgiZ2JtIikKbGlicmFyeSgicnBhcnQiKQpsaWJyYXJ5KCJjb3JycGxvdCIpCmxpYnJhcnkoIk1ldHJpY3MiKQpsaWJyYXJ5KCJycGFydC5wbG90IikKbGlicmFyeSgicmFuZG9tRm9yZXN0IikKcGFjbWFuOjpwX2xvYWQodGlkeXZlcnNlKQpwYWNtYW46OnBfbG9hZChjYXJldCkgCnBhY21hbjo6cF9sb2FkKHJwYXJ0KQpwYWNtYW46OnBfbG9hZChycGFydC5wbG90KQpwYWNtYW46OnBfbG9hZChjb3JycGxvdCkKcGFjbWFuOjpwX2xvYWQoTWV0cmljcykKYGBgCgpSZWFkaW5nIERhdGEKYGBge3IgbWVzc2FnZT1GQUxTRX0Kcm0obGlzdD1scygpKQpzZXR3ZCgiL1VzZXJzL2theWhhbmJhYmFrYW4vRHJvcGJveC8xNTA3MSBBbmFseXRpY3MgRWRnZSBUZWFtL1RlYW0gUHJvamVjdCIpCnN0dWRlbnRzX21hdGg8LXJlYWQuY3N2KCJzdHVkZW50LW1hdC5jc3YiLHNlcCA9ICI7IikKc3R1ZGVudHNfcG9yPC1yZWFkLmNzdigic3R1ZGVudC1wb3IuY3N2Iiwgc2VwID0gIjsiKQpzdHVkZW50c19ib3RoPC1yZWFkLmNzdigic3R1ZGVudHNpbmJvdGguY3N2Iiwgc2VwID0gIiwiKQpgYGAKClByZS1Qcm9jZXNzaW5nCmBgYHtyIHdhcm5pbmc9RkFMU0V9CnN0dWRlbnRzX21hdGgkc2Nob29sID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkc2Nob29sKQpzdHVkZW50c19tYXRoJHNleCA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJHNleCkKc3R1ZGVudHNfbWF0aCRhZGRyZXNzID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkYWRkcmVzcykKc3R1ZGVudHNfbWF0aCRmYW1zaXplID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkZmFtc2l6ZSkKc3R1ZGVudHNfbWF0aCRGam9iID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkRmpvYikKc3R1ZGVudHNfbWF0aCRNam9iID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkTWpvYikKc3R1ZGVudHNfbWF0aCRyZWFzb24gPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRyZWFzb24pCnN0dWRlbnRzX21hdGgkZ3VhcmRpYW4gPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRndWFyZGlhbikKc3R1ZGVudHNfbWF0aCRzY2hvb2xzdXAgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRzY2hvb2xzdXApCnN0dWRlbnRzX21hdGgkZmFtc3VwID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkZmFtc3VwKQpzdHVkZW50c19tYXRoJHBhaWQgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRwYWlkKQpzdHVkZW50c19tYXRoJGFjdGl2aXRpZXMgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRhY3Rpdml0aWVzKQpzdHVkZW50c19tYXRoJG51cnNlcnkgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRudXJzZXJ5KQpzdHVkZW50c19tYXRoJGhpZ2hlciA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJGhpZ2hlcikKc3R1ZGVudHNfbWF0aCRpbnRlcm5ldCA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJGludGVybmV0KQpzdHVkZW50c19tYXRoJGhpZ2hlciA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJGhpZ2hlcikKc3R1ZGVudHNfbWF0aCRyb21hbnRpYyA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJHJvbWFudGljKQpzdHVkZW50c19tYXRoJFBzdGF0dXMgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRQc3RhdHVzKQpgYGAKCkNvbGluaWVhcml0eSB0ZXN0aW5nCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CnN0dWRlbnRzX21hdGhvbmx5ID0gc2VsZWN0KHN0dWRlbnRzX21hdGgsLWMoRzEsRzIpKQpzdHVkZW50c19tYXRob25seSA9IG5hLm9taXQoc3R1ZGVudHNfbWF0aG9ubHkpICNyZW1vdmluZyBuYXMgZnJvbSB0aGUgZW50aXJlIGRhdGEgc2V0CnN0dWRlbnRzX21hdGhvbmx5MiA9IG1vZGVsLm1hdHJpeCh+LixkYXRhPXN0dWRlbnRzX21hdGhvbmx5KQpjb3JwbG90PWdnY29ycihzdHVkZW50c19tYXRob25seTIsc2l6ZT0yKSsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCBoanVzdCA9IDEpKQoKV2FsY3hEYWxjID0gZ2dwbG90KHN0dWRlbnRzX21hdGhvbmx5KSsKICBnZW9tX2JhcihhZXMoV2FsYyxEYWxjKSxzdGF0PSJpZGVudGl0eSIpCgpNZWR1eEZlZHU9Z2dwbG90KHN0dWRlbnRzX21hdGhvbmx5KSsKICBnZW9tX2JhcihhZXMoYXMuZmFjdG9yKE1lZHUpLGZpbGw9YXMuZmFjdG9yKEZlZHUpKSxzdGF0PSJjb3VudCIpCmBgYApJbml0aWFsIGluZGljYXRpb25zCmBgYHtyfQojZ3JhZGUgYWJvdmUgcGVyY2V0bmFnZQpGYWlsdXJlc3hHMz1nZ3Bsb3Qoc3R1ZGVudHNfbWF0aG9ubHksYWVzKHg9ZmFpbHVyZXMsZmlsbD1HMzw9MTUpKSsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD0xLHBvc2l0aW9uPSdmaWxsJykKCkZhbXJlbHhHMz1nZ3Bsb3Qoc3R1ZGVudHNfbWF0aG9ubHksYWVzKHg9ZmFtcmVsLGZpbGw9RzM8PTE1KSkrCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MSxwb3NpdGlvbj0nZmlsbCcpCgpHb291dHhHMz0gZ2dwbG90KHN0dWRlbnRzX21hdGhvbmx5LGFlcyh4PWdvb3V0LGZpbGw9RzM8PTE1KSkrCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MSxwb3NpdGlvbj0nZmlsbCcpCgpTdHVkeXRpbWV4RzM9Z2dwbG90KHN0dWRlbnRzX21hdGhvbmx5LGFlcyh4PXN0dWR5dGltZSxmaWxsPUczPD0xNSkpKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoPTEscG9zaXRpb249J2ZpbGwnKQoKUm9tYW50aWN4RzM9Z2dwbG90KHN0dWRlbnRzX21hdGhvbmx5LGFlcyh4PXJvbWFudGljLGZpbGw9RzM8PTE0KSkrCiAgZ2VvbV9iYXIocG9zaXRpb249J2ZpbGwnKQoKQWJzZW5jZXN4RzM9IGdncGxvdChzdHVkZW50c19tYXRob25seSwgYWVzKGFic2VuY2VzLCBHMykpKwogIGdlb21faml0dGVyKGRhdGE9c3R1ZGVudHNfbWF0aG9ubHksIGNvbG9yPSJibHVlIikrCiAgeGxhYigiYWJzZW5jZXMiKSsKICB5bGFiKCIzUSBHcmFkZXMiKSsKICB0aGVtZShheGlzLmxpbmU9ZWxlbWVudF9saW5lKGNvbG9yPSJibGFjayIpKSsKICBib3JkZXIoY29sb3I9ImJsYWNrIiwgc2l6ZT0xLCBsaW5ldHlwZT0xKQpnZ2FycmFuZ2UoRmFpbHVyZXN4RzMsRmFtcmVseEczLEdvb3V0eEczLFN0dWR5dGltZXhHMyxSb21hbnRpY3hHMyxBYnNlbmNlc3hHMyxsZWdlbmQgPSAidG9wIixjb21tb24ubGVnZW5kID0gVFJVRSkKYGBgCkxpbmVhciBNb2RlbGluZwpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpzZXQuc2VlZCgxKQpzcGxpdCA9IGNyZWF0ZURhdGFQYXJ0aXRpb24oc3R1ZGVudHNfbWF0aG9ubHkkRzMsIHAgPSAwLjY1LCBsaXN0ID0gRkFMU0UpIAptYXRoLnRyYWluID0gc3R1ZGVudHNfbWF0aG9ubHlbc3BsaXQsXQptYXRoLnRlc3QgPSBzdHVkZW50c19tYXRob25seVstc3BsaXQsXQptYXRoLmxtID0gZ2xtKEczfi4sIGRhdGEgPSBtYXRoLnRyYWluLCBmYW1pbHk9ImdhdXNzaWFuIikKc3RlcC5tYXRoLmxtID1zdGVwKG1hdGgubG0pCmBgYApTdGVwd2lzZSB2YXJpYWJsZSByZWR1Y3Rpb24gbGluZWFyIG91dHB1dCBiYXNpcyBzdGFuZGFyZCB0aHJlc2hvbGQKYGBge3J9CnN1bW1hcnkoc3RlcC5tYXRoLmxtKQpgYGAKI0xpbmVhciBSZWdyZXNzaW9uIFByZWRpY3Rpb25zCmBgYHtyfQpQcmVkaWN0VHJhaW4gPSBwcmVkaWN0KHN0ZXAubWF0aC5sbSwgbmV3ZGF0YSA9IG1hdGgudHJhaW4pClByZWRpY3RUZXN0ID0gcHJlZGljdChzdGVwLm1hdGgubG0sIG5ld2RhdGEgPSBtYXRoLnRlc3QpCmBgYAojTGluZWFyIFJlZ3Jlc3Npb24gS1BJcwpgYGB7cn0KbWVhbl90cmFpbiA9IG1lYW4obWF0aC50cmFpbiRHMykKU1NFVHJhaW4gPSBzdW0oKFByZWRpY3RUcmFpbiAtIG1hdGgudHJhaW4kRzMpXjIpClNTVFRyYWluID0gc3VtKChtYXRoLnRyYWluJEczIC0gbWVhbl90cmFpbileMikKClIyID0gMSAtIFNTRVRyYWluL1NTVFRyYWluClNTRVRlc3QgPSBzdW0oKFByZWRpY3RUZXN0IC0gbWF0aC50ZXN0JEczKV4yKQpTU1RUZXN0ID0gc3VtKChtYXRoLnRlc3QkRzMgLSBtZWFuX3RyYWluKV4yKQpPU1IyID0gMSAtIFNTRVRlc3QvU1NUVGVzdApSTVNFID0gUk1TRShQcmVkaWN0VGVzdCxtYXRoLnRlc3QkRzMpCk1BRSA9IE1BRShQcmVkaWN0VGVzdCxtYXRoLnRlc3QkRzMpCktQSUxNPWRhdGEuZnJhbWUoIk1vZGVsIj0iTE0iLCJSMiI9UjIsIk9TUjIiPU9TUjIsIlJNU0UiPVJNU0UsIk1BRSI9TUFFKQpLUElMTQpgYGAKI0xhc3NvCmBgYHtyfQojc2V0IHRyYWluaW5nIGFuZCB0ZXN0IGRhdGFzZXQKeC5tYXRoLnRyYWluPW1vZGVsLm1hdHJpeChHM34uLTEsZGF0YT1tYXRoLnRyYWluKSAKeS5tYXRoLnRyYWluPC1tYXRoLnRyYWluWyxjKCJHMyIpXSAjc2V0IFkgZm9yIGdsbW5ldCBmaXR0aW5nCngubWF0aC50ZXN0PW1vZGVsLm1hdHJpeChHM34uLTEsZGF0YT1tYXRoLnRlc3QpIAp5Lm1hdGgudGVzdD1tYXRoLnRlc3RbLCJHMyJdCmxhc3NvLmxhbWJkYXMgPSBjKGV4cChzZXEoNSwtNSwtLjEpKSkKc2V0LnNlZWQoMSkKY3YubGFzc28gPSBjdi5nbG1uZXQoeC5tYXRoLnRyYWluLHkubWF0aC50cmFpbixhbHBoYT0xLGxhbWJkYT1sYXNzby5sYW1iZGFzLG5mb2xkcz0xMCkKYmVzdGxhbWJkYSA9IGN2Lmxhc3NvJGxhbWJkYS5taW4KbGFzc28gPSBnbG1uZXQoeC5tYXRoLnRyYWluLHkubWF0aC50cmFpbixhbHBoYT0xLGxhbWJkYSA9IGJlc3RsYW1iZGEpCmBgYAojTGFzc28gUHJlZGljdGlvbnMKYGBge3J9CnByZWQudHJhaW4ubGFzc28gPC0gcHJlZGljdChsYXNzbyx4Lm1hdGgudHJhaW4pCnByZWQudGVzdC5sYXNzbyA8LSBwcmVkaWN0KGxhc3NvLHgubWF0aC50ZXN0KQpgYGAKI0xhc3NvIEtQSXMKYGBge3J9ClIyLkxhc3NvID0gMS1zdW0oKHByZWQudHJhaW4ubGFzc28gLSB5Lm1hdGgudHJhaW4pXjIpL1NTVFRyYWluCk9TUjIubGFzc28gPC0gMS1zdW0oKHByZWQudGVzdC5sYXNzby15Lm1hdGgudGVzdCleMikvU1NUVGVzdApSTVNFLmxhc3NvID0gUk1TRShwcmVkLnRlc3QubGFzc28sbWF0aC50ZXN0JEczKQpNQUUubGFzc28gPSBNQUUocHJlZC50ZXN0Lmxhc3NvLG1hdGgudGVzdCRHMykKS1BJTGFzc289ZGF0YS5mcmFtZSgiTW9kZWwiPSJMYXNzbyIsIlIyIj1SMi5MYXNzbywiT1NSMiI9T1NSMi5sYXNzbywiUk1TRSI9IFJNU0UubGFzc28sIk1BRSI9TUFFLmxhc3NvKQpLUElMYXNzbwpgYGAKI2NhcnQKYGBge3J9CmRlZmF1bHRfdHJlZSA8LSBycGFydChHMyB+IHNjaG9vbCtzZXgrYWdlK2FkZHJlc3MrZmFtc2l6ZStQc3RhdHVzK01lZHUrRmVkdStNam9iK0Zqb2IrcmVhc29uK2d1YXJkaWFuK3RyYXZlbHRpbWUrc3R1ZHl0aW1lK2ZhaWx1cmVzK3NjaG9vbHN1cCtmYW1zdXArcGFpZCthY3Rpdml0aWVzK251cnNlcnkraGlnaGVyK2ludGVybmV0K3JvbWFudGljK2ZhbXJlbCtmcmVldGltZStnb291dCtEYWxjK1dhbGMraGVhbHRoK2Fic2VuY2VzLCBkYXRhPW1hdGgudHJhaW4pCnBhcihtYXI9YygxLDEsMSwxKSkKcHJwKGRlZmF1bHRfdHJlZSkKcHJpbnQoZGVmYXVsdF90cmVlLCBkaWdpdHM9MykKYGBgCgojQ2FydCBDcm9zcyBWYWxpZGF0aW9uCmBgYHtyfQpSU3F1YXJlZCA8LSBmdW5jdGlvbihkYXRhLCBsZXYgPSBOVUxMLCBtb2RlbCA9IE5VTEwsIC4uLikgewogIGMoUlNxID0gY29yKGRhdGEkb2JzLCBkYXRhJHByZWQpICoqIDIpCn0KCmN2LnRyZWVzID0gdHJhaW4oRzN+c2Nob29sK3NleCthZ2UrYWRkcmVzcytmYW1zaXplK1BzdGF0dXMrTWVkdStGZWR1K01qb2IrRmpvYityZWFzb24rZ3VhcmRpYW4rdHJhdmVsdGltZStzdHVkeXRpbWUrZmFpbHVyZXMrc2Nob29sc3VwK2ZhbXN1cCtwYWlkK2FjdGl2aXRpZXMrbnVyc2VyeStoaWdoZXIraW50ZXJuZXQrcm9tYW50aWMrZmFtcmVsK2ZyZWV0aW1lK2dvb3V0K0RhbGMrV2FsYytoZWFsdGgrYWJzZW5jZXMsCiAgICAgICAgICAgICAgICAgZGF0YSA9IG1hdGgudHJhaW4sCiAgICAgICAgICAgICAgICAgbWV0aG9kID0gInJwYXJ0IiwKICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSB0cmFpbkNvbnRyb2wobWV0aG9kID0gImN2IiwgbnVtYmVyID0gMTAsIHN1bW1hcnlGdW5jdGlvbj1SU3F1YXJlZCksICMgMTAtZm9sZCBjdgogICAgICAgICAgICAgICAgIG1ldHJpYz0iUlNxIiwgbWF4aW1pemU9VFJVRSwgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgdHVuZUdyaWQgPSBkYXRhLmZyYW1lKC5jcCA9IHNlcSgwLC4wMDA0LC4wMDAwMSkpKSAgCmBgYAoKI2NhcnQgYmVzdCB0cmVlCmBgYHtyfQpiZXN0X2NwIDwtIGN2LnRyZWVzJGJlc3RUdW5lJGNwCmJlc3RfdHJlZSA8LSBycGFydChHMyB+IHNjaG9vbCtzZXgrYWdlK2FkZHJlc3MrZmFtc2l6ZStQc3RhdHVzK01lZHUrRmVkdStNam9iK0Zqb2IrcmVhc29uK2d1YXJkaWFuK3RyYXZlbHRpbWUrc3R1ZHl0aW1lK2ZhaWx1cmVzK3NjaG9vbHN1cCtmYW1zdXArcGFpZCthY3Rpdml0aWVzK251cnNlcnkraGlnaGVyK2ludGVybmV0K3JvbWFudGljK2ZhbXJlbCtmcmVldGltZStnb291dCtEYWxjK1dhbGMraGVhbHRoK2Fic2VuY2VzLCBkYXRhPW1hdGgudHJhaW4sIGNwPWJlc3RfY3ApCmJlc3RfdHJlZQpwcnAoYmVzdF90cmVlKQpwcmludChiZXN0X3RyZWUsIGRpZ2l0cz0zKQpgYGAKI0NhcnQgS1BJcwpgYGB7ciBlY2hvPVRSVUV9CmRlZmF1bHRfcHJlZF90cmFpbiA9IHByZWRpY3QoZGVmYXVsdF90cmVlLCBuZXdkYXRhID0gbWF0aC50cmFpbikKYmVzdF9wcmVkX3RyYWluID0gcHJlZGljdChiZXN0X3RyZWUsIG5ld2RhdGE9bWF0aC50cmFpbikKZGVmYXVsdF9wcmVkIDwtIHByZWRpY3QoZGVmYXVsdF90cmVlLCBuZXdkYXRhID0gbWF0aC50ZXN0KQpiZXN0X3ByZWQgICAgPC0gcHJlZGljdChiZXN0X3RyZWUsIG5ld2RhdGE9bWF0aC50ZXN0KQphY3R1YWx0cmFpbiA8LSBtYXRoLnRyYWluJEczCmFjdHVhbCA8LSBtYXRoLnRlc3QkRzMKClIyY2FydF9kZWZhdWx0PWNvcihhY3R1YWx0cmFpbiwgZGVmYXVsdF9wcmVkX3RyYWluKSBeIDIKUjJjYXJ0PWNvcihhY3R1YWx0cmFpbiwgYmVzdF9wcmVkX3RyYWluKSBeIDIKCiNPU1IyCk9TUjJjYXJ0X2RlZmF1bHQ9Y29yKGFjdHVhbCwgZGVmYXVsdF9wcmVkKSBeIDIKT1NSMmNhcnQ9Y29yKGFjdHVhbCwgYmVzdF9wcmVkKSBeIDIKI01BRQpNQUVjYXJ0X2RlZmF1bHQ9TWV0cmljczo6bWFlKGFjdHVhbCwgZGVmYXVsdF9wcmVkKQpNQUVjYXJ0PU1ldHJpY3M6Om1hZShhY3R1YWwsIGJlc3RfcHJlZCkKI1JNU0UKUk1TRWNhcnRfZGVmYXVsdD1NZXRyaWNzOjpybXNlKGFjdHVhbCwgZGVmYXVsdF9wcmVkKQpSTVNFY2FydD1NZXRyaWNzOjpybXNlKGFjdHVhbCwgYmVzdF9wcmVkKQoKS1BJY2FydCA9IGRhdGEuZnJhbWUoIk1vZGVsIj0iQ2FydCIsIlIyIj1SMmNhcnQsIk9TUjIiPU9TUjJjYXJ0LCJSTVNFIj0gUk1TRWNhcnQsIk1BRSI9TUFFY2FydCkgCktQSWNhcnQKYGBgCgpgYGB7cn0Kc2V0LnNlZWQoMSkKcmYuY3YubWF0aD10cmFpbih5PW1hdGgudHJhaW4kRzMsIHg9c3Vic2V0KG1hdGgudHJhaW4sIHNlbGVjdD0tYyhHMykpLCBtZXRob2Q9InJmIiwgbm9kc2l6ZT0yNSwgbnRyZWU9ODAsIHRyQ29udHJvbD10cmFpbkNvbnRyb2wobWV0aG9kPSJjdiIsIG51bWJlcj0xMCksIHR1bmVHcmlkPWRhdGEuZnJhbWUobXRyeT1zZXEoMTAsMzAsMSkpKQpyZi5jdi5tYXRoCgojNCBSRiB3aXRoIENWCm1vZC5yZi5tYXRoPXJhbmRvbUZvcmVzdChHM34uLCBkYXRhPW1hdGgudHJhaW4sbXRyeT0yNyxub2Rlc2l6ZT0yNSxudHJlZT04MCkKaW1wb3J0YW50X3ZhcnNfYW1lcz1pbXBvcnRhbmNlKG1vZC5yZi5tYXRoKQppbXBvcnRhbnRfdmFyc19hbWVzCiNwcmVkaWN0CnByZWQudHJhaW4ucmYubWF0aD1wcmVkaWN0KG1vZC5yZi5tYXRoLG5ld2RhdGE9bWF0aC50cmFpbikKcHJlZC50ZXN0LnJmLm1hdGg9cHJlZGljdChtb2QucmYubWF0aCxuZXdkYXRhPW1hdGgudGVzdCkKI3BlcmZvcm1hbmNlIG9mIHJmClIyLnJmLm1hdGg9MS1zdW0oKHByZWQudHJhaW4ucmYubWF0aC1tYXRoLnRyYWluJEczKV4yKS9TU1RUcmFpbgpNQUUucmYubWF0aD1tZWFuKGFicyhwcmVkLnRyYWluLnJmLm1hdGgtbWF0aC50cmFpbiRHMykpClJNU0UucmYubWF0aD1zcXJ0KG1lYW4oKHByZWQudHJhaW4ucmYubWF0aC1tYXRoLnRyYWluJEczKV4yKSkKT1NSMi5yZi5tYXRoLnRlc3Q9MS1zdW0oKHByZWQudGVzdC5yZi5tYXRoLW1hdGgudGVzdCRHMyleMikvU1NUVGVzdApNQUUucmYubWF0aC50ZXN0PW1lYW4oYWJzKHByZWQudGVzdC5yZi5tYXRoLW1hdGgudGVzdCRHMykpClJNU0UucmYubWF0aC50ZXN0PXNxcnQobWVhbigocHJlZC50ZXN0LnJmLm1hdGgtbWF0aC50ZXN0JEczKV4yKSkKCktQSXJmPWRhdGEuZnJhbWUoIk1vZGVsIj0iUkYiLCJSMiI9UjIucmYubWF0aCwiT1NSMiI9T1NSMi5yZi5tYXRoLnRlc3QsIk1BRSI9TUFFLnJmLm1hdGgudGVzdCwiUk1TRSI9Uk1TRS5yZi5tYXRoLnRlc3QpCktQSXJmCmBgYAojYWxsIE1vZGVsIEtQSXMKYGBge3J9CnJiaW5kKEtQSUxNLEtQSUxhc3NvLEtQSWNhcnQsS1BJcmYpCmBgYAoK