library("ggplot2")
library("gplots")
library("glmnet")
library("MASS")
library("tidyverse")
library("dplyr")
library("reshape")
library("ggpubr")
library("glmnet")
library("reshape2")
library("heatmaply")
library("dummies")
library("dplyr")
library("tidyr")
library("ggplot2")
library("caTools")
library("caret")
library("ROCR")
library("ggpubr")
Reading Data
rm(list=ls())
students_math<-read.csv("/Users/kayhanbabakan/OneDrive/Analytics Edge Project/student-mat.csv",sep = ";")
students_por<-read.csv("/Users/kayhanbabakan/OneDrive/Analytics Edge Project/student-por.csv", sep = ";")
students_both<-read.csv("/Users/kayhanbabakan/OneDrive/Analytics Edge Project/studentsinboth.csv", sep = ",")
Pre-Processing
students_math$school = as.factor(students_math$school)
students_math$sex = as.factor(students_math$sex)
students_math$address = as.factor(students_math$address)
students_math$famsize = as.factor(students_math$famsize)
students_math$Fjob = as.factor(students_math$Fjob)
students_math$Mjob = as.factor(students_math$Mjob)
students_math$reason = as.factor(students_math$reason)
students_math$guardian = as.factor(students_math$guardian)
students_math$schoolsup = as.factor(students_math$schoolsup)
students_math$famsup = as.factor(students_math$famsup)
students_math$paid = as.factor(students_math$paid)
students_math$activities = as.factor(students_math$activities)
students_math$nursery = as.factor(students_math$nursery)
students_math$higher = as.factor(students_math$higher)
students_math$internet = as.factor(students_math$internet)
students_math$higher = as.factor(students_math$higher)
students_math$romantic = as.factor(students_math$romantic)
students_math$Pstatus = as.factor(students_math$Pstatus)
Coliniearity testing
students_mathonly = select(students_math,-c(G1,G2))
students_mathonly2 = model.matrix(~.,data=students_mathonly)
ggcorr(students_mathonly2,size=2)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))

WalcxDalc = ggplot(students_mathonly)+
geom_bar(aes(Walc,Dalc),stat="identity")
MeduxFedu=ggplot(students_mathonly)+
geom_bar(aes(as.factor(Medu),fill=as.factor(Fedu)),stat="count")
ggarrange(WalcxDalc,MeduxFedu,legend = "top")

Initial indications
#grade above percetnage
FailuresxG3=ggplot(students_mathonly,aes(x=failures,fill=G3<=10))+
geom_histogram(binwidth=1,position='fill')
FamrelxG3=ggplot(students_mathonly,aes(x=famrel,fill=G3<=10))+
geom_histogram(binwidth=1,position='fill')
GooutxG3= ggplot(students_mathonly,aes(x=goout,fill=G3<=10))+
geom_histogram(binwidth=1,position='fill')
StudytimexG3=ggplot(students_mathonly,aes(x=studytime,fill=G3<=10))+
geom_histogram(binwidth=1,position='fill')
RomanticxG3=ggplot(students_mathonly,aes(x=romantic,fill=G3<=10))+
geom_bar(position='fill')
AbsencesxG3= ggplot(students_mathonly, aes(absences, G3,colour = sex))+
geom_point()+
xlab("absences")+
ylab("3Q Grades")
ggarrange(FailuresxG3,FamrelxG3,GooutxG3,StudytimexG3,RomanticxG3,AbsencesxG3,legend = "top")

Linear Modeling
set.seed(1)
split = createDataPartition(students_mathonly$G3, p = 0.65, list = FALSE)
math.train = students_mathonly[split,]
math.test = students_mathonly[-split,]
math.lm = lm(G3~+sex+famsize+Medu+failures+romantic, data = math.train)
summary(math.lm)
Call:
lm(formula = G3 ~ +sex + famsize + Medu + failures + romantic,
data = math.train)
Residuals:
Min 1Q Median 3Q Max
-12.6879 -2.0789 0.3121 2.8798 8.8798
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.2935 0.8126 10.207 < 2e-16 ***
sexM 0.7410 0.5389 1.375 0.170348
famsizeLE3 1.1845 0.5864 2.020 0.044453 *
Medu 0.9133 0.2537 3.600 0.000383 ***
failures -1.8348 0.3681 -4.984 1.16e-06 ***
romanticyes -1.2107 0.5581 -2.169 0.030981 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.265 on 253 degrees of freedom
Multiple R-squared: 0.196, Adjusted R-squared: 0.1802
F-statistic: 12.34 on 5 and 253 DF, p-value: 1.017e-10
Stepwise variable reduction linear output basis standard threshold
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAogIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCmBgYHtyIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkoImdncGxvdDIiKQpsaWJyYXJ5KCJncGxvdHMiKQpsaWJyYXJ5KCJnbG1uZXQiKQpsaWJyYXJ5KCJNQVNTIikKbGlicmFyeSgidGlkeXZlcnNlIikKbGlicmFyeSgiZHBseXIiKQpsaWJyYXJ5KCJyZXNoYXBlIikKbGlicmFyeSgiZ2dwdWJyIikKbGlicmFyeSgiZ2xtbmV0IikKbGlicmFyeSgicmVzaGFwZTIiKQpsaWJyYXJ5KCJoZWF0bWFwbHkiKQpsaWJyYXJ5KCJkdW1taWVzIikKbGlicmFyeSgiZHBseXIiKQpsaWJyYXJ5KCJ0aWR5ciIpCmxpYnJhcnkoImdncGxvdDIiKQpsaWJyYXJ5KCJjYVRvb2xzIikKbGlicmFyeSgiY2FyZXQiKQpsaWJyYXJ5KCJST0NSIikKbGlicmFyeSgiZ2dwdWJyIikKYGBgCgpSZWFkaW5nIERhdGEKYGBge3IgbWVzc2FnZT1GQUxTRX0Kcm0obGlzdD1scygpKQpzdHVkZW50c19tYXRoPC1yZWFkLmNzdigiL1VzZXJzL2theWhhbmJhYmFrYW4vT25lRHJpdmUvQW5hbHl0aWNzIEVkZ2UgUHJvamVjdC9zdHVkZW50LW1hdC5jc3YiLHNlcCA9ICI7IikKc3R1ZGVudHNfcG9yPC1yZWFkLmNzdigiL1VzZXJzL2theWhhbmJhYmFrYW4vT25lRHJpdmUvQW5hbHl0aWNzIEVkZ2UgUHJvamVjdC9zdHVkZW50LXBvci5jc3YiLCBzZXAgPSAiOyIpCnN0dWRlbnRzX2JvdGg8LXJlYWQuY3N2KCIvVXNlcnMva2F5aGFuYmFiYWthbi9PbmVEcml2ZS9BbmFseXRpY3MgRWRnZSBQcm9qZWN0L3N0dWRlbnRzaW5ib3RoLmNzdiIsIHNlcCA9ICIsIikKYGBgCgpQcmUtUHJvY2Vzc2luZwpgYGB7ciB3YXJuaW5nPUZBTFNFfQoKc3R1ZGVudHNfbWF0aCRzY2hvb2wgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRzY2hvb2wpCnN0dWRlbnRzX21hdGgkc2V4ID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkc2V4KQpzdHVkZW50c19tYXRoJGFkZHJlc3MgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRhZGRyZXNzKQpzdHVkZW50c19tYXRoJGZhbXNpemUgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRmYW1zaXplKQpzdHVkZW50c19tYXRoJEZqb2IgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRGam9iKQpzdHVkZW50c19tYXRoJE1qb2IgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRNam9iKQpzdHVkZW50c19tYXRoJHJlYXNvbiA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJHJlYXNvbikKc3R1ZGVudHNfbWF0aCRndWFyZGlhbiA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJGd1YXJkaWFuKQpzdHVkZW50c19tYXRoJHNjaG9vbHN1cCA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJHNjaG9vbHN1cCkKc3R1ZGVudHNfbWF0aCRmYW1zdXAgPSBhcy5mYWN0b3Ioc3R1ZGVudHNfbWF0aCRmYW1zdXApCnN0dWRlbnRzX21hdGgkcGFpZCA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJHBhaWQpCnN0dWRlbnRzX21hdGgkYWN0aXZpdGllcyA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJGFjdGl2aXRpZXMpCnN0dWRlbnRzX21hdGgkbnVyc2VyeSA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJG51cnNlcnkpCnN0dWRlbnRzX21hdGgkaGlnaGVyID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkaGlnaGVyKQpzdHVkZW50c19tYXRoJGludGVybmV0ID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkaW50ZXJuZXQpCnN0dWRlbnRzX21hdGgkaGlnaGVyID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkaGlnaGVyKQpzdHVkZW50c19tYXRoJHJvbWFudGljID0gYXMuZmFjdG9yKHN0dWRlbnRzX21hdGgkcm9tYW50aWMpCnN0dWRlbnRzX21hdGgkUHN0YXR1cyA9IGFzLmZhY3RvcihzdHVkZW50c19tYXRoJFBzdGF0dXMpCmBgYAoKQ29saW5pZWFyaXR5IHRlc3RpbmcKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0Kc3R1ZGVudHNfbWF0aG9ubHkgPSBzZWxlY3Qoc3R1ZGVudHNfbWF0aCwtYyhHMSxHMikpCnN0dWRlbnRzX21hdGhvbmx5MiA9IG1vZGVsLm1hdHJpeCh+LixkYXRhPXN0dWRlbnRzX21hdGhvbmx5KQpnZ2NvcnIoc3R1ZGVudHNfbWF0aG9ubHkyLHNpemU9MikrCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCwgaGp1c3QgPSAxKSkKCldhbGN4RGFsYyA9IGdncGxvdChzdHVkZW50c19tYXRob25seSkrCiAgZ2VvbV9iYXIoYWVzKFdhbGMsRGFsYyksc3RhdD0iaWRlbnRpdHkiKQoKTWVkdXhGZWR1PWdncGxvdChzdHVkZW50c19tYXRob25seSkrCiAgZ2VvbV9iYXIoYWVzKGFzLmZhY3RvcihNZWR1KSxmaWxsPWFzLmZhY3RvcihGZWR1KSksc3RhdD0iY291bnQiKQoKZ2dhcnJhbmdlKFdhbGN4RGFsYyxNZWR1eEZlZHUsbGVnZW5kID0gInRvcCIpCmBgYApJbml0aWFsIGluZGljYXRpb25zCmBgYHtyfQojZ3JhZGUgYWJvdmUgcGVyY2V0bmFnZQpGYWlsdXJlc3hHMz1nZ3Bsb3Qoc3R1ZGVudHNfbWF0aG9ubHksYWVzKHg9ZmFpbHVyZXMsZmlsbD1HMzw9MTApKSsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD0xLHBvc2l0aW9uPSdmaWxsJykKCkZhbXJlbHhHMz1nZ3Bsb3Qoc3R1ZGVudHNfbWF0aG9ubHksYWVzKHg9ZmFtcmVsLGZpbGw9RzM8PTEwKSkrCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MSxwb3NpdGlvbj0nZmlsbCcpCgpHb291dHhHMz0gZ2dwbG90KHN0dWRlbnRzX21hdGhvbmx5LGFlcyh4PWdvb3V0LGZpbGw9RzM8PTEwKSkrCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MSxwb3NpdGlvbj0nZmlsbCcpCgpTdHVkeXRpbWV4RzM9Z2dwbG90KHN0dWRlbnRzX21hdGhvbmx5LGFlcyh4PXN0dWR5dGltZSxmaWxsPUczPD0xMCkpKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoPTEscG9zaXRpb249J2ZpbGwnKQoKUm9tYW50aWN4RzM9Z2dwbG90KHN0dWRlbnRzX21hdGhvbmx5LGFlcyh4PXJvbWFudGljLGZpbGw9RzM8PTEwKSkrCiAgZ2VvbV9iYXIocG9zaXRpb249J2ZpbGwnKQoKQWJzZW5jZXN4RzM9IGdncGxvdChzdHVkZW50c19tYXRob25seSwgYWVzKGFic2VuY2VzLCBHMyxjb2xvdXIgPSBzZXgpKSsKICBnZW9tX3BvaW50KCkrCiAgeGxhYigiYWJzZW5jZXMiKSsKICB5bGFiKCIzUSBHcmFkZXMiKQoKZ2dhcnJhbmdlKEZhaWx1cmVzeEczLEZhbXJlbHhHMyxHb291dHhHMyxTdHVkeXRpbWV4RzMsUm9tYW50aWN4RzMsQWJzZW5jZXN4RzMsbGVnZW5kID0gInRvcCIpCmBgYApMaW5lYXIgTW9kZWxpbmcKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpzZXQuc2VlZCgxKQpzcGxpdCA9IGNyZWF0ZURhdGFQYXJ0aXRpb24oc3R1ZGVudHNfbWF0aG9ubHkkRzMsIHAgPSAwLjY1LCBsaXN0ID0gRkFMU0UpIAptYXRoLnRyYWluID0gc3R1ZGVudHNfbWF0aG9ubHlbc3BsaXQsXQptYXRoLnRlc3QgPSBzdHVkZW50c19tYXRob25seVstc3BsaXQsXQoKbWF0aC5sbSA9IGxtKEczfitzZXgrZmFtc2l6ZStNZWR1K2ZhaWx1cmVzK3JvbWFudGljLCBkYXRhID0gbWF0aC50cmFpbikKc3VtbWFyeShtYXRoLmxtKQpgYGAKU3RlcHdpc2UgdmFyaWFibGUgcmVkdWN0aW9uIGxpbmVhciBvdXRwdXQgYmFzaXMgc3RhbmRhcmQgdGhyZXNob2xkCg==