What we need to do: correlation plot dummy variables off the top VIF Explain hours more
library(faraway)
student_data <- read.csv("student_lifestyle_dataset.csv")
student_data$Study_Hours_Per_Day + student_data$Extracurricular_Hours_Per_Day + student_data$Sleep_Hours_Per_Day + student_data$Social_Hours_Per_Day + student_data$Physical_Activity_Hours_Per_Day
## [1] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [25] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [49] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [73] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [97] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [121] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [145] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [169] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [193] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [217] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [241] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [265] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [289] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [313] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [337] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [361] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [385] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [409] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [433] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [457] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [481] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [505] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [529] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [553] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [577] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [601] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [625] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [649] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [673] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [697] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [721] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [745] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [769] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [793] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [817] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [841] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [865] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [889] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [913] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [937] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [961] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [985] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1009] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1033] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1057] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1081] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1105] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1129] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1153] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1177] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1201] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1225] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1249] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1273] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1297] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1321] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1345] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1369] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1393] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1417] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1441] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1465] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1489] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1513] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1537] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1561] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1585] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1609] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1633] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1657] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1681] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1705] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1729] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1753] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1777] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1801] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1825] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1849] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1873] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1897] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1921] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1945] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1969] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## [1993] 24 24 24 24 24 24 24 24
model <- lm(GPA ~.-Student_ID,data=student_data)
summary(model)
##
## Call:
## lm(formula = GPA ~ . - Student_ID, data = student_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59944 -0.13439 -0.00251 0.13369 0.77543
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.007257 0.044611 44.995 <2e-16 ***
## Study_Hours_Per_Day 0.154016 0.005128 30.033 <2e-16 ***
## Extracurricular_Hours_Per_Day -0.007418 0.003959 -1.874 0.0611 .
## Sleep_Hours_Per_Day -0.003658 0.003609 -1.014 0.3109
## Social_Hours_Per_Day 0.001427 0.002789 0.512 0.6088
## Physical_Activity_Hours_Per_Day NA NA NA NA
## Stress_LevelLow 0.006591 0.021317 0.309 0.7572
## Stress_LevelModerate -0.015801 0.013469 -1.173 0.2409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2025 on 1993 degrees of freedom
## Multiple R-squared: 0.5418, Adjusted R-squared: 0.5404
## F-statistic: 392.7 on 6 and 1993 DF, p-value: < 2.2e-16
plot(model)
Dummy Variable
d1 <- ifelse(student_data$Stress_Level == "Low",1,0)
d2 <- ifelse(student_data$Stress_Level == "Moderate",1,0)
new_data <- cbind.data.frame(student_data,d1,d2)
Cor
library(corrplot)
## corrplot 0.95 loaded
cor_matrix <- cor(new_data[,c("Study_Hours_Per_Day","Extracurricular_Hours_Per_Day","Sleep_Hours_Per_Day", "Social_Hours_Per_Day","Physical_Activity_Hours_Per_Day","GPA",'d1','d2')])
corrplot(cor_matrix, method = "circle", type = "lower", tl.cex = 0.8, addCoef.col = "black")
VIF
model1 <- lm(GPA ~. -Student_ID - Stress_Level - Physical_Activity_Hours_Per_Day ,data=new_data)
model2 <- lm(GPA ~. -Student_ID - Stress_Level - Social_Hours_Per_Day ,data=new_data)
model3 <- lm(GPA ~. -Student_ID - Stress_Level - Sleep_Hours_Per_Day ,data=new_data)
model4 <- lm(GPA ~. -Student_ID - Stress_Level - Extracurricular_Hours_Per_Day ,data=new_data)
vif(model1)
## Study_Hours_Per_Day Extracurricular_Hours_Per_Day
## 2.599471 1.020729
## Sleep_Hours_Per_Day Social_Hours_Per_Day
## 1.355431 1.081113
## d1 d2
## 2.802699 1.977307
vif(model2)
## Study_Hours_Per_Day Extracurricular_Hours_Per_Day
## 3.180111 1.323115
## Sleep_Hours_Per_Day Physical_Activity_Hours_Per_Day
## 1.782787 2.396792
## d1 d2
## 2.802699 1.977307
vif(model3) #higher vif with phy act
## Study_Hours_Per_Day Extracurricular_Hours_Per_Day
## 5.227288 1.831459
## Social_Hours_Per_Day Physical_Activity_Hours_Per_Day
## 2.381436 4.013988
## d1 d2
## 2.802699 1.977307
vif(model4) #higher vif with phy act
## Study_Hours_Per_Day Sleep_Hours_Per_Day
## 4.111329 2.925908
## Social_Hours_Per_Day Physical_Activity_Hours_Per_Day
## 2.823582 4.829172
## d1 d2
## 2.802699 1.977307
summary(model1) #same r2
##
## Call:
## lm(formula = GPA ~ . - Student_ID - Stress_Level - Physical_Activity_Hours_Per_Day,
## data = new_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59944 -0.13439 -0.00251 0.13369 0.77543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.007257 0.044611 44.995 <2e-16 ***
## Study_Hours_Per_Day 0.154016 0.005128 30.033 <2e-16 ***
## Extracurricular_Hours_Per_Day -0.007418 0.003959 -1.874 0.0611 .
## Sleep_Hours_Per_Day -0.003658 0.003609 -1.014 0.3109
## Social_Hours_Per_Day 0.001427 0.002789 0.512 0.6088
## d1 0.006591 0.021317 0.309 0.7572
## d2 -0.015801 0.013469 -1.173 0.2409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2025 on 1993 degrees of freedom
## Multiple R-squared: 0.5418, Adjusted R-squared: 0.5404
## F-statistic: 392.7 on 6 and 1993 DF, p-value: < 2.2e-16
summary(model2)
##
## Call:
## lm(formula = GPA ~ . - Student_ID - Stress_Level - Social_Hours_Per_Day,
## data = new_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59944 -0.13439 -0.00251 0.13369 0.77543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.041517 0.065850 31.003 <2e-16 ***
## Study_Hours_Per_Day 0.152588 0.005672 26.902 <2e-16 ***
## Extracurricular_Hours_Per_Day -0.008846 0.004507 -1.963 0.0498 *
## Sleep_Hours_Per_Day -0.005085 0.004139 -1.229 0.2194
## Physical_Activity_Hours_Per_Day -0.001427 0.002789 -0.512 0.6088
## d1 0.006591 0.021317 0.309 0.7572
## d2 -0.015801 0.013469 -1.173 0.2409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2025 on 1993 degrees of freedom
## Multiple R-squared: 0.5418, Adjusted R-squared: 0.5404
## F-statistic: 392.7 on 6 and 1993 DF, p-value: < 2.2e-16
#choosing first model
Base model
model <- lm(GPA ~.,data=student_data)
res_sat <- residuals(model)
fit_sat <- fitted(model)
plot(fit_sat,res_sat,main = "Residuals against Y Hat")
After VIF model
res_sat <- residuals(model1)
fit_sat <- fitted(model1)
plot(fit_sat,res_sat,main = "Residuals against Y Hat")
Some outliers
boxplot(residuals(model))
lev <- hatvalues(model)
halfnorm(lev,labs = row.names(student_data))
each predictor ploted
res <- residuals(model1)
fit <- fitted(model1)
plot(fit,res,main = "Residuals against Y Hat")
plot(student_data$Student_ID,res, main = "Residuals against Student ID")
plot(student_data$Study_Hours_Per_Day,res, main = "Residuals against Study Hours", col = 'blue')
plot(student_data$Extracurricular_Hours_Per_Day,res, main = "Residuals against Extracurricular",col = 'red')
plot(student_data$Sleep_Hours_Per_Day,res, main = "Residuals against Sleep",col = 'orange')
plot(student_data$Social_Hours_Per_Day,res, main = "Residuals against Social Hours", col = 'darkgreen')
qqnorm(residuals(model1))
qqline(residuals(model1))
summary(model)
##
## Call:
## lm(formula = GPA ~ ., data = student_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59820 -0.13423 -0.00278 0.13389 0.77506
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.009e+00 4.525e-02 44.389 <2e-16 ***
## Student_ID -1.546e-06 7.878e-06 -0.196 0.8444
## Study_Hours_Per_Day 1.540e-01 5.131e-03 30.024 <2e-16 ***
## Extracurricular_Hours_Per_Day -7.434e-03 3.960e-03 -1.877 0.0607 .
## Sleep_Hours_Per_Day -3.672e-03 3.611e-03 -1.017 0.3093
## Social_Hours_Per_Day 1.423e-03 2.790e-03 0.510 0.6100
## Physical_Activity_Hours_Per_Day NA NA NA NA
## Stress_LevelLow 6.579e-03 2.132e-02 0.309 0.7577
## Stress_LevelModerate -1.563e-02 1.350e-02 -1.157 0.2473
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2025 on 1992 degrees of freedom
## Multiple R-squared: 0.5418, Adjusted R-squared: 0.5401
## F-statistic: 336.4 on 7 and 1992 DF, p-value: < 2.2e-16
#cook distance
cook <- cooks.distance(model)
halfnorm(cook,labs = row.names(model))
predictor selection
#do this for different citerion
library(leaps)
library(faraway)
step(model1,direction="backward",criterion="AIC")
## Start: AIC=-6381.3
## GPA ~ (Student_ID + Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## Sleep_Hours_Per_Day + Social_Hours_Per_Day + Physical_Activity_Hours_Per_Day +
## Stress_Level + d1 + d2) - Student_ID - Stress_Level - Physical_Activity_Hours_Per_Day
##
## Df Sum of Sq RSS AIC
## - d1 1 0.004 81.720 -6383.2
## - Social_Hours_Per_Day 1 0.011 81.727 -6383.0
## - Sleep_Hours_Per_Day 1 0.042 81.758 -6382.3
## - d2 1 0.056 81.773 -6381.9
## <none> 81.716 -6381.3
## - Extracurricular_Hours_Per_Day 1 0.144 81.860 -6379.8
## - Study_Hours_Per_Day 1 36.984 118.700 -5636.6
##
## Step: AIC=-6383.2
## GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + Sleep_Hours_Per_Day +
## Social_Hours_Per_Day + d2
##
## Df Sum of Sq RSS AIC
## - Social_Hours_Per_Day 1 0.011 81.731 -6384.9
## - Sleep_Hours_Per_Day 1 0.039 81.759 -6384.2
## <none> 81.720 -6383.2
## - d2 1 0.136 81.856 -6381.9
## - Extracurricular_Hours_Per_Day 1 0.144 81.864 -6381.7
## - Study_Hours_Per_Day 1 86.504 168.224 -4941.2
##
## Step: AIC=-6384.93
## GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + Sleep_Hours_Per_Day +
## d2
##
## Df Sum of Sq RSS AIC
## - Sleep_Hours_Per_Day 1 0.049 81.780 -6385.7
## <none> 81.731 -6384.9
## - d2 1 0.134 81.865 -6383.7
## - Extracurricular_Hours_Per_Day 1 0.159 81.890 -6383.1
## - Study_Hours_Per_Day 1 87.638 169.370 -4929.6
##
## Step: AIC=-6385.73
## GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + d2
##
## Df Sum of Sq RSS AIC
## <none> 81.780 -6385.7
## - Extracurricular_Hours_Per_Day 1 0.160 81.940 -6383.8
## - d2 1 0.184 81.965 -6383.2
## - Study_Hours_Per_Day 1 87.940 169.721 -4927.5
##
## Call:
## lm(formula = GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## d2, data = new_data)
##
## Coefficients:
## (Intercept) Study_Hours_Per_Day
## 2.000020 0.152278
## Extracurricular_Hours_Per_Day d2
## -0.007734 -0.020983
step(model1,direction="both")
## Start: AIC=-6381.3
## GPA ~ (Student_ID + Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## Sleep_Hours_Per_Day + Social_Hours_Per_Day + Physical_Activity_Hours_Per_Day +
## Stress_Level + d1 + d2) - Student_ID - Stress_Level - Physical_Activity_Hours_Per_Day
##
## Df Sum of Sq RSS AIC
## - d1 1 0.004 81.720 -6383.2
## - Social_Hours_Per_Day 1 0.011 81.727 -6383.0
## - Sleep_Hours_Per_Day 1 0.042 81.758 -6382.3
## - d2 1 0.056 81.773 -6381.9
## <none> 81.716 -6381.3
## - Extracurricular_Hours_Per_Day 1 0.144 81.860 -6379.8
## - Study_Hours_Per_Day 1 36.984 118.700 -5636.6
##
## Step: AIC=-6383.2
## GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + Sleep_Hours_Per_Day +
## Social_Hours_Per_Day + d2
##
## Df Sum of Sq RSS AIC
## - Social_Hours_Per_Day 1 0.011 81.731 -6384.9
## - Sleep_Hours_Per_Day 1 0.039 81.759 -6384.2
## <none> 81.720 -6383.2
## - d2 1 0.136 81.856 -6381.9
## - Extracurricular_Hours_Per_Day 1 0.144 81.864 -6381.7
## + d1 1 0.004 81.716 -6381.3
## - Study_Hours_Per_Day 1 86.504 168.224 -4941.2
##
## Step: AIC=-6384.93
## GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + Sleep_Hours_Per_Day +
## d2
##
## Df Sum of Sq RSS AIC
## - Sleep_Hours_Per_Day 1 0.049 81.780 -6385.7
## <none> 81.731 -6384.9
## - d2 1 0.134 81.865 -6383.7
## + Social_Hours_Per_Day 1 0.011 81.720 -6383.2
## - Extracurricular_Hours_Per_Day 1 0.159 81.890 -6383.1
## + d1 1 0.004 81.727 -6383.0
## - Study_Hours_Per_Day 1 87.638 169.370 -4929.6
##
## Step: AIC=-6385.73
## GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + d2
##
## Df Sum of Sq RSS AIC
## <none> 81.780 -6385.7
## + Sleep_Hours_Per_Day 1 0.049 81.731 -6384.9
## + Social_Hours_Per_Day 1 0.021 81.759 -6384.2
## - Extracurricular_Hours_Per_Day 1 0.160 81.940 -6383.8
## + d1 1 0.002 81.779 -6383.8
## - d2 1 0.184 81.965 -6383.2
## - Study_Hours_Per_Day 1 87.940 169.721 -4927.5
##
## Call:
## lm(formula = GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## d2, data = new_data)
##
## Coefficients:
## (Intercept) Study_Hours_Per_Day
## 2.000020 0.152278
## Extracurricular_Hours_Per_Day d2
## -0.007734 -0.020983
step(model1,direction="forward",criterion="BIC")
## Start: AIC=-6381.3
## GPA ~ (Student_ID + Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## Sleep_Hours_Per_Day + Social_Hours_Per_Day + Physical_Activity_Hours_Per_Day +
## Stress_Level + d1 + d2) - Student_ID - Stress_Level - Physical_Activity_Hours_Per_Day
##
## Call:
## lm(formula = GPA ~ (Student_ID + Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## Sleep_Hours_Per_Day + Social_Hours_Per_Day + Physical_Activity_Hours_Per_Day +
## Stress_Level + d1 + d2) - Student_ID - Stress_Level - Physical_Activity_Hours_Per_Day,
## data = new_data)
##
## Coefficients:
## (Intercept) Study_Hours_Per_Day
## 2.007257 0.154016
## Extracurricular_Hours_Per_Day Sleep_Hours_Per_Day
## -0.007418 -0.003658
## Social_Hours_Per_Day d1
## 0.001427 0.006591
## d2
## -0.015801
sum3 = summary(regsubsets(GPA ~.-Stress_Level-Physical_Activity_Hours_Per_Day, data=new_data,nbest=1))
sum3$which
## (Intercept) Student_ID Study_Hours_Per_Day Extracurricular_Hours_Per_Day
## 1 TRUE FALSE TRUE FALSE
## 2 TRUE FALSE TRUE FALSE
## 3 TRUE FALSE TRUE TRUE
## 4 TRUE FALSE TRUE TRUE
## 5 TRUE FALSE TRUE TRUE
## 6 TRUE FALSE TRUE TRUE
## 7 TRUE TRUE TRUE TRUE
## Sleep_Hours_Per_Day Social_Hours_Per_Day d1 d2
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE TRUE
## 3 FALSE FALSE FALSE TRUE
## 4 TRUE FALSE FALSE TRUE
## 5 TRUE TRUE FALSE TRUE
## 6 TRUE TRUE TRUE TRUE
## 7 TRUE TRUE TRUE TRUE
sum3$rsq
## [1] 0.5394432 0.5404943 0.5413901 0.5416668 0.5417285 0.5417505 0.5417593
outputMat = cbind(sum3$which, sum3$rsq,sum3$adjr2, sum3$cp, sum3$bic, sum3$rsq)
colnames(outputMat) <- c(colnames(sum3$which), "rsq","adjr2", "cp", "bic", "rss")
View(outputMat)
termplot(model1, partial.resid = T)
plot(regsubsets(GPA ~., data=student_data,
nbest=1,), scale="adjr2")
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 1 linear dependencies found
## Reordering variables and trying again:
#plotting AIC
AIC <- 50*log(sum3$rss/50) + (1:7)*2
plot(AIC~I(1:7),ylab="AIC",xlab="Number of Predictors",type="l",lwd=2)
#Plotting Cp
plot(1:7,sum3$cp,xlab="No.of Predictors",
ylab="Cp Statistic",type="l",lwd=2)
new_model <- lm(GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day + d2, data = new_data)
summary(new_model)
##
## Call:
## lm(formula = GPA ~ Study_Hours_Per_Day + Extracurricular_Hours_Per_Day +
## d2, data = new_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60982 -0.13364 -0.00205 0.13395 0.77368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.000020 0.027153 73.658 <2e-16 ***
## Study_Hours_Per_Day 0.152278 0.003287 46.329 <2e-16 ***
## Extracurricular_Hours_Per_Day -0.007734 0.003917 -1.975 0.0485 *
## d2 -0.020983 0.009899 -2.120 0.0342 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2024 on 1996 degrees of freedom
## Multiple R-squared: 0.5414, Adjusted R-squared: 0.5407
## F-statistic: 785.4 on 3 and 1996 DF, p-value: < 2.2e-16
model2 <- lm(GPA ~. - Student_ID - Stress_Level,data =student_data)
termplot(model2, partial.resid = T,terms=5,col.res='purple')
#partial residual