library(readxl)
student_lifestyle <- read_excel("~/Library/CloudStorage/OneDrive-DrexelUniversity/CJS 310/Final Project/student_lifestyle.xls")

Research Question: How do lifestyle factors—study time, sleep, socializing, and stress—interact to predict students’ GPA, and which factor has the strongest influence on academic performance?

Step 1 — Start with correlation analysis

Create a correlation matrix between:

o GPA

o Study hours

o Sleep hours

o Social hours

o Stress level

This helps answer:

Which variables are most strongly associated with GPA?

ggpairs(student_lifestyle[,c("GPA","Study_Hours_Per_Day",
"Sleep_Hours_Per_Day",
"Social_Hours_Per_Day",
"Stress_Level")])
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Step 2 — Scatterplots for each factor vs GPA

df <- data.frame(student_lifestyle$Sleep_Hours_Per_Day, student_lifestyle$GPA)
dens <- kde2d(df$student_lifestyle.Sleep_Hours_Per_Day, df$student_lifestyle.GPA, n = 100) 
df$sleep_density <- with(df, dens$z[ findInterval(df$student_lifestyle.Sleep_Hours_Per_Day, dens$x), findInterval(df$student_lifestyle.GPA, dens$y)])

ggplot(df, aes(x = student_lifestyle.Sleep_Hours_Per_Day, y = student_lifestyle.GPA, color = sleep_density)) + geom_point(size = 3) + scale_color_gradient(low = "green", high = "red") + geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 1.2) + labs( title = "Sleep Hours vs GPA (Density Colored)", x = "Hours of Sleep", y = "GPA", color = "Density" ) + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in `[<-.data.frame`(`*tmp*`, names(mapped), value = list(colour =
## c("#EA7200", : replacement element 1 has 4000000 rows to replace 2000 rows

df <- data.frame(student_lifestyle$Study_Hours_Per_Day, student_lifestyle$GPA)
dens <- kde2d(df$student_lifestyle.Study_Hours_Per_Day, df$student_lifestyle.GPA, n = 100) 
df$study_density <- with(df, dens$z[ findInterval(df$student_lifestyle.Study_Hours_Per_Day, dens$x), findInterval(df$student_lifestyle.GPA, dens$y)])

ggplot(df, aes(x = student_lifestyle.Study_Hours_Per_Day, y = student_lifestyle.GPA, color = study_density)) + geom_point(size = 3) + scale_color_gradient(low = "green", high = "red") +  geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 1.2) + labs( title = "Study Hours vs GPA (Density Colored)", x = "Hours of Studying", y = "GPA", color = "Density" ) + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in `[<-.data.frame`(`*tmp*`, names(mapped), value = list(colour =
## c("#F64F00", : replacement element 1 has 4000000 rows to replace 2000 rows

df <- data.frame(student_lifestyle$Social_Hours_Per_Day, student_lifestyle$GPA)
dens <- kde2d(df$student_lifestyle.Social_Hours_Per_Day, df$student_lifestyle.GPA, n = 100) 
df$social_density <- with(df, dens$z[ findInterval(df$student_lifestyle.Social_Hours_Per_Day, dens$x), findInterval(df$student_lifestyle.GPA, dens$y)])

ggplot(df, aes(x = student_lifestyle.Social_Hours_Per_Day, y = student_lifestyle.GPA, color = social_density)) + geom_point(size = 3) + scale_color_gradient(low = "green", high = "red") +  geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 1.2) + labs( title = "Social Hours vs GPA (Density Colored)", x = "Hours of Socializing", y = "GPA", color = "Density" ) + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in `[<-.data.frame`(`*tmp*`, names(mapped), value = list(colour =
## c("#E08700", : replacement element 1 has 4000000 rows to replace 2000 rows

ggplot(student_lifestyle, aes(x = Stress_Level, y = GPA, fill = Stress_Level)) +
  geom_boxplot() +
  labs(
    title = "Distribution of GPA by Stress Level",
    x = "Stress Level",
    y = "GPA"
  ) +
  theme_minimal()

Step 3 — Multiple regression

GPA ~ Study + Sleep + Social + Stress

This tells us:

o which variables are statistically significant

o which have the strongest impact

model <- lm(GPA ~ Study_Hours_Per_Day +
Sleep_Hours_Per_Day +
Social_Hours_Per_Day +
Stress_Level,
data = student_lifestyle)

summary(model)
## 
## Call:
## lm(formula = GPA ~ Study_Hours_Per_Day + Sleep_Hours_Per_Day + 
##     Social_Hours_Per_Day + Stress_Level, data = student_lifestyle)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6095 -0.1331 -0.0018  0.1353  0.7897 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.988910   0.043550  45.669   <2e-16 ***
## Study_Hours_Per_Day   0.154105   0.005131  30.033   <2e-16 ***
## Sleep_Hours_Per_Day  -0.003521   0.003611  -0.975    0.330    
## Social_Hours_Per_Day  0.002170   0.002762   0.785    0.432    
## Stress_LevelLow       0.006448   0.021330   0.302    0.762    
## Stress_LevelModerate -0.016078   0.013477  -1.193    0.233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2026 on 1994 degrees of freedom
## Multiple R-squared:  0.5409, Adjusted R-squared:  0.5398 
## F-statistic: 469.9 on 5 and 1994 DF,  p-value: < 2.2e-16
Variable Estimate Meaning
Intercept 1.99 Predicted GPA when all predictors = 0
Study Hours 0.154 GPA increases by 0.154 per extra study hour
Sleep Hours -0.0035 Essentially no effect
Social Hours 0.002 Almost no effect
Stress Level (Low) 0.006 Almost no difference from baseline
Stress Level (Moderate) -0.016 Slightly lower GPA than baseline
Variable p-value Significant?
Study Hours <2e-16 ✅ YES
Sleep Hours 0.330 ❌ No
Social Hours 0.432 ❌ No
Stress Low 0.762 ❌ No
Stress Moderate 0.233 ❌ No

Interpretation: A multiple linear regression was conducted to examine whether study hours, sleep hours, social hours, and stress levels predict GPA. The model was statistically significant (F(5,1994) = 469.9, p < 0.001) and explained approximately 54% of the variance in GPA (R² = 0.54). Study hours per day was a significant positive predictor of GPA (β = 0.154, p < 0.001), indicating that students who study more tend to have higher GPAs. Sleep hours, social hours, and stress levels were not statistically significant predictors in this model. Study time is the strongest predictor of academic performance.

Step 4 — Investigate interactions

Example question:

Does studying only help GPA if students also get enough sleep?

model2 <- lm(GPA ~ Study_Hours_Per_Day * Sleep_Hours_Per_Day +
Social_Hours_Per_Day +
Stress_Level,
data = student_lifestyle)

summary(model2)
## 
## Call:
## lm(formula = GPA ~ Study_Hours_Per_Day * Sleep_Hours_Per_Day + 
##     Social_Hours_Per_Day + Stress_Level, data = student_lifestyle)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60794 -0.13449 -0.00325  0.13468  0.78793 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              2.248052   0.132189  17.006  < 2e-16
## Study_Hours_Per_Day                      0.119539   0.017421   6.862 9.05e-12
## Sleep_Hours_Per_Day                     -0.042862   0.019290  -2.222   0.0264
## Social_Hours_Per_Day                     0.002950   0.002786   1.059   0.2897
## Stress_LevelLow                          0.026665   0.023432   1.138   0.2553
## Stress_LevelModerate                    -0.005701   0.014364  -0.397   0.6915
## Study_Hours_Per_Day:Sleep_Hours_Per_Day  0.005091   0.002452   2.076   0.0380
##                                            
## (Intercept)                             ***
## Study_Hours_Per_Day                     ***
## Sleep_Hours_Per_Day                     *  
## Social_Hours_Per_Day                       
## Stress_LevelLow                            
## Stress_LevelModerate                       
## Study_Hours_Per_Day:Sleep_Hours_Per_Day *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2024 on 1993 degrees of freedom
## Multiple R-squared:  0.5419, Adjusted R-squared:  0.5406 
## F-statistic:   393 on 6 and 1993 DF,  p-value: < 2.2e-16

Interpretation: The effect of study hours on GPA depends on how much sleep a student gets.

Because the coefficient is positive, it means:

o Studying helps GPA more when students sleep more.

o Students who study a lot but sleep very little get less benefit.

The interaction between study hours and sleep hours was statistically significant (β = 0.0051, p = 0.038), suggesting that the positive effect of studying on GPA becomes stronger when students get more sleep.

Step 5 — Stress analysis

Because the dataset includes stress levels, you can explore whether lifestyle affects stress, which then affects GPA.

stress_model <- multinom(Stress_Level ~ Study_Hours_Per_Day +
                                         Sleep_Hours_Per_Day +
                                         Social_Hours_Per_Day,
                         data = student_lifestyle)
## # weights:  15 (8 variable)
## initial  value 2197.224577 
## iter  10 value 1079.494453
## iter  20 value 798.341838
## iter  30 value 796.741437
## iter  30 value 796.741436
## iter  30 value 796.741436
## final  value 796.741436 
## converged
summary(stress_model)
## Call:
## multinom(formula = Stress_Level ~ Study_Hours_Per_Day + Sleep_Hours_Per_Day + 
##     Social_Hours_Per_Day, data = student_lifestyle)
## 
## Coefficients:
##          (Intercept) Study_Hours_Per_Day Sleep_Hours_Per_Day
## Low        37.429430           -8.967273            2.324130
## Moderate    5.302923           -1.898731            1.150286
##          Social_Hours_Per_Day
## Low               -0.01127429
## Moderate           0.04669286
## 
## Std. Errors:
##          (Intercept) Study_Hours_Per_Day Sleep_Hours_Per_Day
## Low        2.7694110          0.58145990          0.15013715
## Moderate   0.5682766          0.09368542          0.06923321
##          Social_Hours_Per_Day
## Low                0.09087493
## Moderate           0.04409839
## 
## Residual Deviance: 1593.483 
## AIC: 1609.483

Low Stress vs High Stress | Variable | Effect | | ———— | ——— | | Study Hours | -8.97 | | Sleep Hours | +2.32 | | Social Hours | ~0 |

Students are more likely to have low stress when they:

o Sleep more

o Study less

Moderate Stress vs High Stress | Variable | Effect | | ———— | —— | | Study Hours | -1.90 | | Sleep Hours | +1.15 | | Social Hours | Small |

Higher sleep also reduces the likelihood of high stress.

Step 6 - Relationship between Sleep and Studying

# Create sleep categories
student_lifestyle <- student_lifestyle %>%
  mutate(Sleep_Group = case_when(
    Sleep_Hours_Per_Day <= 6 ~ "Low Sleep",
    Sleep_Hours_Per_Day <= 8 ~ "Moderate Sleep",
    TRUE ~ "High Sleep"
  ))

# Fit interaction model
model3 <- lm(GPA ~ Study_Hours_Per_Day * Sleep_Hours_Per_Day +
              Social_Hours_Per_Day + Stress_Level,
            data = student_lifestyle)

# Plot interaction
interact_plot(model3,
              pred = Study_Hours_Per_Day,
              modx = Sleep_Hours_Per_Day)

Summary

So students who:

o Study consistently

o Get adequate sleep

tend to perform best academically.

Multiple regression analysis showed that study hours per day were a strong positive predictor of GPA (β = 0.154, p < 0.001). Sleep hours and social hours were not individually significant predictors of GPA. However, an interaction effect between study hours and sleep hours was observed (β = 0.0051, p = 0.038), suggesting that the benefits of studying are greater when students obtain sufficient sleep.

A multinomial logistic regression examining predictors of stress levels indicated that increased sleep hours were associated with lower levels of stress, while higher study hours were associated with increased stress.

Overall, these results suggest that while studying is the primary driver of academic performance, adequate sleep plays an important role in reducing stress and enhancing the effectiveness of studying.