testrunquarto

##Load Libraries and dataset

echo=FALSE

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelsummary)
`modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
  backend. Learn more at: https://vincentarelbundock.github.io/tinytable/

Revert to `kableExtra` for one session:

  options(modelsummary_factory_default = 'kableExtra')
  options(modelsummary_factory_latex = 'kableExtra')
  options(modelsummary_factory_html = 'kableExtra')

Silence this message forever:

  config_modelsummary(startup_message = FALSE)
library(patchwork)
library(mgcv)
Loading required package: nlme

Attaching package: 'nlme'

The following object is masked from 'package:dplyr':

    collapse

This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
library(party)
Loading required package: grid
Loading required package: mvtnorm
Loading required package: modeltools
Loading required package: stats4
Loading required package: strucchange
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

Loading required package: sandwich

Attaching package: 'strucchange'

The following object is masked from 'package:stringr':

    boundary


Attaching package: 'party'

The following object is masked from 'package:dplyr':

    where
library(ggfortify)
library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(ggrepel)

##load dataset 

echo=FALSE
attendance <- read.csv("attendance.csv")
str(attendance)
'data.frame':   680 obs. of  11 variables:
 $ attend : int  27 22 30 31 32 29 30 26 24 29 ...
 $ termGPA: num  3.19 2.73 3 2.04 3.68 ...
 $ priGPA : num  2.64 3.52 2.46 2.61 3.32 ...
 $ ACT    : int  23 25 24 20 23 26 21 22 24 21 ...
 $ final  : int  28 26 30 27 34 25 10 34 26 26 ...
 $ atndrte: num  84.4 68.8 93.8 96.9 100 ...
 $ hwrte  : num  100 87.5 87.5 100 100 100 75 100 100 100 ...
 $ frosh  : int  0 0 0 0 0 0 1 0 1 0 ...
 $ soph   : int  1 0 0 1 1 1 0 1 0 1 ...
 $ missed : int  5 10 2 1 0 3 2 6 8 3 ...
 $ stndfnl: num  0.4727 0.0525 0.8929 0.2626 1.7332 ...
summary(attendance)
     attend         termGPA          priGPA           ACT       
 Min.   : 2.00   Min.   :0.000   Min.   :0.857   Min.   :13.00  
 1st Qu.:24.00   1st Qu.:2.138   1st Qu.:2.190   1st Qu.:20.00  
 Median :28.00   Median :2.670   Median :2.560   Median :22.00  
 Mean   :26.15   Mean   :2.601   Mean   :2.587   Mean   :22.51  
 3rd Qu.:30.00   3rd Qu.:3.120   3rd Qu.:2.942   3rd Qu.:25.00  
 Max.   :32.00   Max.   :4.000   Max.   :3.930   Max.   :32.00  
                                                                
     final          atndrte           hwrte            frosh       
 Min.   :10.00   Min.   :  6.25   Min.   : 12.50   Min.   :0.0000  
 1st Qu.:22.00   1st Qu.: 75.00   1st Qu.: 87.50   1st Qu.:0.0000  
 Median :26.00   Median : 87.50   Median :100.00   Median :0.0000  
 Mean   :25.89   Mean   : 81.71   Mean   : 87.91   Mean   :0.2324  
 3rd Qu.:29.00   3rd Qu.: 93.75   3rd Qu.:100.00   3rd Qu.:0.0000  
 Max.   :39.00   Max.   :100.00   Max.   :100.00   Max.   :1.0000  
                                  NA's   :6                        
      soph            missed          stndfnl        
 Min.   :0.0000   Min.   : 0.000   Min.   :-3.30882  
 1st Qu.:0.0000   1st Qu.: 2.000   1st Qu.:-0.78782  
 Median :1.0000   Median : 4.000   Median : 0.05252  
 Mean   :0.5765   Mean   : 5.853   Mean   : 0.02966  
 3rd Qu.:1.0000   3rd Qu.: 8.000   3rd Qu.: 0.68277  
 Max.   :1.0000   Max.   :30.000   Max.   : 2.78361  
                                                     

You can add options to executable code like this

attendance %>% 
  ggplot(aes(x=atndrte, y=priGPA)) +
  geom_point(aes(color=termGPA), alpha=0.6) +
  scale_color_viridis_c(option = "plasma") +
  theme_minimal() +
  labs(title="Scatterplot of Attendance Rate vs Prior GPA",
       x="Attendance Rate (%)",
       y="Prior GPA",
       color="Term GPA")

##Histogram to viualize the distribution of Term GPA identifying patterns like whether data is skewed

attendance %>% 
  ggplot(aes(x=termGPA)) +
  geom_histogram(bins=20, fill="blue", color="black") +
  theme_minimal() +
  labs(title="Distribution of Term GPA",
       x="GPA",
       y="Frequency")

##Attendance and Performance scatterplot diagram relationship (positive/negitive correlation)

attendance %>% 
  ggplot(aes(x=atndrte, y=termGPA, color=termGPA)) +
  geom_point(alpha=0.7) +
  scale_color_viridis_c(option = "plasma") +
  geom_smooth(method="lm", se=FALSE, color="red") +
  theme_minimal() +
  labs(title="Attendance vs Term GPA",
       x="Attendance Rate (%)",
       y="Term GPA",
       color="Term GPA")
`geom_smooth()` using formula = 'y ~ x'

##Missed classes percentage and standardized final scores 

attendance <- attendance %>% 
  mutate(missed_pct = (missed / 32) * 100,
         stnd_final = scale(final))
##Summarize key variables
attendance %>% 
  group_by(frosh, soph) %>% 
  summarize(mean_GPA = mean(termGPA),
            mean_attendance = mean(atndrte), .groups = "drop") %>% 
  arrange(desc(mean_GPA))
# A tibble: 3 × 4
  frosh  soph mean_GPA mean_attendance
  <int> <int>    <dbl>           <dbl>
1     0     1     2.68            82.5
2     0     0     2.56            80.2
3     1     0     2.43            81.1
##regression

lm1=lm(termGPA ~ atndrte + priGPA + ACT, data = attendance)

lm2=lm(termGPA ~ atndrte, data = attendance)

modelsummary(list("Model 1" = lm1, "Model 2" = lm2), stars = TRUE)
Model 1 Model 2
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) -1.060*** 0.625***
(0.169) (0.115)
atndrte 0.017*** 0.024***
(0.001) (0.001)
priGPA 0.575***
(0.044)
ACT 0.033***
(0.006)
Num.Obs. 680 680
R2 0.542 0.313
R2 Adj. 0.540 0.312
AIC 991.9 1263.3
BIC 1014.5 1276.9
Log.Lik. -490.931 -628.666
RMSE 0.50 0.61
modelsummary(list("Model 1" = lm1, "Model 2" = lm2),
             vcov = "robust",
             stars = TRUE,
             gof_map = c("nobs", "r.squared"),
             notes = list("Robust standard errors in parentheses."))
Model 1 Model 2
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Robust standard errors in parentheses.
(Intercept) -1.060*** 0.625***
(0.185) (0.134)
atndrte 0.017*** 0.024***
(0.002) (0.002)
priGPA 0.575***
(0.049)
ACT 0.033***
(0.007)
Num.Obs. 680 680
R2 0.542 0.313
mods <- list(AttendanceRate = lm1, FullModel = lm2)
var_names <- c('atndrte' = 'Attendance Rate (%)',
               'priGPA' = 'Prior GPA',
               'ACT' = 'ACT Score')

modelsummary(mods, coef_map = var_names)
AttendanceRate FullModel
Attendance Rate (%) 0.017 0.024
(0.001) (0.001)
Prior GPA 0.575
(0.044)
ACT Score 0.033
(0.006)
Num.Obs. 680 680
R2 0.542 0.313
R2 Adj. 0.540 0.312
AIC 991.9 1263.3
BIC 1014.5 1276.9
Log.Lik. -490.931 -628.666
RMSE 0.50 0.61
modelsummary(mods,
             coef_map = var_names,
             stars = TRUE,
             vcov="robust",
             gof_map = c("nobs", "r.squared"),
             notes = list("Standard errors are robust."))
AttendanceRate FullModel
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Standard errors are robust.
Attendance Rate (%) 0.017*** 0.024***
(0.002) (0.002)
Prior GPA 0.575***
(0.049)
ACT Score 0.033***
(0.007)
Num.Obs. 680 680
R2 0.542 0.313
##regression plotted

modelplot(mods, coef_map = var_names) +
  theme_bw(base_size = 16) +
  labs(title="Impact of Attendance on GPA",
       x="95% Confidence Interval") +
  geom_vline(xintercept=0, linetype=2, colour="red", linewidth=0.35)

Based on the analysis attendance seems to have a strong correlation with academic performance, however other variables seem to have a significant effect.