##Load Libraries and datasetecho=FALSElibrary(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelsummary)
`modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
Revert to `kableExtra` for one session:
options(modelsummary_factory_default = 'kableExtra')
options(modelsummary_factory_latex = 'kableExtra')
options(modelsummary_factory_html = 'kableExtra')
Silence this message forever:
config_modelsummary(startup_message = FALSE)
library(patchwork)library(mgcv)
Loading required package: nlme
Attaching package: 'nlme'
The following object is masked from 'package:dplyr':
collapse
This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
library(party)
Loading required package: grid
Loading required package: mvtnorm
Loading required package: modeltools
Loading required package: stats4
Loading required package: strucchange
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
Loading required package: sandwich
Attaching package: 'strucchange'
The following object is masked from 'package:stringr':
boundary
Attaching package: 'party'
The following object is masked from 'package:dplyr':
where
library(ggfortify)library(GGally)
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
attend termGPA priGPA ACT
Min. : 2.00 Min. :0.000 Min. :0.857 Min. :13.00
1st Qu.:24.00 1st Qu.:2.138 1st Qu.:2.190 1st Qu.:20.00
Median :28.00 Median :2.670 Median :2.560 Median :22.00
Mean :26.15 Mean :2.601 Mean :2.587 Mean :22.51
3rd Qu.:30.00 3rd Qu.:3.120 3rd Qu.:2.942 3rd Qu.:25.00
Max. :32.00 Max. :4.000 Max. :3.930 Max. :32.00
final atndrte hwrte frosh
Min. :10.00 Min. : 6.25 Min. : 12.50 Min. :0.0000
1st Qu.:22.00 1st Qu.: 75.00 1st Qu.: 87.50 1st Qu.:0.0000
Median :26.00 Median : 87.50 Median :100.00 Median :0.0000
Mean :25.89 Mean : 81.71 Mean : 87.91 Mean :0.2324
3rd Qu.:29.00 3rd Qu.: 93.75 3rd Qu.:100.00 3rd Qu.:0.0000
Max. :39.00 Max. :100.00 Max. :100.00 Max. :1.0000
NA's :6
soph missed stndfnl
Min. :0.0000 Min. : 0.000 Min. :-3.30882
1st Qu.:0.0000 1st Qu.: 2.000 1st Qu.:-0.78782
Median :1.0000 Median : 4.000 Median : 0.05252
Mean :0.5765 Mean : 5.853 Mean : 0.02966
3rd Qu.:1.0000 3rd Qu.: 8.000 3rd Qu.: 0.68277
Max. :1.0000 Max. :30.000 Max. : 2.78361
You can add options to executable code like this
attendance %>%ggplot(aes(x=atndrte, y=priGPA)) +geom_point(aes(color=termGPA), alpha=0.6) +scale_color_viridis_c(option ="plasma") +theme_minimal() +labs(title="Scatterplot of Attendance Rate vs Prior GPA",x="Attendance Rate (%)",y="Prior GPA",color="Term GPA")
##Histogram to viualize the distribution of Term GPA identifying patterns like whether data is skewedattendance %>%ggplot(aes(x=termGPA)) +geom_histogram(bins=20, fill="blue", color="black") +theme_minimal() +labs(title="Distribution of Term GPA",x="GPA",y="Frequency")
##Attendance and Performance scatterplot diagram relationship (positive/negitive correlation)attendance %>%ggplot(aes(x=atndrte, y=termGPA, color=termGPA)) +geom_point(alpha=0.7) +scale_color_viridis_c(option ="plasma") +geom_smooth(method="lm", se=FALSE, color="red") +theme_minimal() +labs(title="Attendance vs Term GPA",x="Attendance Rate (%)",y="Term GPA",color="Term GPA")
`geom_smooth()` using formula = 'y ~ x'
##Missed classes percentage and standardized final scores attendance <- attendance %>%mutate(missed_pct = (missed /32) *100,stnd_final =scale(final))
modelsummary(mods,coef_map = var_names,stars =TRUE,vcov="robust",gof_map =c("nobs", "r.squared"),notes =list("Standard errors are robust."))
AttendanceRate
FullModel
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Standard errors are robust.
Attendance Rate (%)
0.017***
0.024***
(0.002)
(0.002)
Prior GPA
0.575***
(0.049)
ACT Score
0.033***
(0.007)
Num.Obs.
680
680
R2
0.542
0.313
##regression plottedmodelplot(mods, coef_map = var_names) +theme_bw(base_size =16) +labs(title="Impact of Attendance on GPA",x="95% Confidence Interval") +geom_vline(xintercept=0, linetype=2, colour="red", linewidth=0.35)
Based on the analysis attendance seems to have a strong correlation with academic performance, however other variables seem to have a significant effect.