library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
student_performance <- read.csv("student_performance.csv", header = TRUE)
student_performance %>%
  select(age, study_hours, test_score) %>%
  tbl_summary()
Characteristic N = 2001
age
    18 24 (12%)
    19 27 (14%)
    20 26 (13%)
    21 34 (17%)
    22 29 (15%)
    23 23 (12%)
    24 37 (19%)
study_hours 4.90 (3.60, 6.20)
test_score 67 (59, 73)
1 n (%); Median (Q1, Q3)
student_performance %>%
  tbl_summary(by = group) %>%
  add_overall() %>%
  add_p() %>%
  modify_caption("Table 1: Student Characteristics by Group")
Table 1: Student Characteristics by Group
Characteristic Overall
N = 200
1
A
N = 105
1
B
N = 95
1
p-value2
id 101 (51, 151) 108 (62, 143) 90 (48, 158) 0.8
age


0.3
    18 24 (12%) 12 (11%) 12 (13%)
    19 27 (14%) 15 (14%) 12 (13%)
    20 26 (13%) 12 (11%) 14 (15%)
    21 34 (17%) 23 (22%) 11 (12%)
    22 29 (15%) 15 (14%) 14 (15%)
    23 23 (12%) 14 (13%) 9 (9.5%)
    24 37 (19%) 14 (13%) 23 (24%)
gender


0.5
    Female 107 (54%) 54 (51%) 53 (56%)
    Male 93 (47%) 51 (49%) 42 (44%)
study_hours 4.90 (3.60, 6.20) 5.10 (4.00, 6.20) 4.80 (3.40, 6.30) 0.3
test_score 67 (59, 73) 68 (58, 73) 67 (59, 73) 0.8
passed 145 (73%) 76 (72%) 69 (73%) >0.9
1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test
model <- glm(passed ~ group + study_hours, data = student_performance, family = "binomial")

model
## 
## Call:  glm(formula = passed ~ group + study_hours, family = "binomial", 
##     data = student_performance)
## 
## Coefficients:
## (Intercept)       groupB  study_hours  
##     0.72632      0.02297      0.04686  
## 
## Degrees of Freedom: 199 Total (i.e. Null);  197 Residual
## Null Deviance:       235.3 
## Residual Deviance: 234.9     AIC: 240.9
tbl_regression(model) %>%
  modify_caption("Table 2: Logistic Regression Predicting Passed")
Table 2: Logistic Regression Predicting Passed
Characteristic log(OR) 95% CI p-value
group


    A
    B 0.02 -0.60, 0.65 >0.9
study_hours 0.05 -0.11, 0.20 0.5
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
##                   OR     2.5 %   97.5 %
## (Intercept) 2.067460 0.8614112 5.054507
## groupB      1.023236 0.5485734 1.915696
## study_hours 1.047980 0.9002521 1.225140