Setup

Download the dataset from our google sheets document and then upload it to the R environment

setwd("/Users/shmoops/Desktop/UNC 2019-2020/FALL/EPID 600/")

# read in data
library(readxl)
data = read_xlsx("Data_Part3.xlsx")

# making headers lower case
colnames(data) = tolower(colnames(data))

Tasks

1. Create a contingency table of exposed vs unexposed and outcome vs no outcome

# Number of participants
nrow(data)

## [1] 117

# Describe data
library(Hmisc)
Hmisc::describe(data)

## data 
## 
##  20  Variables      117  Observations
## ---------------------------------------------------------------------------
## duration (seconds) 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      117        0       75        1    430.2    700.9     55.6     57.0 
##      .25      .50      .75      .90      .95 
##     67.0     88.0    118.0    176.0    240.2 
##                                                                       
## Value         50   100   150   200   300   350   700  2150 13100 23200
## Frequency     41    49    15     6     1     1     1     1     1     1
## Proportion 0.350 0.419 0.128 0.051 0.009 0.009 0.009 0.009 0.009 0.009
## ---------------------------------------------------------------------------
## q1 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        4    0.589    1.803   0.4515 
##                                   
## Value          1     2     3     4
## Frequency     28    86     1     2
## Proportion 0.239 0.735 0.009 0.017
## ---------------------------------------------------------------------------
## q2 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.933     2021    1.332 
##                                         
## Value       2019  2020  2021  2022  2023
## Frequency      1    38    25    25    28
## Proportion 0.009 0.325 0.214 0.214 0.239
## ---------------------------------------------------------------------------
## q3 
##        n  missing distinct 
##      117        0       15 
## 
## lowest : Biochemistry           Biology                Biomedical Engineering Chemistry              Comparative Literature
## highest: Philosophy             Political Science      Psychology             Sociology              Studio Art            
## ---------------------------------------------------------------------------
## stem1 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      117        0        2    0.169      110   0.9402   0.1135 
## 
## ---------------------------------------------------------------------------
## q5 
##        n  missing distinct 
##       30       87       18 
## 
## lowest : Anthropology          Asian Studies         Biology               Chemistry             Communications       
## highest: Music                 Neuroscience          Peace War and Defense Political science     Psychology           
## ---------------------------------------------------------------------------
## stem2 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##       30       87        2    0.697       11   0.3667   0.4805 
## 
## ---------------------------------------------------------------------------
## q6 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      117        0        2    0.746       63   0.5385   0.5013 
## 
## ---------------------------------------------------------------------------
## q7 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      117        0        2    0.316       14   0.1197   0.2125 
## 
## ---------------------------------------------------------------------------
## q8 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      117        0       32    0.991    18.22       13        4        6 
##      .25      .50      .75      .90      .95 
##       10       15       25       35       40 
## 
## lowest :  0  1  2  3  4, highest: 35 40 42 50 63
## ---------------------------------------------------------------------------
## q9 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      117        0       15    0.876    1.794    2.579      0.0      0.0 
##      .25      .50      .75      .90      .95 
##      0.0      0.2      3.0      5.0      8.0 
##                                                                       
## Value        0.0   0.2   0.5   1.0   1.5   2.0   2.5   3.0   3.5   4.0
## Frequency     58     2     4    13     1     4     1     8     1     6
## Proportion 0.496 0.017 0.034 0.111 0.009 0.034 0.009 0.068 0.009 0.051
##                                         
## Value        5.0   6.0   8.0  10.0  15.0
## Frequency     10     2     3     3     1
## Proportion 0.085 0.017 0.026 0.026 0.009
## ---------------------------------------------------------------------------
## q10 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      117        0       12    0.945    2.406    2.752      0.0      0.0 
##      .25      .50      .75      .90      .95 
##      0.0      2.0      4.0      6.0      6.2 
##                                                                       
## Value        0.0   0.5   1.0   2.0   3.0   4.0   5.0   6.0   7.0   8.0
## Frequency     43     1    12    10    11    14    12     8     2     2
## Proportion 0.368 0.009 0.103 0.085 0.094 0.120 0.103 0.068 0.017 0.017
##                       
## Value       10.0  12.0
## Frequency      1     1
## Proportion 0.009 0.009
## ---------------------------------------------------------------------------
## q11 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.931    3.385    1.286 
##                                         
## Value          1     2     3     4     5
## Frequency      7    23    25    42    20
## Proportion 0.060 0.197 0.214 0.359 0.171
## ---------------------------------------------------------------------------
## q12 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.914    3.641    1.133 
##                                         
## Value          1     2     3     4     5
## Frequency      3    15    28    46    25
## Proportion 0.026 0.128 0.239 0.393 0.214
## ---------------------------------------------------------------------------
## q13 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        4    0.904     3.65    1.022 
##                                   
## Value          2     3     4     5
## Frequency     15    33    47    22
## Proportion 0.128 0.282 0.402 0.188
## ---------------------------------------------------------------------------
## q14 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.856    4.197   0.9425 
##                                         
## Value          1     2     3     4     5
## Frequency      1     7    13    43    53
## Proportion 0.009 0.060 0.111 0.368 0.453
## ---------------------------------------------------------------------------
## q15 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.861    3.521   0.9304 
##                                         
## Value          1     2     3     4     5
## Frequency      1    16    32    57    11
## Proportion 0.009 0.137 0.274 0.487 0.094
## ---------------------------------------------------------------------------
## q15reversed 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.861    2.479   0.9304 
##                                         
## Value          1     2     3     4     5
## Frequency     11    57    32    16     1
## Proportion 0.094 0.487 0.274 0.137 0.009
## ---------------------------------------------------------------------------
## q16 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.934    2.513    1.549 
##                                         
## Value          1     2     3     4     5
## Frequency     35    37    10    20    15
## Proportion 0.299 0.316 0.085 0.171 0.128
## ---------------------------------------------------------------------------
## q17 
##        n  missing distinct     Info     Mean      Gmd 
##      117        0        5    0.916    3.778    1.336 
##                                         
## Value          1     2     3     4     5
## Frequency      9    12    16    39    41
## Proportion 0.077 0.103 0.137 0.333 0.350
## ---------------------------------------------------------------------------

# Making dichotomous variables for exposure and outcome 
library(dplyr)

# Exposure
# We will calculate the mean stress score for all participants using the aforementioned scale. Then, if a participant falls above one standard deviation of the mean among all participants, we will classify them as having “high stress” and they will be coded as a 1. If they fall below the mean, they will be categorized as “low stress” and coded as a 0.

# Two data points are missing for two individuals. I will use a random number generator to fill in these numbers

# Q11-Q17 (Q15 will be reversed)
# Making a total stress scale score
data = data %>% mutate(stress = q11 + q12 + q13 + q14 + q15reversed + q16 + q17)

# mean stress score
mean(data$stress)

## [1] 23.64103

# One standard deviation
sqrt(var(data$stress))

## [1] 4.756958

# Creating a stress factor variable based off of whether participants are above/below the mean + 1 SD
data$stress_f = ifelse(data$stress > (mean(data$stress)+sqrt(var(data$stress))) , 1, 0)

# stress_f table
table(data$stress_f)

## 
##  0  1 
## 98 19

# Outcome
# Our outcome of interest is binge drinking, or the consumption of 4 or more alcoholic beverages by a female and 5 or more by a male in a 2 hour time period. 

# Based off of q1 and q10
data$binge_drinking = ifelse(data$q1 == 1 & data$q10 >= 5, 1, ifelse(data$q1 == 2 &  data$q10 >= 4,1,0))

# binge_drinking table
table(data$binge_drinking)

## 
##  0  1 
## 81 36

# Contingency Table 

disease_exp = sum(ifelse(data$stress_f == 1 & data$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data$stress_f == 1 & data$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data$stress_f == 0 & data$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data$stress_f == 0 & data$binge_drinking == 0, 1, 0), na.rm = T)

a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))

table

##              Binge Drinking No Binge Brinking
## Stressed                  7                12
## Not Stressed             29                69

2. Prevalence/Prevalence Ratio/Prevalence Odds Ratio analysis

# Prevalence in exposed pop
prev_exp = table[1,1]/sum(table[1,])
prev_exp

## [1] 0.3684211

# Prevalence in unexposed pop
prev_unexp = table[2,1]/sum(table[2,])
prev_unexp

## [1] 0.2959184

# Prevalence in total cohort
prev_tot = sum(table[,1])/sum(table)
prev_tot

## [1] 0.3076923

# Prevalence ratio 
prev_ratio = (prev_exp/ prev_unexp)
prev_ratio

## [1] 1.245009

exp(log(prev_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,1]))))

## [1] 0.5453937

exp(log(prev_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,1]))))

## [1] 2.842071

# confidence interval crosses the null value!

# Prevalence Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.387931

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.4964221

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 3.880473

# confidence interval crosses the null value!

3. Covariate analysis

(confounder and EMM)

Varibles to assess:

Stem major (a) Two majors(b) Live on/off campus(c) Greek life(d) Year in school(e) Hours studied per week(f) Gender(g)

Steps for each variable: stratification and modelling

a) Stem Majors

# subsetting data
data_stem = data %>% filter(stem1 == 1)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_stem$stress_f == 1 & data_stem$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_stem$stress_f == 1 & data_stem$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_stem$stress_f == 0 & data_stem$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_stem$stress_f == 0 & data_stem$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  7                11
## Not Stressed             25                67

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.705455

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.5949506

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 4.888768

# non-stem majors
data_nonstem = data %>% filter(stem1 == 0)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_nonstem$stress_f == 1 & data_nonstem$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_nonstem$stress_f == 1 & data_nonstem$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_nonstem$stress_f == 0 & data_nonstem$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_nonstem$stress_f == 0 & data_nonstem$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  0                 1
## Not Stressed              4                 2

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 0

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] NaN

# Because there are no non-stem majors that are academically stressed and binge drink, we are unable to do this analysis

# percent change with modeling approach
m_crude_or = glm(binge_drinking ~ stress_f, family = binomial("logit"), data = data)
m_crude_or_results = cbind(broom::tidy(m_crude_or), broom::confint_tidy(m_crude_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_crude_or_results

##          term  estimate std.error  statistic      p.value  conf.low
## 1 (Intercept) 0.4202899 0.2213040 -3.9168324 8.972008e-05 0.2684130
## 2    stress_f 1.3879310 0.5245626  0.6249286 5.320179e-01 0.4749929
##   conf.high
## 1 0.6413797
## 2 3.8178005

m_adj_or = glm(binge_drinking ~ stress_f + stem1, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term  estimate std.error  statistic   p.value  conf.low conf.high
## 1 (Intercept) 1.2707355 0.7683818  0.3118187 0.7551783 0.2773065  6.494869
## 2    stress_f 1.4131672 0.5297752  0.6527928 0.5138899 0.4788882  3.928208
## 3       stem1 0.3040961 0.7938509 -1.4995404 0.1337335 0.0569942  1.457051

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 1.818261

# Less than 10%, therefore not a confounder.

b) Two Majors

# subsetting data
data_two_majors = data %>% filter(!is.na(data$stem2))

# inserting a two majors dichotomous variable into the original dataset 
data$two_majors = ifelse(!is.na(data$stem2),1,0) 

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_two_majors$stress_f == 1 & data_two_majors$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_two_majors$stress_f == 1 & data_two_majors$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_two_majors$stress_f == 0 & data_two_majors$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_two_majors$stress_f == 0 & data_two_majors$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  1                 5
## Not Stressed              7                17

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 0.4857143

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.04770927

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 4.944917

# one major
data_one_major = data %>% filter(is.na(data$stem2))

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_one_major$stress_f == 1 & data_one_major$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_one_major$stress_f == 1 & data_one_major$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_one_major$stress_f == 0 & data_one_major$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_one_major$stress_f == 0 & data_one_major$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  6                 7
## Not Stressed             22                52

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 2.025974

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.6108322

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 6.719638

# Because the stratified estimates fall on either side of the crude estimate, having two majors serves as an effect measure modifier. 

# percent change with modeling approach
m_adj_or = glm(binge_drinking ~ stress_f + two_majors, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term  estimate std.error  statistic     p.value  conf.low
## 1 (Intercept) 0.4493323 0.2454759 -3.2589463 0.001118268 0.2732728
## 2    stress_f 1.4173617 0.5267888  0.6621196 0.507894560 0.4834509
## 3  two_majors 0.7514332 0.4744156 -0.6023683 0.546928974 0.2828000
##   conf.high
## 1 0.7185785
## 2 3.9209564
## 3 1.8517556

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 2.120473

# Less than 10%, therefore not a confounder.

c) Living on/off campus

# subsetting data
data_on_campus = data %>% filter(data$q6 == 1)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_on_campus$stress_f == 1 & data_on_campus$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_on_campus$stress_f == 1 & data_on_campus$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_on_campus$stress_f == 0 & data_on_campus$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_on_campus$stress_f == 0 & data_on_campus$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  2                 9
## Not Stressed             13                39

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 0.6666667

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.1272936

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 3.491492

# live off campus 
data_off_campus = data %>% filter(data$q6 == 0)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_off_campus$stress_f == 1 & data_off_campus$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_off_campus$stress_f == 1 & data_off_campus$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_off_campus$stress_f == 0 & data_off_campus$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_off_campus$stress_f == 0 & data_off_campus$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  5                 3
## Not Stressed             16                30

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 3.125

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.660183

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 14.7923

# Because the stratified estimates fall on either side of the crude estimate, living on/off campus serves as an effect measure modifier. 

# percent change with modeling approach
m_adj_or = glm(binge_drinking ~ stress_f + q6, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term  estimate std.error  statistic    p.value  conf.low
## 1 (Intercept) 0.6011445 0.2918416 -1.7438228 0.08119000 0.3339018
## 2    stress_f 1.4527207 0.5333983  0.7001112 0.48385786 0.4899876
## 3          q6 0.4845195 0.4083236 -1.7745672 0.07596938 0.2143978
##   conf.high
## 1  1.055898
## 2  4.079001
## 3  1.071045

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 4.668073

# Less than 10%, therefore living on/off campus is not a confounder.

d) Involvement in Greek Life

# subsetting data
data_greek = data %>% filter(data$q7 == 1)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_greek$stress_f == 1 & data_greek$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_greek$stress_f == 1 & data_greek$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_greek$stress_f == 0 & data_greek$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_greek$stress_f == 0 & data_greek$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  1                 3
## Not Stressed              6                 4

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 0.2222222

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.0166239

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 2.970585

# not involved in greek life
data_non_greek = data %>% filter(data$q7 == 0)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_non_greek$stress_f == 1 & data_non_greek$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_non_greek$stress_f == 1 & data_non_greek$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_non_greek$stress_f == 0 & data_non_greek$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_non_greek$stress_f == 0 & data_non_greek$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  6                 9
## Not Stressed             23                65

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.884058

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.6042431

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 5.87458

# Because the stratified estimates fall on either side of the crude estimate, being in/out of greek life serves as an effect measure modifier.

# percent change with modeling approach
m_adj_or = glm(binge_drinking ~ stress_f + q7, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term  estimate std.error  statistic      p.value  conf.low
## 1 (Intercept) 0.3785166 0.2349692 -4.1345657 3.556265e-05 0.2346493
## 2    stress_f 1.2572868 0.5367035  0.4265969 6.696729e-01 0.4182929
## 3          q7 2.4748179 0.5824019  1.5559133 1.197287e-01 0.7756704
##   conf.high
## 1 0.5918922
## 2 3.5265009
## 3 7.9081584

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 9.412878

# Less than 10%, therefore involvement in greek life is not a confounder.

e) Year in School

# create an upper classman (junior/senior)  (coded as 1) 
# versus lower classman (freshman/sophomore) (coded as 0)
data$class = ifelse(data$q2 > 2021, 0, 1)

# upperclassman
data_upperclassman = data %>% filter(data$class == 1)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_upperclassman$stress_f == 1 & data_upperclassman$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_upperclassman$stress_f == 1 & data_upperclassman$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_upperclassman$stress_f == 0 & data_upperclassman$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_upperclassman$stress_f == 0 & data_upperclassman$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  4                 6
## Not Stressed             14                40

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.904762

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.4678516

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 7.754847

# lowerclassman
data_lowerclassman = data %>% filter(data$class == 0)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_lowerclassman$stress_f == 1 & data_lowerclassman$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_lowerclassman$stress_f == 1 & data_lowerclassman$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_lowerclassman$stress_f == 0 & data_lowerclassman$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_lowerclassman$stress_f == 0 & data_lowerclassman$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  3                 6
## Not Stressed             15                29

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 0.9666667

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.2114928

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 4.418328

# Because the stratified estimates fall on either side of the crude estimate, being an lower/upperclassman serves as an effect measure modifier.

# percent change with modeling approach
m_adj_or = glm(binge_drinking ~ stress_f + class, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term  estimate std.error  statistic    p.value  conf.low
## 1 (Intercept) 0.4858342 0.3059936 -2.3591595 0.01831638 0.2606412
## 2    stress_f 1.3804848 0.5256871  0.6133587 0.53963919 0.4714151
## 3       class 0.7636566 0.4024577 -0.6699762 0.50287298 0.3452862
##   conf.high
## 1 0.8721452
## 2 3.8055889
## 3 1.6847882

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 0.5364971

# less than 10%, therefore being a lower/upper classman is not a confounder.

f) Hourse Studied per Week

# stratifying by median (16 hours)
# Those that study 16 or more hours are coded as 1 
data$studyhours = ifelse(data$q8 >= 16, 1, 0)

# committed students
data_committed = data %>% filter(data$studyhours == 1)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_committed$stress_f == 1 & data_committed$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_committed$stress_f == 1 & data_committed$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_committed$stress_f == 0 & data_committed$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_committed$stress_f == 0 & data_committed$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  3                 7
## Not Stressed             10                36

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.542857

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.3363558

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 7.077054

# non committed students 
data_noncommitted = data %>% filter(data$studyhours == 0)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_noncommitted$stress_f == 1 & data_noncommitted$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_noncommitted$stress_f == 1 & data_noncommitted$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_noncommitted$stress_f == 0 & data_noncommitted$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_noncommitted$stress_f == 0 & data_noncommitted$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  4                 5
## Not Stressed             19                33

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.389474

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.3322321

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 5.811109

# I'm not entirley sure how to interpret this one. Because the stratified estimates fall on either side of the crude estimate (one of which is very close), being a committed student could serve as both a confounder and an effect measure modifier.

# percent change with modeling approach
m_adj_or = glm(binge_drinking ~ stress_f + studyhours, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term  estimate std.error  statistic    p.value  conf.low
## 1 (Intercept) 0.5713834 0.2776038 -2.0161646 0.04378277 0.3265159
## 2    stress_f 1.4590712 0.5332366  0.7085037 0.47863253 0.4923834
## 3  studyhours 0.4918705 0.4140616 -1.7136091 0.08660053 0.2138743
##   conf.high
## 1 0.9755478
## 2 4.0968751
## 3 1.0940928

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 5.12563

# less than 10%, therefore being a committed student is not a confounder.

g) Sex

# males
data_male = data %>% filter(data$q1 == 1)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_male$stress_f == 1 & data_male$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_male$stress_f == 1 & data_male$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_male$stress_f == 0 & data_male$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_male$stress_f == 0 & data_male$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  1                 2
## Not Stressed              6                19

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.583333

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.1211823

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 20.68738

# females 
data_female = data %>% filter(data$q1 == 2)

# Find the number of those that are exposed and do/do not have the disease
disease_exp = sum(ifelse(data_female$stress_f == 1 & data_female$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_exp = sum(ifelse(data_female$stress_f == 1 & data_female$binge_drinking == 0, 1, 0), na.rm = T)

disease_noexp = sum(ifelse(data_female$stress_f == 0 & data_female$binge_drinking == 1, 1, 0), na.rm = T)
nodisease_noexp = sum(ifelse(data_female$stress_f == 0 & data_female$binge_drinking == 0, 1, 0), na.rm = T)

# Create table variables
a = disease_exp; b = nodisease_exp
c = disease_noexp; d = nodisease_noexp

# Create table
table = matrix(c(a, b, c, d), nrow = 2, ncol = 2, byrow = T)
dimnames(table) = list (c("Stressed", "Not Stressed"), c("Binge Drinking", "No Binge Brinking"))
table

##              Binge Drinking No Binge Brinking
## Stressed                  6                 9
## Not Stressed             23                48

# Odds Ratio (cross product)
odds_ratio = (table[1,1]*table[2,2])/(table[2,1]*table[1,2])
odds_ratio

## [1] 1.391304

exp(log(odds_ratio)-(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 0.4421389

exp(log(odds_ratio)+(1.96*sqrt((1/table[1,1])+(1/table[2,2])+(1/table[2,1])+(1/table[1,2]))))

## [1] 4.378098

# Because the stratified estimates fall on either side of the crude estimate, sex serves as an effect measure modifier.

# percent change with modeling approach
# recoding sex as 0 and 1 (excluding those that who put 3 and 4)
data$sex = ifelse(data$q1 == 1, 0, ifelse(data$q1 == 2,1,NA))
m_adj_or = glm(binge_drinking ~ stress_f + sex, family = binomial("logit"), data = data)

m_adj_or_results = cbind(broom::tidy(m_adj_or), broom::confint_tidy(m_adj_or)) %>% 
  mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))

m_adj_or_results

##          term estimate std.error  statistic     p.value  conf.low
## 1 (Intercept) 0.320098 0.4419179 -2.5776920 0.009946261 0.1247787
## 2    stress_f 1.421096 0.5348450  0.6570665 0.511138134 0.4781331
## 3         sex 1.490975 0.4944038  0.8079035 0.419146139 0.5853140
##   conf.high
## 1 0.7253392
## 2 4.0036940
## 3 4.1643951

# percent difference calculation 
(abs((m_crude_or_results$estimate[2] - m_adj_or_results$estimate[2]))/m_crude_or_results$estimate[2])*100

## [1] 2.389561

# less than 10%, therefore sex is not a confounder.

4. Table One

library(tableone)
a = tableone::CreateTableOne(vars = colnames(data), data = data, factorVars = c("q1", "q2", "q3", "q5", "q6", "q7", "stem1", "studyhours", "binge_drinking", "stress_f", "class"))
print(summary(a))

## 
##      ### Summary of continuous variables ###
## 
## strata: Overall
##                      n miss p.miss  mean    sd median p25 p75 min   max
## duration (seconds) 117    0      0 430.2 2e+03   88.0  67 118  52 23202
## stem2              117   87     74   0.4 5e-01    0.0   0   1   0     1
## q8                 117    0      0  18.2 1e+01   15.0  10  25   0    63
## q9                 117    0      0   1.8 3e+00    0.2   0   3   0    15
## q10                117    0      0   2.4 3e+00    2.0   0   4   0    12
## q11                117    0      0   3.4 1e+00    4.0   2   4   1     5
## q12                117    0      0   3.6 1e+00    4.0   3   4   1     5
## q13                117    0      0   3.6 9e-01    4.0   3   4   2     5
## q14                117    0      0   4.2 9e-01    4.0   4   5   1     5
## q15                117    0      0   3.5 9e-01    4.0   3   4   1     5
## q15reversed        117    0      0   2.5 9e-01    2.0   2   3   1     5
## q16                117    0      0   2.5 1e+00    2.0   1   4   1     5
## q17                117    0      0   3.8 1e+00    4.0   3   5   1     5
## stress             117    0      0  23.6 5e+00   23.0  20  27  10    35
## two_majors         117    0      0   0.3 4e-01    0.0   0   1   0     1
## sex                117    3      3   0.8 4e-01    1.0   1   1   0     1
##                     skew kurt
## duration (seconds)  8.29 71.5
## stem2               0.58 -1.8
## q8                  1.05  1.0
## q9                  2.04  4.9
## q10                 0.98  0.8
## q11                -0.36 -0.8
## q12                -0.50 -0.4
## q13                -0.22 -0.8
## q14                -1.14  0.9
## q15                -0.46 -0.3
## q15reversed         0.46 -0.3
## q16                 0.54 -1.1
## q17                -0.87 -0.2
## stress             -0.07 -0.1
## two_majors          1.13 -0.7
## sex                -1.20 -0.6
## 
## =======================================================================================
## 
##      ### Summary of categorical variables ### 
## 
## strata: Overall
##             var   n miss p.miss                       level freq percent
##              q1 117    0    0.0                           1   28    23.9
##                                                           2   86    73.5
##                                                           3    1     0.9
##                                                           4    2     1.7
##                                                                         
##              q2 117    0    0.0                        2019    1     0.9
##                                                        2020   38    32.5
##                                                        2021   25    21.4
##                                                        2022   25    21.4
##                                                        2023   28    23.9
##                                                                         
##              q3 117    0    0.0                Biochemistry    3     2.6
##                                                     Biology   93    79.5
##                                      Biomedical Engineering    2     1.7
##                                                   Chemistry    2     1.7
##                                      Comparative Literature    1     0.9
##                                            Computer Science    1     0.9
##                                 Excercise and Sport Science    2     1.7
##                                                Neuroscience    4     3.4
##                                                     Nursing    1     0.9
##                                     Peace, War, and Defense    1     0.9
##                                                  Philosophy    2     1.7
##                                           Political Science    1     0.9
##                                                  Psychology    2     1.7
##                                                   Sociology    1     0.9
##                                                  Studio Art    1     0.9
##                                                                         
##           stem1 117    0    0.0                           0    7     6.0
##                                                           1  110    94.0
##                                                                         
##              q5 117   87   74.4                Anthropology    1     3.3
##                                               Asian Studies    1     3.3
##                                                     Biology    2     6.7
##                                                   Chemistry    3    10.0
##                                              Communications    1     3.3
##                                                     English    2     6.7
##                                 Exercise and Sports Science    1     3.3
##                                                      German    4    13.3
##                                              Global Studies    1     3.3
##                                        Hispanic Linguistics    1     3.3
##                                                     History    3    10.0
##                                                 Mathematics    2     6.7
##                                        Medical Anthropology    1     3.3
##                                                       Music    2     6.7
##                                                Neuroscience    2     6.7
##                                       Peace War and Defense    1     3.3
##                                           Political science    1     3.3
##                                                  Psychology    1     3.3
##                                                                         
##              q6 117    0    0.0                           0   54    46.2
##                                                           1   63    53.8
##                                                                         
##              q7 117    0    0.0                           0  103    88.0
##                                                           1   14    12.0
##                                                                         
##        stress_f 117    0    0.0                           0   98    83.8
##                                                           1   19    16.2
##                                                                         
##  binge_drinking 117    0    0.0                           0   81    69.2
##                                                           1   36    30.8
##                                                                         
##           class 117    0    0.0                           0   53    45.3
##                                                           1   64    54.7
##                                                                         
##      studyhours 117    0    0.0                           0   61    52.1
##                                                           1   56    47.9
##                                                                         
##  cum.percent
##         23.9
##         97.4
##         98.3
##        100.0
##             
##          0.9
##         33.3
##         54.7
##         76.1
##        100.0
##             
##          2.6
##         82.1
##         83.8
##         85.5
##         86.3
##         87.2
##         88.9
##         92.3
##         93.2
##         94.0
##         95.7
##         96.6
##         98.3
##         99.1
##        100.0
##             
##          6.0
##        100.0
##             
##          3.3
##          6.7
##         13.3
##         23.3
##         26.7
##         33.3
##         36.7
##         50.0
##         53.3
##         56.7
##         66.7
##         73.3
##         76.7
##         83.3
##         90.0
##         93.3
##         96.7
##        100.0
##             
##         46.2
##        100.0
##             
##         88.0
##        100.0
##             
##         83.8
##        100.0
##             
##         69.2
##        100.0
##             
##         45.3
##        100.0
##             
##         52.1
##        100.0
##             
## NULL

5. Crude versus adjusted model

library(broom)
library(purrr)
# Writing a function for model results
model_results = function(m){bind_cols(tidy(m), confint_tidy(m)) %>% filter(term == "stress_f") %>% mutate(estimate = exp(estimate), conf.low = exp(conf.low), conf.high = exp(conf.high))}

m1 = glm(binge_drinking ~ stress_f, family = binomial("logit"), data = data)

m2 = glm(binge_drinking ~ stress_f + q7, family = binomial("logit"), data = data)

# using dplyr and purrr functions
model_df = tibble(models = list(m1, m2), 
                  model_name = c("Crude", "Adjusted for Greek Life"), 
                  results = map(models, model_results), 
                  estimate = map_dbl(results, "estimate"))

results_df = bind_rows(model_df$results) %>% 
  mutate(model_name = model_df$model_name)

# plotting all the model estimates and confidence intervals
 ggplot(results_df, aes(model_name, color=std.error)) + 
  geom_point(aes(y=estimate)) + 
  geom_linerange(aes(ymin = conf.low, ymax=conf.high)) +
   ylab("Estimate") + xlab("Models") + geom_hline(yintercept=1, linetype="dashed", color = "black") +
   ggtitle("Crude and Adjusted Models")

Team Project Part 3 (EPID 600)

Samuel Goldstein

12/2/2019

Setup

Download the dataset from our google sheets document and then upload it to the R environment

Tasks

1. Create a contingency table of exposed vs unexposed and outcome vs no outcome

2. Prevalence/Prevalence Ratio/Prevalence Odds Ratio analysis

3. Covariate analysis

(confounder and EMM)

Varibles to assess:

Stem major (a) Two majors(b) Live on/off campus(c) Greek life(d) Year in school(e) Hours studied per week(f) Gender(g)

Steps for each variable: stratification and modelling

a) Stem Majors

b) Two Majors

c) Living on/off campus

d) Involvement in Greek Life

e) Year in School

f) Hourse Studied per Week

g) Sex

4. Table One

5. Crude versus adjusted model