module1_day3_RScript-hypertonic.R

# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 Part 1 Day 3 ~
# ~ Creating Data from a   ~
# ~ Manuscript to check    ~
# ~ Results                ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# 
# Nebulized Hypertonic Saline for Bronchiolitis
# A Randomized Clinical Trial
# Author: Wu et al.
# Citation:
# JAMA Pediatrics, 168(7), 657-63. 
# URL: https://doi.org/10.1001/jamapediatrics.2014.301
#

# We want to verify the primary unadjusted analysis
# for admission rate (admitted/not admitted)
# Odds ratio of admission (HS Group vs NS Group)
# OR = 0.55 (95% CI, 0.36-0.83)
# meaning that odds of admission are 45% less in HS
# group than the NS group.

# Creating Dataset
# Let's create a dataset that contains the following variables:
#    patientID -- ID number from 1 to 408
#    admitted -- 0=Not Admitted, 1=Admitted
#    hsgroup -- 0=NS Group; 1=HS Group
# 
# create a sequence of integers from 1 to 408

patientID <- seq(1,408,1)

# Total in NS Group = 197
# NS Admitted = 84 (42.6%)
# NS Not Admitted = 197-84 = 113

# Total in HS Group = 211
# HS Admitted = 61 (28.9%)
# HS Not Admitted = 211-61 = 150

# STEP 1a:
# Make a vector with 145 ones (84NS+61HS=145)
# representing the admitted patients in
# both of the treatment groups. 

ones <- rep(1,145)

# STEP 1b: 
# Make a vector with 263 zeroes (113NS+150HS=263)
# representing everyone else in the sample, 
# all of whom were not admitted.

zeroes <- rep(0,263)
  
# STEP 1c: 
# Let's combine these using the c() function
# into a single vector called 'admitted'
# of length 408 equal to the total sample size

admitted <- c(ones,zeroes)

# STEP 2:
# The next variable to create is the treatment 
# group variable which we will call hsgroup.
# we need a series of vectors representing
# those in each group who were admitted and 
# not admitted.

# STEP 2a:
# Let's make hsgroup=0 be the NS group
#            hsgroup=1 be the HS group
# For the admitted (n=145) we have
# n=84 in the nsgroup and n=61 in the hsgroup
# We can represent this by two vectors
# for the NS group (hsgroup=0),

ns_admit <- rep(0,84)

# for the HS group (hsgroup=1),

hs_admit <- rep(1,61)

# For those NOT admitted (n=263) we have
# n=113 n the nsgroup and n=150 in the hsgroup
# We can represent this by two vectors
# for the NS group (hsgroup=0),

ns_noadmit <- rep(0,113)

# for the HS group (hsgroup=1),

hs_noadmit <- rep(1,150)

# Now let's connect these vectors into one 
# variable that matches the admitted variable
# created above in the following order:
# HS group admitted, NS group admitted, HS group not admitted,
# and HS group not admitted

hsgroup <- c(hs_admit,ns_admit,hs_noadmit,ns_noadmit)

# Lastly, pull these three vectors we have created
# together into a data frame.

hyper <- data.frame(patientID,admitted,hsgroup)

#

#
# Now let's summaryize/visualize the data

str(hyper)

## 'data.frame':    408 obs. of  3 variables:
##  $ patientID: num  1 2 3 4 5 6 7 8 9 10 ...
##  $ admitted : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ hsgroup  : num  1 1 1 1 1 1 1 1 1 1 ...

summary(hyper)

##    patientID        admitted         hsgroup      
##  Min.   :  1.0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:102.8   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :204.5   Median :0.0000   Median :1.0000  
##  Mean   :204.5   Mean   :0.3554   Mean   :0.5172  
##  3rd Qu.:306.2   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :408.0   Max.   :1.0000   Max.   :1.0000

# Recall the table1 package from CRP241
# install.packages("table1") # only run this if you need to

library(table1)

## Warning: package 'table1' was built under R version 4.0.3

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

table.data <- hyper

# we need to change our variables to factors for 
# compatibility with table1 package

table.data$hsgroup <- factor(table.data$hsgroup,
                             labels = c("Normal","Hypertonic"))

table.data$admitted <- factor(table.data$admitted,
                               labels = c("NO","YES"))

# We can create a nicely formatted table with 
# a single function call to table1

table1(~ admitted | hsgroup, data=table.data)

	Normal (N=197)	Hypertonic (N=211)	Overall (N=408)
admitted
NO	113 (57.4%)	150 (71.1%)	263 (64.5%)
YES	84 (42.6%)	61 (28.9%)	145 (35.5%)

#
# Unadjusted Analyses using Chi-square
# lets create the 2 x 2 outcome table
# for admission variable
admission.table <- table(admitted,hsgroup)
rownames(admission.table) <- c("NO","YES")
colnames(admission.table) <- c("NS","HS")
admission.table

##         hsgroup
## admitted  NS  HS
##      NO  113 150
##      YES  84  61

# what if we wanted to create this by hand?
byhand.table <- rbind(c(113,150),c(84,61))
rownames(byhand.table) <- c("NO","YES")
colnames(byhand.table) <- c("NS","HS")
byhand.table

##      NS  HS
## NO  113 150
## YES  84  61

# Now let's analyze the 2 x 2 table 
# using the 1st table we created (admissioni.table)
# Here is the standard Pearson Chi-Square Test
chisq.test(admission.table,correct = FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  admission.table
## X-squared = 8.3831, df = 1, p-value = 0.003787

# We can use epitools to calculate the Odds Ratio (OR)
# remember that you may need to run the 
# install.packages function if you haven't installed
# epitools before
# install.packages("epitools")
library(epitools)

## Warning: package 'epitools' was built under R version 4.0.3

oddsratio(admission.table)

## $data
##         hsgroup
## admitted  NS  HS Total
##    NO    113 150   263
##    YES    84  61   145
##    Total 197 211   408
## 
## $measure
##         odds ratio with 95% C.I.
## admitted  estimate     lower     upper
##      NO  1.0000000        NA        NA
##      YES 0.5482102 0.3623922 0.8253729
## 
## $p.value
##         two-sided
## admitted  midp.exact fisher.exact  chi.square
##      NO           NA           NA          NA
##      YES 0.003933746  0.005114295 0.003787306
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "median-unbiased estimate & mid-p exact CI"

# Odds ratio of admission (HS Group vs NS Group)
# OR = 0.55 (95% CI, 0.36-0.83)
# meaning that odds of admission are 45% less in HS
# group than the NS group.

# We can also generate the unadjusted odds ratio using 
# logistic regression:

hyper.fit <- glm(admitted ~ hsgroup , data=hyper, family='binomial')
summary(hyper.fit)

## 
## Call:
## glm(formula = admitted ~ hsgroup, family = "binomial", data = hyper)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0543  -1.0543  -0.8261   1.3057   1.5754  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -0.2966     0.1441  -2.059  0.03953 * 
## hsgroup      -0.6032     0.2093  -2.882  0.00396 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 530.99  on 407  degrees of freedom
## Residual deviance: 522.58  on 406  degrees of freedom
## AIC: 526.58
## 
## Number of Fisher Scoring iterations: 4

# This yields the coefficient table:
# Coefficients:
#             Estimate Std. Error z value Pr(>|z|)   
# (Intercept)  -0.2966     0.1441  -2.059  0.03953 * 
# nsgroup      -0.6032     0.2093  -2.882  0.00396 **

# We can exponentiate to generate the desired odds ratio

exp(hyper.fit$coefficients)

## (Intercept)     hsgroup 
##   0.7433628   0.5470635

# This yields:
# (Intercept)     hsgroup 
#   0.7433628   0.5470635
#
# We can exponentiate the confidence interval to get the 
# limits on the OR:

exp(confint(hyper.fit))

## Waiting for profiling to be done...

##                 2.5 %    97.5 %
## (Intercept) 0.5591328 0.9844228
## hsgroup     0.3619429 0.8229722

# This yields:
#                   2.5 %    97.5 %
#   (Intercept) 0.5591328 0.9844228
#   hsgroup     0.3619429 0.8229722
#
# Thus, we have the unadjusted OR from the 
# paper:
# OR = 0.55 with 95% CI = [0.36, 0.82]
#
# What about predictions?
predict(hyper.fit,data.frame(hsgroup=0),type="response")

##         1 
## 0.4263959

predict(hyper.fit,data.frame(hsgroup=1),type="response")

##         1 
## 0.2890995

# Can we convert the difference in these probabilities
# back to odds scale and logit scale?
# OR = odds p2/odds p1

p1 <- 0.2890995 # HS
p2 <- 0.4263959 # NS

OR_HSvsNS <- (p1/(1-p1)/(p2/(1-p2)))
OR_HSvsNS

## [1] 0.5470635

# logit scale
# need to install 1 time
# install.packages("faraway")

library(faraway)

## Warning: package 'faraway' was built under R version 4.0.3

diff.logit <- logit(p1)-logit(p2)
diff.logit

## [1] -0.6031904

# this yields -0.603 which is equal to the slope coefficient 
# from our logistic model

# Estimated Probability Plot
# plot with ggplot2
# on the x axis is treatment group (0 vs 1)
# on the y axis is probability of admitted (0 vs 1)
# use jitter to remind that points are stacked at 
# each combination of 0 and 1
# overlay logistic fitted line

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.3

ggplot(hyper, aes(x=hsgroup, y=admitted)) + 
  geom_point() +
  ggtitle("Plot of Admission Probability by Treatment Group") +
  scale_y_continuous(name="Probability of Admitted",breaks = seq(0,1,.1)) +
  scale_x_continuous(name="Treatment Group -- 0=NS -- 1=HS",breaks=seq(0,1,1)) +
  geom_jitter(width = 0.03, height = 0.03)+
  stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

# NOW LETS LOOK AT LOS OUTCOME and ANALYSIS
# looking at LOS
# From Abstract
# Mean (SD) LOS for NS 3.92 (5.24) [n=84] (n not quite right?)
# Mean (SD) LOS for HS 3.16 (2.11) [n=61] (n not quite right?)
# Note that 4 participants were missing LOS data
# so sample size is slightly off

# We can calculate t-test using summary statistics
# via function in R tsum.test
# arguments for function
# tsum.test(mean.x,s.x,n.x,mean.y,s.y,n.y,var.equal=TRUE)

# need to run install 1 time!
# install.packages("BSDA")

library(BSDA)

## Warning: package 'BSDA' was built under R version 4.0.3

## Loading required package: lattice

## 
## Attaching package: 'lattice'

## The following object is masked from 'package:faraway':
## 
##     melanoma

## 
## Attaching package: 'BSDA'

## The following object is masked from 'package:datasets':
## 
##     Orange

tsum.test(mean.x = 3.92,s.x = 5.24,n.x = 84,mean.y = 3.16,s.y = 2.11,n.y = 61,
          var.equal = FALSE)

## 
##  Welch Modified Two-Sample t-Test
## 
## data:  Summarized x and y
## t = 1.2019, df = 116.19, p-value = 0.2319
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4924194  2.0124194
## sample estimates:
## mean of x mean of y 
##      3.92      3.16

3.16 - 3.92 # diff in means

## [1] -0.76

# can also do this using web-based calculator like
# https://www.graphpad.com/quickcalcs/ttest1/

# End of Program

module1_day3_RScript-hypertonic.R

sgrambow

2021-01-30