# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 Part 1 Day 3 ~
# ~ Creating Data from a ~
# ~ Manuscript to check ~
# ~ Results ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Nebulized Hypertonic Saline for Bronchiolitis
# A Randomized Clinical Trial
# Author: Wu et al.
# Citation:
# JAMA Pediatrics, 168(7), 657-63.
# URL: https://doi.org/10.1001/jamapediatrics.2014.301
#
# We want to verify the primary unadjusted analysis
# for admission rate (admitted/not admitted)
# Odds ratio of admission (HS Group vs NS Group)
# OR = 0.55 (95% CI, 0.36-0.83)
# meaning that odds of admission are 45% less in HS
# group than the NS group.
# Creating Dataset
# Let's create a dataset that contains the following variables:
# patientID -- ID number from 1 to 408
# admitted -- 0=Not Admitted, 1=Admitted
# hsgroup -- 0=NS Group; 1=HS Group
#
# create a sequence of integers from 1 to 408
patientID <- seq(1,408,1)
# Total in NS Group = 197
# NS Admitted = 84 (42.6%)
# NS Not Admitted = 197-84 = 113
# Total in HS Group = 211
# HS Admitted = 61 (28.9%)
# HS Not Admitted = 211-61 = 150
# STEP 1a:
# Make a vector with 145 ones (84NS+61HS=145)
# representing the admitted patients in
# both of the treatment groups.
ones <- rep(1,145)
# STEP 1b:
# Make a vector with 263 zeroes (113NS+150HS=263)
# representing everyone else in the sample,
# all of whom were not admitted.
zeroes <- rep(0,263)
# STEP 1c:
# Let's combine these using the c() function
# into a single vector called 'admitted'
# of length 408 equal to the total sample size
admitted <- c(ones,zeroes)
# STEP 2:
# The next variable to create is the treatment
# group variable which we will call hsgroup.
# we need a series of vectors representing
# those in each group who were admitted and
# not admitted.
# STEP 2a:
# Let's make hsgroup=0 be the NS group
# hsgroup=1 be the HS group
# For the admitted (n=145) we have
# n=84 in the nsgroup and n=61 in the hsgroup
# We can represent this by two vectors
# for the NS group (hsgroup=0),
ns_admit <- rep(0,84)
# for the HS group (hsgroup=1),
hs_admit <- rep(1,61)
# For those NOT admitted (n=263) we have
# n=113 n the nsgroup and n=150 in the hsgroup
# We can represent this by two vectors
# for the NS group (hsgroup=0),
ns_noadmit <- rep(0,113)
# for the HS group (hsgroup=1),
hs_noadmit <- rep(1,150)
# Now let's connect these vectors into one
# variable that matches the admitted variable
# created above in the following order:
# HS group admitted, NS group admitted, HS group not admitted,
# and HS group not admitted
hsgroup <- c(hs_admit,ns_admit,hs_noadmit,ns_noadmit)
# Lastly, pull these three vectors we have created
# together into a data frame.
hyper <- data.frame(patientID,admitted,hsgroup)
#
#
# Now let's summaryize/visualize the data
str(hyper)
## 'data.frame': 408 obs. of 3 variables:
## $ patientID: num 1 2 3 4 5 6 7 8 9 10 ...
## $ admitted : num 1 1 1 1 1 1 1 1 1 1 ...
## $ hsgroup : num 1 1 1 1 1 1 1 1 1 1 ...
summary(hyper)
## patientID admitted hsgroup
## Min. : 1.0 Min. :0.0000 Min. :0.0000
## 1st Qu.:102.8 1st Qu.:0.0000 1st Qu.:0.0000
## Median :204.5 Median :0.0000 Median :1.0000
## Mean :204.5 Mean :0.3554 Mean :0.5172
## 3rd Qu.:306.2 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :408.0 Max. :1.0000 Max. :1.0000
# Recall the table1 package from CRP241
# install.packages("table1") # only run this if you need to
library(table1)
## Warning: package 'table1' was built under R version 4.0.3
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table.data <- hyper
# we need to change our variables to factors for
# compatibility with table1 package
table.data$hsgroup <- factor(table.data$hsgroup,
labels = c("Normal","Hypertonic"))
table.data$admitted <- factor(table.data$admitted,
labels = c("NO","YES"))
# We can create a nicely formatted table with
# a single function call to table1
table1(~ admitted | hsgroup, data=table.data)
| Normal (N=197) |
Hypertonic (N=211) |
Overall (N=408) |
|
|---|---|---|---|
| admitted | |||
| NO | 113 (57.4%) | 150 (71.1%) | 263 (64.5%) |
| YES | 84 (42.6%) | 61 (28.9%) | 145 (35.5%) |
#
#
# Unadjusted Analyses using Chi-square
# lets create the 2 x 2 outcome table
# for admission variable
admission.table <- table(admitted,hsgroup)
rownames(admission.table) <- c("NO","YES")
colnames(admission.table) <- c("NS","HS")
admission.table
## hsgroup
## admitted NS HS
## NO 113 150
## YES 84 61
# what if we wanted to create this by hand?
byhand.table <- rbind(c(113,150),c(84,61))
rownames(byhand.table) <- c("NO","YES")
colnames(byhand.table) <- c("NS","HS")
byhand.table
## NS HS
## NO 113 150
## YES 84 61
# Now let's analyze the 2 x 2 table
# using the 1st table we created (admissioni.table)
# Here is the standard Pearson Chi-Square Test
chisq.test(admission.table,correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: admission.table
## X-squared = 8.3831, df = 1, p-value = 0.003787
# We can use epitools to calculate the Odds Ratio (OR)
# remember that you may need to run the
# install.packages function if you haven't installed
# epitools before
# install.packages("epitools")
library(epitools)
## Warning: package 'epitools' was built under R version 4.0.3
oddsratio(admission.table)
## $data
## hsgroup
## admitted NS HS Total
## NO 113 150 263
## YES 84 61 145
## Total 197 211 408
##
## $measure
## odds ratio with 95% C.I.
## admitted estimate lower upper
## NO 1.0000000 NA NA
## YES 0.5482102 0.3623922 0.8253729
##
## $p.value
## two-sided
## admitted midp.exact fisher.exact chi.square
## NO NA NA NA
## YES 0.003933746 0.005114295 0.003787306
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "median-unbiased estimate & mid-p exact CI"
# Odds ratio of admission (HS Group vs NS Group)
# OR = 0.55 (95% CI, 0.36-0.83)
# meaning that odds of admission are 45% less in HS
# group than the NS group.
# We can also generate the unadjusted odds ratio using
# logistic regression:
hyper.fit <- glm(admitted ~ hsgroup , data=hyper, family='binomial')
summary(hyper.fit)
##
## Call:
## glm(formula = admitted ~ hsgroup, family = "binomial", data = hyper)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0543 -1.0543 -0.8261 1.3057 1.5754
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2966 0.1441 -2.059 0.03953 *
## hsgroup -0.6032 0.2093 -2.882 0.00396 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 530.99 on 407 degrees of freedom
## Residual deviance: 522.58 on 406 degrees of freedom
## AIC: 526.58
##
## Number of Fisher Scoring iterations: 4
# This yields the coefficient table:
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.2966 0.1441 -2.059 0.03953 *
# nsgroup -0.6032 0.2093 -2.882 0.00396 **
# We can exponentiate to generate the desired odds ratio
exp(hyper.fit$coefficients)
## (Intercept) hsgroup
## 0.7433628 0.5470635
# This yields:
# (Intercept) hsgroup
# 0.7433628 0.5470635
#
# We can exponentiate the confidence interval to get the
# limits on the OR:
exp(confint(hyper.fit))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.5591328 0.9844228
## hsgroup 0.3619429 0.8229722
# This yields:
# 2.5 % 97.5 %
# (Intercept) 0.5591328 0.9844228
# hsgroup 0.3619429 0.8229722
#
# Thus, we have the unadjusted OR from the
# paper:
# OR = 0.55 with 95% CI = [0.36, 0.82]
#
# What about predictions?
predict(hyper.fit,data.frame(hsgroup=0),type="response")
## 1
## 0.4263959
predict(hyper.fit,data.frame(hsgroup=1),type="response")
## 1
## 0.2890995
# Can we convert the difference in these probabilities
# back to odds scale and logit scale?
# OR = odds p2/odds p1
p1 <- 0.2890995 # HS
p2 <- 0.4263959 # NS
OR_HSvsNS <- (p1/(1-p1)/(p2/(1-p2)))
OR_HSvsNS
## [1] 0.5470635
# logit scale
# need to install 1 time
# install.packages("faraway")
library(faraway)
## Warning: package 'faraway' was built under R version 4.0.3
diff.logit <- logit(p1)-logit(p2)
diff.logit
## [1] -0.6031904
# this yields -0.603 which is equal to the slope coefficient
# from our logistic model
# Estimated Probability Plot
# plot with ggplot2
# on the x axis is treatment group (0 vs 1)
# on the y axis is probability of admitted (0 vs 1)
# use jitter to remind that points are stacked at
# each combination of 0 and 1
# overlay logistic fitted line
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
ggplot(hyper, aes(x=hsgroup, y=admitted)) +
geom_point() +
ggtitle("Plot of Admission Probability by Treatment Group") +
scale_y_continuous(name="Probability of Admitted",breaks = seq(0,1,.1)) +
scale_x_continuous(name="Treatment Group -- 0=NS -- 1=HS",breaks=seq(0,1,1)) +
geom_jitter(width = 0.03, height = 0.03)+
stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
# NOW LETS LOOK AT LOS OUTCOME and ANALYSIS
# looking at LOS
# From Abstract
# Mean (SD) LOS for NS 3.92 (5.24) [n=84] (n not quite right?)
# Mean (SD) LOS for HS 3.16 (2.11) [n=61] (n not quite right?)
# Note that 4 participants were missing LOS data
# so sample size is slightly off
# We can calculate t-test using summary statistics
# via function in R tsum.test
# arguments for function
# tsum.test(mean.x,s.x,n.x,mean.y,s.y,n.y,var.equal=TRUE)
# need to run install 1 time!
# install.packages("BSDA")
library(BSDA)
## Warning: package 'BSDA' was built under R version 4.0.3
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:faraway':
##
## melanoma
##
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
tsum.test(mean.x = 3.92,s.x = 5.24,n.x = 84,mean.y = 3.16,s.y = 2.11,n.y = 61,
var.equal = FALSE)
##
## Welch Modified Two-Sample t-Test
##
## data: Summarized x and y
## t = 1.2019, df = 116.19, p-value = 0.2319
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4924194 2.0124194
## sample estimates:
## mean of x mean of y
## 3.92 3.16
3.16 - 3.92 # diff in means
## [1] -0.76
# can also do this using web-based calculator like
# https://www.graphpad.com/quickcalcs/ttest1/
# End of Program