# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 Final Fall 2021 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Statistical Report | Relationship between AFib and PRU Levels
# Download and load the data file = prevend
load(url("http://people.duke.edu/~sgrambow/crp241data/prudata.RData"))
# if the above load command doesnt run for you try the
# set of commands below to load the data frame
# highlight and run all 3 lines of code
download.file("http://people.duke.edu/~sgrambow/crp241data/prudata.RData",
destfile = "prudata.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("prudata.RData")
# Data Dictionary:
# (1) subjno = subject number
# (2) pru = PRU level
# (3) afib = afib status (0 = no / 1 = yes)
# (4) apt = taking antiplatelet therapy (0 = no / 1 = yes)
# examine structure of the data
str(prudata)
## 'data.frame': 40 obs. of 4 variables:
## $ subjno: int 1 2 3 4 5 6 7 8 9 10 ...
## $ pru : num 357 358 358 369 328 ...
## $ afib : num 1 1 1 1 1 1 1 1 1 1 ...
## $ apt : int 1 1 1 1 1 1 1 1 1 1 ...
# create subgroups by afib status. These subgroups
# can be used as inputs to some of the functions we want
# to use to describe and test the data
yes.afib <- subset(prudata,afib==1)
no.afib <- subset(prudata,afib==0)
# Deciding on Analytic Method to be used
# Summary Measures for overall cohort
summary(prudata)
## subjno pru afib apt
## Min. : 1.00 Min. :288.9 Min. :0.0 Min. :0.0
## 1st Qu.:10.75 1st Qu.:299.8 1st Qu.:0.0 1st Qu.:0.0
## Median :20.50 Median :322.1 Median :0.5 Median :0.5
## Mean :20.50 Mean :324.3 Mean :0.5 Mean :0.5
## 3rd Qu.:30.25 3rd Qu.:352.1 3rd Qu.:1.0 3rd Qu.:1.0
## Max. :40.00 Max. :368.5 Max. :1.0 Max. :1.0
# Summary measures provided by afib status
by(prudata$pru,prudata$afib,summary)
## prudata$afib: 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 288.9 296.5 301.9 308.6 310.5 352.6
## ------------------------------------------------------------
## prudata$afib: 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 290.9 332.7 348.6 340.1 357.8 368.5
# Graphical summary by afib status
# (3) afib = afib status (0 = no / 1 = yes)
boxplot(prudata$pru~prudata$afib,
main='PRU Levels by AFib Status',
ylab='PRU Levels',
xlab='AFib Status',
names=c("No","Yes"),
col="light blue")
# add mean to each boxplot with points function
points(c(mean(no.afib$pru),mean(yes.afib$pru)),cex=1,pch=16,col="red")

# Overall cohort analysis
# Comparing the mean PRU
# level between patients with vs. without AFib.
# two-sample t-test results
t.test(prudata$pru ~ prudata$afib, var.equal=TRUE)
##
## Two Sample t-test
##
## data: prudata$pru by prudata$afib
## t = -4.6514, df = 38, p-value = 3.92e-05
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -45.22857 -17.79792
## sample estimates:
## mean in group 0 mean in group 1
## 308.5638 340.0770
# mean diff (afibYES - abfibNO) = 340.1 - 308.6 = 31.5
340.1 - 308.6
## [1] 31.5
# Unadjusted Linear Regression results
unadjusted.model <- lm(prudata$pru ~ prudata$afib)
summary(unadjusted.model)
##
## Call:
## lm(formula = prudata$pru ~ prudata$afib)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.197 -11.741 -2.176 17.219 44.035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 308.564 4.791 64.409 < 2e-16 ***
## prudata$afib 31.513 6.775 4.651 3.92e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.42 on 38 degrees of freedom
## Multiple R-squared: 0.3628, Adjusted R-squared: 0.346
## F-statistic: 21.64 on 1 and 38 DF, p-value: 3.92e-05
# Concern about confounding of antiplatelet therapy use
# Create subsets based on APT use:
# (4) apt = taking antiplatelet therapy (0 = no / 1 = yes)
yes.apt <- subset(prudata,prudata$apt==1)
no.apt <- subset(prudata,prudata$apt==0)
# Among patients taking an APT:
t.test(yes.apt$pru ~ yes.apt$afib, var.equal=TRUE)
##
## Two Sample t-test
##
## data: yes.apt$pru by yes.apt$afib
## t = -1.4837, df = 18, p-value = 0.1552
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -22.780694 3.922564
## sample estimates:
## mean in group 0 mean in group 1
## 341.1409 350.5700
# mean diff (afibYES - abfibNO) = 350.6 - 341.1 = 9.5
350.6 - 341.1
## [1] 9.5
# Among patinets not taking an APT:
t.test(no.apt$pru ~ no.apt$afib, var.equal=TRUE)
##
## Two Sample t-test
##
## data: no.apt$pru by no.apt$afib
## t = 0.55501, df = 18, p-value = 0.5857
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -6.446194 11.074827
## sample estimates:
## mean in group 0 mean in group 1
## 300.4195 298.1052
# mean diff (afibYES - abfibNO) = 298.1 - 300.4 = -2.3
298.1 - 300.4
## [1] -2.3
# compare models
# Unadjusted Linear Regression
unadjusted.model <- lm(prudata$pru ~ prudata$afib)
summary(unadjusted.model)
##
## Call:
## lm(formula = prudata$pru ~ prudata$afib)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.197 -11.741 -2.176 17.219 44.035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 308.564 4.791 64.409 < 2e-16 ***
## prudata$afib 31.513 6.775 4.651 3.92e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.42 on 38 degrees of freedom
## Multiple R-squared: 0.3628, Adjusted R-squared: 0.346
## F-statistic: 21.64 on 1 and 38 DF, p-value: 3.92e-05
# check regression diagnostics
plot(unadjusted.model)


## hat values (leverages) are all = 0.05
## and there are no factor predictors; no plot no. 5

# covariate adjusted model
# run model with adjustment for apt
adjusted.model <- lm(prudata$pru ~ prudata$afib + prudata$apt)
summary(adjusted.model)
##
## Call:
## lm(formula = prudata$pru ~ prudata$afib + prudata$apt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.357 -7.177 1.269 7.757 19.147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 299.245 2.323 128.838 < 2e-16 ***
## prudata$afib 3.557 3.871 0.919 0.364
## prudata$apt 46.593 3.871 12.036 2.33e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.793 on 37 degrees of freedom
## Multiple R-squared: 0.8704, Adjusted R-squared: 0.8634
## F-statistic: 124.2 on 2 and 37 DF, p-value: < 2.2e-16
# check regression diagnostics
plot(adjusted.model)




# End of Program