# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 Final Fall 2021 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 
# Statistical Report | Relationship between AFib and PRU Levels

# Download and load the data file = prevend
load(url("http://people.duke.edu/~sgrambow/crp241data/prudata.RData"))

# if the above load command doesnt run for you try the
# set of commands below to load the data frame
# highlight and run all 3 lines of code

download.file("http://people.duke.edu/~sgrambow/crp241data/prudata.RData",
              destfile = "prudata.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("prudata.RData")


# Data Dictionary: 
# (1) subjno  = subject number
# (2) pru     = PRU level 
# (3) afib    = afib status (0 = no / 1 = yes)
# (4) apt     = taking antiplatelet therapy (0 = no / 1 = yes)

# examine structure of the data 
str(prudata)
## 'data.frame':    40 obs. of  4 variables:
##  $ subjno: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ pru   : num  357 358 358 369 328 ...
##  $ afib  : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ apt   : int  1 1 1 1 1 1 1 1 1 1 ...
# create subgroups by afib status.  These subgroups
# can be used as inputs to some of the functions we want
# to use to describe and test the data
yes.afib <- subset(prudata,afib==1)  
no.afib <- subset(prudata,afib==0) 

# Deciding on Analytic Method to be used
# Summary Measures for overall cohort 
summary(prudata)
##      subjno           pru             afib          apt     
##  Min.   : 1.00   Min.   :288.9   Min.   :0.0   Min.   :0.0  
##  1st Qu.:10.75   1st Qu.:299.8   1st Qu.:0.0   1st Qu.:0.0  
##  Median :20.50   Median :322.1   Median :0.5   Median :0.5  
##  Mean   :20.50   Mean   :324.3   Mean   :0.5   Mean   :0.5  
##  3rd Qu.:30.25   3rd Qu.:352.1   3rd Qu.:1.0   3rd Qu.:1.0  
##  Max.   :40.00   Max.   :368.5   Max.   :1.0   Max.   :1.0
# Summary measures provided by afib status
by(prudata$pru,prudata$afib,summary)
## prudata$afib: 0
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   288.9   296.5   301.9   308.6   310.5   352.6 
## ------------------------------------------------------------ 
## prudata$afib: 1
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   290.9   332.7   348.6   340.1   357.8   368.5
# Graphical summary by afib status
# (3) afib    = afib status (0 = no / 1 = yes)
boxplot(prudata$pru~prudata$afib,
        main='PRU Levels by AFib Status',
        ylab='PRU Levels',
        xlab='AFib Status',
        names=c("No","Yes"),
        col="light blue")

# add mean to each boxplot with points function
points(c(mean(no.afib$pru),mean(yes.afib$pru)),cex=1,pch=16,col="red")

# Overall cohort analysis 
# Comparing the mean PRU 
# level between patients with vs. without AFib. 

# two-sample t-test results
t.test(prudata$pru ~ prudata$afib, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  prudata$pru by prudata$afib
## t = -4.6514, df = 38, p-value = 3.92e-05
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -45.22857 -17.79792
## sample estimates:
## mean in group 0 mean in group 1 
##        308.5638        340.0770
# mean diff (afibYES - abfibNO) = 340.1 - 308.6 = 31.5
340.1 - 308.6
## [1] 31.5
# Unadjusted Linear Regression results
unadjusted.model <- lm(prudata$pru ~ prudata$afib)
summary(unadjusted.model)
## 
## Call:
## lm(formula = prudata$pru ~ prudata$afib)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.197 -11.741  -2.176  17.219  44.035 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   308.564      4.791  64.409  < 2e-16 ***
## prudata$afib   31.513      6.775   4.651 3.92e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.42 on 38 degrees of freedom
## Multiple R-squared:  0.3628, Adjusted R-squared:  0.346 
## F-statistic: 21.64 on 1 and 38 DF,  p-value: 3.92e-05
# Concern about confounding of antiplatelet therapy use

# Create subsets based on APT use: 
# (4) apt     = taking antiplatelet therapy (0 = no / 1 = yes)
yes.apt <- subset(prudata,prudata$apt==1)
no.apt <- subset(prudata,prudata$apt==0)

# Among patients taking an APT: 
t.test(yes.apt$pru ~ yes.apt$afib, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  yes.apt$pru by yes.apt$afib
## t = -1.4837, df = 18, p-value = 0.1552
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -22.780694   3.922564
## sample estimates:
## mean in group 0 mean in group 1 
##        341.1409        350.5700
# mean diff (afibYES - abfibNO) = 350.6 - 341.1 = 9.5
350.6 - 341.1
## [1] 9.5
# Among patinets not taking an APT: 
t.test(no.apt$pru ~ no.apt$afib, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  no.apt$pru by no.apt$afib
## t = 0.55501, df = 18, p-value = 0.5857
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -6.446194 11.074827
## sample estimates:
## mean in group 0 mean in group 1 
##        300.4195        298.1052
# mean diff (afibYES - abfibNO) = 298.1 - 300.4 = -2.3
298.1 - 300.4
## [1] -2.3
# compare models
# Unadjusted Linear Regression
unadjusted.model <- lm(prudata$pru ~ prudata$afib)
summary(unadjusted.model)
## 
## Call:
## lm(formula = prudata$pru ~ prudata$afib)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.197 -11.741  -2.176  17.219  44.035 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   308.564      4.791  64.409  < 2e-16 ***
## prudata$afib   31.513      6.775   4.651 3.92e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.42 on 38 degrees of freedom
## Multiple R-squared:  0.3628, Adjusted R-squared:  0.346 
## F-statistic: 21.64 on 1 and 38 DF,  p-value: 3.92e-05
# check regression diagnostics
plot(unadjusted.model)

## hat values (leverages) are all = 0.05
##  and there are no factor predictors; no plot no. 5

# covariate adjusted model

# run model with adjustment for apt
adjusted.model <- lm(prudata$pru ~ prudata$afib + prudata$apt)
summary(adjusted.model)
## 
## Call:
## lm(formula = prudata$pru ~ prudata$afib + prudata$apt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.357  -7.177   1.269   7.757  19.147 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   299.245      2.323 128.838  < 2e-16 ***
## prudata$afib    3.557      3.871   0.919    0.364    
## prudata$apt    46.593      3.871  12.036 2.33e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.793 on 37 degrees of freedom
## Multiple R-squared:  0.8704, Adjusted R-squared:  0.8634 
## F-statistic: 124.2 on 2 and 37 DF,  p-value: < 2.2e-16
# check regression diagnostics
plot(adjusted.model)

# End of Program