Analysis for Dr. Keith Hallbourg, PI of the study.

Load data

df <- read.csv("HALL_PCS_Data.csv")
df$CASE <- as.factor(df$CASE)
df$Group <- as.factor(df$Group)
df$Performance.Category <- as.factor(df$Performance.Category)

Summary

summary(df)
##    Subject          Group      CASE       HISTORY    HOME.SAFETY    
##  Length:35          1:17   Falls :12   Min.   : 0   Min.   :  0.00  
##  Class :character   2:18   Stroke:12   1st Qu.:20   1st Qu.:  0.00  
##  Mode  :character          TKR   :11   Median :40   Median : 25.00  
##                                        Mean   :36   Mean   : 37.14  
##                                        3rd Qu.:50   3rd Qu.: 50.00  
##                                        Max.   :80   Max.   :100.00  
##  SUPPORT.SYSTEMS ASSISTIVE.DEVICE     SAFETY        PLAN.OF.CARE
##  Min.   :  0     Min.   : 50      Min.   : 50.00   Min.   :  0  
##  1st Qu.: 40     1st Qu.: 80      1st Qu.:100.00   1st Qu.:100  
##  Median : 60     Median :100      Median :100.00   Median :100  
##  Mean   : 60     Mean   : 92      Mean   : 97.14   Mean   : 80  
##  3rd Qu.: 90     3rd Qu.:100      3rd Qu.:100.00   3rd Qu.:100  
##  Max.   :100     Max.   :100      Max.   :100.00   Max.   :100  
##    INTERVIEW      PREPARATION     PATIENT.INSTRUCT   FON.SAFETY 
##  Min.   :18.00   Min.   : 75.00   Min.   : 50.00   Min.   :100  
##  1st Qu.:45.00   1st Qu.: 87.50   1st Qu.:100.00   1st Qu.:100  
##  Median :55.00   Median : 92.00   Median :100.00   Median :100  
##  Mean   :54.54   Mean   : 92.66   Mean   : 94.29   Mean   :100  
##  3rd Qu.:64.00   3rd Qu.:100.00   3rd Qu.:100.00   3rd Qu.:100  
##  Max.   :91.00   Max.   :100.00   Max.   :100.00   Max.   :100  
##    EXECUTION         OVERALL        Total       Performance.Category
##  Min.   : 75.00   Min.   :100   Min.   :49.00   Pass:35             
##  1st Qu.: 91.00   1st Qu.:100   1st Qu.:70.00                       
##  Median : 94.00   Median :100   Median :78.00                       
##  Mean   : 93.54   Mean   :100   Mean   :76.14                       
##  3rd Qu.:100.00   3rd Qu.:100   3rd Qu.:82.50                       
##  Max.   :100.00   Max.   :100   Max.   :92.00

Cleaning

It’s a bit easier without un needed variables - Performance.Category, FON.SAFETY, and OVERALL have no variance - everyone passed or had a 100 so they are not variables

# Drop Performance.Category Variable
df <- select(df, -c(Performance.Category, FON.SAFETY, OVERALL))

Numeric data distributions

# Pull just the numeric rows
dfNumeric <- df[,4:14]

Histograms on numeric variables

hist.data.frame(dfNumeric, nclass = 6)

Run a Shapiro-Wilk test on each numeric variable

Tests the null hypothesis that there is a normal distribution If p < 0.05, by convention, we reject the null hypothesis that the distribution is normal Therefore, it is not a normal distribution

apply(dfNumeric,2,shapiro.test)
## $HISTORY
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.92239, p-value = 0.01671
## 
## 
## $HOME.SAFETY
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.78609, p-value = 1.101e-05
## 
## 
## $SUPPORT.SYSTEMS
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.88772, p-value = 0.001879
## 
## 
## $ASSISTIVE.DEVICE
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.61671, p-value = 2.371e-08
## 
## 
## $SAFETY
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.31832, p-value = 1.345e-11
## 
## 
## $PLAN.OF.CARE
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.49143, p-value = 7.195e-10
## 
## 
## $INTERVIEW
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.98013, p-value = 0.7635
## 
## 
## $PREPARATION
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.81527, p-value = 4.123e-05
## 
## 
## $PATIENT.INSTRUCT
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.47356, p-value = 4.587e-10
## 
## 
## $EXECUTION
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.82854, p-value = 7.794e-05
## 
## 
## $Total
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.9568, p-value = 0.183

Correlations

dfMatrix <- as.matrix(as.data.frame(lapply(dfNumeric, as.numeric)))
rcorr(dfMatrix, type = "spearman")
##                  HISTORY HOME.SAFETY SUPPORT.SYSTEMS ASSISTIVE.DEVICE SAFETY
## HISTORY             1.00        0.14           -0.02             0.12   0.22
## HOME.SAFETY         0.14        1.00            0.10             0.32  -0.27
## SUPPORT.SYSTEMS    -0.02        0.10            1.00             0.38   0.03
## ASSISTIVE.DEVICE    0.12        0.32            0.38             1.00   0.01
## SAFETY              0.22       -0.27            0.03             0.01   1.00
## PLAN.OF.CARE        0.04        0.16            0.09             0.14   0.12
## INTERVIEW           0.09        0.09            0.31             0.04   0.30
## PREPARATION         0.10       -0.01            0.06             0.01   0.06
## PATIENT.INSTRUCT    0.26       -0.29            0.24             0.03   0.18
## EXECUTION          -0.18       -0.02            0.18            -0.20  -0.11
## Total               0.38        0.60            0.53             0.45   0.01
##                  PLAN.OF.CARE INTERVIEW PREPARATION PATIENT.INSTRUCT EXECUTION
## HISTORY                  0.04      0.09        0.10             0.26     -0.18
## HOME.SAFETY              0.16      0.09       -0.01            -0.29     -0.02
## SUPPORT.SYSTEMS          0.09      0.31        0.06             0.24      0.18
## ASSISTIVE.DEVICE         0.14      0.04        0.01             0.03     -0.20
## SAFETY                   0.12      0.30        0.06             0.18     -0.11
## PLAN.OF.CARE             1.00      0.21        0.47             0.16      0.13
## INTERVIEW                0.21      1.00        0.02             0.11      0.27
## PREPARATION              0.47      0.02        1.00             0.31      0.05
## PATIENT.INSTRUCT         0.16      0.11        0.31             1.00      0.14
## EXECUTION                0.13      0.27        0.05             0.14      1.00
## Total                    0.61      0.44        0.30             0.22      0.14
##                  Total
## HISTORY           0.38
## HOME.SAFETY       0.60
## SUPPORT.SYSTEMS   0.53
## ASSISTIVE.DEVICE  0.45
## SAFETY            0.01
## PLAN.OF.CARE      0.61
## INTERVIEW         0.44
## PREPARATION       0.30
## PATIENT.INSTRUCT  0.22
## EXECUTION         0.14
## Total             1.00
## 
## n= 35 
## 
## 
## P
##                  HISTORY HOME.SAFETY SUPPORT.SYSTEMS ASSISTIVE.DEVICE SAFETY
## HISTORY                  0.4366      0.9207          0.4788           0.2110
## HOME.SAFETY      0.4366              0.5751          0.0599           0.1112
## SUPPORT.SYSTEMS  0.9207  0.5751                      0.0243           0.8637
## ASSISTIVE.DEVICE 0.4788  0.0599      0.0243                           0.9468
## SAFETY           0.2110  0.1112      0.8637          0.9468                 
## PLAN.OF.CARE     0.8049  0.3568      0.5928          0.4107           0.5050
## INTERVIEW        0.6176  0.6136      0.0670          0.8194           0.0842
## PREPARATION      0.5491  0.9388      0.7402          0.9725           0.7181
## PATIENT.INSTRUCT 0.1377  0.0922      0.1720          0.8432           0.3039
## EXECUTION        0.3120  0.8907      0.2902          0.2553           0.5126
## Total            0.0236  0.0001      0.0012          0.0061           0.9330
##                  PLAN.OF.CARE INTERVIEW PREPARATION PATIENT.INSTRUCT EXECUTION
## HISTORY          0.8049       0.6176    0.5491      0.1377           0.3120   
## HOME.SAFETY      0.3568       0.6136    0.9388      0.0922           0.8907   
## SUPPORT.SYSTEMS  0.5928       0.0670    0.7402      0.1720           0.2902   
## ASSISTIVE.DEVICE 0.4107       0.8194    0.9725      0.8432           0.2553   
## SAFETY           0.5050       0.0842    0.7181      0.3039           0.5126   
## PLAN.OF.CARE                  0.2183    0.0047      0.3531           0.4664   
## INTERVIEW        0.2183                 0.9295      0.5430           0.1202   
## PREPARATION      0.0047       0.9295                0.0696           0.7848   
## PATIENT.INSTRUCT 0.3531       0.5430    0.0696                       0.4189   
## EXECUTION        0.4664       0.1202    0.7848      0.4189                    
## Total            0.0001       0.0080    0.0768      0.1951           0.4213   
##                  Total 
## HISTORY          0.0236
## HOME.SAFETY      0.0001
## SUPPORT.SYSTEMS  0.0012
## ASSISTIVE.DEVICE 0.0061
## SAFETY           0.9330
## PLAN.OF.CARE     0.0001
## INTERVIEW        0.0080
## PREPARATION      0.0768
## PATIENT.INSTRUCT 0.1951
## EXECUTION        0.4213
## Total
d<-cor(dfNumeric)
corrplot(d, method = 'ellipse', type = 'upper')

qplot(data = df, INTERVIEW, Total)

Any differences between the Cases?

Case_plot <- ggplot(data = df, aes(x=CASE, y=Total)) + 
  geom_boxplot() 
Case_plot

Case_plot <- ggplot(data = df, aes(x=CASE, y=HISTORY)) + 
  geom_boxplot() 
Case_plot

One way ANOVA

ezANOVA( df, Total, wid = Subject, between = CASE, type=3)
## Warning: Converting "Subject" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Coefficient covariances computed by hccm()
## $ANOVA
##   Effect DFn DFd        F          p p<.05       ges
## 2   CASE   2  32 4.782238 0.01523501     * 0.2301118
## 
## $`Levene's Test for Homogeneity of Variance`
##   DFn DFd      SSn      SSd         F         p p<.05
## 1   2  32 20.15628 1142.015 0.2823959 0.7558346

Turns out the “Falls” Case seemed to be more difficult

Cross tab - Chi Square

Is there a difference between each group and the case they had? In other words, do we have to control for “case” in the analysis?

crosstabs(Group ~ CASE, df)
##        CASE
## Group   Falls Stroke TKR Total
##   1         4      8   5    17
##   2         8      4   6    18
##   Total    12     12  11    35
## Chisq = 2.7312  df = 2   p-value = 0.25522

No significant difference between the groups based on the Chi Square analysis.

Hypothesis testing

Total score on Physical Therapy Interventions I lab final

Boxplot for Total score

Total_plot <- ggplot(data = df, aes(x=Group, y=Total)) + 
  geom_boxplot() 
Total_plot

T-Test of Total score

t.test(Total ~ Group, data = df)
## 
##  Welch Two Sample t-test
## 
## data:  Total by Group
## t = -1.136, df = 25.832, p-value = 0.2664
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -10.10123   2.91169
## sample estimates:
## mean in group 1 mean in group 2 
##        74.29412        77.88889

ANOVA with co-variate of case

ezANOVA( df , Total , wid = Subject, between = Group , between_covariates = CASE , type = 3)
## Warning: Converting "Subject" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Warning: Implementation of ANCOVA in this version of ez is experimental and
## not yet fully validated. Also, note that ANCOVA is intended purely as a tool
## to increase statistical power; ANCOVA can not eliminate confounds in the data.
## Specifically, covariates should: (1) be uncorrelated with other predictors and
## (2) should have effects on the DV that are independent of other predictors.
## Failure to meet these conditions may dramatically increase the rate of false-
## positives.
## Coefficient covariances computed by hccm()
## $ANOVA
##   Effect DFn DFd        F          p p<.05       ges
## 2  Group   1  33 4.588171 0.03966596     * 0.1220642
## 
## $`Levene's Test for Homogeneity of Variance`
##   DFn DFd      SSn      SSd        F          p p<.05
## 1   1  33 117.4381 811.3047 4.776821 0.03604666     *

Scatter plot and linear model approach to testing differences between cases

sc_plot <- df %>%
  ggplot(aes(x=as.integer(Group), 
             y=Total, color = CASE)) +
  geom_point() +
  geom_smooth(method = lm)

sc_plot + geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

sc_plot <- df %>%
  ggplot(aes(x=INTERVIEW, 
             y=Total)) +
  geom_point()

Boxplot for Interview

Interview_plot <- ggplot(data = df, aes(x=Group, y=INTERVIEW)) + 
  geom_boxplot() 
Interview_plot

T-Test of Interview

t.test(INTERVIEW ~ Group, data = df)
## 
##  Welch Two Sample t-test
## 
## data:  INTERVIEW by Group
## t = -0.91867, df = 32.993, p-value = 0.3649
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -15.159317   5.727944
## sample estimates:
## mean in group 1 mean in group 2 
##        52.11765        56.83333

History

Boxplot for History

HISTORY_plot <- ggplot(data = df, aes(x=Group, y=HISTORY)) + 
  geom_boxplot() 
HISTORY_plot

Mann Whitney U for History

wilcox.test(HISTORY ~ Group, data = df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  HISTORY by Group
## W = 121, p-value = 0.2888
## alternative hypothesis: true location shift is not equal to 0

History

Boxplot

HOME.SAFETY_plot <- ggplot(data = df, aes(x=Group, y=HOME.SAFETY)) + 
  geom_boxplot() 
HOME.SAFETY_plot

Mann Whitney U for Home Safety

wilcox.test(HOME.SAFETY ~ Group, data = df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  HOME.SAFETY by Group
## W = 158, p-value = 0.8754
## alternative hypothesis: true location shift is not equal to 0

PLAN.OF.CARE

Boxplot

PLAN.OF.CARE_plot <- ggplot(data = df, aes(x=Group, y=PLAN.OF.CARE)) + 
  geom_boxplot() 
PLAN.OF.CARE_plot

Mann Whitney U for PLAN.OF.CARE

wilcox.test(PLAN.OF.CARE ~ Group, data = df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  PLAN.OF.CARE by Group
## W = 107.5, p-value = 0.03212
## alternative hypothesis: true location shift is not equal to 0

SUPPORT.SYSTEMS

Boxplot

SUPPORT.SYSTEMS_plot <- ggplot(data = df, aes(x=Group, y=SUPPORT.SYSTEMS)) + 
  geom_boxplot() 
SUPPORT.SYSTEMS_plot

Mann Whitney U for SUPPORT.SYSTEMS

wilcox.test(SUPPORT.SYSTEMS ~ Group, data = df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  SUPPORT.SYSTEMS by Group
## W = 185, p-value = 0.2899
## alternative hypothesis: true location shift is not equal to 0

ASSISTIVE.DEVICE

Boxplot

ASSISTIVE.DEVICE_plot <- ggplot(data = df, aes(x=Group, y=ASSISTIVE.DEVICE)) + 
  geom_boxplot() 
ASSISTIVE.DEVICE_plot

Mann Whitney U for ASSISTIVE.DEVICE

wilcox.test(ASSISTIVE.DEVICE ~ Group, data = df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ASSISTIVE.DEVICE by Group
## W = 153, p-value = 1
## alternative hypothesis: true location shift is not equal to 0

Factor Analysis

Is there a factor that may better represent?

Based on an exploratory PCA and looking at the scree plot (below), it is not worthwhile to use this approach to combine variables. This does not mean we cannot combine them - just that a factor analytic approach would not be useful.

Scree Plot

#perform PCA
results <- prcomp(dfMatrix, scale = TRUE)
#calculate total variance explained by each principal component
var_explained = results$sdev^2 / sum(results$sdev^2)
#create scree plot
library(ggplot2)

qplot(c(1:11), var_explained) + 
  geom_line() + 
  xlab("Principal Component") + 
  ylab("Variance Explained") +
  ggtitle("Scree Plot") +
  ylim(0, 1)