Analysis for Dr. Keith Hallbourg, PI of the study.
df <- read.csv("HALL_PCS_Data.csv")
df$CASE <- as.factor(df$CASE)
df$Group <- as.factor(df$Group)
df$Performance.Category <- as.factor(df$Performance.Category)
summary(df)
## Subject Group CASE HISTORY HOME.SAFETY
## Length:35 1:17 Falls :12 Min. : 0 Min. : 0.00
## Class :character 2:18 Stroke:12 1st Qu.:20 1st Qu.: 0.00
## Mode :character TKR :11 Median :40 Median : 25.00
## Mean :36 Mean : 37.14
## 3rd Qu.:50 3rd Qu.: 50.00
## Max. :80 Max. :100.00
## SUPPORT.SYSTEMS ASSISTIVE.DEVICE SAFETY PLAN.OF.CARE
## Min. : 0 Min. : 50 Min. : 50.00 Min. : 0
## 1st Qu.: 40 1st Qu.: 80 1st Qu.:100.00 1st Qu.:100
## Median : 60 Median :100 Median :100.00 Median :100
## Mean : 60 Mean : 92 Mean : 97.14 Mean : 80
## 3rd Qu.: 90 3rd Qu.:100 3rd Qu.:100.00 3rd Qu.:100
## Max. :100 Max. :100 Max. :100.00 Max. :100
## INTERVIEW PREPARATION PATIENT.INSTRUCT FON.SAFETY
## Min. :18.00 Min. : 75.00 Min. : 50.00 Min. :100
## 1st Qu.:45.00 1st Qu.: 87.50 1st Qu.:100.00 1st Qu.:100
## Median :55.00 Median : 92.00 Median :100.00 Median :100
## Mean :54.54 Mean : 92.66 Mean : 94.29 Mean :100
## 3rd Qu.:64.00 3rd Qu.:100.00 3rd Qu.:100.00 3rd Qu.:100
## Max. :91.00 Max. :100.00 Max. :100.00 Max. :100
## EXECUTION OVERALL Total Performance.Category
## Min. : 75.00 Min. :100 Min. :49.00 Pass:35
## 1st Qu.: 91.00 1st Qu.:100 1st Qu.:70.00
## Median : 94.00 Median :100 Median :78.00
## Mean : 93.54 Mean :100 Mean :76.14
## 3rd Qu.:100.00 3rd Qu.:100 3rd Qu.:82.50
## Max. :100.00 Max. :100 Max. :92.00
It’s a bit easier without un needed variables - Performance.Category, FON.SAFETY, and OVERALL have no variance - everyone passed or had a 100 so they are not variables
# Drop Performance.Category Variable
df <- select(df, -c(Performance.Category, FON.SAFETY, OVERALL))
# Pull just the numeric rows
dfNumeric <- df[,4:14]
hist.data.frame(dfNumeric, nclass = 6)
Tests the null hypothesis that there is a normal distribution If p < 0.05, by convention, we reject the null hypothesis that the distribution is normal Therefore, it is not a normal distribution
apply(dfNumeric,2,shapiro.test)
## $HISTORY
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.92239, p-value = 0.01671
##
##
## $HOME.SAFETY
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.78609, p-value = 1.101e-05
##
##
## $SUPPORT.SYSTEMS
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.88772, p-value = 0.001879
##
##
## $ASSISTIVE.DEVICE
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.61671, p-value = 2.371e-08
##
##
## $SAFETY
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.31832, p-value = 1.345e-11
##
##
## $PLAN.OF.CARE
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.49143, p-value = 7.195e-10
##
##
## $INTERVIEW
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.98013, p-value = 0.7635
##
##
## $PREPARATION
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.81527, p-value = 4.123e-05
##
##
## $PATIENT.INSTRUCT
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.47356, p-value = 4.587e-10
##
##
## $EXECUTION
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.82854, p-value = 7.794e-05
##
##
## $Total
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.9568, p-value = 0.183
dfMatrix <- as.matrix(as.data.frame(lapply(dfNumeric, as.numeric)))
rcorr(dfMatrix, type = "spearman")
## HISTORY HOME.SAFETY SUPPORT.SYSTEMS ASSISTIVE.DEVICE SAFETY
## HISTORY 1.00 0.14 -0.02 0.12 0.22
## HOME.SAFETY 0.14 1.00 0.10 0.32 -0.27
## SUPPORT.SYSTEMS -0.02 0.10 1.00 0.38 0.03
## ASSISTIVE.DEVICE 0.12 0.32 0.38 1.00 0.01
## SAFETY 0.22 -0.27 0.03 0.01 1.00
## PLAN.OF.CARE 0.04 0.16 0.09 0.14 0.12
## INTERVIEW 0.09 0.09 0.31 0.04 0.30
## PREPARATION 0.10 -0.01 0.06 0.01 0.06
## PATIENT.INSTRUCT 0.26 -0.29 0.24 0.03 0.18
## EXECUTION -0.18 -0.02 0.18 -0.20 -0.11
## Total 0.38 0.60 0.53 0.45 0.01
## PLAN.OF.CARE INTERVIEW PREPARATION PATIENT.INSTRUCT EXECUTION
## HISTORY 0.04 0.09 0.10 0.26 -0.18
## HOME.SAFETY 0.16 0.09 -0.01 -0.29 -0.02
## SUPPORT.SYSTEMS 0.09 0.31 0.06 0.24 0.18
## ASSISTIVE.DEVICE 0.14 0.04 0.01 0.03 -0.20
## SAFETY 0.12 0.30 0.06 0.18 -0.11
## PLAN.OF.CARE 1.00 0.21 0.47 0.16 0.13
## INTERVIEW 0.21 1.00 0.02 0.11 0.27
## PREPARATION 0.47 0.02 1.00 0.31 0.05
## PATIENT.INSTRUCT 0.16 0.11 0.31 1.00 0.14
## EXECUTION 0.13 0.27 0.05 0.14 1.00
## Total 0.61 0.44 0.30 0.22 0.14
## Total
## HISTORY 0.38
## HOME.SAFETY 0.60
## SUPPORT.SYSTEMS 0.53
## ASSISTIVE.DEVICE 0.45
## SAFETY 0.01
## PLAN.OF.CARE 0.61
## INTERVIEW 0.44
## PREPARATION 0.30
## PATIENT.INSTRUCT 0.22
## EXECUTION 0.14
## Total 1.00
##
## n= 35
##
##
## P
## HISTORY HOME.SAFETY SUPPORT.SYSTEMS ASSISTIVE.DEVICE SAFETY
## HISTORY 0.4366 0.9207 0.4788 0.2110
## HOME.SAFETY 0.4366 0.5751 0.0599 0.1112
## SUPPORT.SYSTEMS 0.9207 0.5751 0.0243 0.8637
## ASSISTIVE.DEVICE 0.4788 0.0599 0.0243 0.9468
## SAFETY 0.2110 0.1112 0.8637 0.9468
## PLAN.OF.CARE 0.8049 0.3568 0.5928 0.4107 0.5050
## INTERVIEW 0.6176 0.6136 0.0670 0.8194 0.0842
## PREPARATION 0.5491 0.9388 0.7402 0.9725 0.7181
## PATIENT.INSTRUCT 0.1377 0.0922 0.1720 0.8432 0.3039
## EXECUTION 0.3120 0.8907 0.2902 0.2553 0.5126
## Total 0.0236 0.0001 0.0012 0.0061 0.9330
## PLAN.OF.CARE INTERVIEW PREPARATION PATIENT.INSTRUCT EXECUTION
## HISTORY 0.8049 0.6176 0.5491 0.1377 0.3120
## HOME.SAFETY 0.3568 0.6136 0.9388 0.0922 0.8907
## SUPPORT.SYSTEMS 0.5928 0.0670 0.7402 0.1720 0.2902
## ASSISTIVE.DEVICE 0.4107 0.8194 0.9725 0.8432 0.2553
## SAFETY 0.5050 0.0842 0.7181 0.3039 0.5126
## PLAN.OF.CARE 0.2183 0.0047 0.3531 0.4664
## INTERVIEW 0.2183 0.9295 0.5430 0.1202
## PREPARATION 0.0047 0.9295 0.0696 0.7848
## PATIENT.INSTRUCT 0.3531 0.5430 0.0696 0.4189
## EXECUTION 0.4664 0.1202 0.7848 0.4189
## Total 0.0001 0.0080 0.0768 0.1951 0.4213
## Total
## HISTORY 0.0236
## HOME.SAFETY 0.0001
## SUPPORT.SYSTEMS 0.0012
## ASSISTIVE.DEVICE 0.0061
## SAFETY 0.9330
## PLAN.OF.CARE 0.0001
## INTERVIEW 0.0080
## PREPARATION 0.0768
## PATIENT.INSTRUCT 0.1951
## EXECUTION 0.4213
## Total
d<-cor(dfNumeric)
corrplot(d, method = 'ellipse', type = 'upper')
qplot(data = df, INTERVIEW, Total)
Case_plot <- ggplot(data = df, aes(x=CASE, y=Total)) +
geom_boxplot()
Case_plot
Case_plot <- ggplot(data = df, aes(x=CASE, y=HISTORY)) +
geom_boxplot()
Case_plot
ezANOVA( df, Total, wid = Subject, between = CASE, type=3)
## Warning: Converting "Subject" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Coefficient covariances computed by hccm()
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 CASE 2 32 4.782238 0.01523501 * 0.2301118
##
## $`Levene's Test for Homogeneity of Variance`
## DFn DFd SSn SSd F p p<.05
## 1 2 32 20.15628 1142.015 0.2823959 0.7558346
Turns out the “Falls” Case seemed to be more difficult
Is there a difference between each group and the case they had? In other words, do we have to control for “case” in the analysis?
crosstabs(Group ~ CASE, df)
## CASE
## Group Falls Stroke TKR Total
## 1 4 8 5 17
## 2 8 4 6 18
## Total 12 12 11 35
## Chisq = 2.7312 df = 2 p-value = 0.25522
No significant difference between the groups based on the Chi Square analysis.
Total_plot <- ggplot(data = df, aes(x=Group, y=Total)) +
geom_boxplot()
Total_plot
t.test(Total ~ Group, data = df)
##
## Welch Two Sample t-test
##
## data: Total by Group
## t = -1.136, df = 25.832, p-value = 0.2664
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -10.10123 2.91169
## sample estimates:
## mean in group 1 mean in group 2
## 74.29412 77.88889
ezANOVA( df , Total , wid = Subject, between = Group , between_covariates = CASE , type = 3)
## Warning: Converting "Subject" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Warning: Implementation of ANCOVA in this version of ez is experimental and
## not yet fully validated. Also, note that ANCOVA is intended purely as a tool
## to increase statistical power; ANCOVA can not eliminate confounds in the data.
## Specifically, covariates should: (1) be uncorrelated with other predictors and
## (2) should have effects on the DV that are independent of other predictors.
## Failure to meet these conditions may dramatically increase the rate of false-
## positives.
## Coefficient covariances computed by hccm()
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 Group 1 33 4.588171 0.03966596 * 0.1220642
##
## $`Levene's Test for Homogeneity of Variance`
## DFn DFd SSn SSd F p p<.05
## 1 1 33 117.4381 811.3047 4.776821 0.03604666 *
sc_plot <- df %>%
ggplot(aes(x=as.integer(Group),
y=Total, color = CASE)) +
geom_point() +
geom_smooth(method = lm)
sc_plot + geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
sc_plot <- df %>%
ggplot(aes(x=INTERVIEW,
y=Total)) +
geom_point()
Interview_plot <- ggplot(data = df, aes(x=Group, y=INTERVIEW)) +
geom_boxplot()
Interview_plot
t.test(INTERVIEW ~ Group, data = df)
##
## Welch Two Sample t-test
##
## data: INTERVIEW by Group
## t = -0.91867, df = 32.993, p-value = 0.3649
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -15.159317 5.727944
## sample estimates:
## mean in group 1 mean in group 2
## 52.11765 56.83333
HISTORY_plot <- ggplot(data = df, aes(x=Group, y=HISTORY)) +
geom_boxplot()
HISTORY_plot
wilcox.test(HISTORY ~ Group, data = df)
##
## Wilcoxon rank sum test with continuity correction
##
## data: HISTORY by Group
## W = 121, p-value = 0.2888
## alternative hypothesis: true location shift is not equal to 0
HOME.SAFETY_plot <- ggplot(data = df, aes(x=Group, y=HOME.SAFETY)) +
geom_boxplot()
HOME.SAFETY_plot
wilcox.test(HOME.SAFETY ~ Group, data = df)
##
## Wilcoxon rank sum test with continuity correction
##
## data: HOME.SAFETY by Group
## W = 158, p-value = 0.8754
## alternative hypothesis: true location shift is not equal to 0
PLAN.OF.CARE_plot <- ggplot(data = df, aes(x=Group, y=PLAN.OF.CARE)) +
geom_boxplot()
PLAN.OF.CARE_plot
wilcox.test(PLAN.OF.CARE ~ Group, data = df)
##
## Wilcoxon rank sum test with continuity correction
##
## data: PLAN.OF.CARE by Group
## W = 107.5, p-value = 0.03212
## alternative hypothesis: true location shift is not equal to 0
SUPPORT.SYSTEMS_plot <- ggplot(data = df, aes(x=Group, y=SUPPORT.SYSTEMS)) +
geom_boxplot()
SUPPORT.SYSTEMS_plot
wilcox.test(SUPPORT.SYSTEMS ~ Group, data = df)
##
## Wilcoxon rank sum test with continuity correction
##
## data: SUPPORT.SYSTEMS by Group
## W = 185, p-value = 0.2899
## alternative hypothesis: true location shift is not equal to 0
ASSISTIVE.DEVICE_plot <- ggplot(data = df, aes(x=Group, y=ASSISTIVE.DEVICE)) +
geom_boxplot()
ASSISTIVE.DEVICE_plot
wilcox.test(ASSISTIVE.DEVICE ~ Group, data = df)
##
## Wilcoxon rank sum test with continuity correction
##
## data: ASSISTIVE.DEVICE by Group
## W = 153, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
Is there a factor that may better represent?
Based on an exploratory PCA and looking at the scree plot (below), it is not worthwhile to use this approach to combine variables. This does not mean we cannot combine them - just that a factor analytic approach would not be useful.
#perform PCA
results <- prcomp(dfMatrix, scale = TRUE)
#calculate total variance explained by each principal component
var_explained = results$sdev^2 / sum(results$sdev^2)
#create scree plot
library(ggplot2)
qplot(c(1:11), var_explained) +
geom_line() +
xlab("Principal Component") +
ylab("Variance Explained") +
ggtitle("Scree Plot") +
ylim(0, 1)