library(magrittr)
library(knitr)
library(kableExtra)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
Load in subjective attribute data
gpm <- read.csv("Geisinger-PennMed/Subjective_Clean_Geis.csv")
Cleaning and transforming subjective attribute data
colnames(gpm) <- gsub("_1", "_Pos", colnames(gpm))
colnames(gpm) <- gsub("_2", "_Neg", colnames(gpm))
colnames(gpm) <- gsub("_3", "_Remind", colnames(gpm))
colnames(gpm) <- gsub("_4", "_Casual", colnames(gpm))
colnames(gpm) <- gsub("_5", "_Surprise", colnames(gpm))
colnames(gpm) <- gsub("Q84", "S100.1", colnames(gpm))
colnames(gpm) <- gsub("Q80", "S100.2", colnames(gpm))
colnames(gpm) <- gsub("Q85", "S100.3", colnames(gpm))
colnames(gpm) <- gsub("Q86", "S4.1", colnames(gpm))
colnames(gpm) <- gsub("Q87", "S4.2", colnames(gpm))
colnames(gpm) <- gsub("Q88", "S4.3", colnames(gpm))
colnames(gpm) <- gsub("Q89", "S6.1", colnames(gpm))
colnames(gpm) <- gsub("Q90", "S6.2", colnames(gpm))
colnames(gpm) <- gsub("Q91", "S16.1", colnames(gpm))
colnames(gpm) <- gsub("Q92", "S16.2", colnames(gpm))
colnames(gpm) <- gsub("Q93", "S16.3", colnames(gpm))
colnames(gpm) <- gsub("Q94", "S27.1", colnames(gpm))
colnames(gpm) <- gsub("Q95", "S27.2", colnames(gpm))
colnames(gpm) <- gsub("Q96", "S43.1", colnames(gpm))
colnames(gpm) <- gsub("Q97", "S43.2", colnames(gpm))
colnames(gpm) <- gsub("Q102", "S50.1", colnames(gpm))
colnames(gpm) <- gsub("Q103", "S50.3", colnames(gpm))
colnames(gpm) <- gsub("Q104", "S50.2", colnames(gpm))
colnames(gpm) <- gsub("Q105", "S50.4", colnames(gpm))
colnames(gpm) <- gsub("Q106", "S63.1", colnames(gpm))
colnames(gpm) <- gsub("Q107", "S63.2", colnames(gpm))
gpm %<>% filter(!grepl("Excluded", Excluded.observations))
gpm %<>% select(ResponseId, S100.1_Pos:S63.2_Surprise)
gpm_long <- gpm %>%
pivot_longer(
!ResponseId,
names_to = c("study",".value"),
names_sep = "_",
values_drop_na = TRUE
)
gpm_long_avg <- gpm_long %>%
group_by(study) %>%
summarise(Pos_avg = mean(Pos, na.rm = TRUE),
Neg_avg = mean(Neg, na.rm = TRUE),
Remind_avg = mean(Remind, na.rm = TRUE),
Casual_avg = mean(Casual, na.rm = TRUE),
Surprise_avg = mean(Surprise, na.rm = TRUE)) %>%
ungroup()
## `summarise()` ungrouping output (override with `.groups` argument)
for(i in 2:6){
gpm_long_avg[22,i] <- colMeans(gpm_long_avg[c(14, 16), i])
}
gpm_long_avg[22, 1] <- "S50_1_3"
for(i in 2:6){
gpm_long_avg[23,i] <- colMeans(gpm_long_avg[c(15, 17), i])
}
gpm_long_avg[23, 1] <- "S50_2_4"
gpm_long_avg %<>% mutate(
study2 = gsub("\\.", "_", study),
study2 = gsub("S", "", study2)
)
gpm_long_avg <- gpm_long_avg[-c(14:17), ]
Load in objective attribute data and combine with subjective attribute data
#colnames(gpm_o)
gpm_o <- read.csv("Geisinger-PennMed/ObjectiveEfficacy_Geis.csv") %>% rename(study2 = `ï..ID..`)
gpm_o[15, 1] <- "50_2_4"
gpm_all <- merge(gpm_o, gpm_long_avg, by = "study2")
Analysis I: Correlations
q_all <- quos(Impact,
Surprise_avg, Remind_avg, Casual_avg, Pos_avg, Neg_avg,
ReservedForYou, AnyInteractive, Is_Control, Multimedia, ExclamationMarks, Imperative_FullConvo, FR_ReadingEase_First, Hours_PreAppt, Total_Messages, Words_First, Interrogative_FullConvo, FR_ReadingLevel_First)
gpm_all %>% clnR2::table.corr(q_all, copy = FALSE)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
|
var
|
Impact
|
Surprise_avg
|
Remind_avg
|
Casual_avg
|
Pos_avg
|
Neg_avg
|
ReservedForYou
|
AnyInteractive
|
Is_Control
|
Multimedia
|
ExclamationMarks
|
Imperative_FullConvo
|
FR_ReadingEase_First
|
Hours_PreAppt
|
Total_Messages
|
Words_First
|
Interrogative_FullConvo
|
|
Surprise_avg
|
-0.373 .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Remind_avg
|
0.304
|
-0.010
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Casual_avg
|
-0.300
|
0.413 †
|
0.343
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Pos_avg
|
-0.172
|
-0.083
|
0.379 .
|
0.522 *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Neg_avg
|
-0.127
|
0.432 †
|
-0.558 *
|
-0.359 .
|
-0.850 ***
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ReservedForYou
|
0.671 **
|
-0.338
|
0.406 †
|
0.005
|
0.320
|
-0.469 *
|
|
|
|
|
|
|
|
|
|
|
|
|
AnyInteractive
|
-0.318
|
0.493 *
|
-0.167
|
0.343
|
0.022
|
0.201
|
-0.268
|
|
|
|
|
|
|
|
|
|
|
|
Is_Control
|
0.249
|
-0.447 †
|
0.060
|
-0.192
|
0.067
|
-0.224
|
0.016
|
-0.655 **
|
|
|
|
|
|
|
|
|
|
|
Multimedia
|
-0.189
|
0.180
|
-0.288
|
0.174
|
0.383 .
|
-0.148
|
0.016
|
0.519 *
|
-0.218
|
|
|
|
|
|
|
|
|
|
ExclamationMarks
|
0.181
|
-0.216
|
0.048
|
0.218
|
-0.147
|
0.032
|
0.268
|
-0.095
|
-0.049
|
-0.519 *
|
|
|
|
|
|
|
|
|
Imperative_FullConvo
|
0.141
|
0.464 *
|
-0.311
|
-0.302
|
-0.797 ***
|
0.858 ***
|
-0.159
|
0.070
|
-0.249
|
-0.331
|
0.244
|
|
|
|
|
|
|
|
FR_ReadingEase_First
|
-0.127
|
-0.037
|
-0.128
|
0.249
|
0.426 †
|
-0.266
|
0.281
|
-0.148
|
-0.003
|
-0.085
|
0.258
|
-0.207
|
|
|
|
|
|
|
Hours_PreAppt
|
0.108
|
-0.087
|
-0.309
|
0.322
|
0.214
|
-0.100
|
0.133
|
0.184
|
0.165
|
0.542 *
|
0.010
|
-0.238
|
0.181
|
|
|
|
|
|
Total_Messages
|
0.065
|
0.452 †
|
-0.176
|
0.095
|
-0.533 *
|
0.608 **
|
-0.241
|
0.634 **
|
-0.486 *
|
-0.051
|
0.309
|
0.646 **
|
-0.135
|
0.045
|
|
|
|
|
Words_First
|
-0.063
|
0.177
|
0.037
|
-0.308
|
-0.210
|
0.241
|
0.125
|
-0.111
|
-0.203
|
-0.455 †
|
0.149
|
0.409 †
|
0.017
|
-0.697 ***
|
-0.062
|
|
|
|
Interrogative_FullConvo
|
0.050
|
0.413 †
|
-0.259
|
0.288
|
-0.395 †
|
0.508 *
|
-0.224
|
0.394 †
|
-0.258
|
0.019
|
0.319
|
0.524 *
|
0.085
|
0.412 †
|
0.828 ***
|
-0.418 †
|
|
|
FR_ReadingLevel_First
|
-0.019
|
0.286
|
-0.060
|
-0.022
|
-0.498 *
|
0.456 *
|
-0.350 .
|
0.319
|
-0.192
|
0.058
|
-0.044
|
0.428 †
|
-0.869 ***
|
-0.107
|
0.338
|
0.088
|
0.167
|
Analysis II: Principal Components Analysis
gpm_pca <- gpm_all %>% select(Impact, Surprise_avg, Remind_avg, Casual_avg, ReservedForYou, AnyInteractive)
psych::scree(gpm_pca[,-1])

psych::pca(gpm_pca[,-1], nfactors = 2, rotate = "promax")
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## Surprise_avg 0.83 -0.08 0.70 0.30 1.0
## Remind_avg 0.14 0.88 0.78 0.22 1.0
## Casual_avg 0.76 0.51 0.75 0.25 1.7
## ReservedForYou -0.33 0.70 0.66 0.34 1.4
## AnyInteractive 0.74 -0.20 0.63 0.37 1.1
##
## RC1 RC2
## SS loadings 1.94 1.58
## Proportion Var 0.39 0.32
## Cumulative Var 0.39 0.70
## Proportion Explained 0.55 0.45
## Cumulative Proportion 0.55 1.00
##
## With component correlations of
## RC1 RC2
## RC1 1.00 -0.11
## RC2 -0.11 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.12
## with the empirical chi square 5.79 with prob < 0.016
##
## Fit based upon off diagonal values = 0.85
gpm_pca_results <- psych::pca(gpm_pca[,-1], nfactors = 2, rotate = "promax")
gpm_loadings <- cbind.data.frame(gpm_pca, gpm_pca_results$scores)
Analysis III: Regressing impact on PCA factors
gpm_loadings %$% lm(Impact ~ RC1 + RC2) %>% summary()
##
## Call:
## lm(formula = Impact ~ RC1 + RC2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0174499 -0.0045377 0.0002867 0.0041519 0.0128286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.021158 0.001835 11.532 3.65e-09 ***
## RC1 -0.004506 0.001896 -2.376 0.0303 *
## RC2 0.003952 0.001896 2.084 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007997 on 16 degrees of freedom
## Multiple R-squared: 0.4121, Adjusted R-squared: 0.3386
## F-statistic: 5.608 on 2 and 16 DF, p-value: 0.01427
gpm_loadings %$% lm(Impact ~ RC1 + RC2) %>% lm.beta::lm.beta()
##
## Call:
## lm(formula = Impact ~ RC1 + RC2)
##
## Standardized Coefficients::
## (Intercept) RC1 RC2
## 0.0000000 -0.4582601 0.4018713
ATTIC
new.fun <- function(x){
if(is.na(x)){
0
}else{
x
}
}
gpm %<>% mutate(across(S100_1.Pos:S63_2.Surprise, new.fun))
gpm %<>% mutate(
Pos = rowSums(across(ends_with("Pos")), na.rm = TRUE),
Neg = rowSums(across(ends_with("Neg")), na.rm = TRUE),
Remind = rowSums(across(ends_with("Remind")), na.rm = TRUE),
Casual = rowSums(across(ends_with("Casual")), na.rm = TRUE),
Surprise = rowSums(across(ends_with("Surprise")), na.rm = TRUE)
)