library(magrittr)
library(knitr)
library(kableExtra)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()    masks magrittr::extract()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x purrr::set_names()  masks magrittr::set_names()

Load in subjective attribute data

gpm <- read.csv("Geisinger-PennMed/Subjective_Clean_Geis.csv")

Cleaning and transforming subjective attribute data

colnames(gpm) <- gsub("_1", "_Pos", colnames(gpm))
colnames(gpm) <- gsub("_2", "_Neg", colnames(gpm))
colnames(gpm) <- gsub("_3", "_Remind", colnames(gpm))
colnames(gpm) <- gsub("_4", "_Casual", colnames(gpm))
colnames(gpm) <- gsub("_5", "_Surprise", colnames(gpm))

colnames(gpm) <- gsub("Q84", "S100.1", colnames(gpm))
colnames(gpm) <- gsub("Q80", "S100.2", colnames(gpm))
colnames(gpm) <- gsub("Q85", "S100.3", colnames(gpm))
colnames(gpm) <- gsub("Q86", "S4.1", colnames(gpm))
colnames(gpm) <- gsub("Q87", "S4.2", colnames(gpm))
colnames(gpm) <- gsub("Q88", "S4.3", colnames(gpm))
colnames(gpm) <- gsub("Q89", "S6.1", colnames(gpm))
colnames(gpm) <- gsub("Q90", "S6.2", colnames(gpm))
colnames(gpm) <- gsub("Q91", "S16.1", colnames(gpm))
colnames(gpm) <- gsub("Q92", "S16.2", colnames(gpm))
colnames(gpm) <- gsub("Q93", "S16.3", colnames(gpm))
colnames(gpm) <- gsub("Q94", "S27.1", colnames(gpm))
colnames(gpm) <- gsub("Q95", "S27.2", colnames(gpm))
colnames(gpm) <- gsub("Q96", "S43.1", colnames(gpm))
colnames(gpm) <- gsub("Q97", "S43.2", colnames(gpm))
colnames(gpm) <- gsub("Q102", "S50.1", colnames(gpm))
colnames(gpm) <- gsub("Q103", "S50.3", colnames(gpm))
colnames(gpm) <- gsub("Q104", "S50.2", colnames(gpm))
colnames(gpm) <- gsub("Q105", "S50.4", colnames(gpm))
colnames(gpm) <- gsub("Q106", "S63.1", colnames(gpm))
colnames(gpm) <- gsub("Q107", "S63.2", colnames(gpm))
gpm %<>% filter(!grepl("Excluded", Excluded.observations))

gpm %<>% select(ResponseId, S100.1_Pos:S63.2_Surprise)

gpm_long <- gpm %>%
  pivot_longer(
    !ResponseId,
    names_to = c("study",".value"),
    names_sep = "_",
    values_drop_na = TRUE
  )

gpm_long_avg <- gpm_long %>% 
  group_by(study) %>% 
  summarise(Pos_avg = mean(Pos, na.rm = TRUE),
            Neg_avg = mean(Neg, na.rm = TRUE),
            Remind_avg = mean(Remind, na.rm = TRUE),
            Casual_avg = mean(Casual, na.rm = TRUE),
            Surprise_avg = mean(Surprise, na.rm = TRUE)) %>% 
  ungroup()
## `summarise()` ungrouping output (override with `.groups` argument)
for(i in 2:6){
  gpm_long_avg[22,i] <- colMeans(gpm_long_avg[c(14, 16), i])
}
gpm_long_avg[22, 1] <- "S50_1_3"

for(i in 2:6){
  gpm_long_avg[23,i] <- colMeans(gpm_long_avg[c(15, 17), i])
}
gpm_long_avg[23, 1] <- "S50_2_4"

gpm_long_avg %<>% mutate(
  
  study2 = gsub("\\.", "_", study),
  study2 = gsub("S", "", study2)
)

gpm_long_avg <- gpm_long_avg[-c(14:17), ]

Load in objective attribute data and combine with subjective attribute data

#colnames(gpm_o)
gpm_o <- read.csv("Geisinger-PennMed/ObjectiveEfficacy_Geis.csv") %>% rename(study2 = `ï..ID..`)

gpm_o[15, 1] <- "50_2_4"

gpm_all <- merge(gpm_o, gpm_long_avg, by = "study2")

Analysis I: Correlations

q_all <- quos(Impact, 
              Surprise_avg, Remind_avg, Casual_avg, Pos_avg, Neg_avg, 
              ReservedForYou, AnyInteractive, Is_Control, Multimedia, ExclamationMarks, Imperative_FullConvo, FR_ReadingEase_First, Hours_PreAppt, Total_Messages, Words_First, Interrogative_FullConvo, FR_ReadingLevel_First)

gpm_all %>% clnR2::table.corr(q_all, copy = FALSE)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
var Impact Surprise_avg Remind_avg Casual_avg Pos_avg Neg_avg ReservedForYou AnyInteractive Is_Control Multimedia ExclamationMarks Imperative_FullConvo FR_ReadingEase_First Hours_PreAppt Total_Messages Words_First Interrogative_FullConvo
Surprise_avg -0.373 .
Remind_avg 0.304 -0.010
Casual_avg -0.300 0.413 † 0.343
Pos_avg -0.172 -0.083 0.379 . 0.522 *
Neg_avg -0.127 0.432 † -0.558 * -0.359 . -0.850 ***
ReservedForYou 0.671 ** -0.338 0.406 † 0.005 0.320 -0.469 *
AnyInteractive -0.318 0.493 * -0.167 0.343 0.022 0.201 -0.268
Is_Control 0.249 -0.447 † 0.060 -0.192 0.067 -0.224 0.016 -0.655 **
Multimedia -0.189 0.180 -0.288 0.174 0.383 . -0.148 0.016 0.519 * -0.218
ExclamationMarks 0.181 -0.216 0.048 0.218 -0.147 0.032 0.268 -0.095 -0.049 -0.519 *
Imperative_FullConvo 0.141 0.464 * -0.311 -0.302 -0.797 *** 0.858 *** -0.159 0.070 -0.249 -0.331 0.244
FR_ReadingEase_First -0.127 -0.037 -0.128 0.249 0.426 † -0.266 0.281 -0.148 -0.003 -0.085 0.258 -0.207
Hours_PreAppt 0.108 -0.087 -0.309 0.322 0.214 -0.100 0.133 0.184 0.165 0.542 * 0.010 -0.238 0.181
Total_Messages 0.065 0.452 † -0.176 0.095 -0.533 * 0.608 ** -0.241 0.634 ** -0.486 * -0.051 0.309 0.646 ** -0.135 0.045
Words_First -0.063 0.177 0.037 -0.308 -0.210 0.241 0.125 -0.111 -0.203 -0.455 † 0.149 0.409 † 0.017 -0.697 *** -0.062
Interrogative_FullConvo 0.050 0.413 † -0.259 0.288 -0.395 † 0.508 * -0.224 0.394 † -0.258 0.019 0.319 0.524 * 0.085 0.412 † 0.828 *** -0.418 †
FR_ReadingLevel_First -0.019 0.286 -0.060 -0.022 -0.498 * 0.456 * -0.350 . 0.319 -0.192 0.058 -0.044 0.428 † -0.869 *** -0.107 0.338 0.088 0.167

Analysis II: Principal Components Analysis

gpm_pca <- gpm_all %>% select(Impact, Surprise_avg, Remind_avg, Casual_avg, ReservedForYou, AnyInteractive)

psych::scree(gpm_pca[,-1])

psych::pca(gpm_pca[,-1], nfactors = 2, rotate = "promax")
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals, 
##     rotate = rotate, n.obs = n.obs, covar = covar, scores = scores, 
##     missing = missing, impute = impute, oblique.scores = oblique.scores, 
##     method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  RC1   RC2   h2   u2 com
## Surprise_avg    0.83 -0.08 0.70 0.30 1.0
## Remind_avg      0.14  0.88 0.78 0.22 1.0
## Casual_avg      0.76  0.51 0.75 0.25 1.7
## ReservedForYou -0.33  0.70 0.66 0.34 1.4
## AnyInteractive  0.74 -0.20 0.63 0.37 1.1
## 
##                        RC1  RC2
## SS loadings           1.94 1.58
## Proportion Var        0.39 0.32
## Cumulative Var        0.39 0.70
## Proportion Explained  0.55 0.45
## Cumulative Proportion 0.55 1.00
## 
##  With component correlations of 
##       RC1   RC2
## RC1  1.00 -0.11
## RC2 -0.11  1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.12 
##  with the empirical chi square  5.79  with prob <  0.016 
## 
## Fit based upon off diagonal values = 0.85
gpm_pca_results <- psych::pca(gpm_pca[,-1], nfactors = 2, rotate = "promax")

gpm_loadings <- cbind.data.frame(gpm_pca, gpm_pca_results$scores)

Analysis III: Regressing impact on PCA factors

gpm_loadings %$% lm(Impact ~ RC1 + RC2) %>% summary()
## 
## Call:
## lm(formula = Impact ~ RC1 + RC2)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0174499 -0.0045377  0.0002867  0.0041519  0.0128286 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.021158   0.001835  11.532 3.65e-09 ***
## RC1         -0.004506   0.001896  -2.376   0.0303 *  
## RC2          0.003952   0.001896   2.084   0.0536 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007997 on 16 degrees of freedom
## Multiple R-squared:  0.4121, Adjusted R-squared:  0.3386 
## F-statistic: 5.608 on 2 and 16 DF,  p-value: 0.01427
gpm_loadings %$% lm(Impact ~ RC1 + RC2) %>% lm.beta::lm.beta()
## 
## Call:
## lm(formula = Impact ~ RC1 + RC2)
## 
## Standardized Coefficients::
## (Intercept)         RC1         RC2 
##   0.0000000  -0.4582601   0.4018713

ATTIC

new.fun <- function(x){
  
  if(is.na(x)){
    0
  }else{
    
    x
  }
}



gpm %<>% mutate(across(S100_1.Pos:S63_2.Surprise, new.fun))

gpm %<>% mutate(
  
  Pos = rowSums(across(ends_with("Pos")), na.rm = TRUE),
  Neg = rowSums(across(ends_with("Neg")), na.rm = TRUE),
  Remind = rowSums(across(ends_with("Remind")), na.rm = TRUE),
  Casual = rowSums(across(ends_with("Casual")), na.rm = TRUE),
  Surprise = rowSums(across(ends_with("Surprise")), na.rm = TRUE)
)