library(magrittr)
library(knitr)
library(kableExtra)
library(tidyverse)

## -- Attaching packages --------------------------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()    masks magrittr::extract()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x purrr::set_names()  masks magrittr::set_names()

Load in subjective attribute data

gpm <- read.csv("Geisinger-PennMed/Subjective_Clean_Geis.csv")

Cleaning and transforming subjective attribute data

colnames(gpm) <- gsub("_1", "_Pos", colnames(gpm))
colnames(gpm) <- gsub("_2", "_Neg", colnames(gpm))
colnames(gpm) <- gsub("_3", "_Remind", colnames(gpm))
colnames(gpm) <- gsub("_4", "_Casual", colnames(gpm))
colnames(gpm) <- gsub("_5", "_Surprise", colnames(gpm))

colnames(gpm) <- gsub("Q84", "S100.1", colnames(gpm))
colnames(gpm) <- gsub("Q80", "S100.2", colnames(gpm))
colnames(gpm) <- gsub("Q85", "S100.3", colnames(gpm))
colnames(gpm) <- gsub("Q86", "S4.1", colnames(gpm))
colnames(gpm) <- gsub("Q87", "S4.2", colnames(gpm))
colnames(gpm) <- gsub("Q88", "S4.3", colnames(gpm))
colnames(gpm) <- gsub("Q89", "S6.1", colnames(gpm))
colnames(gpm) <- gsub("Q90", "S6.2", colnames(gpm))
colnames(gpm) <- gsub("Q91", "S16.1", colnames(gpm))
colnames(gpm) <- gsub("Q92", "S16.2", colnames(gpm))
colnames(gpm) <- gsub("Q93", "S16.3", colnames(gpm))
colnames(gpm) <- gsub("Q94", "S27.1", colnames(gpm))
colnames(gpm) <- gsub("Q95", "S27.2", colnames(gpm))
colnames(gpm) <- gsub("Q96", "S43.1", colnames(gpm))
colnames(gpm) <- gsub("Q97", "S43.2", colnames(gpm))
colnames(gpm) <- gsub("Q102", "S50.1", colnames(gpm))
colnames(gpm) <- gsub("Q103", "S50.3", colnames(gpm))
colnames(gpm) <- gsub("Q104", "S50.2", colnames(gpm))
colnames(gpm) <- gsub("Q105", "S50.4", colnames(gpm))
colnames(gpm) <- gsub("Q106", "S63.1", colnames(gpm))
colnames(gpm) <- gsub("Q107", "S63.2", colnames(gpm))

gpm %<>% filter(!grepl("Excluded", Excluded.observations))

gpm %<>% select(ResponseId, S100.1_Pos:S63.2_Surprise)

gpm_long <- gpm %>%
  pivot_longer(
    !ResponseId,
    names_to = c("study",".value"),
    names_sep = "_",
    values_drop_na = TRUE
  )

gpm_long_avg <- gpm_long %>% 
  group_by(study) %>% 
  summarise(Pos_avg = mean(Pos, na.rm = TRUE),
            Neg_avg = mean(Neg, na.rm = TRUE),
            Remind_avg = mean(Remind, na.rm = TRUE),
            Casual_avg = mean(Casual, na.rm = TRUE),
            Surprise_avg = mean(Surprise, na.rm = TRUE)) %>% 
  ungroup()

## `summarise()` ungrouping output (override with `.groups` argument)

for(i in 2:6){
  gpm_long_avg[22,i] <- colMeans(gpm_long_avg[c(14, 16), i])
}
gpm_long_avg[22, 1] <- "S50_1_3"

for(i in 2:6){
  gpm_long_avg[23,i] <- colMeans(gpm_long_avg[c(15, 17), i])
}
gpm_long_avg[23, 1] <- "S50_2_4"

gpm_long_avg %<>% mutate(
  
  study2 = gsub("\\.", "_", study),
  study2 = gsub("S", "", study2)
)

gpm_long_avg <- gpm_long_avg[-c(14:17), ]

Load in objective attribute data and combine with subjective attribute data

#colnames(gpm_o)
gpm_o <- read.csv("Geisinger-PennMed/ObjectiveEfficacy_Geis.csv") %>% rename(study2 = `ï..ID..`)

gpm_o[15, 1] <- "50_2_4"

gpm_all <- merge(gpm_o, gpm_long_avg, by = "study2")

Analysis I: Correlations

q_all <- quos(Impact, 
              Surprise_avg, Remind_avg, Casual_avg, Pos_avg, Neg_avg, 
              ReservedForYou, AnyInteractive, Is_Control, Multimedia, ExclamationMarks, Imperative_FullConvo, FR_ReadingEase_First, Hours_PreAppt, Total_Messages, Words_First, Interrogative_FullConvo, FR_ReadingLevel_First)

gpm_all %>% clnR2::table.corr(q_all, copy = FALSE)

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

var	Impact	Surprise_avg	Remind_avg	Casual_avg	Pos_avg	Neg_avg	ReservedForYou	AnyInteractive	Is_Control	Multimedia	ExclamationMarks	Imperative_FullConvo	FR_ReadingEase_First	Hours_PreAppt	Total_Messages	Words_First	Interrogative_FullConvo
Surprise_avg	-0.373 .
Remind_avg	0.304	-0.010
Casual_avg	-0.300	0.413 †	0.343
Pos_avg	-0.172	-0.083	0.379 .	0.522 *
Neg_avg	-0.127	0.432 †	-0.558 *	-0.359 .	-0.850 ***
ReservedForYou	0.671 **	-0.338	0.406 †	0.005	0.320	-0.469 *
AnyInteractive	-0.318	0.493 *	-0.167	0.343	0.022	0.201	-0.268
Is_Control	0.249	-0.447 †	0.060	-0.192	0.067	-0.224	0.016	-0.655 **
Multimedia	-0.189	0.180	-0.288	0.174	0.383 .	-0.148	0.016	0.519 *	-0.218
ExclamationMarks	0.181	-0.216	0.048	0.218	-0.147	0.032	0.268	-0.095	-0.049	-0.519 *
Imperative_FullConvo	0.141	0.464 *	-0.311	-0.302	-0.797 ***	0.858 ***	-0.159	0.070	-0.249	-0.331	0.244
FR_ReadingEase_First	-0.127	-0.037	-0.128	0.249	0.426 †	-0.266	0.281	-0.148	-0.003	-0.085	0.258	-0.207
Hours_PreAppt	0.108	-0.087	-0.309	0.322	0.214	-0.100	0.133	0.184	0.165	0.542 *	0.010	-0.238	0.181
Total_Messages	0.065	0.452 †	-0.176	0.095	-0.533 *	0.608 **	-0.241	0.634 **	-0.486 *	-0.051	0.309	0.646 **	-0.135	0.045
Words_First	-0.063	0.177	0.037	-0.308	-0.210	0.241	0.125	-0.111	-0.203	-0.455 †	0.149	0.409 †	0.017	-0.697 ***	-0.062
Interrogative_FullConvo	0.050	0.413 †	-0.259	0.288	-0.395 †	0.508 *	-0.224	0.394 †	-0.258	0.019	0.319	0.524 *	0.085	0.412 †	0.828 ***	-0.418 †
FR_ReadingLevel_First	-0.019	0.286	-0.060	-0.022	-0.498 *	0.456 *	-0.350 .	0.319	-0.192	0.058	-0.044	0.428 †	-0.869 ***	-0.107	0.338	0.088	0.167

Analysis II: Principal Components Analysis

gpm_pca <- gpm_all %>% select(Impact, Surprise_avg, Remind_avg, Casual_avg, ReservedForYou, AnyInteractive)

psych::scree(gpm_pca[,-1])

psych::pca(gpm_pca[,-1], nfactors = 2, rotate = "promax")

## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals, 
##     rotate = rotate, n.obs = n.obs, covar = covar, scores = scores, 
##     missing = missing, impute = impute, oblique.scores = oblique.scores, 
##     method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  RC1   RC2   h2   u2 com
## Surprise_avg    0.83 -0.08 0.70 0.30 1.0
## Remind_avg      0.14  0.88 0.78 0.22 1.0
## Casual_avg      0.76  0.51 0.75 0.25 1.7
## ReservedForYou -0.33  0.70 0.66 0.34 1.4
## AnyInteractive  0.74 -0.20 0.63 0.37 1.1
## 
##                        RC1  RC2
## SS loadings           1.94 1.58
## Proportion Var        0.39 0.32
## Cumulative Var        0.39 0.70
## Proportion Explained  0.55 0.45
## Cumulative Proportion 0.55 1.00
## 
##  With component correlations of 
##       RC1   RC2
## RC1  1.00 -0.11
## RC2 -0.11  1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.12 
##  with the empirical chi square  5.79  with prob <  0.016 
## 
## Fit based upon off diagonal values = 0.85

gpm_pca_results <- psych::pca(gpm_pca[,-1], nfactors = 2, rotate = "promax")

gpm_loadings <- cbind.data.frame(gpm_pca, gpm_pca_results$scores)

Analysis III: Regressing impact on PCA factors

gpm_loadings %$% lm(Impact ~ RC1 + RC2) %>% summary()

## 
## Call:
## lm(formula = Impact ~ RC1 + RC2)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0174499 -0.0045377  0.0002867  0.0041519  0.0128286 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.021158   0.001835  11.532 3.65e-09 ***
## RC1         -0.004506   0.001896  -2.376   0.0303 *  
## RC2          0.003952   0.001896   2.084   0.0536 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007997 on 16 degrees of freedom
## Multiple R-squared:  0.4121, Adjusted R-squared:  0.3386 
## F-statistic: 5.608 on 2 and 16 DF,  p-value: 0.01427

gpm_loadings %$% lm(Impact ~ RC1 + RC2) %>% lm.beta::lm.beta()

## 
## Call:
## lm(formula = Impact ~ RC1 + RC2)
## 
## Standardized Coefficients::
## (Intercept)         RC1         RC2 
##   0.0000000  -0.4582601   0.4018713

ATTIC

new.fun <- function(x){
  
  if(is.na(x)){
    0
  }else{
    
    x
  }
}



gpm %<>% mutate(across(S100_1.Pos:S63_2.Surprise, new.fun))

gpm %<>% mutate(
  
  Pos = rowSums(across(ends_with("Pos")), na.rm = TRUE),
  Neg = rowSums(across(ends_with("Neg")), na.rm = TRUE),
  Remind = rowSums(across(ends_with("Remind")), na.rm = TRUE),
  Casual = rowSums(across(ends_with("Casual")), na.rm = TRUE),
  Surprise = rowSums(across(ends_with("Surprise")), na.rm = TRUE)
)

replicating_BCFG analysis

Chayce Baldwin

2/8/2021

Load in subjective attribute data

Cleaning and transforming subjective attribute data

Load in objective attribute data and combine with subjective attribute data

Analysis I: Correlations

Analysis II: Principal Components Analysis

Analysis III: Regressing impact on PCA factors

ATTIC