library(magrittr)
library(knitr)
library(kableExtra)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()    masks magrittr::extract()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x purrr::set_names()  masks magrittr::set_names()

Load in subjective attribute data

wal <- read.csv("Walmart/Subjective_Clean_Wmt.csv")

Cleaning and transforming subjective attribute data

wal %<>% rename(Positive.1 = Positive,
                Negative.1 = Negative,
                Casual.1 = Casual,
                Remind.1 = Remind,
                Surprise.1 = Surprise)
wal %<>% dplyr::filter(!grepl("Excluded", Excluded.observations))

wal %<>% dplyr::select(ResponseId, Stimulus, Positive.1:Surprise.22)

colnames(wal) <- gsub("\\.", "_", colnames(wal))

erase.zeros <- function(x){
  
  ifelse(x==0, NA, x)
  
}

wal %<>% mutate(across(Positive_1:Surprise_22, erase.zeros))

wal_long <- wal %>%
  pivot_longer(
    -c(ResponseId, Stimulus),
    names_to = c(".value", "study"),
    names_sep = "_",
    values_drop_na = TRUE
  )

wal_long_avg <- wal_long %>% 
  group_by(Stimulus) %>% 
  summarise(Pos_avg = mean(Positive, na.rm = TRUE),
            Neg_avg = mean(Negative, na.rm = TRUE),
            Remind_avg = mean(Remind, na.rm = TRUE),
            Casual_avg = mean(Casual, na.rm = TRUE),
            Surprise_avg = mean(Surprise, na.rm = TRUE)) %>% 
  ungroup()
## `summarise()` ungrouping output (override with `.groups` argument)
wal_long_avg[10, 1] <- "50_1"
wal_long_avg[11, 1] <- "50_2"
wal_long_avg[12, 1] <- "50_3"
wal_long_avg[13, 1] <- "50_4"

Load in objective attribute data and combine with subjective attribute data

#colnames(wal_o)
wal_o <- read.csv("Walmart/Objective_Efficacy_Wmt.csv") %>% rename(Stimulus = `ï..ID_Here`)

wal_all <- merge(wal_o, wal_long_avg, by = "Stimulus")

Analysis I: Correlations

q_all <- quos(Efficacy, 
              Remind_avg, Surprise_avg, Neg_avg, Pos_avg, Casual_avg,
              Interrogative_FullConvo, Total_Messages,  WaitingForYou, Imperative_FullConvo, AnyInteractive, FR_ReadingLevel_First, FR_ReadingEase_First, ExclamationMarks, Words_First, Is_Control)

wal_all %>% clnR2::table.corr(q_all, copy = FALSE)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
var Efficacy Remind_avg Surprise_avg Neg_avg Pos_avg Casual_avg Interrogative_FullConvo Total_Messages WaitingForYou Imperative_FullConvo AnyInteractive FR_ReadingLevel_First FR_ReadingEase_First ExclamationMarks Words_First
Remind_avg 0.637 **
Surprise_avg -0.500 * -0.285
Neg_avg -0.439 * -0.407 † 0.324 .
Pos_avg 0.329 . 0.512 * -0.080 -0.801 ***
Casual_avg 0.019 0.449 * 0.354 . -0.385 † 0.540 **
Interrogative_FullConvo -0.480 * -0.207 0.879 *** 0.429 * -0.224 0.364 †
Total_Messages 0.461 * 0.057 -0.370 † 0.074 -0.169 -0.537 ** -0.349 .
WaitingForYou 0.450 * 0.462 * -0.175 -0.074 0.003 0.084 0.000 0.166
Imperative_FullConvo -0.384 † -0.161 0.376 † 0.506 * -0.261 0.096 0.490 * 0.157 -0.015
AnyInteractive -0.253 -0.339 . 0.318 . 0.287 -0.227 -0.228 0.371 † 0.556 ** -0.123 0.674 ***
FR_ReadingLevel_First 0.250 0.303 0.054 -0.138 0.326 . 0.004 -0.032 -0.020 0.071 -0.221 -0.218
FR_ReadingEase_First -0.245 -0.422 † 0.145 0.065 -0.258 -0.002 0.115 0.153 -0.224 0.233 0.435 * -0.896 ***
ExclamationMarks 0.167 0.289 0.023 0.072 0.191 0.126 0.144 0.396 † 0.090 0.635 ** 0.516 * 0.130 -0.122
Words_First 0.078 -0.112 0.523 * -0.148 0.293 0.098 0.328 . 0.282 -0.157 0.141 0.538 ** 0.348 . 0.059 0.273
Is_Control -0.043 -0.031 -0.455 * -0.286 -0.129 0.085 -0.288 -0.285 0.090 -0.226 -0.295 -0.539 ** 0.327 . -0.375 † -0.658 ***

Analysis II: Principal Components Analysis

wal_pca <- wal_all %>% select(Efficacy, Surprise_avg, Remind_avg, Neg_avg, Pos_avg, Interrogative_FullConvo, Total_Messages,  WaitingForYou, Imperative_FullConvo, AnyInteractive, FR_ReadingLevel_First)

psych::scree(wal_pca[,-1])

psych::pca(wal_pca[,-1], nfactors = 4, use = "complete.obs")
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals, 
##     rotate = rotate, n.obs = n.obs, covar = covar, scores = scores, 
##     missing = missing, impute = impute, oblique.scores = oblique.scores, 
##     method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                           RC2   RC4   RC1   RC3   h2    u2 com
## Surprise_avg             0.93 -0.02  0.05 -0.16 0.90 0.101 1.1
## Remind_avg              -0.14  0.51 -0.07  0.70 0.78 0.218 1.9
## Neg_avg                  0.34 -0.83  0.15  0.03 0.83 0.171 1.4
## Pos_avg                 -0.04  0.96 -0.08  0.03 0.92 0.077 1.0
## Interrogative_FullConvo  0.94 -0.17  0.11  0.03 0.93 0.074 1.1
## Total_Messages          -0.50 -0.09  0.77  0.16 0.87 0.129 1.9
## WaitingForYou           -0.07 -0.07  0.02  0.90 0.81 0.189 1.0
## Imperative_FullConvo     0.48 -0.26  0.66  0.03 0.73 0.271 2.2
## AnyInteractive           0.25 -0.12  0.91 -0.20 0.95 0.050 1.3
## FR_ReadingLevel_First    0.13  0.43 -0.15  0.30 0.32 0.678 2.3
## 
##                        RC2  RC4  RC1  RC3
## SS loadings           2.45 2.17 1.93 1.49
## Proportion Var        0.25 0.22 0.19 0.15
## Cumulative Var        0.25 0.46 0.66 0.80
## Proportion Explained  0.31 0.27 0.24 0.18
## Cumulative Proportion 0.31 0.58 0.82 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  10.64  with prob <  0.47 
## 
## Fit based upon off diagonal values = 0.95
wal_pca_results <- psych::pca(wal_pca[,-1], nfactors = 4, rotate = "promax")

wal_loadings <- cbind.data.frame(wal_pca, wal_pca_results$scores)
q_pca <- quos(Efficacy, RC1, RC2, RC3, RC4)

wal_loadings %>% clnR2::table.corr(q_pca, copy = FALSE)
var Efficacy RC1 RC2 RC3
RC1 -0.073
RC2 -0.574 ** 0.143
RC3 0.631 ** -0.043 -0.180
RC4 0.459 * -0.389 † -0.253 0.297

Analysis III: Regressing impact on PCA factors

wal_loadings %$% lm(Efficacy ~ RC1 + RC2 + RC3 + RC4) %>% summary()
## 
## Call:
## lm(formula = Efficacy ~ RC1 + RC2 + RC3 + RC4)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0047168 -0.0029172  0.0007565  0.0026165  0.0058126 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0101500  0.0008068  12.580 4.87e-10 ***
## RC1          0.0006247  0.0009011   0.693  0.49754    
## RC2         -0.0025829  0.0008607  -3.001  0.00804 ** 
## RC3          0.0028356  0.0008740   3.245  0.00477 ** 
## RC4          0.0014401  0.0009558   1.507  0.15025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003784 on 17 degrees of freedom
## Multiple R-squared:  0.663,  Adjusted R-squared:  0.5837 
## F-statistic: 8.363 on 4 and 17 DF,  p-value: 0.0006403
wal_loadings %$% lm(Efficacy ~ RC1 + RC2 + RC3 + RC4) %>% lm.beta::lm.beta()
## 
## Call:
## lm(formula = Efficacy ~ RC1 + RC2 + RC3 + RC4)
## 
## Standardized Coefficients::
## (Intercept)         RC1         RC2         RC3         RC4 
##   0.0000000   0.1065004  -0.4403510   0.4834410   0.2455275