library(magrittr)
library(knitr)
library(kableExtra)
library(tidyverse)

## -- Attaching packages --------------------------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()    masks magrittr::extract()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x purrr::set_names()  masks magrittr::set_names()

Load in subjective attribute data

wal <- read.csv("Walmart/Subjective_Clean_Wmt.csv")

Cleaning and transforming subjective attribute data

wal %<>% rename(Positive.1 = Positive,
                Negative.1 = Negative,
                Casual.1 = Casual,
                Remind.1 = Remind,
                Surprise.1 = Surprise)

wal %<>% dplyr::filter(!grepl("Excluded", Excluded.observations))

wal %<>% dplyr::select(ResponseId, Stimulus, Positive.1:Surprise.22)

colnames(wal) <- gsub("\\.", "_", colnames(wal))

erase.zeros <- function(x){
  
  ifelse(x==0, NA, x)
  
}

wal %<>% mutate(across(Positive_1:Surprise_22, erase.zeros))

wal_long <- wal %>%
  pivot_longer(
    -c(ResponseId, Stimulus),
    names_to = c(".value", "study"),
    names_sep = "_",
    values_drop_na = TRUE
  )

wal_long_avg <- wal_long %>% 
  group_by(Stimulus) %>% 
  summarise(Pos_avg = mean(Positive, na.rm = TRUE),
            Neg_avg = mean(Negative, na.rm = TRUE),
            Remind_avg = mean(Remind, na.rm = TRUE),
            Casual_avg = mean(Casual, na.rm = TRUE),
            Surprise_avg = mean(Surprise, na.rm = TRUE)) %>% 
  ungroup()

## `summarise()` ungrouping output (override with `.groups` argument)

wal_long_avg[10, 1] <- "50_1"
wal_long_avg[11, 1] <- "50_2"
wal_long_avg[12, 1] <- "50_3"
wal_long_avg[13, 1] <- "50_4"

Load in objective attribute data and combine with subjective attribute data

#colnames(wal_o)
wal_o <- read.csv("Walmart/Objective_Efficacy_Wmt.csv") %>% rename(Stimulus = `ï..ID_Here`)

wal_all <- merge(wal_o, wal_long_avg, by = "Stimulus")

Analysis I: Correlations

q_all <- quos(Efficacy, 
              Remind_avg, Surprise_avg, Neg_avg, Pos_avg, Casual_avg,
              Interrogative_FullConvo, Total_Messages,  WaitingForYou, Imperative_FullConvo, AnyInteractive, FR_ReadingLevel_First, FR_ReadingEase_First, ExclamationMarks, Words_First, Is_Control)

wal_all %>% clnR2::table.corr(q_all, copy = FALSE)

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

var	Efficacy	Remind_avg	Surprise_avg	Neg_avg	Pos_avg	Casual_avg	Interrogative_FullConvo	Total_Messages	WaitingForYou	Imperative_FullConvo	AnyInteractive	FR_ReadingLevel_First	FR_ReadingEase_First	ExclamationMarks	Words_First
Remind_avg	0.637 **
Surprise_avg	-0.500 *	-0.285
Neg_avg	-0.439 *	-0.407 †	0.324 .
Pos_avg	0.329 .	0.512 *	-0.080	-0.801 ***
Casual_avg	0.019	0.449 *	0.354 .	-0.385 †	0.540 **
Interrogative_FullConvo	-0.480 *	-0.207	0.879 ***	0.429 *	-0.224	0.364 †
Total_Messages	0.461 *	0.057	-0.370 †	0.074	-0.169	-0.537 **	-0.349 .
WaitingForYou	0.450 *	0.462 *	-0.175	-0.074	0.003	0.084	0.000	0.166
Imperative_FullConvo	-0.384 †	-0.161	0.376 †	0.506 *	-0.261	0.096	0.490 *	0.157	-0.015
AnyInteractive	-0.253	-0.339 .	0.318 .	0.287	-0.227	-0.228	0.371 †	0.556 **	-0.123	0.674 ***
FR_ReadingLevel_First	0.250	0.303	0.054	-0.138	0.326 .	0.004	-0.032	-0.020	0.071	-0.221	-0.218
FR_ReadingEase_First	-0.245	-0.422 †	0.145	0.065	-0.258	-0.002	0.115	0.153	-0.224	0.233	0.435 *	-0.896 ***
ExclamationMarks	0.167	0.289	0.023	0.072	0.191	0.126	0.144	0.396 †	0.090	0.635 **	0.516 *	0.130	-0.122
Words_First	0.078	-0.112	0.523 *	-0.148	0.293	0.098	0.328 .	0.282	-0.157	0.141	0.538 **	0.348 .	0.059	0.273
Is_Control	-0.043	-0.031	-0.455 *	-0.286	-0.129	0.085	-0.288	-0.285	0.090	-0.226	-0.295	-0.539 **	0.327 .	-0.375 †	-0.658 ***

Analysis II: Principal Components Analysis

wal_pca <- wal_all %>% select(Efficacy, Surprise_avg, Remind_avg, Neg_avg, Pos_avg, Interrogative_FullConvo, Total_Messages,  WaitingForYou, Imperative_FullConvo, AnyInteractive, FR_ReadingLevel_First)

psych::scree(wal_pca[,-1])

psych::pca(wal_pca[,-1], nfactors = 4, use = "complete.obs")

## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals, 
##     rotate = rotate, n.obs = n.obs, covar = covar, scores = scores, 
##     missing = missing, impute = impute, oblique.scores = oblique.scores, 
##     method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                           RC2   RC4   RC1   RC3   h2    u2 com
## Surprise_avg             0.93 -0.02  0.05 -0.16 0.90 0.101 1.1
## Remind_avg              -0.14  0.51 -0.07  0.70 0.78 0.218 1.9
## Neg_avg                  0.34 -0.83  0.15  0.03 0.83 0.171 1.4
## Pos_avg                 -0.04  0.96 -0.08  0.03 0.92 0.077 1.0
## Interrogative_FullConvo  0.94 -0.17  0.11  0.03 0.93 0.074 1.1
## Total_Messages          -0.50 -0.09  0.77  0.16 0.87 0.129 1.9
## WaitingForYou           -0.07 -0.07  0.02  0.90 0.81 0.189 1.0
## Imperative_FullConvo     0.48 -0.26  0.66  0.03 0.73 0.271 2.2
## AnyInteractive           0.25 -0.12  0.91 -0.20 0.95 0.050 1.3
## FR_ReadingLevel_First    0.13  0.43 -0.15  0.30 0.32 0.678 2.3
## 
##                        RC2  RC4  RC1  RC3
## SS loadings           2.45 2.17 1.93 1.49
## Proportion Var        0.25 0.22 0.19 0.15
## Cumulative Var        0.25 0.46 0.66 0.80
## Proportion Explained  0.31 0.27 0.24 0.18
## Cumulative Proportion 0.31 0.58 0.82 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  10.64  with prob <  0.47 
## 
## Fit based upon off diagonal values = 0.95

wal_pca_results <- psych::pca(wal_pca[,-1], nfactors = 4, rotate = "promax")

wal_loadings <- cbind.data.frame(wal_pca, wal_pca_results$scores)

q_pca <- quos(Efficacy, RC1, RC2, RC3, RC4)

wal_loadings %>% clnR2::table.corr(q_pca, copy = FALSE)

var	Efficacy	RC1	RC2	RC3
RC1	-0.073
RC2	-0.574 **	0.143
RC3	0.631 **	-0.043	-0.180
RC4	0.459 *	-0.389 †	-0.253	0.297

Analysis III: Regressing impact on PCA factors

wal_loadings %$% lm(Efficacy ~ RC1 + RC2 + RC3 + RC4) %>% summary()

## 
## Call:
## lm(formula = Efficacy ~ RC1 + RC2 + RC3 + RC4)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0047168 -0.0029172  0.0007565  0.0026165  0.0058126 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0101500  0.0008068  12.580 4.87e-10 ***
## RC1          0.0006247  0.0009011   0.693  0.49754    
## RC2         -0.0025829  0.0008607  -3.001  0.00804 ** 
## RC3          0.0028356  0.0008740   3.245  0.00477 ** 
## RC4          0.0014401  0.0009558   1.507  0.15025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003784 on 17 degrees of freedom
## Multiple R-squared:  0.663,  Adjusted R-squared:  0.5837 
## F-statistic: 8.363 on 4 and 17 DF,  p-value: 0.0006403

wal_loadings %$% lm(Efficacy ~ RC1 + RC2 + RC3 + RC4) %>% lm.beta::lm.beta()

## 
## Call:
## lm(formula = Efficacy ~ RC1 + RC2 + RC3 + RC4)
## 
## Standardized Coefficients::
## (Intercept)         RC1         RC2         RC3         RC4 
##   0.0000000   0.1065004  -0.4403510   0.4834410   0.2455275

replicating_walmart analysis

Chayce Baldwin

2/9/2021

Load in subjective attribute data

Cleaning and transforming subjective attribute data

Load in objective attribute data and combine with subjective attribute data

Analysis I: Correlations

Analysis II: Principal Components Analysis

Analysis III: Regressing impact on PCA factors