Data Cleaning

data <- read.csv("/Users/elaineyoung/Documents/projects/followup/study1/data.csv")

data <- data[-c(1, 2), ]

colnames(data) <- gsub("\\.", "", colnames(data))
colnames(data) <- gsub("_1", "", colnames(data))
colnames(data)[colnames(data) == "FL4_DO"] <- "condition"

data[, !(names(data) %in% c("condition"))] <- 
  lapply(data[, !(names(data) %in% c("condition"))], as.numeric)

names(data) <- make.names(names(data), unique = TRUE)

data <- data %>%
  filter(Status == 0,
         consent == 1,
         attention == 2)

names(data)[names(data) == "s_awkard"] <- "s_awkward"

data$condition <- as.factor(data$condition)

Create Variables

vars <- c("appreciate", "reply", "appropriate", "thoughtful", 
          "awkward", "annoying", "valued", "positive")

for (v in vars) {
  cols <- c(paste0("s_", v), paste0("r_", v))
  
  if(all(cols %in% names(data))) {
    data[[v]] <- rowMeans(data[, cols], na.rm = TRUE)
  }
}

Regression (Individual Outcomes)

outcomes <- vars

models <- lapply(outcomes, function(v) {
  lm(as.formula(paste(v, "~ condition")), data = data)
})

for (i in seq_along(outcomes)) {
  cat("\n\n============================\n")
  cat("Outcome:", outcomes[i], "\n")
  cat("============================\n")
  print(summary(models[[i]]))
}
## 
## 
## ============================
## Outcome: appreciate 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.575 -1.437  0.425  1.425  3.563 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.5750     0.1131  40.447  < 2e-16 ***
## conditionsender  -1.1378     0.1602  -7.104 5.64e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.6 on 397 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1128, Adjusted R-squared:  0.1106 
## F-statistic: 50.47 on 1 and 397 DF,  p-value: 5.644e-12
## 
## 
## 
## ============================
## Outcome: reply 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3333 -0.7085  0.2915  1.2915  3.2915 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.3333     0.1122   47.52   <2e-16 ***
## conditionsender  -1.6248     0.1591  -10.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.591 on 398 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2076, Adjusted R-squared:  0.2056 
## F-statistic: 104.3 on 1 and 398 DF,  p-value: < 2.2e-16
## 
## 
## 
## ============================
## Outcome: appropriate 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -3.55  -0.74   0.26   1.26   3.26 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.5500     0.1019  44.667  < 2e-16 ***
## conditionsender  -0.8100     0.1441  -5.623 3.55e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.441 on 398 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07359,    Adjusted R-squared:  0.07126 
## F-statistic: 31.61 on 1 and 398 DF,  p-value: 3.545e-08
## 
## 
## 
## ============================
## Outcome: thoughtful 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8308 -0.8308  0.2010  1.1692  3.2010 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.8308     0.1083  44.608  < 2e-16 ***
## conditionsender  -1.0319     0.1535  -6.721 6.29e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.535 on 398 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1019, Adjusted R-squared:  0.09966 
## F-statistic: 45.17 on 1 and 398 DF,  p-value: 6.29e-11
## 
## 
## 
## ============================
## Outcome: awkward 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9141 -1.3317  0.0859  1.0859  2.6683 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.3317     0.1160  37.343  < 2e-16 ***
## conditionsender   0.5825     0.1643   3.546 0.000438 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.636 on 395 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.03086,    Adjusted R-squared:  0.0284 
## F-statistic: 12.58 on 1 and 395 DF,  p-value: 0.0004376
## 
## 
## 
## ============================
## Outcome: annoying 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.94444 -0.94444  0.05556  1.10000  3.10000 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.90000    0.12229  31.892   <2e-16 ***
## conditionsender  0.04444    0.17338   0.256    0.798    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.729 on 396 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.0001659,  Adjusted R-squared:  -0.002359 
## F-statistic: 0.06571 on 1 and 396 DF,  p-value: 0.7978
## 
## 
## 
## ============================
## Outcome: valued 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8955 -0.8955  0.1045  1.1045  2.1045 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.8955     0.0915  53.504  < 2e-16 ***
## conditionsender  -0.6493     0.1297  -5.005  8.4e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.297 on 398 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.05922,    Adjusted R-squared:  0.05685 
## F-statistic: 25.05 on 1 and 398 DF,  p-value: 8.395e-07
## 
## 
## 
## ============================
## Outcome: positive 
## ============================
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6517 -0.8894  0.1106  1.1106  3.1106 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      4.65174    0.09793   47.50  < 2e-16 ***
## conditionsender -0.76229    0.13884   -5.49 7.16e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.388 on 398 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07041,    Adjusted R-squared:  0.06807 
## F-statistic: 30.14 on 1 and 398 DF,  p-value: 7.157e-08

Plot

long_data <- data %>%
  select(condition, all_of(outcomes)) %>%
  pivot_longer(cols = all_of(outcomes),
               names_to = "outcome",
               values_to = "value")

summary_data <- long_data %>%
  group_by(condition, outcome) %>%
  summarise(
    mean = mean(value, na.rm = TRUE),
    se = sd(value, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

summary_data$outcome <- factor(summary_data$outcome, levels = outcomes)

ggplot(summary_data, aes(x = outcome, y = mean, fill = condition)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
                position = position_dodge(width = 0.8),
                width = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

EFA

efa_data <- data %>%
  select(all_of(outcomes)) %>%
  na.omit()

cor(efa_data)
##             appreciate      reply appropriate thoughtful     awkward   annoying
## appreciate   1.0000000  0.7070932   0.6528334  0.7586159 -0.38091414 0.17939020
## reply        0.7070932  1.0000000   0.5863903  0.6350890 -0.31975306 0.14537262
## appropriate  0.6528334  0.5863903   1.0000000  0.6880923 -0.30540042 0.07504290
## thoughtful   0.7586159  0.6350890   0.6880923  1.0000000 -0.36530687 0.17633043
## awkward     -0.3809141 -0.3197531  -0.3054004 -0.3653069  1.00000000 0.09944816
## annoying     0.1793902  0.1453726   0.0750429  0.1763304  0.09944816 1.00000000
## valued       0.5970306  0.4940450   0.4893799  0.6551880 -0.26194619 0.16695120
## positive     0.7560018  0.6096866   0.6342138  0.7867451 -0.41437951 0.17567290
##                 valued   positive
## appreciate   0.5970306  0.7560018
## reply        0.4940450  0.6096866
## appropriate  0.4893799  0.6342138
## thoughtful   0.6551880  0.7867451
## awkward     -0.2619462 -0.4143795
## annoying     0.1669512  0.1756729
## valued       1.0000000  0.7066499
## positive     0.7066499  1.0000000
KMO(efa_data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = efa_data)
## Overall MSA =  0.9
## MSA for each item = 
##  appreciate       reply appropriate  thoughtful     awkward    annoying 
##        0.90        0.92        0.93        0.91        0.89        0.71 
##      valued    positive 
##        0.91        0.88
cortest.bartlett(efa_data)
## $chisq
## [1] 1765.545
## 
## $p.value
## [1] 0
## 
## $df
## [1] 28
fa.parallel(efa_data, fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
efa_result <- fa(efa_data,
                 nfactors = 2,
                 rotate = "oblimin",
                 fm = "ml",
                 scores = "regression")

print(efa_result, cut = 0.3)
## Factor Analysis using method =  ml
## Call: fa(r = efa_data, nfactors = 2, rotate = "oblimin", scores = "regression", 
##     fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##               ML1   ML2    h2   u2 com
## appreciate   0.87       0.787 0.21 1.1
## reply        0.74       0.638 0.36 1.3
## appropriate  0.74       0.568 0.43 1.1
## thoughtful   0.88       0.770 0.23 1.0
## awkward     -0.43       0.184 0.82 1.0
## annoying                0.037 0.96 1.1
## valued       0.73       0.589 0.41 1.2
## positive     0.90       0.851 0.15 1.1
## 
##                        ML1  ML2
## SS loadings           4.20 0.23
## Proportion Var        0.52 0.03
## Cumulative Var        0.52 0.55
## Proportion Explained  0.95 0.05
## Cumulative Proportion 0.95 1.00
## 
##  With factor correlations of 
##      ML1  ML2
## ML1 1.00 0.02
## ML2 0.02 1.00
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  28  with the objective function =  4.54 with Chi Square =  1765.54
## df of  the model are 13  and the objective function was  0.1 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic n.obs is  393 with the empirical chi square  34.68  with prob <  0.00095 
## The total n.obs was  393  with Likelihood Chi Square =  39.7  with prob <  0.00015 
## 
## Tucker Lewis Index of factoring reliability =  0.967
## RMSEA index =  0.072  and the 90 % confidence intervals are  0.047 0.099
## BIC =  -37.96
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1   ML2
## Correlation of (regression) scores with factors   0.97  0.67
## Multiple R square of scores with factors          0.94  0.45
## Minimum correlation of possible factor scores     0.89 -0.11
print(efa_result$loadings, cutoff = 0.3)
## 
## Loadings:
##             ML1    ML2   
## appreciate   0.871       
## reply        0.742       
## appropriate  0.739       
## thoughtful   0.878       
## awkward     -0.430       
## annoying                 
## valued       0.734       
## positive     0.902       
## 
##                  ML1   ML2
## SS loadings    4.199 0.224
## Proportion Var 0.525 0.028
## Cumulative Var 0.525 0.553
fa.diagram(efa_result)

Factor Regression

data <- data %>%
  drop_na(all_of(outcomes))


data$factor1 <- efa_result$scores[,1]
data$factor2 <- efa_result$scores[,2]


model_f1 <- lm(factor1 ~ condition, data = data)
model_f2 <- lm(factor2 ~ condition, data = data)

summary(model_f1)
## 
## Call:
## lm(formula = factor1 ~ condition, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6111 -0.5397  0.0861  0.6512  1.9882 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.34595    0.06492   5.329 1.67e-07 ***
## conditionsender -0.69014    0.09169  -7.527 3.63e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9089 on 391 degrees of freedom
## Multiple R-squared:  0.1265, Adjusted R-squared:  0.1243 
## F-statistic: 56.65 on 1 and 391 DF,  p-value: 3.626e-13
summary(model_f2)
## 
## Call:
## lm(formula = factor2 ~ condition, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.96248 -0.40231  0.01062  0.36954  2.21583 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.15010    0.04655   3.224  0.00137 ** 
## conditionsender -0.29944    0.06575  -4.554 7.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6517 on 391 degrees of freedom
## Multiple R-squared:  0.05037,    Adjusted R-squared:  0.04794 
## F-statistic: 20.74 on 1 and 391 DF,  p-value: 7.037e-06

Factor Plots

ggplot(data, aes(x = condition, y = factor1, fill = condition)) +
  stat_summary(fun = mean, geom = "bar") +
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  theme_minimal()

ggplot(data, aes(x = condition, y = factor2, fill = condition)) +
  stat_summary(fun = mean, geom = "bar") +
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  theme_minimal()