Data Cleaning
data <- read.csv("/Users/elaineyoung/Documents/projects/followup/study1/data.csv")
data <- data[-c(1, 2), ]
colnames(data) <- gsub("\\.", "", colnames(data))
colnames(data) <- gsub("_1", "", colnames(data))
colnames(data)[colnames(data) == "FL4_DO"] <- "condition"
data[, !(names(data) %in% c("condition"))] <-
lapply(data[, !(names(data) %in% c("condition"))], as.numeric)
names(data) <- make.names(names(data), unique = TRUE)
data <- data %>%
filter(Status == 0,
consent == 1,
attention == 2)
names(data)[names(data) == "s_awkard"] <- "s_awkward"
data$condition <- as.factor(data$condition)
Create Variables
vars <- c("appreciate", "reply", "appropriate", "thoughtful",
"awkward", "annoying", "valued", "positive")
for (v in vars) {
cols <- c(paste0("s_", v), paste0("r_", v))
if(all(cols %in% names(data))) {
data[[v]] <- rowMeans(data[, cols], na.rm = TRUE)
}
}
Regression (Individual Outcomes)
outcomes <- vars
models <- lapply(outcomes, function(v) {
lm(as.formula(paste(v, "~ condition")), data = data)
})
for (i in seq_along(outcomes)) {
cat("\n\n============================\n")
cat("Outcome:", outcomes[i], "\n")
cat("============================\n")
print(summary(models[[i]]))
}
##
##
## ============================
## Outcome: appreciate
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.575 -1.437 0.425 1.425 3.563
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5750 0.1131 40.447 < 2e-16 ***
## conditionsender -1.1378 0.1602 -7.104 5.64e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.6 on 397 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1128, Adjusted R-squared: 0.1106
## F-statistic: 50.47 on 1 and 397 DF, p-value: 5.644e-12
##
##
##
## ============================
## Outcome: reply
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3333 -0.7085 0.2915 1.2915 3.2915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.3333 0.1122 47.52 <2e-16 ***
## conditionsender -1.6248 0.1591 -10.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.591 on 398 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2076, Adjusted R-squared: 0.2056
## F-statistic: 104.3 on 1 and 398 DF, p-value: < 2.2e-16
##
##
##
## ============================
## Outcome: appropriate
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.55 -0.74 0.26 1.26 3.26
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5500 0.1019 44.667 < 2e-16 ***
## conditionsender -0.8100 0.1441 -5.623 3.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.441 on 398 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.07359, Adjusted R-squared: 0.07126
## F-statistic: 31.61 on 1 and 398 DF, p-value: 3.545e-08
##
##
##
## ============================
## Outcome: thoughtful
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8308 -0.8308 0.2010 1.1692 3.2010
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8308 0.1083 44.608 < 2e-16 ***
## conditionsender -1.0319 0.1535 -6.721 6.29e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.535 on 398 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1019, Adjusted R-squared: 0.09966
## F-statistic: 45.17 on 1 and 398 DF, p-value: 6.29e-11
##
##
##
## ============================
## Outcome: awkward
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9141 -1.3317 0.0859 1.0859 2.6683
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3317 0.1160 37.343 < 2e-16 ***
## conditionsender 0.5825 0.1643 3.546 0.000438 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.636 on 395 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.03086, Adjusted R-squared: 0.0284
## F-statistic: 12.58 on 1 and 395 DF, p-value: 0.0004376
##
##
##
## ============================
## Outcome: annoying
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.94444 -0.94444 0.05556 1.10000 3.10000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.90000 0.12229 31.892 <2e-16 ***
## conditionsender 0.04444 0.17338 0.256 0.798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.729 on 396 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.0001659, Adjusted R-squared: -0.002359
## F-statistic: 0.06571 on 1 and 396 DF, p-value: 0.7978
##
##
##
## ============================
## Outcome: valued
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8955 -0.8955 0.1045 1.1045 2.1045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8955 0.0915 53.504 < 2e-16 ***
## conditionsender -0.6493 0.1297 -5.005 8.4e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.297 on 398 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.05922, Adjusted R-squared: 0.05685
## F-statistic: 25.05 on 1 and 398 DF, p-value: 8.395e-07
##
##
##
## ============================
## Outcome: positive
## ============================
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6517 -0.8894 0.1106 1.1106 3.1106
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.65174 0.09793 47.50 < 2e-16 ***
## conditionsender -0.76229 0.13884 -5.49 7.16e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.388 on 398 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.07041, Adjusted R-squared: 0.06807
## F-statistic: 30.14 on 1 and 398 DF, p-value: 7.157e-08
Plot
long_data <- data %>%
select(condition, all_of(outcomes)) %>%
pivot_longer(cols = all_of(outcomes),
names_to = "outcome",
values_to = "value")
summary_data <- long_data %>%
group_by(condition, outcome) %>%
summarise(
mean = mean(value, na.rm = TRUE),
se = sd(value, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
summary_data$outcome <- factor(summary_data$outcome, levels = outcomes)
ggplot(summary_data, aes(x = outcome, y = mean, fill = condition)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position = position_dodge(width = 0.8),
width = 0.2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

EFA
efa_data <- data %>%
select(all_of(outcomes)) %>%
na.omit()
cor(efa_data)
## appreciate reply appropriate thoughtful awkward annoying
## appreciate 1.0000000 0.7070932 0.6528334 0.7586159 -0.38091414 0.17939020
## reply 0.7070932 1.0000000 0.5863903 0.6350890 -0.31975306 0.14537262
## appropriate 0.6528334 0.5863903 1.0000000 0.6880923 -0.30540042 0.07504290
## thoughtful 0.7586159 0.6350890 0.6880923 1.0000000 -0.36530687 0.17633043
## awkward -0.3809141 -0.3197531 -0.3054004 -0.3653069 1.00000000 0.09944816
## annoying 0.1793902 0.1453726 0.0750429 0.1763304 0.09944816 1.00000000
## valued 0.5970306 0.4940450 0.4893799 0.6551880 -0.26194619 0.16695120
## positive 0.7560018 0.6096866 0.6342138 0.7867451 -0.41437951 0.17567290
## valued positive
## appreciate 0.5970306 0.7560018
## reply 0.4940450 0.6096866
## appropriate 0.4893799 0.6342138
## thoughtful 0.6551880 0.7867451
## awkward -0.2619462 -0.4143795
## annoying 0.1669512 0.1756729
## valued 1.0000000 0.7066499
## positive 0.7066499 1.0000000
KMO(efa_data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = efa_data)
## Overall MSA = 0.9
## MSA for each item =
## appreciate reply appropriate thoughtful awkward annoying
## 0.90 0.92 0.93 0.91 0.89 0.71
## valued positive
## 0.91 0.88
cortest.bartlett(efa_data)
## $chisq
## [1] 1765.545
##
## $p.value
## [1] 0
##
## $df
## [1] 28
fa.parallel(efa_data, fa = "fa")

## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
efa_result <- fa(efa_data,
nfactors = 2,
rotate = "oblimin",
fm = "ml",
scores = "regression")
print(efa_result, cut = 0.3)
## Factor Analysis using method = ml
## Call: fa(r = efa_data, nfactors = 2, rotate = "oblimin", scores = "regression",
## fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## appreciate 0.87 0.787 0.21 1.1
## reply 0.74 0.638 0.36 1.3
## appropriate 0.74 0.568 0.43 1.1
## thoughtful 0.88 0.770 0.23 1.0
## awkward -0.43 0.184 0.82 1.0
## annoying 0.037 0.96 1.1
## valued 0.73 0.589 0.41 1.2
## positive 0.90 0.851 0.15 1.1
##
## ML1 ML2
## SS loadings 4.20 0.23
## Proportion Var 0.52 0.03
## Cumulative Var 0.52 0.55
## Proportion Explained 0.95 0.05
## Cumulative Proportion 0.95 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 0.02
## ML2 0.02 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 28 with the objective function = 4.54 with Chi Square = 1765.54
## df of the model are 13 and the objective function was 0.1
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 393 with the empirical chi square 34.68 with prob < 0.00095
## The total n.obs was 393 with Likelihood Chi Square = 39.7 with prob < 0.00015
##
## Tucker Lewis Index of factoring reliability = 0.967
## RMSEA index = 0.072 and the 90 % confidence intervals are 0.047 0.099
## BIC = -37.96
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.97 0.67
## Multiple R square of scores with factors 0.94 0.45
## Minimum correlation of possible factor scores 0.89 -0.11
print(efa_result$loadings, cutoff = 0.3)
##
## Loadings:
## ML1 ML2
## appreciate 0.871
## reply 0.742
## appropriate 0.739
## thoughtful 0.878
## awkward -0.430
## annoying
## valued 0.734
## positive 0.902
##
## ML1 ML2
## SS loadings 4.199 0.224
## Proportion Var 0.525 0.028
## Cumulative Var 0.525 0.553
fa.diagram(efa_result)

Factor Regression
data <- data %>%
drop_na(all_of(outcomes))
data$factor1 <- efa_result$scores[,1]
data$factor2 <- efa_result$scores[,2]
model_f1 <- lm(factor1 ~ condition, data = data)
model_f2 <- lm(factor2 ~ condition, data = data)
summary(model_f1)
##
## Call:
## lm(formula = factor1 ~ condition, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6111 -0.5397 0.0861 0.6512 1.9882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.34595 0.06492 5.329 1.67e-07 ***
## conditionsender -0.69014 0.09169 -7.527 3.63e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9089 on 391 degrees of freedom
## Multiple R-squared: 0.1265, Adjusted R-squared: 0.1243
## F-statistic: 56.65 on 1 and 391 DF, p-value: 3.626e-13
summary(model_f2)
##
## Call:
## lm(formula = factor2 ~ condition, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.96248 -0.40231 0.01062 0.36954 2.21583
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.15010 0.04655 3.224 0.00137 **
## conditionsender -0.29944 0.06575 -4.554 7.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6517 on 391 degrees of freedom
## Multiple R-squared: 0.05037, Adjusted R-squared: 0.04794
## F-statistic: 20.74 on 1 and 391 DF, p-value: 7.037e-06
Factor Plots
ggplot(data, aes(x = condition, y = factor1, fill = condition)) +
stat_summary(fun = mean, geom = "bar") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
theme_minimal()

ggplot(data, aes(x = condition, y = factor2, fill = condition)) +
stat_summary(fun = mean, geom = "bar") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
theme_minimal()
