R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# =========================
# Load and clean data
# =========================
data <- read.csv("/Users/elaineyoung/Documents/projects/followup/study1/data.csv")

data <- data[-c(1, 2), ]

colnames(data) <- gsub("\\.", "", colnames(data))
colnames(data) <- gsub("_1", "", colnames(data))
colnames(data)[colnames(data) == "FL4_DO"] <- "condition"

data[, !(names(data) %in% c("condition"))] <- 
  lapply(data[, !(names(data) %in% c("condition"))], as.numeric)
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
names(data) <- make.names(names(data), unique = TRUE)

data <- data %>%
  filter(Status == 0,
         consent == 1,
         attention == 2)

names(data)[names(data) == "s_awkard"] <- "s_awkward"

# =========================
# Create outcome variables
# =========================
vars <- c("appreciate", "reply", "appropriate", "thoughtful",
          "awkward", "annoying", "valued", "positive")

for (v in vars) {
  cols <- c(paste0("s_", v), paste0("r_", v))
  if(all(cols %in% names(data))) {
    data[[v]] <- rowSums(data[, cols], na.rm = TRUE)
  }
}

outcomes <- vars

# =========================
# Regression models
# =========================
models <- lapply(outcomes, function(v) {
  lm(as.formula(paste(v, "~ condition")), data = data)
})

lapply(models, summary)
## [[1]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5522 -1.4200  0.4478  1.4478  3.5800 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.5522     0.1143  39.815  < 2e-16 ***
## conditionsender  -1.1322     0.1619  -6.994 1.14e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.621 on 399 degrees of freedom
## Multiple R-squared:  0.1092, Adjusted R-squared:  0.107 
## F-statistic: 48.91 on 1 and 399 DF,  p-value: 1.136e-11
## 
## 
## [[2]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.333 -0.690  0.310  1.310  3.310 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.3333     0.1129   47.26   <2e-16 ***
## conditionsender  -1.6433     0.1598  -10.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.6 on 399 degrees of freedom
## Multiple R-squared:  0.2095, Adjusted R-squared:  0.2075 
## F-statistic: 105.8 on 1 and 399 DF,  p-value: < 2.2e-16
## 
## 
## [[3]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.527 -0.740  0.260  1.260  3.260 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.5274     0.1027  44.065  < 2e-16 ***
## conditionsender  -0.7874     0.1455  -5.412 1.08e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.457 on 399 degrees of freedom
## Multiple R-squared:  0.06839,    Adjusted R-squared:  0.06606 
## F-statistic: 29.29 on 1 and 399 DF,  p-value: 1.077e-07
## 
## 
## [[4]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8308 -0.8308  0.2200  1.1692  3.2200 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.8308     0.1090   44.33  < 2e-16 ***
## conditionsender  -1.0508     0.1543   -6.81 3.61e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.545 on 399 degrees of freedom
## Multiple R-squared:  0.1041, Adjusted R-squared:  0.1019 
## F-statistic: 46.37 on 1 and 399 DF,  p-value: 3.613e-11
## 
## 
## [[5]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.865 -1.289  0.135  1.135  2.711 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.2886     0.1194  35.929  < 2e-16 ***
## conditionsender   0.5764     0.1690   3.411 0.000714 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.692 on 399 degrees of freedom
## Multiple R-squared:  0.02833,    Adjusted R-squared:  0.02589 
## F-statistic: 11.63 on 1 and 399 DF,  p-value: 0.0007143
## 
## 
## [[6]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.905 -1.881  0.095  1.119  3.119 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       3.8806     0.1239  31.331   <2e-16 ***
## conditionsender   0.0244     0.1754   0.139    0.889    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.756 on 399 degrees of freedom
## Multiple R-squared:  4.852e-05,  Adjusted R-squared:  -0.002458 
## F-statistic: 0.01936 on 1 and 399 DF,  p-value: 0.8894
## 
## 
## [[7]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2250 -0.8955  0.1045  1.1045  2.1045 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.8955     0.0926  52.868  < 2e-16 ***
## conditionsender  -0.6705     0.1311  -5.114 4.91e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.313 on 399 degrees of freedom
## Multiple R-squared:  0.06151,    Adjusted R-squared:  0.05916 
## F-statistic: 26.15 on 1 and 399 DF,  p-value: 4.911e-07
## 
## 
## [[8]]
## 
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -3.87  -0.87   0.13   1.13   3.13 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      4.65174    0.09876   47.10  < 2e-16 ***
## conditionsender -0.78174    0.13984   -5.59 4.21e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.4 on 399 degrees of freedom
## Multiple R-squared:  0.07263,    Adjusted R-squared:  0.07031 
## F-statistic: 31.25 on 1 and 399 DF,  p-value: 4.214e-08
# =========================
# Plot (mean + SE by condition)
# =========================
long_data <- data %>%
  select(condition, all_of(outcomes)) %>%
  pivot_longer(cols = all_of(outcomes),
               names_to = "outcome",
               values_to = "value")

summary_data <- long_data %>%
  group_by(condition, outcome) %>%
  summarise(
    mean = mean(value, na.rm = TRUE),
    se = sd(value, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

summary_data$outcome <- factor(summary_data$outcome, levels = outcomes)

ggplot(summary_data, aes(x = outcome, y = mean, fill = condition)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
                position = position_dodge(width = 0.8),
                width = 0.2) +
  labs(x = "Outcome", y = "Mean Rating") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# =========================
# Exploratory Factor Analysis
# =========================
efa_data <- data %>%
  select(all_of(outcomes)) %>%
  na.omit()

cor(efa_data)
##             appreciate      reply appropriate thoughtful    awkward   annoying
## appreciate   1.0000000  0.6913405  0.60499796  0.7299199 -0.3775135 0.14664180
## reply        0.6913405  1.0000000  0.57793185  0.6277471 -0.3201702 0.11744736
## appropriate  0.6049980  0.5779319  1.00000000  0.6749322 -0.3096619 0.06032786
## thoughtful   0.7299199  0.6277471  0.67493221  1.0000000 -0.3602767 0.15578972
## awkward     -0.3775135 -0.3201702 -0.30966186 -0.3602767  1.0000000 0.15361047
## annoying     0.1466418  0.1174474  0.06032786  0.1557897  0.1536105 1.00000000
## valued       0.5715837  0.4916595  0.48124463  0.6695384 -0.2549000 0.14968459
## positive     0.7159076  0.5987105  0.62690352  0.7938382 -0.3950888 0.16320021
##                 valued   positive
## appreciate   0.5715837  0.7159076
## reply        0.4916595  0.5987105
## appropriate  0.4812446  0.6269035
## thoughtful   0.6695384  0.7938382
## awkward     -0.2549000 -0.3950888
## annoying     0.1496846  0.1632002
## valued       1.0000000  0.7187711
## positive     0.7187711  1.0000000
KMO(efa_data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = efa_data)
## Overall MSA =  0.9
## MSA for each item = 
##  appreciate       reply appropriate  thoughtful     awkward    annoying 
##        0.91        0.91        0.93        0.90        0.87        0.62 
##      valued    positive 
##        0.90        0.88
cortest.bartlett(efa_data)
## R was not square, finding R from data
## $chisq
## [1] 1742.41
## 
## $p.value
## [1] 0
## 
## $df
## [1] 28
fa.parallel(efa_data, fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
efa_result <- fa(efa_data,
                 nfactors = 2,
                 rotate = "oblimin",
                 fm = "ml",
                 scores = "regression")
## Loading required namespace: GPArotation
print(efa_result, cut = 0.3)
## Factor Analysis using method =  ml
## Call: fa(r = efa_data, nfactors = 2, rotate = "oblimin", scores = "regression", 
##     fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##               ML1   ML2    h2   u2 com
## appreciate   0.79       0.737 0.26 1.2
## reply        0.66  0.35 0.645 0.36 1.5
## appropriate  0.69       0.543 0.46 1.1
## thoughtful   0.88       0.781 0.22 1.0
## awkward     -0.40       0.185 0.82 1.1
## annoying                0.034 0.97 1.4
## valued       0.79       0.617 0.38 1.2
## positive     0.93       0.841 0.16 1.1
## 
##                        ML1  ML2
## SS loadings           4.06 0.33
## Proportion Var        0.51 0.04
## Cumulative Var        0.51 0.55
## Proportion Explained  0.93 0.07
## Cumulative Proportion 0.93 1.00
## 
##  With factor correlations of 
##      ML1  ML2
## ML1 1.00 0.18
## ML2 0.18 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  28  with the objective function =  4.39 with Chi Square =  1742.41
## df of  the model are 13  and the objective function was  0.12 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic n.obs is  401 with the empirical chi square  45.86  with prob <  1.5e-05 
## The total n.obs was  401  with Likelihood Chi Square =  47.87  with prob <  6.9e-06 
## 
## Tucker Lewis Index of factoring reliability =  0.956
## RMSEA index =  0.082  and the 90 % confidence intervals are  0.058 0.107
## BIC =  -30.05
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1   ML2
## Correlation of (regression) scores with factors   0.97  0.70
## Multiple R square of scores with factors          0.94  0.48
## Minimum correlation of possible factor scores     0.88 -0.03
fa.diagram(efa_result)

data$factor1 <- efa_result$scores[,1]
data$factor2 <- efa_result$scores[,2]