This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# =========================
# Load and clean data
# =========================
data <- read.csv("/Users/elaineyoung/Documents/projects/followup/study1/data.csv")
data <- data[-c(1, 2), ]
colnames(data) <- gsub("\\.", "", colnames(data))
colnames(data) <- gsub("_1", "", colnames(data))
colnames(data)[colnames(data) == "FL4_DO"] <- "condition"
data[, !(names(data) %in% c("condition"))] <-
lapply(data[, !(names(data) %in% c("condition"))], as.numeric)
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
## Warning in lapply(data[, !(names(data) %in% c("condition"))], as.numeric): NAs
## introduced by coercion
names(data) <- make.names(names(data), unique = TRUE)
data <- data %>%
filter(Status == 0,
consent == 1,
attention == 2)
names(data)[names(data) == "s_awkard"] <- "s_awkward"
# =========================
# Create outcome variables
# =========================
vars <- c("appreciate", "reply", "appropriate", "thoughtful",
"awkward", "annoying", "valued", "positive")
for (v in vars) {
cols <- c(paste0("s_", v), paste0("r_", v))
if(all(cols %in% names(data))) {
data[[v]] <- rowSums(data[, cols], na.rm = TRUE)
}
}
outcomes <- vars
# =========================
# Regression models
# =========================
models <- lapply(outcomes, function(v) {
lm(as.formula(paste(v, "~ condition")), data = data)
})
lapply(models, summary)
## [[1]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5522 -1.4200 0.4478 1.4478 3.5800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5522 0.1143 39.815 < 2e-16 ***
## conditionsender -1.1322 0.1619 -6.994 1.14e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.621 on 399 degrees of freedom
## Multiple R-squared: 0.1092, Adjusted R-squared: 0.107
## F-statistic: 48.91 on 1 and 399 DF, p-value: 1.136e-11
##
##
## [[2]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.333 -0.690 0.310 1.310 3.310
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.3333 0.1129 47.26 <2e-16 ***
## conditionsender -1.6433 0.1598 -10.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.6 on 399 degrees of freedom
## Multiple R-squared: 0.2095, Adjusted R-squared: 0.2075
## F-statistic: 105.8 on 1 and 399 DF, p-value: < 2.2e-16
##
##
## [[3]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.527 -0.740 0.260 1.260 3.260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5274 0.1027 44.065 < 2e-16 ***
## conditionsender -0.7874 0.1455 -5.412 1.08e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.457 on 399 degrees of freedom
## Multiple R-squared: 0.06839, Adjusted R-squared: 0.06606
## F-statistic: 29.29 on 1 and 399 DF, p-value: 1.077e-07
##
##
## [[4]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8308 -0.8308 0.2200 1.1692 3.2200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8308 0.1090 44.33 < 2e-16 ***
## conditionsender -1.0508 0.1543 -6.81 3.61e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.545 on 399 degrees of freedom
## Multiple R-squared: 0.1041, Adjusted R-squared: 0.1019
## F-statistic: 46.37 on 1 and 399 DF, p-value: 3.613e-11
##
##
## [[5]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.865 -1.289 0.135 1.135 2.711
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2886 0.1194 35.929 < 2e-16 ***
## conditionsender 0.5764 0.1690 3.411 0.000714 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.692 on 399 degrees of freedom
## Multiple R-squared: 0.02833, Adjusted R-squared: 0.02589
## F-statistic: 11.63 on 1 and 399 DF, p-value: 0.0007143
##
##
## [[6]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.905 -1.881 0.095 1.119 3.119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8806 0.1239 31.331 <2e-16 ***
## conditionsender 0.0244 0.1754 0.139 0.889
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.756 on 399 degrees of freedom
## Multiple R-squared: 4.852e-05, Adjusted R-squared: -0.002458
## F-statistic: 0.01936 on 1 and 399 DF, p-value: 0.8894
##
##
## [[7]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2250 -0.8955 0.1045 1.1045 2.1045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8955 0.0926 52.868 < 2e-16 ***
## conditionsender -0.6705 0.1311 -5.114 4.91e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.313 on 399 degrees of freedom
## Multiple R-squared: 0.06151, Adjusted R-squared: 0.05916
## F-statistic: 26.15 on 1 and 399 DF, p-value: 4.911e-07
##
##
## [[8]]
##
## Call:
## lm(formula = as.formula(paste(v, "~ condition")), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.87 -0.87 0.13 1.13 3.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.65174 0.09876 47.10 < 2e-16 ***
## conditionsender -0.78174 0.13984 -5.59 4.21e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.4 on 399 degrees of freedom
## Multiple R-squared: 0.07263, Adjusted R-squared: 0.07031
## F-statistic: 31.25 on 1 and 399 DF, p-value: 4.214e-08
# =========================
# Plot (mean + SE by condition)
# =========================
long_data <- data %>%
select(condition, all_of(outcomes)) %>%
pivot_longer(cols = all_of(outcomes),
names_to = "outcome",
values_to = "value")
summary_data <- long_data %>%
group_by(condition, outcome) %>%
summarise(
mean = mean(value, na.rm = TRUE),
se = sd(value, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
summary_data$outcome <- factor(summary_data$outcome, levels = outcomes)
ggplot(summary_data, aes(x = outcome, y = mean, fill = condition)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position = position_dodge(width = 0.8),
width = 0.2) +
labs(x = "Outcome", y = "Mean Rating") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# =========================
# Exploratory Factor Analysis
# =========================
efa_data <- data %>%
select(all_of(outcomes)) %>%
na.omit()
cor(efa_data)
## appreciate reply appropriate thoughtful awkward annoying
## appreciate 1.0000000 0.6913405 0.60499796 0.7299199 -0.3775135 0.14664180
## reply 0.6913405 1.0000000 0.57793185 0.6277471 -0.3201702 0.11744736
## appropriate 0.6049980 0.5779319 1.00000000 0.6749322 -0.3096619 0.06032786
## thoughtful 0.7299199 0.6277471 0.67493221 1.0000000 -0.3602767 0.15578972
## awkward -0.3775135 -0.3201702 -0.30966186 -0.3602767 1.0000000 0.15361047
## annoying 0.1466418 0.1174474 0.06032786 0.1557897 0.1536105 1.00000000
## valued 0.5715837 0.4916595 0.48124463 0.6695384 -0.2549000 0.14968459
## positive 0.7159076 0.5987105 0.62690352 0.7938382 -0.3950888 0.16320021
## valued positive
## appreciate 0.5715837 0.7159076
## reply 0.4916595 0.5987105
## appropriate 0.4812446 0.6269035
## thoughtful 0.6695384 0.7938382
## awkward -0.2549000 -0.3950888
## annoying 0.1496846 0.1632002
## valued 1.0000000 0.7187711
## positive 0.7187711 1.0000000
KMO(efa_data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = efa_data)
## Overall MSA = 0.9
## MSA for each item =
## appreciate reply appropriate thoughtful awkward annoying
## 0.91 0.91 0.93 0.90 0.87 0.62
## valued positive
## 0.90 0.88
cortest.bartlett(efa_data)
## R was not square, finding R from data
## $chisq
## [1] 1742.41
##
## $p.value
## [1] 0
##
## $df
## [1] 28
fa.parallel(efa_data, fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
efa_result <- fa(efa_data,
nfactors = 2,
rotate = "oblimin",
fm = "ml",
scores = "regression")
## Loading required namespace: GPArotation
print(efa_result, cut = 0.3)
## Factor Analysis using method = ml
## Call: fa(r = efa_data, nfactors = 2, rotate = "oblimin", scores = "regression",
## fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## appreciate 0.79 0.737 0.26 1.2
## reply 0.66 0.35 0.645 0.36 1.5
## appropriate 0.69 0.543 0.46 1.1
## thoughtful 0.88 0.781 0.22 1.0
## awkward -0.40 0.185 0.82 1.1
## annoying 0.034 0.97 1.4
## valued 0.79 0.617 0.38 1.2
## positive 0.93 0.841 0.16 1.1
##
## ML1 ML2
## SS loadings 4.06 0.33
## Proportion Var 0.51 0.04
## Cumulative Var 0.51 0.55
## Proportion Explained 0.93 0.07
## Cumulative Proportion 0.93 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 0.18
## ML2 0.18 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 28 with the objective function = 4.39 with Chi Square = 1742.41
## df of the model are 13 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic n.obs is 401 with the empirical chi square 45.86 with prob < 1.5e-05
## The total n.obs was 401 with Likelihood Chi Square = 47.87 with prob < 6.9e-06
##
## Tucker Lewis Index of factoring reliability = 0.956
## RMSEA index = 0.082 and the 90 % confidence intervals are 0.058 0.107
## BIC = -30.05
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.97 0.70
## Multiple R square of scores with factors 0.94 0.48
## Minimum correlation of possible factor scores 0.88 -0.03
fa.diagram(efa_result)
data$factor1 <- efa_result$scores[,1]
data$factor2 <- efa_result$scores[,2]