library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: polycor
library(sjPlot)
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("~/Downloads/Outliers & Gender (Study 1 - Full)_September 13, 2024_11.17.csv") %>%
filter(vid_attn_check == "SURVEY" | vid_attn_check == "survey" | vid_attn_check == "Survey") %>%
mutate(Condition = ifelse(outlier == "Adam Cooke 4M4W", "Man Outlier", "Woman Outlier"))
location <- data %>%
group_by(LocationLatitude, LocationLongitude) %>% # Group by LocationLatitude and LocationLongitude
summarise(Count = n()) %>% # Count the number of occurrences and store in Count column
ungroup() # Ungroup to return to a normal dataframe structure
## `summarise()` has grouped output by 'LocationLatitude'. You can override using
## the `.groups` argument.
data <- data %>%
left_join(location, by = c("LocationLatitude", "LocationLongitude")) %>%
filter(Count < 3)
Note: Ask Derek how to deal with manipulation checks in cleaning
data_clean <- data %>%
rename("F_time" = "F") %>%
select(c(ResponseId, Condition, A, B, C, D, E, F_time, G, H, presc_comfort_2, presc_accept_2, presc_approp_2, typicality_E, align_E, typicality_outlier, align_outlier, status_outlier_1, status_outlier_2, status_outlier_3, status_E_1, status_E_2, status_E_3, gender, race, age)) %>%
mutate_at(c(3:25), as.numeric) %>%
mutate(A_time_from_mean = A - 40,
B_time_from_mean = B - 40,
C_time_from_mean = C - 40,
D_time_from_mean = D - 40,
E_time_from_mean = E - 40,
F_time_from_mean = F_time - 40,
G_time_from_mean = G - 40,
H_time_from_mean = H - 40) %>%
rowwise() %>%
mutate(mean_time_descr = mean(A_time_from_mean:H_time_from_mean, na.rm = T)) %>%
ungroup()
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(mean_time_descr ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = mean_time_descr ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.786 -1.786 2.714 4.107 15.607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2857 0.5963 2.156 0.0316 *
## ConditionWoman Outlier -0.3929 0.8362 -0.470 0.6387
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.496 on 411 degrees of freedom
## Multiple R-squared: 0.0005367, Adjusted R-squared: -0.001895
## F-statistic: 0.2207 on 1 and 411 DF, p-value: 0.6387
ggplot(data = data_clean,
aes(x = Condition, y = mean_time_descr)) +
geom_point(alpha = 0.1,
size = 2,
position = position_jitter(0.1)) +
stat_summary(fun.data = "mean_cl_boot",
size = 1,
geom = "linerange",
color = "grey50")+
stat_summary(fun = "mean",
size = 0.3)+
theme_bw() +
labs(title = "Descriptive Norm by Condition",
x = "Condition",
y = "Time Relative to Objective Mean")+
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).
data_clean <- data_clean %>%
mutate(p1_time_from_mean = presc_comfort_2 - 40,
p2_time_from_mean = presc_accept_2 - 40,
p3_time_from_mean = presc_approp_2 - 40) %>%
rowwise() %>%
mutate(mean_time_presc = mean(p1_time_from_mean:p3_time_from_mean, na.rm = T)) %>%
ungroup()
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(mean_time_presc ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = mean_time_presc ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.376 -5.376 -0.376 7.124 28.624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.8005 0.7385 9.208 <2e-16 ***
## ConditionWoman Outlier -0.4243 1.0357 -0.410 0.682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.52 on 411 degrees of freedom
## Multiple R-squared: 0.0004082, Adjusted R-squared: -0.002024
## F-statistic: 0.1678 on 1 and 411 DF, p-value: 0.6822
ggplot(data = data_clean,
aes(x = Condition, y = mean_time_presc)) +
geom_point(alpha = 0.1,
size = 2,
position = position_jitter(0.1)) +
stat_summary(fun.data = "mean_cl_boot",
size = 1,
geom = "linerange",
color = "grey50")+
stat_summary(fun = "mean",
size = 0.3)+
theme_bw() +
labs(title = "Prescriptive Norm by Condition",
x = "Condition",
y = "Time Relative to Objective Mean")+
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).
presc <- data_clean %>%
dplyr::select(c(presc_comfort_2:presc_approp_2))
cronbach.alpha(presc)
##
## Cronbach's alpha for the 'presc' data-set
##
## Items: 3
## Sample units: 413
## alpha: 0.915
data_clean <- data_clean %>%
rowwise() %>%
mutate(nonoutlier_score = mean(typicality_E:align_E, na.rm = T)) %>%
mutate(outlier_score = mean(typicality_outlier:align_outlier, na.rm = T)) %>%
mutate(diff_typicality_score = outlier_score - nonoutlier_score) %>%
ungroup()
Negative typicality difference score = outlier is less typical than the non-outlier
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(outlier_score ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = outlier_score ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06158 -0.96190 -0.06158 0.93842 3.03810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.06158 0.07673 26.869 <2e-16 ***
## ConditionWoman Outlier -0.09967 0.10760 -0.926 0.355
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.093 on 411 degrees of freedom
## Multiple R-squared: 0.002083, Adjusted R-squared: -0.0003447
## F-statistic: 0.8581 on 1 and 411 DF, p-value: 0.3548
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(diff_typicality_score ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = diff_typicality_score ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2167 -1.0214 -0.0214 0.9786 4.7833
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.78325 0.09768 -8.018 1.13e-14 ***
## ConditionWoman Outlier -0.19532 0.13699 -1.426 0.155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.392 on 411 degrees of freedom
## Multiple R-squared: 0.004922, Adjusted R-squared: 0.002501
## F-statistic: 2.033 on 1 and 411 DF, p-value: 0.1547
table(data_clean$outlier_score, data_clean$Condition)
##
## Man Outlier Woman Outlier
## 1 83 90
## 1.5 9 11
## 2 38 45
## 2.5 6 12
## 3 38 24
## 3.5 8 4
## 4 14 19
## 4.5 4 3
## 5 3 2
table(data_clean$nonoutlier_score, data_clean$Condition)
##
## Man Outlier Woman Outlier
## 1 17 11
## 1.5 7 6
## 2 43 40
## 2.5 11 18
## 3 59 67
## 3.5 25 17
## 4 31 42
## 4.5 4 2
## 5 6 7
df_long <- data_clean %>%
select(c(ResponseId, Condition, outlier_score, nonoutlier_score)) %>%
pivot_longer(cols = outlier_score:nonoutlier_score,
names_to = "outlier",
values_to = "values")
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
# Fit model
model <- df_long %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lmer(values ~ Condition * outlier + (1|ResponseId), .)
# Display model summary
summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: values ~ Condition * outlier + (1 | ResponseId)
## Data: .
##
## REML criterion at convergence: 2394.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.97918 -0.85805 0.00624 0.80350 2.90279
##
## Random effects:
## Groups Name Variance Std.Dev.
## ResponseId (Intercept) 0.08507 0.2917
## Residual 0.96851 0.9841
## Number of obs: 826, groups: ResponseId, 413
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 2.84483 0.07204 816.67574
## ConditionWoman Outlier 0.09565 0.10103 816.67574
## outlieroutlier_score -0.78325 0.09768 410.99999
## ConditionWoman Outlier:outlieroutlier_score -0.19532 0.13699 410.99999
## t value Pr(>|t|)
## (Intercept) 39.488 < 2e-16 ***
## ConditionWoman Outlier 0.947 0.344
## outlieroutlier_score -8.018 1.13e-14 ***
## ConditionWoman Outlier:outlieroutlier_score -1.426 0.155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndtWO otlrt_
## CndtnWmnOtl -0.713
## otlrtlr_scr -0.678 0.483
## CndtnWOtl:_ 0.483 -0.678 -0.713
plot_model(model, type="pred", terms=c("Condition", "outlier"))
# Define the SEM model with specified coefficients
library(lavaan)
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
library(parallel)
model <- '
# Regression coefficients
outlier_score ~ a*Condition
mean_time_descr ~ cprime*Condition + b*outlier_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 413
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## outlier_score ~
## Conditn (a) -0.100 0.107 -0.929 0.353
## mean_time_descr ~
## Conditn (cprm) -0.606 0.803 -0.755 0.450
## otlr_sc (b) -2.141 0.368 -5.823 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .outlier_score 1.189 0.083 14.370 0.000
## .mean_time_dscr 66.379 4.619 14.370 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.213 0.233 0.917 0.359
model <- '
# Regression coefficients
outlier_score ~ a*Condition
mean_time_presc ~ cprime*Condition + b*outlier_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 413
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## outlier_score ~
## Conditn (a) -0.100 0.107 -0.929 0.353
## mean_time_presc ~
## Conditn (cprm) -0.360 1.032 -0.349 0.727
## otlr_sc (b) 0.646 0.473 1.367 0.172
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .outlier_score 1.189 0.083 14.370 0.000
## .mean_time_prsc 109.683 7.633 14.370 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect -0.064 0.084 -0.768 0.442
data_clean <- data_clean %>%
rowwise() %>%
mutate(nonoutlier_status = mean(status_E_1:status_E_3, na.rm = T)) %>%
mutate(outlier_status = mean(status_outlier_1:status_outlier_3, na.rm = T)) %>%
mutate(diff_status_score = outlier_status - nonoutlier_status) %>%
ungroup()
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(outlier_status ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = outlier_status ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2143 -0.7143 -0.1619 0.7857 2.8381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.21429 0.06582 33.643 <2e-16 ***
## ConditionWoman Outlier -0.05238 0.09230 -0.568 0.571
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9378 on 411 degrees of freedom
## Multiple R-squared: 0.000783, Adjusted R-squared: -0.001648
## F-statistic: 0.3221 on 1 and 411 DF, p-value: 0.5707
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(diff_status_score ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = diff_status_score ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2048 -0.7048 0.1478 0.6478 2.2952
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.64778 0.06900 -9.388 <2e-16 ***
## ConditionWoman Outlier -0.14745 0.09677 -1.524 0.128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9831 on 411 degrees of freedom
## Multiple R-squared: 0.005618, Adjusted R-squared: 0.003198
## F-statistic: 2.322 on 1 and 411 DF, p-value: 0.1283
df_long <- data_clean %>%
select(c(ResponseId, Condition, outlier_status, nonoutlier_status)) %>%
pivot_longer(cols = outlier_status:nonoutlier_status,
names_to = "outlier",
values_to = "values")
library(lmerTest)
# Fit model
model <- df_long %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lmer(values ~ Condition * outlier + (1|ResponseId), .)
# Display model summary
summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: values ~ Condition * outlier + (1 | ResponseId)
## Data: .
##
## REML criterion at convergence: 2125.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.04682 -0.56820 -0.06259 0.53860 2.56310
##
## Random effects:
## Groups Name Variance Std.Dev.
## ResponseId (Intercept) 0.3515 0.5929
## Residual 0.4833 0.6952
## Number of obs: 826, groups: ResponseId, 413
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 2.86207 0.06413 698.21784
## ConditionWoman Outlier 0.09507 0.08993 698.21784
## outlieroutlier_status -0.64778 0.06900 411.00002
## ConditionWoman Outlier:outlieroutlier_status -0.14745 0.09677 411.00002
## t value Pr(>|t|)
## (Intercept) 44.632 <2e-16 ***
## ConditionWoman Outlier 1.057 0.291
## outlieroutlier_status -9.388 <2e-16 ***
## ConditionWoman Outlier:outlieroutlier_status -1.524 0.128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndtWO otlrt_
## CndtnWmnOtl -0.713
## otlrtlr_stt -0.538 0.384
## CndtnWOtl:_ 0.384 -0.538 -0.713
plot_model(model, type="pred", terms=c("Condition", "outlier"))
# Define the SEM model with specified coefficients
model <- '
# Regression coefficients
diff_status_score ~ a*Condition
mean_time_descr ~ cprime*Condition + b*diff_status_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 413
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## diff_status_score ~
## Conditn (a) -0.147 0.097 -1.527 0.127
## mean_time_descr ~
## Conditn (cprm) -0.653 0.819 -0.797 0.425
## dff_st_ (b) -1.762 0.416 -4.233 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .diff_stats_scr 0.962 0.067 14.370 0.000
## .mean_time_dscr 68.842 4.791 14.370 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.260 0.181 1.437 0.151
# Define the SEM model with specified coefficients
model <- '
# Regression coefficients
diff_status_score ~ a*Condition
mean_time_presc ~ cprime*Condition + b*diff_status_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 413
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## diff_status_score ~
## Conditn (a) -0.147 0.097 -1.527 0.127
## mean_time_presc ~
## Conditn (cprm) -0.505 1.035 -0.488 0.625
## dff_st_ (b) -0.549 0.526 -1.043 0.297
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .diff_stats_scr 0.962 0.067 14.370 0.000
## .mean_time_prsc 109.890 7.647 14.370 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.081 0.094 0.861 0.389