library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("~/Google drive/My Drive/YEAR 2/PROJECTS/DEREK/Outliers Small Pilot 9:6:24/pilot_data.csv") %>%
filter(vid_attn_check == "SURVEY" | vid_attn_check == "survey" | vid_attn_check == "Survey") %>%
mutate(Condition = ifelse(outlier == "Adam Cooke 4M4W", "Man Outlier", "Woman Outlier"))
Note: Ask Derek how to deal with manipulation checks in cleaning
data_clean <- data %>%
select(c(ResponseId, Condition, A, B, C, D, E, "F", G, H, presc_1, presc_2, presc_3, typicality_E, align_E, typicality_outlier, align_outlier, status_outlier_1, status_outlier_2, status_outlier_3, status_E_1, status_E_2, status_E_3, gender, race, age)) %>%
rename("F_time" = "F") %>%
mutate_at(c(3:25), as.numeric) %>%
mutate(A_time_from_mean = A - 20,
B_time_from_mean = B - 20,
C_time_from_mean = C - 20,
D_time_from_mean = D - 20,
E_time_from_mean = E - 20,
F_time_from_mean = F_time - 20,
G_time_from_mean = G - 20,
H_time_from_mean = H - 20) %>%
rowwise() %>%
mutate(mean_time_descr = mean(A_time_from_mean:H_time_from_mean, na.rm = T)) %>%
ungroup()
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(mean_time_descr ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = mean_time_descr ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.740 -1.610 1.760 2.760 6.265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.735 1.172 3.186 0.0028 **
## ConditionWoman Outlier -1.495 1.520 -0.984 0.3310
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.834 on 40 degrees of freedom
## Multiple R-squared: 0.02364, Adjusted R-squared: -0.0007733
## F-statistic: 0.9683 on 1 and 40 DF, p-value: 0.331
Finding: Male outliers shift perceptions of descriptive norms slightly more than female outliers
ggplot(data = data_clean,
aes(x = Condition, y = mean_time_descr)) +
geom_point(alpha = 0.3,
size = 2,
position = position_jitter(0.1)) +
stat_summary(fun.data = "mean_cl_boot",
size = 1,
geom = "linerange",
color = "grey50")+
stat_summary(fun = "mean",
size = 0.3)+
theme_bw() +
labs(title = "Descriptive Norm by Condition",
x = "Condition",
y = "Time Relative to Objective Mean")+
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).
data_clean <- data_clean %>%
mutate(p1_time_from_mean = presc_1 - 20,
p2_time_from_mean = presc_2 - 20,
p3_time_from_mean = presc_3 - 20) %>%
rowwise() %>%
mutate(mean_time_presc = mean(p1_time_from_mean:p3_time_from_mean, na.rm = T)) %>%
ungroup()
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(mean_time_presc ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = mean_time_presc ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.540 -4.501 -1.382 3.618 17.960
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.1176 1.6491 -3.710 0.00063 ***
## ConditionWoman Outlier 0.6576 2.1374 0.308 0.75992
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.799 on 40 degrees of freedom
## Multiple R-squared: 0.002361, Adjusted R-squared: -0.02258
## F-statistic: 0.09467 on 1 and 40 DF, p-value: 0.7599
Finding: No difference between prescriptive norm perceptions based on gender of outlier
ggplot(data = data_clean,
aes(x = Condition, y = mean_time_presc)) +
geom_point(alpha = 0.3,
size = 2,
position = position_jitter(0.1)) +
stat_summary(fun.data = "mean_cl_boot",
size = 1,
geom = "linerange",
color = "grey50")+
stat_summary(fun = "mean",
size = 0.3)+
theme_bw() +
labs(title = "Prescriptive Norm by Condition",
x = "Condition",
y = "Time Relative to Objective Mean")+
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).
data_clean <- data_clean %>%
rowwise() %>%
mutate(nonoutlier_score = mean(typicality_E:align_E, na.rm = T)) %>%
mutate(outlier_score = mean(typicality_outlier:align_outlier, na.rm = T)) %>%
mutate(diff_typicality_score = outlier_score - nonoutlier_score) %>%
ungroup()
Negative typicality difference score = outlier is less typical than the non-outlier
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(outlier_score ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = outlier_score ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2647 -0.9000 0.1000 0.7353 2.6000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2647 0.2672 8.475 1.82e-10 ***
## ConditionWoman Outlier -0.3647 0.3464 -1.053 0.299
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.102 on 40 degrees of freedom
## Multiple R-squared: 0.02697, Adjusted R-squared: 0.002644
## F-statistic: 1.109 on 1 and 40 DF, p-value: 0.2987
Finding: Women outliers are seen as slightly less typical than male outliers
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(diff_typicality_score ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = diff_typicality_score ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7800 -0.7800 0.2200 0.5588 2.2200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5588 0.2831 -1.974 0.0553 .
## ConditionWoman Outlier -0.6612 0.3669 -1.802 0.0791 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.167 on 40 degrees of freedom
## Multiple R-squared: 0.0751, Adjusted R-squared: 0.05197
## F-statistic: 3.248 on 1 and 40 DF, p-value: 0.07906
Finding: Women outliers are seen as slightly less typical as compared to male outliers using a difference score measure between perceptions of a non-outlier and perceptions of an outlier. (How to describe this…?)
# Define the SEM model with specified coefficients
library(lavaan)
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
library(parallel)
model <- '
# Regression coefficients
diff_typicality_score ~ a*Condition
mean_time_descr ~ cprime*Condition + b*diff_typicality_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 42
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## diff_typicality_score ~
## Conditn (a) -0.661 0.358 -1.847 0.065
## mean_time_descr ~
## Conditn (cprm) -2.038 1.511 -1.348 0.178
## dff_ty_ (b) -0.820 0.626 -1.310 0.190
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .dff_typclty_sc 1.297 0.283 4.583 0.000
## .mean_time_dscr 21.380 4.665 4.583 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.542 0.508 1.068 0.285
model <- '
# Regression coefficients
diff_typicality_score ~ a*Condition
mean_time_presc ~ cprime*Condition + b*diff_typicality_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 42
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## diff_typicality_score ~
## Conditn (a) -0.661 0.358 -1.847 0.065
## mean_time_presc ~
## Conditn (cprm) -0.876 1.990 -0.440 0.660
## dff_ty_ (b) -2.320 0.825 -2.813 0.005
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .dff_typclty_sc 1.297 0.283 4.583 0.000
## .mean_time_prsc 37.047 8.084 4.583 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 1.534 0.994 1.544 0.123
Open question: Should we use the difference score between the non-outlier and outlier? Or just the outlier’s typicality here?
data_clean <- data_clean %>%
rowwise() %>%
mutate(nonoutlier_status = mean(status_E_1:status_E_3, na.rm = T)) %>%
mutate(outlier_status = mean(status_outlier_1:status_outlier_3, na.rm = T)) %>%
mutate(diff_status_score = outlier_status - nonoutlier_status) %>%
ungroup()
Negative status difference score = outlier has less status than the non-outlier
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(outlier_status ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = outlier_status ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.11765 -0.96000 -0.03882 0.54000 2.04000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1176 0.2177 9.728 4.25e-12 ***
## ConditionWoman Outlier -0.1576 0.2822 -0.559 0.579
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8976 on 40 degrees of freedom
## Multiple R-squared: 0.007744, Adjusted R-squared: -0.01706
## F-statistic: 0.3122 on 1 and 40 DF, p-value: 0.5795
Finding: Women outliers are seen as slightly lower status than male outliers
# Fit model
model <- data_clean %>%
mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>%
lm(diff_status_score ~ Condition, .)
# Display model summary
summary(model)
##
## Call:
## lm(formula = diff_status_score ~ Condition, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3235 -0.5400 0.1765 0.6765 2.4600
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6765 0.2498 -2.708 0.00991 **
## ConditionWoman Outlier -0.2835 0.3238 -0.876 0.38642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.03 on 40 degrees of freedom
## Multiple R-squared: 0.01881, Adjusted R-squared: -0.005719
## F-statistic: 0.7669 on 1 and 40 DF, p-value: 0.3864
Finding: Women outliers are seen as slightly lower status as compared to male outliers using a difference score measure between perceptions of a non-outlier and perceptions of an outlier. (Again…How to describe this…?)
# Define the SEM model with specified coefficients
library(lavaan)
library(parallel)
model <- '
# Regression coefficients
diff_status_score ~ a*Condition
mean_time_descr ~ cprime*Condition + b*diff_status_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 42
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## diff_status_score ~
## Conditn (a) -0.284 0.316 -0.897 0.370
## mean_time_descr ~
## Conditn (cprm) -1.948 1.408 -1.384 0.166
## dff_st_ (b) -1.598 0.681 -2.346 0.019
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .diff_stats_scr 1.010 0.220 4.583 0.000
## .mean_time_dscr 19.675 4.293 4.583 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.453 0.540 0.838 0.402
# Define the SEM model with specified coefficients
library(lavaan)
library(parallel)
model <- '
# Regression coefficients
diff_status_score ~ a*Condition
mean_time_presc ~ cprime*Condition + b*diff_status_score
# Indirect effect
indirect := a*b
'
# Fit the model
fit <- sem(model, data = data_clean)
# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 42
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## diff_status_score ~
## Conditn (a) -0.284 0.316 -0.897 0.370
## mean_time_presc ~
## Conditn (cprm) 0.495 2.098 0.236 0.814
## dff_st_ (b) -0.575 1.015 -0.567 0.571
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .diff_stats_scr 1.010 0.220 4.583 0.000
## .mean_time_prsc 43.695 9.535 4.583 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.163 0.340 0.479 0.632
Open question: Should we use the difference score between the non-outlier and outlier? Or just the outlier’s status here?