Import data

library(ltm)

## Loading required package: MASS

## Loading required package: msm

## Loading required package: polycor

library(sjPlot)

## Learn more about sjPlot with 'browseVignettes("sjPlot")'.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

data <- read.csv("~/Downloads/Outliers & Gender (Study 1 - Full)_September 13, 2024_11.17.csv") %>% 
  filter(vid_attn_check == "SURVEY" | vid_attn_check == "survey" | vid_attn_check == "Survey") %>% 
  mutate(Condition = ifelse(outlier == "Adam Cooke 4M4W", "Man Outlier", "Woman Outlier")) 

location <- data %>%
  group_by(LocationLatitude, LocationLongitude) %>%  # Group by LocationLatitude and LocationLongitude
  summarise(Count = n()) %>%  # Count the number of occurrences and store in Count column
  ungroup()  # Ungroup to return to a normal dataframe structure

## `summarise()` has grouped output by 'LocationLatitude'. You can override using
## the `.groups` argument.

data <- data %>% 
  left_join(location, by = c("LocationLatitude", "LocationLongitude")) %>% 
  filter(Count < 3)

Descriptive norm

Cleaning

Note: Ask Derek how to deal with manipulation checks in cleaning

data_clean <- data %>% 
  rename("F_time" = "F") %>% 
  select(c(ResponseId, Condition, A, B, C, D, E, F_time, G, H, presc_comfort_2, presc_accept_2, presc_approp_2, typicality_E, align_E, typicality_outlier, align_outlier, status_outlier_1, status_outlier_2, status_outlier_3, status_E_1, status_E_2, status_E_3, gender, race, age)) %>% 
  mutate_at(c(3:25), as.numeric) %>% 
  mutate(A_time_from_mean = A - 40,
         B_time_from_mean = B - 40,
         C_time_from_mean = C - 40,
         D_time_from_mean = D - 40,
         E_time_from_mean = E - 40,
         F_time_from_mean = F_time - 40,
         G_time_from_mean = G - 40,
         H_time_from_mean = H - 40) %>% 
  rowwise() %>% 
  mutate(mean_time_descr = mean(A_time_from_mean:H_time_from_mean, na.rm = T)) %>% 
  ungroup()

Analysis

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(mean_time_descr ~ Condition, .)

# Display model summary
summary(model)

## 
## Call:
## lm(formula = mean_time_descr ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.786  -1.786   2.714   4.107  15.607 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)              1.2857     0.5963   2.156   0.0316 *
## ConditionWoman Outlier  -0.3929     0.8362  -0.470   0.6387  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.496 on 411 degrees of freedom
## Multiple R-squared:  0.0005367,  Adjusted R-squared:  -0.001895 
## F-statistic: 0.2207 on 1 and 411 DF,  p-value: 0.6387

Figure

ggplot(data = data_clean, 
       aes(x = Condition, y = mean_time_descr)) +
  geom_point(alpha = 0.1,
             size = 2,
             position = position_jitter(0.1)) +
  stat_summary(fun.data = "mean_cl_boot",
               size = 1,
               geom = "linerange",
               color = "grey50")+
  stat_summary(fun = "mean",
               size = 0.3)+
  theme_bw() +
  labs(title = "Descriptive Norm by Condition",
       x = "Condition",
       y = "Time Relative to Objective Mean")+
  geom_hline(yintercept = 0, linetype = "dashed", color = "red")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Prescriptive norm

Cleaning

data_clean <- data_clean %>% 
  mutate(p1_time_from_mean = presc_comfort_2 - 40,
         p2_time_from_mean = presc_accept_2 - 40,
         p3_time_from_mean = presc_approp_2 - 40) %>% 
  rowwise() %>% 
  mutate(mean_time_presc = mean(p1_time_from_mean:p3_time_from_mean, na.rm = T)) %>% 
  ungroup()

Analysis

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(mean_time_presc ~ Condition, .)

# Display model summary
summary(model)

## 
## Call:
## lm(formula = mean_time_presc ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.376  -5.376  -0.376   7.124  28.624 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              6.8005     0.7385   9.208   <2e-16 ***
## ConditionWoman Outlier  -0.4243     1.0357  -0.410    0.682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.52 on 411 degrees of freedom
## Multiple R-squared:  0.0004082,  Adjusted R-squared:  -0.002024 
## F-statistic: 0.1678 on 1 and 411 DF,  p-value: 0.6822

Figure

ggplot(data = data_clean, 
       aes(x = Condition, y = mean_time_presc)) +
  geom_point(alpha = 0.1,
             size = 2,
             position = position_jitter(0.1)) +
  stat_summary(fun.data = "mean_cl_boot",
               size = 1,
               geom = "linerange",
               color = "grey50")+
  stat_summary(fun = "mean",
               size = 0.3)+
  theme_bw() +
  labs(title = "Prescriptive Norm by Condition",
       x = "Condition",
       y = "Time Relative to Objective Mean")+
  geom_hline(yintercept = 0, linetype = "dashed", color = "red")

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Scale loading

presc <- data_clean %>% 
  dplyr::select(c(presc_comfort_2:presc_approp_2))

cronbach.alpha(presc)

## 
## Cronbach's alpha for the 'presc' data-set
## 
## Items: 3
## Sample units: 413
## alpha: 0.915

Outlier mediation model

Clean data

data_clean <- data_clean %>% 
  rowwise() %>% 
  mutate(nonoutlier_score = mean(typicality_E:align_E, na.rm = T)) %>% 
  mutate(outlier_score = mean(typicality_outlier:align_outlier, na.rm = T)) %>% 
  mutate(diff_typicality_score = outlier_score - nonoutlier_score) %>% 
  ungroup()

Negative typicality difference score = outlier is less typical than the non-outlier

Analysis

Outlier typicality mean

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(outlier_score ~ Condition, .)

# Display model summary
summary(model)

## 
## Call:
## lm(formula = outlier_score ~ Condition, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.06158 -0.96190 -0.06158  0.93842  3.03810 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2.06158    0.07673  26.869   <2e-16 ***
## ConditionWoman Outlier -0.09967    0.10760  -0.926    0.355    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.093 on 411 degrees of freedom
## Multiple R-squared:  0.002083,   Adjusted R-squared:  -0.0003447 
## F-statistic: 0.8581 on 1 and 411 DF,  p-value: 0.3548

Difference score

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(diff_typicality_score ~ Condition, .)

# Display model summary
summary(model)

## 
## Call:
## lm(formula = diff_typicality_score ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2167 -1.0214 -0.0214  0.9786  4.7833 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.78325    0.09768  -8.018 1.13e-14 ***
## ConditionWoman Outlier -0.19532    0.13699  -1.426    0.155    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.392 on 411 degrees of freedom
## Multiple R-squared:  0.004922,   Adjusted R-squared:  0.002501 
## F-statistic: 2.033 on 1 and 411 DF,  p-value: 0.1547

Mechanism check

table(data_clean$outlier_score, data_clean$Condition)

##      
##       Man Outlier Woman Outlier
##   1            83            90
##   1.5           9            11
##   2            38            45
##   2.5           6            12
##   3            38            24
##   3.5           8             4
##   4            14            19
##   4.5           4             3
##   5             3             2

table(data_clean$nonoutlier_score, data_clean$Condition)

##      
##       Man Outlier Woman Outlier
##   1            17            11
##   1.5           7             6
##   2            43            40
##   2.5          11            18
##   3            59            67
##   3.5          25            17
##   4            31            42
##   4.5           4             2
##   5             6             7

df_long <- data_clean %>% 
  select(c(ResponseId, Condition, outlier_score, nonoutlier_score)) %>% 
  pivot_longer(cols = outlier_score:nonoutlier_score,
               names_to = "outlier",
               values_to = "values")

library(lmerTest)

## Loading required package: lme4

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

# Fit model
model <- df_long %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lmer(values ~ Condition * outlier + (1|ResponseId), .)

# Display model summary
summary(model)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: values ~ Condition * outlier + (1 | ResponseId)
##    Data: .
## 
## REML criterion at convergence: 2394.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.97918 -0.85805  0.00624  0.80350  2.90279 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  ResponseId (Intercept) 0.08507  0.2917  
##  Residual               0.96851  0.9841  
## Number of obs: 826, groups:  ResponseId, 413
## 
## Fixed effects:
##                                              Estimate Std. Error        df
## (Intercept)                                   2.84483    0.07204 816.67574
## ConditionWoman Outlier                        0.09565    0.10103 816.67574
## outlieroutlier_score                         -0.78325    0.09768 410.99999
## ConditionWoman Outlier:outlieroutlier_score  -0.19532    0.13699 410.99999
##                                             t value Pr(>|t|)    
## (Intercept)                                  39.488  < 2e-16 ***
## ConditionWoman Outlier                        0.947    0.344    
## outlieroutlier_score                         -8.018 1.13e-14 ***
## ConditionWoman Outlier:outlieroutlier_score  -1.426    0.155    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) CndtWO otlrt_
## CndtnWmnOtl -0.713              
## otlrtlr_scr -0.678  0.483       
## CndtnWOtl:_  0.483 -0.678 -0.713

plot_model(model, type="pred", terms=c("Condition", "outlier"))

# Define the SEM model with specified coefficients
library(lavaan)

## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.

library(parallel)


model <- '
  # Regression coefficients
  outlier_score ~ a*Condition
  mean_time_descr ~ cprime*Condition + b*outlier_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)

## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           413
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                     Estimate  Std.Err  z-value  P(>|z|)
##   outlier_score ~                                      
##     Conditn    (a)    -0.100    0.107   -0.929    0.353
##   mean_time_descr ~                                    
##     Conditn (cprm)    -0.606    0.803   -0.755    0.450
##     otlr_sc    (b)    -2.141    0.368   -5.823    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .outlier_score     1.189    0.083   14.370    0.000
##    .mean_time_dscr   66.379    4.619   14.370    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.213    0.233    0.917    0.359

Outlier Mediation Model: Prescriptive Norms

model <- '
  # Regression coefficients
  outlier_score ~ a*Condition
  mean_time_presc ~ cprime*Condition + b*outlier_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)

## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           413
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                     Estimate  Std.Err  z-value  P(>|z|)
##   outlier_score ~                                      
##     Conditn    (a)    -0.100    0.107   -0.929    0.353
##   mean_time_presc ~                                    
##     Conditn (cprm)    -0.360    1.032   -0.349    0.727
##     otlr_sc    (b)     0.646    0.473    1.367    0.172
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .outlier_score     1.189    0.083   14.370    0.000
##    .mean_time_prsc  109.683    7.633   14.370    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect         -0.064    0.084   -0.768    0.442

Status mediation model

Clean data

data_clean <- data_clean %>% 
  rowwise() %>% 
  mutate(nonoutlier_status = mean(status_E_1:status_E_3, na.rm = T)) %>% 
  mutate(outlier_status = mean(status_outlier_1:status_outlier_3, na.rm = T)) %>% 
  mutate(diff_status_score = outlier_status - nonoutlier_status) %>% 
  ungroup()

Analysis

Outlier status mean

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(outlier_status ~ Condition, .)

# Display model summary
summary(model)

## 
## Call:
## lm(formula = outlier_status ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2143 -0.7143 -0.1619  0.7857  2.8381 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2.21429    0.06582  33.643   <2e-16 ***
## ConditionWoman Outlier -0.05238    0.09230  -0.568    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9378 on 411 degrees of freedom
## Multiple R-squared:  0.000783,   Adjusted R-squared:  -0.001648 
## F-statistic: 0.3221 on 1 and 411 DF,  p-value: 0.5707

Difference score

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(diff_status_score ~ Condition, .)

# Display model summary
summary(model)

## 
## Call:
## lm(formula = diff_status_score ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2048 -0.7048  0.1478  0.6478  2.2952 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.64778    0.06900  -9.388   <2e-16 ***
## ConditionWoman Outlier -0.14745    0.09677  -1.524    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9831 on 411 degrees of freedom
## Multiple R-squared:  0.005618,   Adjusted R-squared:  0.003198 
## F-statistic: 2.322 on 1 and 411 DF,  p-value: 0.1283

Mechanism check

df_long <- data_clean %>% 
  select(c(ResponseId, Condition, outlier_status, nonoutlier_status)) %>% 
  pivot_longer(cols = outlier_status:nonoutlier_status,
               names_to = "outlier",
               values_to = "values")

library(lmerTest)
# Fit model
model <- df_long %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lmer(values ~ Condition * outlier + (1|ResponseId), .)

# Display model summary
summary(model)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: values ~ Condition * outlier + (1 | ResponseId)
##    Data: .
## 
## REML criterion at convergence: 2125.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.04682 -0.56820 -0.06259  0.53860  2.56310 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  ResponseId (Intercept) 0.3515   0.5929  
##  Residual               0.4833   0.6952  
## Number of obs: 826, groups:  ResponseId, 413
## 
## Fixed effects:
##                                               Estimate Std. Error        df
## (Intercept)                                    2.86207    0.06413 698.21784
## ConditionWoman Outlier                         0.09507    0.08993 698.21784
## outlieroutlier_status                         -0.64778    0.06900 411.00002
## ConditionWoman Outlier:outlieroutlier_status  -0.14745    0.09677 411.00002
##                                              t value Pr(>|t|)    
## (Intercept)                                   44.632   <2e-16 ***
## ConditionWoman Outlier                         1.057    0.291    
## outlieroutlier_status                         -9.388   <2e-16 ***
## ConditionWoman Outlier:outlieroutlier_status  -1.524    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) CndtWO otlrt_
## CndtnWmnOtl -0.713              
## otlrtlr_stt -0.538  0.384       
## CndtnWOtl:_  0.384 -0.538 -0.713

plot_model(model, type="pred", terms=c("Condition", "outlier"))

Outlier Mediation Model: Descriptive Norms

# Define the SEM model with specified coefficients
model <- '
  # Regression coefficients
  diff_status_score ~ a*Condition
  mean_time_descr ~ cprime*Condition + b*diff_status_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)

## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           413
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   diff_status_score ~                                    
##     Conditn    (a)      -0.147    0.097   -1.527    0.127
##   mean_time_descr ~                                      
##     Conditn (cprm)      -0.653    0.819   -0.797    0.425
##     dff_st_    (b)      -1.762    0.416   -4.233    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .diff_stats_scr    0.962    0.067   14.370    0.000
##    .mean_time_dscr   68.842    4.791   14.370    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.260    0.181    1.437    0.151

Outlier Mediation Model: Prescriptive Norms

# Define the SEM model with specified coefficients
model <- '
  # Regression coefficients
  diff_status_score ~ a*Condition
  mean_time_presc ~ cprime*Condition + b*diff_status_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)

## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           413
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   diff_status_score ~                                    
##     Conditn    (a)      -0.147    0.097   -1.527    0.127
##   mean_time_presc ~                                      
##     Conditn (cprm)      -0.505    1.035   -0.488    0.625
##     dff_st_    (b)      -0.549    0.526   -1.043    0.297
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .diff_stats_scr    0.962    0.067   14.370    0.000
##    .mean_time_prsc  109.890    7.647   14.370    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.081    0.094    0.861    0.389

Outliers Study 9/13/24

Samantha Grayson

2024-09-13

Import data

Descriptive norm

Cleaning

Analysis

Figure

Prescriptive norm

Cleaning

Analysis

Figure

Scale loading

Outlier mediation model

Clean data

Analysis

Outlier typicality mean

Difference score

Mechanism check

Outlier Mediation Model: Prescriptive Norms

Status mediation model

Clean data

Analysis

Outlier status mean

Difference score

Mechanism check

Outlier Mediation Model: Descriptive Norms

Outlier Mediation Model: Prescriptive Norms