Import data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("~/Google drive/My Drive/YEAR 2/PROJECTS/DEREK/Outliers Small Pilot 9:6:24/pilot_data.csv") %>% 
  filter(vid_attn_check == "SURVEY" | vid_attn_check == "survey" | vid_attn_check == "Survey") %>% 
  mutate(Condition = ifelse(outlier == "Adam Cooke 4M4W", "Man Outlier", "Woman Outlier"))

Descriptive norm

Cleaning

Note: Ask Derek how to deal with manipulation checks in cleaning

data_clean <- data %>% 
  select(c(ResponseId, Condition, A, B, C, D, E, "F", G, H, presc_1, presc_2, presc_3, typicality_E, align_E, typicality_outlier, align_outlier, status_outlier_1, status_outlier_2, status_outlier_3, status_E_1, status_E_2, status_E_3, gender, race, age)) %>% 
  rename("F_time" = "F") %>% 
  mutate_at(c(3:25), as.numeric) %>% 
  mutate(A_time_from_mean = A - 20,
         B_time_from_mean = B - 20,
         C_time_from_mean = C - 20,
         D_time_from_mean = D - 20,
         E_time_from_mean = E - 20,
         F_time_from_mean = F_time - 20,
         G_time_from_mean = G - 20,
         H_time_from_mean = H - 20) %>% 
  rowwise() %>% 
  mutate(mean_time_descr = mean(A_time_from_mean:H_time_from_mean, na.rm = T)) %>% 
  ungroup()

Analysis

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(mean_time_descr ~ Condition, .)

# Display model summary
summary(model)
## 
## Call:
## lm(formula = mean_time_descr ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.740  -1.610   1.760   2.760   6.265 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               3.735      1.172   3.186   0.0028 **
## ConditionWoman Outlier   -1.495      1.520  -0.984   0.3310   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.834 on 40 degrees of freedom
## Multiple R-squared:  0.02364,    Adjusted R-squared:  -0.0007733 
## F-statistic: 0.9683 on 1 and 40 DF,  p-value: 0.331

Finding: Male outliers shift perceptions of descriptive norms slightly more than female outliers

Figure

ggplot(data = data_clean, 
       aes(x = Condition, y = mean_time_descr)) +
  geom_point(alpha = 0.3,
             size = 2,
             position = position_jitter(0.1)) +
  stat_summary(fun.data = "mean_cl_boot",
               size = 1,
               geom = "linerange",
               color = "grey50")+
  stat_summary(fun = "mean",
               size = 0.3)+
  theme_bw() +
  labs(title = "Descriptive Norm by Condition",
       x = "Condition",
       y = "Time Relative to Objective Mean")+
  geom_hline(yintercept = 0, linetype = "dashed", color = "red")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Prescriptive norm

Cleaning

data_clean <- data_clean %>% 
  mutate(p1_time_from_mean = presc_1 - 20,
         p2_time_from_mean = presc_2 - 20,
         p3_time_from_mean = presc_3 - 20) %>% 
  rowwise() %>% 
  mutate(mean_time_presc = mean(p1_time_from_mean:p3_time_from_mean, na.rm = T)) %>% 
  ungroup()

Analysis

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(mean_time_presc ~ Condition, .)

# Display model summary
summary(model)
## 
## Call:
## lm(formula = mean_time_presc ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.540  -4.501  -1.382   3.618  17.960 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -6.1176     1.6491  -3.710  0.00063 ***
## ConditionWoman Outlier   0.6576     2.1374   0.308  0.75992    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.799 on 40 degrees of freedom
## Multiple R-squared:  0.002361,   Adjusted R-squared:  -0.02258 
## F-statistic: 0.09467 on 1 and 40 DF,  p-value: 0.7599

Finding: No difference between prescriptive norm perceptions based on gender of outlier

Figure

ggplot(data = data_clean, 
       aes(x = Condition, y = mean_time_presc)) +
  geom_point(alpha = 0.3,
             size = 2,
             position = position_jitter(0.1)) +
  stat_summary(fun.data = "mean_cl_boot",
               size = 1,
               geom = "linerange",
               color = "grey50")+
  stat_summary(fun = "mean",
               size = 0.3)+
  theme_bw() +
  labs(title = "Prescriptive Norm by Condition",
       x = "Condition",
       y = "Time Relative to Objective Mean")+
  geom_hline(yintercept = 0, linetype = "dashed", color = "red")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Outlier mediation model

Clean data

data_clean <- data_clean %>% 
  rowwise() %>% 
  mutate(nonoutlier_score = mean(typicality_E:align_E, na.rm = T)) %>% 
  mutate(outlier_score = mean(typicality_outlier:align_outlier, na.rm = T)) %>% 
  mutate(diff_typicality_score = outlier_score - nonoutlier_score) %>% 
  ungroup()

Negative typicality difference score = outlier is less typical than the non-outlier

Analysis

Outlier typicality mean

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(outlier_score ~ Condition, .)

# Display model summary
summary(model)
## 
## Call:
## lm(formula = outlier_score ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2647 -0.9000  0.1000  0.7353  2.6000 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.2647     0.2672   8.475 1.82e-10 ***
## ConditionWoman Outlier  -0.3647     0.3464  -1.053    0.299    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.102 on 40 degrees of freedom
## Multiple R-squared:  0.02697,    Adjusted R-squared:  0.002644 
## F-statistic: 1.109 on 1 and 40 DF,  p-value: 0.2987

Finding: Women outliers are seen as slightly less typical than male outliers

Difference score

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(diff_typicality_score ~ Condition, .)

# Display model summary
summary(model)
## 
## Call:
## lm(formula = diff_typicality_score ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7800 -0.7800  0.2200  0.5588  2.2200 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -0.5588     0.2831  -1.974   0.0553 .
## ConditionWoman Outlier  -0.6612     0.3669  -1.802   0.0791 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.167 on 40 degrees of freedom
## Multiple R-squared:  0.0751, Adjusted R-squared:  0.05197 
## F-statistic: 3.248 on 1 and 40 DF,  p-value: 0.07906

Finding: Women outliers are seen as slightly less typical as compared to male outliers using a difference score measure between perceptions of a non-outlier and perceptions of an outlier. (How to describe this…?)

Outlier Mediation Model: Descriptive Norms

# Define the SEM model with specified coefficients
library(lavaan)
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
library(parallel)


model <- '
  # Regression coefficients
  diff_typicality_score ~ a*Condition
  mean_time_descr ~ cprime*Condition + b*diff_typicality_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            42
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                           Estimate  Std.Err  z-value  P(>|z|)
##   diff_typicality_score ~                                    
##     Conditn    (a)          -0.661    0.358   -1.847    0.065
##   mean_time_descr ~                                          
##     Conditn (cprm)          -2.038    1.511   -1.348    0.178
##     dff_ty_    (b)          -0.820    0.626   -1.310    0.190
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .dff_typclty_sc    1.297    0.283    4.583    0.000
##    .mean_time_dscr   21.380    4.665    4.583    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.542    0.508    1.068    0.285

Outlier Mediation Model: Prescriptive Norms

model <- '
  # Regression coefficients
  diff_typicality_score ~ a*Condition
  mean_time_presc ~ cprime*Condition + b*diff_typicality_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            42
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                           Estimate  Std.Err  z-value  P(>|z|)
##   diff_typicality_score ~                                    
##     Conditn    (a)          -0.661    0.358   -1.847    0.065
##   mean_time_presc ~                                          
##     Conditn (cprm)          -0.876    1.990   -0.440    0.660
##     dff_ty_    (b)          -2.320    0.825   -2.813    0.005
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .dff_typclty_sc    1.297    0.283    4.583    0.000
##    .mean_time_prsc   37.047    8.084    4.583    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          1.534    0.994    1.544    0.123

Open question: Should we use the difference score between the non-outlier and outlier? Or just the outlier’s typicality here?

Status mediation model

Clean data

data_clean <- data_clean %>% 
  rowwise() %>% 
  mutate(nonoutlier_status = mean(status_E_1:status_E_3, na.rm = T)) %>% 
  mutate(outlier_status = mean(status_outlier_1:status_outlier_3, na.rm = T)) %>% 
  mutate(diff_status_score = outlier_status - nonoutlier_status) %>% 
  ungroup()

Negative status difference score = outlier has less status than the non-outlier

Analysis

Outlier status mean

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(outlier_status ~ Condition, .)

# Display model summary
summary(model)
## 
## Call:
## lm(formula = outlier_status ~ Condition, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11765 -0.96000 -0.03882  0.54000  2.04000 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.1176     0.2177   9.728 4.25e-12 ***
## ConditionWoman Outlier  -0.1576     0.2822  -0.559    0.579    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8976 on 40 degrees of freedom
## Multiple R-squared:  0.007744,   Adjusted R-squared:  -0.01706 
## F-statistic: 0.3122 on 1 and 40 DF,  p-value: 0.5795

Finding: Women outliers are seen as slightly lower status than male outliers

Difference score

# Fit model
model <- data_clean %>% 
  mutate(Condition = relevel(as.factor(Condition), ref = "Man Outlier")) %>% 
  lm(diff_status_score ~ Condition, .)

# Display model summary
summary(model)
## 
## Call:
## lm(formula = diff_status_score ~ Condition, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3235 -0.5400  0.1765  0.6765  2.4600 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             -0.6765     0.2498  -2.708  0.00991 **
## ConditionWoman Outlier  -0.2835     0.3238  -0.876  0.38642   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.03 on 40 degrees of freedom
## Multiple R-squared:  0.01881,    Adjusted R-squared:  -0.005719 
## F-statistic: 0.7669 on 1 and 40 DF,  p-value: 0.3864

Finding: Women outliers are seen as slightly lower status as compared to male outliers using a difference score measure between perceptions of a non-outlier and perceptions of an outlier. (Again…How to describe this…?)

Outlier Mediation Model: Descriptive Norms

# Define the SEM model with specified coefficients
library(lavaan)
library(parallel)


model <- '
  # Regression coefficients
  diff_status_score ~ a*Condition
  mean_time_descr ~ cprime*Condition + b*diff_status_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            42
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   diff_status_score ~                                    
##     Conditn    (a)      -0.284    0.316   -0.897    0.370
##   mean_time_descr ~                                      
##     Conditn (cprm)      -1.948    1.408   -1.384    0.166
##     dff_st_    (b)      -1.598    0.681   -2.346    0.019
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .diff_stats_scr    1.010    0.220    4.583    0.000
##    .mean_time_dscr   19.675    4.293    4.583    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.453    0.540    0.838    0.402

Outlier Mediation Model: Prescriptive Norms

# Define the SEM model with specified coefficients
library(lavaan)
library(parallel)


model <- '
  # Regression coefficients
  diff_status_score ~ a*Condition
  mean_time_presc ~ cprime*Condition + b*diff_status_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = data_clean)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            42
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   diff_status_score ~                                    
##     Conditn    (a)      -0.284    0.316   -0.897    0.370
##   mean_time_presc ~                                      
##     Conditn (cprm)       0.495    2.098    0.236    0.814
##     dff_st_    (b)      -0.575    1.015   -0.567    0.571
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .diff_stats_scr    1.010    0.220    4.583    0.000
##    .mean_time_prsc   43.695    9.535    4.583    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.163    0.340    0.479    0.632

Open question: Should we use the difference score between the non-outlier and outlier? Or just the outlier’s status here?