Reddit Pilot

Libraries

library(lme4)
library(readr)
library(dplyr)
library(lmerTest)
library(sjPlot)
library(glmmTMB)
library(MuMIn)
library(emmeans)

Load Data

df = read_csv("In Paper/Archival Reddit/main_data_with_text.csv", lazy = TRUE)

## Preprocessing
#log modulus transformation of karma
### There could be better ways that don't require transformation
df_responses_full <- df %>% mutate(
  z_score = scale(score),
  z_score_initial = scale(parent_score),
  z_parent_num_comments = scale(parent_num_comments),
  parent_incivility_coded_num = ifelse(parent_incivility==1, 1, -1),
  parent_incivility_coded_text = ifelse(parent_incivility==1, "1. Uncivil", "2. Civil"),
  z_text_length = scale(text_length),
  z_text_length_parent = scale(parent_text_length),
  z_parent_num = scale(parent_num_comments)) %>%
  select(-text, -parent_text, -comment_id, -reference_time, -incivility)

df_parent_uncivil <- df_responses_full %>%
  filter(parent_incivility == 1)

Data overview

Number of unique initial posts

df_responses_full %>% distinct(parent_post_id) %>% nrow()
## [1] 784457

Percentage of unique respondent posts

df_responses_full %>% distinct() %>% nrow() / nrow(df_responses_full)
## [1] 0.9995

Number of Unique Users

df_responses_full %>% distinct(user_id) %>% nrow()
## [1] 966977

Percentage of unique responses per user

There are mostly unique initial posts (meaning vast majority of responses are from different initial posts) Therefore, we cannot group by initial post (parent_post_id) There are several responses per user, hence nest responses in users.

(df_responses_full %>% distinct() %>% nrow()) / (df_responses_full %>% distinct(user_id) %>% nrow()) # percentage of unique posts per user
## [1] 2.3284

Percentage of uncivil initial posts

nrow(df_parent_uncivil)/nrow(df_responses_full)
## [1] 0.19496

Statistical Analyses

Empty set

m1_empty <- lmer(z_score ~ parent_incivility_coded_text +
                   (1|user_id), 
             data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))

Summary of model

summary(m1_empty, ddf='Satterthwaite')
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text + (1 | user_id)
##    Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 6390365
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -3.64  -0.06  -0.05  -0.05 143.78 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  user_id  (Intercept) 0.0178   0.134   
##  Residual             0.9827   0.991   
## Number of obs: 2252607, groups:  user_id, 966977
## 
## Fixed effects:
##                                            Estimate     Std. Error             df t value Pr(>|t|)
## (Intercept)                                0.000386       0.001534 1778386.153284    0.25     0.80
## parent_incivility_coded_text2. Civil      -0.001216       0.001692 2152619.552533   -0.72     0.47
## 
## Correlation of Fixed Effects:
##             (Intr)
## prnt_n__2.C -0.885

Estimated marginal means

All pre-registered covariates

m1_cov <- lmer(z_score ~ parent_incivility_coded_text +
                 z_score_initial + z_text_length + z_parent_num_comments + z_text_length_parent
               + (1|user_id), 
             data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
summary(m1_cov, ddf='Satterthwaite') #robust SE estimation (faster than brms)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text + z_score_initial + z_text_length +      z_parent_num_comments + z_text_length_parent + (1 | user_id)
##    Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 6384653
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -3.75  -0.07  -0.03  -0.01 143.65 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  user_id  (Intercept) 0.0187   0.137   
##  Residual             0.9794   0.990   
## Number of obs: 2252607, groups:  user_id, 966977
## 
## Fixed effects:
##                                            Estimate     Std. Error             df t value            Pr(>|t|)    
## (Intercept)                               -0.001136       0.001559 1796328.337420   -0.73              0.4662    
## parent_incivility_coded_text2. Civil      -0.000158       0.001727 2166110.996410   -0.09              0.9273    
## z_score_initial                            0.031378       0.000724 1893147.121864   43.33 <0.0000000000000002 ***
## z_text_length                              0.032587       0.000727 1737499.737154   44.84 <0.0000000000000002 ***
## z_parent_num_comments                      0.016569       0.000726 2200661.663690   22.81 <0.0000000000000002 ***
## z_text_length_parent                      -0.001766       0.000672 2175299.284675   -2.63              0.0086 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) p___2C z_scr_ z_txt_ z_pr__
## prnt_n__2.C -0.889                            
## z_score_ntl  0.122 -0.146                     
## z_txt_lngth  0.003  0.006  0.044              
## z_prnt_nm_c -0.169  0.183 -0.352 -0.111       
## z_txt_lngt_ -0.006  0.010  0.038 -0.071 -0.042

Estimated marginal means

With covariates that worked

covariatesthatworked <- lmer(z_score ~ parent_incivility_coded_text + z_text_length + z_score_initial
               + (1|user_id), 
             data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
summary(covariatesthatworked, ddf='Satterthwaite') #robust SE estimation (faster than brms)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text + z_text_length + z_score_initial +      (1 | user_id)
##    Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 6385150
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -3.74  -0.07  -0.03  -0.02 143.65 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  user_id  (Intercept) 0.0186   0.136   
##  Residual             0.9797   0.990   
## Number of obs: 2252607, groups:  user_id, 966977
## 
## Fixed effects:
##                                            Estimate     Std. Error             df t value             Pr(>|t|)    
## (Intercept)                                0.004850       0.001536 1822858.053064    3.16               0.0016 ** 
## parent_incivility_coded_text2. Civil      -0.007309       0.001698 2162408.745338   -4.30             0.000017 ***
## z_text_length                              0.034330       0.000720 1716201.472924   47.66 < 0.0000000000000002 ***
## z_score_initial                            0.037211       0.000678 1771163.557324   54.90 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) p___2C z_txt_
## prnt_n__2.C -0.885              
## z_txt_lngth -0.018  0.028       
## z_score_ntl  0.069 -0.090  0.007

Estimated marginal means

Interactions

Incivility by Parent’s Karma (z-scored)

Regression

inter_initialscore <- lmer(z_score ~ parent_incivility_coded_text * z_score_initial + (1|user_id), data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
summary(inter_initialscore)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text * z_score_initial + (1 |      user_id)
##    Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 6387293
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -3.65  -0.07  -0.03  -0.02 143.74 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  user_id  (Intercept) 0.0191   0.138   
##  Residual             0.9802   0.990   
## Number of obs: 2252607, groups:  user_id, 966977
## 
## Fixed effects:
##                                                           Estimate    Std. Error            df t value             Pr(>|t|)    
## (Intercept)                                                0.00967       0.00157 1876503.97506    6.16    0.000000000722102 ***
## parent_incivility_coded_text2. Civil                      -0.01301       0.00173 2178115.62348   -7.54    0.000000000000048 ***
## z_score_initial                                            0.05640       0.00186 2213938.42202   30.32 < 0.0000000000000002 ***
## parent_incivility_coded_text2. Civil:z_score_initial      -0.02226       0.00199 2245920.25216  -11.18 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pr___2.C z_scr_
## prnt_n__2.C -0.889                
## z_score_ntl  0.212 -0.198         
## pr___2.C:__ -0.202  0.178   -0.931

Simple Slopes of Interaction

(inter_initialscoress <- reghelper::simple_slopes(inter_initialscore))
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

Graph

sjPlot::plot_model(
  inter_initialscore,
  type = "int",
  mdrt.values = "meansd"
)

Incivility by Number of Parent comments (z-scored)

Regressions

inter_num <- lmer(z_score ~ parent_incivility_coded_text * z_parent_num_comments + (1|user_id), data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))

Simple Slopes of Interaction

(inter_numss <- reghelper::simple_slopes(inter_num))
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

Graph

sjPlot::plot_model(
inter_num,
  type = "int",
  mdrt.values = "meansd"
)

Incivility by Length of text in the responder’s comment (z-scored)

Regressions

inter_resplenghth <- lmer(z_score ~ parent_incivility_coded_text * z_text_length + (1|user_id), data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
summary(inter_resplenghth)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text * z_text_length + (1 |      user_id)
##    Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 6388095
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -3.73  -0.05  -0.04  -0.04 143.71 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  user_id  (Intercept) 0.0174   0.132   
##  Residual             0.9821   0.991   
## Number of obs: 2252607, groups:  user_id, 966977
## 
## Fixed effects:
##                                                         Estimate    Std. Error            df t value             Pr(>|t|)    
## (Intercept)                                             -0.00146       0.00153 1778335.46805   -0.95                 0.34    
## parent_incivility_coded_text2. Civil                     0.00155       0.00169 2151125.90441    0.92                 0.36    
## z_text_length                                            0.04420       0.00151 2232118.63755   29.25 < 0.0000000000000002 ***
## parent_incivility_coded_text2. Civil:z_text_length      -0.01296       0.00170 2242134.81422   -7.63    0.000000000000024 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pr___2.C z_txt_
## prnt_n__2.C -0.886                
## z_txt_lngth -0.049  0.046         
## pr___2.C:__  0.046 -0.036   -0.879

Simple Slopes of Interaction

(inter_resplenghthss <- reghelper::simple_slopes(inter_resplenghth))
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

Graph

sjPlot::plot_model(
inter_resplenghth,
  type = "int",
  mdrt.values = "meansd"
)