Libraries
Load Data
df = read_csv("In Paper/Archival Reddit/main_data_with_text.csv", lazy = TRUE)
## Preprocessing
#log modulus transformation of karma
### There could be better ways that don't require transformation
df_responses_full <- df %>% mutate(
z_score = scale(score),
z_score_initial = scale(parent_score),
z_parent_num_comments = scale(parent_num_comments),
parent_incivility_coded_num = ifelse(parent_incivility==1, 1, -1),
parent_incivility_coded_text = ifelse(parent_incivility==1, "1. Uncivil", "2. Civil"),
z_text_length = scale(text_length),
z_text_length_parent = scale(parent_text_length),
z_parent_num = scale(parent_num_comments)) %>%
select(-text, -parent_text, -comment_id, -reference_time, -incivility)
df_parent_uncivil <- df_responses_full %>%
filter(parent_incivility == 1)
Data overview
Number of unique initial posts
## [1] 784457
Percentage of unique respondent posts
## [1] 0.9995
Percentage of unique responses per user
There are mostly unique initial posts (meaning vast majority of responses are from different initial posts) Therefore, we cannot group by initial post (parent_post_id) There are several responses per user, hence nest responses in users.
(df_responses_full %>% distinct() %>% nrow()) / (df_responses_full %>% distinct(user_id) %>% nrow()) # percentage of unique posts per user
## [1] 2.3284
Statistical Analyses
Empty set
m1_empty <- lmer(z_score ~ parent_incivility_coded_text +
(1|user_id),
data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
Summary of model
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text + (1 | user_id)
## Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 6390365
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.64 -0.06 -0.05 -0.05 143.78
##
## Random effects:
## Groups Name Variance Std.Dev.
## user_id (Intercept) 0.0178 0.134
## Residual 0.9827 0.991
## Number of obs: 2252607, groups: user_id, 966977
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.000386 0.001534 1778386.153284 0.25 0.80
## parent_incivility_coded_text2. Civil -0.001216 0.001692 2152619.552533 -0.72 0.47
##
## Correlation of Fixed Effects:
## (Intr)
## prnt_n__2.C -0.885
Estimated marginal means
All pre-registered covariates
m1_cov <- lmer(z_score ~ parent_incivility_coded_text +
z_score_initial + z_text_length + z_parent_num_comments + z_text_length_parent
+ (1|user_id),
data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text + z_score_initial + z_text_length + z_parent_num_comments + z_text_length_parent + (1 | user_id)
## Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 6384653
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.75 -0.07 -0.03 -0.01 143.65
##
## Random effects:
## Groups Name Variance Std.Dev.
## user_id (Intercept) 0.0187 0.137
## Residual 0.9794 0.990
## Number of obs: 2252607, groups: user_id, 966977
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.001136 0.001559 1796328.337420 -0.73 0.4662
## parent_incivility_coded_text2. Civil -0.000158 0.001727 2166110.996410 -0.09 0.9273
## z_score_initial 0.031378 0.000724 1893147.121864 43.33 <0.0000000000000002 ***
## z_text_length 0.032587 0.000727 1737499.737154 44.84 <0.0000000000000002 ***
## z_parent_num_comments 0.016569 0.000726 2200661.663690 22.81 <0.0000000000000002 ***
## z_text_length_parent -0.001766 0.000672 2175299.284675 -2.63 0.0086 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) p___2C z_scr_ z_txt_ z_pr__
## prnt_n__2.C -0.889
## z_score_ntl 0.122 -0.146
## z_txt_lngth 0.003 0.006 0.044
## z_prnt_nm_c -0.169 0.183 -0.352 -0.111
## z_txt_lngt_ -0.006 0.010 0.038 -0.071 -0.042
Estimated marginal means
With covariates that worked
covariatesthatworked <- lmer(z_score ~ parent_incivility_coded_text + z_text_length + z_score_initial
+ (1|user_id),
data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text + z_text_length + z_score_initial + (1 | user_id)
## Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 6385150
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.74 -0.07 -0.03 -0.02 143.65
##
## Random effects:
## Groups Name Variance Std.Dev.
## user_id (Intercept) 0.0186 0.136
## Residual 0.9797 0.990
## Number of obs: 2252607, groups: user_id, 966977
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.004850 0.001536 1822858.053064 3.16 0.0016 **
## parent_incivility_coded_text2. Civil -0.007309 0.001698 2162408.745338 -4.30 0.000017 ***
## z_text_length 0.034330 0.000720 1716201.472924 47.66 < 0.0000000000000002 ***
## z_score_initial 0.037211 0.000678 1771163.557324 54.90 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) p___2C z_txt_
## prnt_n__2.C -0.885
## z_txt_lngth -0.018 0.028
## z_score_ntl 0.069 -0.090 0.007
Estimated marginal means
Interactions
Incivility by Parent’s Karma (z-scored)
Regression
inter_initialscore <- lmer(z_score ~ parent_incivility_coded_text * z_score_initial + (1|user_id), data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
summary(inter_initialscore)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text * z_score_initial + (1 | user_id)
## Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 6387293
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.65 -0.07 -0.03 -0.02 143.74
##
## Random effects:
## Groups Name Variance Std.Dev.
## user_id (Intercept) 0.0191 0.138
## Residual 0.9802 0.990
## Number of obs: 2252607, groups: user_id, 966977
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.00967 0.00157 1876503.97506 6.16 0.000000000722102 ***
## parent_incivility_coded_text2. Civil -0.01301 0.00173 2178115.62348 -7.54 0.000000000000048 ***
## z_score_initial 0.05640 0.00186 2213938.42202 30.32 < 0.0000000000000002 ***
## parent_incivility_coded_text2. Civil:z_score_initial -0.02226 0.00199 2245920.25216 -11.18 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) pr___2.C z_scr_
## prnt_n__2.C -0.889
## z_score_ntl 0.212 -0.198
## pr___2.C:__ -0.202 0.178 -0.931
Simple Slopes of Interaction
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
Incivility by Number of Parent comments (z-scored)
Regressions
Simple Slopes of Interaction
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
Incivility by Length of text in the responder’s comment (z-scored)
Regressions
inter_resplenghth <- lmer(z_score ~ parent_incivility_coded_text * z_text_length + (1|user_id), data = df_responses_full, control = lmerControl(optimizer = "bobyqa"))
summary(inter_resplenghth)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: z_score ~ parent_incivility_coded_text * z_text_length + (1 | user_id)
## Data: df_responses_full
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 6388095
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.73 -0.05 -0.04 -0.04 143.71
##
## Random effects:
## Groups Name Variance Std.Dev.
## user_id (Intercept) 0.0174 0.132
## Residual 0.9821 0.991
## Number of obs: 2252607, groups: user_id, 966977
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.00146 0.00153 1778335.46805 -0.95 0.34
## parent_incivility_coded_text2. Civil 0.00155 0.00169 2151125.90441 0.92 0.36
## z_text_length 0.04420 0.00151 2232118.63755 29.25 < 0.0000000000000002 ***
## parent_incivility_coded_text2. Civil:z_text_length -0.01296 0.00170 2242134.81422 -7.63 0.000000000000024 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) pr___2.C z_txt_
## prnt_n__2.C -0.886
## z_txt_lngth -0.049 0.046
## pr___2.C:__ 0.046 -0.036 -0.879
Simple Slopes of Interaction
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.