Load libraries

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lme4)

## Loading required package: Matrix

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(ggplot2)
library(performance)
library(sjPlot)

## 
## Attaching package: 'sjPlot'

## The following object is masked from 'package:ggplot2':
## 
##     set_theme

library(modelsummary)
library(lattice)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(influence.ME) ##checking influential groups (random effect levels)

## 
## Attaching package: 'influence.ME'

## The following object is masked from 'package:stats':
## 
##     influence

library(knitr)

Reading and preparing the data

data <- read.csv("expm_Nov.csv")
names(data) <- c("Population",
                 "Neighborhood_size",
                 "Opinion_distribution",
                 "Average_initial_opinion", 
                 "Mean_number_of_interactions",
                 "Average_opinion",
                 "Max_number_of_interactions",
                 "Percentage_of_negative_op_holders",
                 "Opinion_change_rate",
                 "Percentage_of_positive_op_holders",
                 "Sum_squared_opinion_difference",
                 "Variance_of_opinion_change")

data$Opinion_distribution <- factor(data$Opinion_distribution)

Creating predictors: centering

data$Mean_number_of_interactions_centered <-
  data$Mean_number_of_interactions -
  mean(data$Mean_number_of_interactions, na.rm = TRUE)

Interaction-ratio measures how much interaction happens per agent. It captures the intensity of social communication in the simulation.

data$interaction_ratio <- data$Mean_number_of_interactions / data$Population

Multilevel model

Data is grouped by Opinion_distribution, which represents four types of initial distributions: Moderate–Low variance; Moderate–High variance; Extreme–Low variance; Extreme–High variance

Different initial distributions imply different “baseline” rates of opinion change.
So, we include random intercepts: Each group is allowed to have its own average level of the outcome, instead of forcing all groups to share the same baseline.

Model assumptions

1/ Linearity:

2/ Random effects have normal distribution: The mean opinion-change baseline for each distribution type is normally distributed around the global mean.

3/ Residuals are independent and normally distributed Within each distribution type.

4/ Homoscedasticity Equal variance across predicted values and groups.

model_ratio <- lmer(
  Opinion_change_rate ~
    scale(Neighborhood_size) +
    scale(interaction_ratio) +
    (1 | Opinion_distribution),##random intercepts
  data = data
)

Exporting model summaries

tab_model(
  model_ratio,
  title = "Mixed Effects Model for Opinion Change",
  pred.labels = c("Intercept", 
                  "Neighborhood Size (scaled)", 
                  "Interaction Ratio (scaled)"),
  dv.labels = "Opinion Change Rate"
)

Mixed Effects Model for Opinion Change
	Opinion Change Rate
Predictors	Estimates	CI	p
Intercept	0.17	0.07 – 0.27	0.002
Neighborhood Size (scaled)	-0.02	-0.03 – -0.01	<0.001
Interaction Ratio (scaled)	-0.03	-0.07 – 0.02	0.263
Random Effects
σ²	0.00
τ₀₀ _{Opinion_distribution}	0.01
ICC	0.88
N _{Opinion_distribution}	6
Observations	96
Marginal R² / Conditional R²	0.063 / 0.883

modelsummary(model_ratio, 
             output = "model_summary.docx",
             estimate = "{estimate} ({std.error})",
             statistic = NULL,
             stars = TRUE)

fixed_effects <- as.data.frame(summary(model_ratio)$coefficients)

names(fixed_effects) <- c("Estimate", "Std.Error", "df", "t.value", "p.value")
fixed_effects$Term <- rownames(fixed_effects)

fixed_effects <- fixed_effects[, c("Term", "Estimate", "Std.Error", "t.value", "p.value")]

str(fixed_effects)

## 'data.frame':    3 obs. of  5 variables:
##  $ Term     : chr  "(Intercept)" "scale(Neighborhood_size)" "scale(interaction_ratio)"
##  $ Estimate : num  0.1688 -0.0245 -0.0253
##  $ Std.Error: num  0.05183 0.00505 0.02245
##  $ t.value  : num  3.26 -4.85 -1.13
##  $ p.value  : num  4.63e-02 5.31e-06 2.65e-01

rand_effects <- as.data.frame(VarCorr(model_ratio))

write.csv(fixed_effects, "fixed_effects.csv", row.names = FALSE)
write.csv(rand_effects, "random_effects.csv", row.names = FALSE)

print(kable(fixed_effects, digits = 4))

## 
## 
## |                         |Term                     | Estimate| Std.Error| t.value| p.value|
## |:------------------------|:------------------------|--------:|---------:|-------:|-------:|
## |(Intercept)              |(Intercept)              |   0.1688|    0.0518|  3.2563|  0.0463|
## |scale(Neighborhood_size) |scale(Neighborhood_size) |  -0.0245|    0.0051| -4.8475|  0.0000|
## |scale(interaction_ratio) |scale(interaction_ratio) |  -0.0253|    0.0225| -1.1263|  0.2653|

print(kable(rand_effects, digits = 4))

## 
## 
## |grp                  |var1        |var2 |   vcov|  sdcor|
## |:--------------------|:-----------|:----|------:|------:|
## |Opinion_distribution |(Intercept) |NA   | 0.0150| 0.1223|
## |Residual             |NA          |NA   | 0.0021| 0.0462|

Diagnostics

Residuals vs Fitted to test linearity and homoskedasticity

p1 <- ggplot(
  data.frame(fitted = fitted(model_ratio),
             resid = resid(model_ratio)),
  aes(x = fitted, y = resid)
) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_smooth(method = "loess", se = TRUE) +
  theme_minimal() +
  labs(title = "Residuals vs Fitted",
       x = "Fitted values", y = "Residuals")

Normal Q-Q Plot (residuals)

p2 <- ggplot(
  data.frame(resid = resid(model_ratio)),
  aes(sample = resid)
) +
  stat_qq() +
  stat_qq_line() +
  theme_minimal() +
  labs(title = "Normal Q-Q Plot")

Scale–Location Plot

p3 <- ggplot(
  data.frame(fitted = fitted(model_ratio),
             resid = sqrt(abs(scale(resid(model_ratio))))),
  aes(x = fitted, y = resid)
) +
  geom_point() +
  geom_smooth(method = "loess") +
  theme_minimal() +
  labs(title = "Scale-Location Plot")

Random Effects QQ to test whether random effect levels are normally distributed

re_group <- ranef(model_ratio)$Opinion_distribution
ranef_df <- data.frame(
  Group = rownames(re_group),
  Effect = re_group[,1]
)

p4 <- ggplot(ranef_df, aes(sample = Effect)) +
  stat_qq() +
  stat_qq_line() +
  theme_minimal() +
  labs(title = "Random Effects Q-Q Plot")

Influence diagnostics to identify influential levels of the random effect

infl  <- influence(model_ratio, group = "Opinion_distribution")

cooks <- cooks.distance(infl)

p5 <- ggplot(
  data.frame(index = 1:length(cooks), cooks = cooks),
  aes(x = index, y = cooks)
) +
  geom_point() +
  theme_minimal() +
  labs(title = "Cook's Distance by Opinion Distribution")

Residuals by Group

checks whether each distribution type has similar variance. make sure no group has huge variance

p6 <- ggplot(
  data.frame(Group = data$Opinion_distribution,
             resid = resid(model_ratio)),
  aes(x = Group, y = resid)
) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Residuals by Opinion Distribution") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

check linearity btw DV and interaction ratio

p7 <- ggplot(
  data.frame(N = data$Neighborhood_size, resid = resid(model_ratio)),
  aes(x = N, y = resid)) +
  geom_point() +
  geom_smooth(method = "loess") +
  theme_minimal() +
  labs(title = "Residuals vs Neighborhood Size")

p8 <- ggplot(
  data.frame(R = data$interaction_ratio, resid = resid(model_ratio)),
  aes(x = R, y = resid)) +
  geom_point() +
  geom_smooth(method = "loess") +
  theme_minimal() +
  labs(title = "Residuals vs Interaction Ratio")

grid.arrange(p1,p2,p3,p4, ncol = 2)

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

grid.arrange(p5,p6,p7,p8, ncol = 2)

## `geom_smooth()` using formula = 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 3.97

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 4.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.2452e-16

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 16.241

## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 3.97

## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 4.03

## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.2452e-16

## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 16.241

## `geom_smooth()` using formula = 'y ~ x'

check for multicollinearity

vif_model <- lm(Opinion_change_rate ~ 
                  scale(Neighborhood_size) +
                  scale(interaction_ratio),
                data = data)

print(vif(vif_model))

## scale(Neighborhood_size) scale(interaction_ratio) 
##                 1.004773                 1.004773

# Model performance summary
print(performance::model_performance(model_ratio))

## # Indices of model performance
## 
## AIC    |   AICc |    BIC | R2 (cond.) | R2 (marg.) |   ICC |  RMSE | Sigma
## --------------------------------------------------------------------------
## -270.0 | -269.3 | -257.1 |      0.883 |      0.063 | 0.875 | 0.044 | 0.046

# Autocorrelation of residuals
print(acf(resid(model_ratio), plot = FALSE))

## 
## Autocorrelations of series 'resid(model_ratio)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000  0.304 -0.226  0.102  0.519  0.207 -0.007  0.052  0.276  0.154  0.032 
##     11     12     13     14     15     16     17     18     19 
##  0.028  0.282  0.129 -0.102  0.107  0.611  0.233 -0.169 -0.049

intraclass correlation: How much of the variation in opinion change comes from which initial distribution was used

icc <- performance::icc(model_ratio)
icc

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.875
##   Unadjusted ICC: 0.820

ICC > 0.3 suggests a strong grouping effect

Random Effects Visualization

# Extract random intercepts for Opinion_distribution with conditional variances
re_list <- ranef(model_ratio, condVar = TRUE)
re_dist <- re_list$Opinion_distribution

# Posterior variances of random effects
postvar <- attr(re_dist, "postVar")

# Convert to data frame: effect + SE + CIs
effect  <- re_dist[, 1]
se      <- sqrt(postvar[1, 1, ])  # SE for each group's random intercept

re_df <- data.frame(
  Opinion_distribution = rownames(re_dist),
  Effect = effect,
  SE = se
)

re_df$CI_lower <- re_df$Effect - 1.96 * re_df$SE
re_df$CI_upper <- re_df$Effect + 1.96 * re_df$SE

# Plot: how each distribution type shifts the baseline, with 95% CIs
ggplot(re_df, aes(x = reorder(Opinion_distribution, Effect), y = Effect)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Baseline Opinion Change by Initial Opinion Distribution",
    x = "Initial Opinion Distribution",
    y = "Random Intercept (Deviation from Overall Baseline)"
  )