library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lme4)
## Loading required package: Matrix
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(ggplot2)
library(performance)
library(sjPlot)
##
## Attaching package: 'sjPlot'
## The following object is masked from 'package:ggplot2':
##
## set_theme
library(modelsummary)
library(lattice)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(influence.ME) ##checking influential groups (random effect levels)
##
## Attaching package: 'influence.ME'
## The following object is masked from 'package:stats':
##
## influence
library(knitr)
data <- read.csv("expm_Nov.csv")
names(data) <- c("Population",
"Neighborhood_size",
"Opinion_distribution",
"Average_initial_opinion",
"Mean_number_of_interactions",
"Average_opinion",
"Max_number_of_interactions",
"Percentage_of_negative_op_holders",
"Opinion_change_rate",
"Percentage_of_positive_op_holders",
"Sum_squared_opinion_difference",
"Variance_of_opinion_change")
data$Opinion_distribution <- factor(data$Opinion_distribution)
data$Mean_number_of_interactions_centered <-
data$Mean_number_of_interactions -
mean(data$Mean_number_of_interactions, na.rm = TRUE)
Interaction-ratio measures how much interaction happens per agent. It captures the intensity of social communication in the simulation.
data$interaction_ratio <- data$Mean_number_of_interactions / data$Population
Data is grouped by Opinion_distribution, which represents four types of initial distributions: Moderate–Low variance; Moderate–High variance; Extreme–Low variance; Extreme–High variance
Different initial distributions imply different “baseline” rates of
opinion change.
So, we include random intercepts: Each group is allowed to have its own
average level of the outcome, instead of forcing all groups to share the
same baseline.
Model assumptions
1/ Linearity:
2/ Random effects have normal distribution: The mean opinion-change baseline for each distribution type is normally distributed around the global mean.
3/ Residuals are independent and normally distributed Within each distribution type.
4/ Homoscedasticity Equal variance across predicted values and groups.
model_ratio <- lmer(
Opinion_change_rate ~
scale(Neighborhood_size) +
scale(interaction_ratio) +
(1 | Opinion_distribution),##random intercepts
data = data
)
tab_model(
model_ratio,
title = "Mixed Effects Model for Opinion Change",
pred.labels = c("Intercept",
"Neighborhood Size (scaled)",
"Interaction Ratio (scaled)"),
dv.labels = "Opinion Change Rate"
)
| Opinion Change Rate | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| Intercept | 0.17 | 0.07 – 0.27 | 0.002 |
| Neighborhood Size (scaled) | -0.02 | -0.03 – -0.01 | <0.001 |
| Interaction Ratio (scaled) | -0.03 | -0.07 – 0.02 | 0.263 |
| Random Effects | |||
| σ2 | 0.00 | ||
| τ00 Opinion_distribution | 0.01 | ||
| ICC | 0.88 | ||
| N Opinion_distribution | 6 | ||
| Observations | 96 | ||
| Marginal R2 / Conditional R2 | 0.063 / 0.883 | ||
modelsummary(model_ratio,
output = "model_summary.docx",
estimate = "{estimate} ({std.error})",
statistic = NULL,
stars = TRUE)
fixed_effects <- as.data.frame(summary(model_ratio)$coefficients)
names(fixed_effects) <- c("Estimate", "Std.Error", "df", "t.value", "p.value")
fixed_effects$Term <- rownames(fixed_effects)
fixed_effects <- fixed_effects[, c("Term", "Estimate", "Std.Error", "t.value", "p.value")]
str(fixed_effects)
## 'data.frame': 3 obs. of 5 variables:
## $ Term : chr "(Intercept)" "scale(Neighborhood_size)" "scale(interaction_ratio)"
## $ Estimate : num 0.1688 -0.0245 -0.0253
## $ Std.Error: num 0.05183 0.00505 0.02245
## $ t.value : num 3.26 -4.85 -1.13
## $ p.value : num 4.63e-02 5.31e-06 2.65e-01
rand_effects <- as.data.frame(VarCorr(model_ratio))
write.csv(fixed_effects, "fixed_effects.csv", row.names = FALSE)
write.csv(rand_effects, "random_effects.csv", row.names = FALSE)
print(kable(fixed_effects, digits = 4))
##
##
## | |Term | Estimate| Std.Error| t.value| p.value|
## |:------------------------|:------------------------|--------:|---------:|-------:|-------:|
## |(Intercept) |(Intercept) | 0.1688| 0.0518| 3.2563| 0.0463|
## |scale(Neighborhood_size) |scale(Neighborhood_size) | -0.0245| 0.0051| -4.8475| 0.0000|
## |scale(interaction_ratio) |scale(interaction_ratio) | -0.0253| 0.0225| -1.1263| 0.2653|
print(kable(rand_effects, digits = 4))
##
##
## |grp |var1 |var2 | vcov| sdcor|
## |:--------------------|:-----------|:----|------:|------:|
## |Opinion_distribution |(Intercept) |NA | 0.0150| 0.1223|
## |Residual |NA |NA | 0.0021| 0.0462|
Residuals vs Fitted to test linearity and homoskedasticity
p1 <- ggplot(
data.frame(fitted = fitted(model_ratio),
resid = resid(model_ratio)),
aes(x = fitted, y = resid)
) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_smooth(method = "loess", se = TRUE) +
theme_minimal() +
labs(title = "Residuals vs Fitted",
x = "Fitted values", y = "Residuals")
Normal Q-Q Plot (residuals)
p2 <- ggplot(
data.frame(resid = resid(model_ratio)),
aes(sample = resid)
) +
stat_qq() +
stat_qq_line() +
theme_minimal() +
labs(title = "Normal Q-Q Plot")
Scale–Location Plot
p3 <- ggplot(
data.frame(fitted = fitted(model_ratio),
resid = sqrt(abs(scale(resid(model_ratio))))),
aes(x = fitted, y = resid)
) +
geom_point() +
geom_smooth(method = "loess") +
theme_minimal() +
labs(title = "Scale-Location Plot")
Random Effects QQ to test whether random effect levels are normally distributed
re_group <- ranef(model_ratio)$Opinion_distribution
ranef_df <- data.frame(
Group = rownames(re_group),
Effect = re_group[,1]
)
p4 <- ggplot(ranef_df, aes(sample = Effect)) +
stat_qq() +
stat_qq_line() +
theme_minimal() +
labs(title = "Random Effects Q-Q Plot")
Influence diagnostics to identify influential levels of the random effect
infl <- influence(model_ratio, group = "Opinion_distribution")
cooks <- cooks.distance(infl)
p5 <- ggplot(
data.frame(index = 1:length(cooks), cooks = cooks),
aes(x = index, y = cooks)
) +
geom_point() +
theme_minimal() +
labs(title = "Cook's Distance by Opinion Distribution")
Residuals by Group
checks whether each distribution type has similar variance. make sure no group has huge variance
p6 <- ggplot(
data.frame(Group = data$Opinion_distribution,
resid = resid(model_ratio)),
aes(x = Group, y = resid)
) +
geom_boxplot() +
theme_minimal() +
labs(title = "Residuals by Opinion Distribution") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
check linearity btw DV and interaction ratio
p7 <- ggplot(
data.frame(N = data$Neighborhood_size, resid = resid(model_ratio)),
aes(x = N, y = resid)) +
geom_point() +
geom_smooth(method = "loess") +
theme_minimal() +
labs(title = "Residuals vs Neighborhood Size")
p8 <- ggplot(
data.frame(R = data$interaction_ratio, resid = resid(model_ratio)),
aes(x = R, y = resid)) +
geom_point() +
geom_smooth(method = "loess") +
theme_minimal() +
labs(title = "Residuals vs Interaction Ratio")
grid.arrange(p1,p2,p3,p4, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
grid.arrange(p5,p6,p7,p8, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 3.97
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 4.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.2452e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 16.241
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 3.97
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 4.03
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.2452e-16
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 16.241
## `geom_smooth()` using formula = 'y ~ x'
check for multicollinearity
vif_model <- lm(Opinion_change_rate ~
scale(Neighborhood_size) +
scale(interaction_ratio),
data = data)
print(vif(vif_model))
## scale(Neighborhood_size) scale(interaction_ratio)
## 1.004773 1.004773
# Model performance summary
print(performance::model_performance(model_ratio))
## # Indices of model performance
##
## AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma
## --------------------------------------------------------------------------
## -270.0 | -269.3 | -257.1 | 0.883 | 0.063 | 0.875 | 0.044 | 0.046
# Autocorrelation of residuals
print(acf(resid(model_ratio), plot = FALSE))
##
## Autocorrelations of series 'resid(model_ratio)', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 0.304 -0.226 0.102 0.519 0.207 -0.007 0.052 0.276 0.154 0.032
## 11 12 13 14 15 16 17 18 19
## 0.028 0.282 0.129 -0.102 0.107 0.611 0.233 -0.169 -0.049
intraclass correlation: How much of the variation in opinion change comes from which initial distribution was used
icc <- performance::icc(model_ratio)
icc
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.875
## Unadjusted ICC: 0.820
ICC > 0.3 suggests a strong grouping effect
# Extract random intercepts for Opinion_distribution with conditional variances
re_list <- ranef(model_ratio, condVar = TRUE)
re_dist <- re_list$Opinion_distribution
# Posterior variances of random effects
postvar <- attr(re_dist, "postVar")
# Convert to data frame: effect + SE + CIs
effect <- re_dist[, 1]
se <- sqrt(postvar[1, 1, ]) # SE for each group's random intercept
re_df <- data.frame(
Opinion_distribution = rownames(re_dist),
Effect = effect,
SE = se
)
re_df$CI_lower <- re_df$Effect - 1.96 * re_df$SE
re_df$CI_upper <- re_df$Effect + 1.96 * re_df$SE
# Plot: how each distribution type shifts the baseline, with 95% CIs
ggplot(re_df, aes(x = reorder(Opinion_distribution, Effect), y = Effect)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
coord_flip() +
theme_minimal() +
labs(
title = "Baseline Opinion Change by Initial Opinion Distribution",
x = "Initial Opinion Distribution",
y = "Random Intercept (Deviation from Overall Baseline)"
)