This document replicates all tables, maps, and figures from Race and social inequality in France (Kirkegaard 2026). It uses publicly available INSEE / DEPP / SSMSI data — no restricted-access files.
Data inputs:
See data/SOURCES.md for download URLs.
shares <- fread(file.path(PROJ, "data/shares_dpt_year.csv"),
colClasses = list(character = "dpt"))
shares[, share_strict := share_maghrebi_arab + share_sub_saharan + share_turkish]
pop <- fread(file.path(PROJ, "data/pop_age_dpt_year.csv"),
colClasses = list(character = "dpt"))
dens <- fread(file.path(PROJ, "data/pop_density_dpt_year.csv"),
colClasses = list(character = "dpt"))
nm_dt <- fread(file.path(PROJ, "data/dept_area.csv"),
colClasses = list(character = "dpt"))[, .(dpt, name)]
cities <- data.table(
dpt = c("75","93","13","69","31","59","06","67","33","44"),
city = c("Paris (75)","Seine-Saint-Denis (93)","Marseille (13)","Lyon (69)",
"Toulouse (31)","Lille / Nord (59)","Nice (06)","Strasbourg (67)",
"Bordeaux (33)","Nantes (44)")
)
plotd <- merge(shares[dpt %in% cities$dpt, .(dpt, year, share_strict)],
cities, by = "dpt")
plotd[, city := factor(city, levels = cities$city)]
p <- ggplot(plotd, aes(year, share_strict, color = city)) +
geom_line(linewidth = 0.6, alpha = 0.85) +
scale_x_continuous(breaks = seq(1900, 2020, 20)) +
scale_y_continuous(breaks = seq(0, 0.5, 0.1), limits = c(0, 0.5)) +
scale_color_brewer(palette = "Paired") +
labs(title = "Strict Muslim/African newborn names share, 1900-2024",
subtitle = "Major city départements (INSEE prénoms file)",
x = NULL,
y = "Muslim/African names share (0-1 fraction)",
color = NULL) +
theme_minimal(base_size = 12) +
theme(plot.background = element_rect(fill = "white", color = NA),
legend.position = "right")
ggsave(file.path(PROJ, "figs/timeseries_names_cities.png"), p,
w = 12, h = 6.5, dpi = 180, bg = "white")
p
build_dept_panel <- function(y_df, y_var, log_y = FALSE) {
y_df <- copy(y_df)
setnames(y_df, y_var, "y")
if (log_y) y_df[, y := log(y)]
base <- y_df[!dpt %in% DROM]
base <- merge(base, shares[, .(dpt, year, share_strict)], by = c("dpt","year"))
base <- merge(base, dens[, .(dpt, year, log_density, popw = pop)],
by = c("dpt","year"))
base <- merge(base, pop[, .(dpt, year, share_60plus, share_under20)],
by = c("dpt","year"), all.x = TRUE)
base[!is.na(share_60plus) & !is.na(share_under20)]
}
stars_note <- "Stars: * p<0.05, ** p<0.01, *** p<0.001."
inc15 <- fread(file.path(PROJ, "data/income_dept_panel_2006_2020.csv"),
colClasses = list(character = "dpt"))
setnames(inc15, "regime", "source")
panel_inc <- merge(merge(merge(
inc15[!dpt %in% DROM, .(dpt, year, source, log_inc = log(median_inc))],
shares[, .(dpt, year, share_strict)], by = c("dpt","year")),
dens[, .(dpt, year, log_density, popw = pop)], by = c("dpt","year")),
pop[, .(dpt, year, share_60plus, share_under20)],
by = c("dpt","year"), all.x = TRUE)
panel_inc <- panel_inc[!is.na(share_60plus) & !is.na(log_density)]
xs_inc <- panel_inc[year == max(year)]
models_inc <- list(
"(1) XS, MA share only" = feols(log_inc ~ share_strict, data = xs_inc, weights = ~popw),
"(2) XS + covariates" = feols(log_inc ~ share_strict + log_density + share_60plus + share_under20,
data = xs_inc, weights = ~popw),
"(3) Panel FEs only" = feols(log_inc ~ share_strict | dpt + year + dpt^source,
data = panel_inc, weights = ~popw, cluster = ~dpt),
"(4) Panel + covariates"= feols(log_inc ~ share_strict + log_density + share_60plus + share_under20 |
dpt + year + dpt^source,
data = panel_inc, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_inc,
stars = c("*"=0.05, "**"=0.01, "***"=0.001),
coef_omit = "Intercept",
coef_rename = c(share_strict = "Muslim/African names share",
log_density = "log(pop density)",
share_60plus = "Share aged 60+",
share_under20 = "Share aged <20"),
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
title = "Median income (log €) — département cross-section + 2006-2020 panel",
notes = list(stars_note,
"Income panel stitches RFL (2006-2011) + FiLoSoFi (2012-2020); Dept × source FE absorbs ~4% per-dept break in 2012."))
| (1) XS, MA share only | (2) XS + covariates | (3) Panel FEs only | (4) Panel + covariates | |
|---|---|---|---|---|
| * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||
| Stars: * p<0.05, ** p<0.01, *** p<0.001. | ||||
| Income panel stitches RFL (2006-2011) + FiLoSoFi (2012-2020); Dept × source FE absorbs ~4% per-dept break in 2012. | ||||
| Muslim/African names share | 0.128 | -0.345* | -0.211*** | -0.187*** |
| (0.108) | (0.139) | (0.060) | (0.051) | |
| log(pop density) | 0.010 | 0.037 | ||
| (0.010) | (0.048) | |||
| Share aged 60+ | -2.028*** | 0.172 | ||
| (0.398) | (0.177) | |||
| Share aged <20 | -2.716*** | -0.791*** | ||
| (0.640) | (0.224) | |||
| Num.Obs. | 94 | 94 | 1410 | 1410 |
| R2 | 0.015 | 0.508 | 0.998 | 0.998 |
| R2 Within | 0.100 | 0.155 | ||
| Std.Errors | IID | IID | by: dpt | by: dpt |
| FE: dpt^source | X | X | ||
| FE: dpt | X | X | ||
| FE: year | X | X | ||
filo <- fread(file.path(PROJ, "data/filosofi_panel_2013_2020.csv"),
colClasses = list(character = "dpt"))
panel_pov <- build_dept_panel(filo[, .(dpt, year, poverty_frac = poverty_rate/100)],
"poverty_frac")
xs_pov <- panel_pov[year == max(year)]
models_pov <- list(
"(1) XS raw" = feols(y ~ share_strict, data = xs_pov, weights = ~popw),
"(2) XS + covariates" = feols(y ~ share_strict + log_density + share_60plus + share_under20,
data = xs_pov, weights = ~popw),
"(3) Panel FEs" = feols(y ~ share_strict | dpt + year, data = panel_pov,
weights = ~popw, cluster = ~dpt),
"(4) Panel + covariates"= feols(y ~ share_strict + log_density + share_60plus + share_under20 |
dpt + year, data = panel_pov, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_pov,
stars = c("*"=0.05, "**"=0.01, "***"=0.001),
coef_omit = "Intercept",
coef_rename = c(share_strict = "Muslim/African names share",
log_density = "log(pop density)",
share_60plus = "Share aged 60+",
share_under20 = "Share aged <20"),
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
title = "Poverty rate (0-1 fraction) — département 2013-2020 panel",
notes = list(stars_note))
| (1) XS raw | (2) XS + covariates | (3) Panel FEs | (4) Panel + covariates | |
|---|---|---|---|---|
| * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||
| Stars: * p<0.05, ** p<0.01, *** p<0.001. | ||||
| Muslim/African names share | 0.204*** | 0.315*** | 0.057** | 0.046** |
| (0.035) | (0.057) | (0.019) | (0.016) | |
| log(pop density) | 0.006 | 0.051 | ||
| (0.004) | (0.028) | |||
| Share aged 60+ | 0.536** | 0.042 | ||
| (0.164) | (0.142) | |||
| Share aged <20 | 0.179 | 0.475* | ||
| (0.263) | (0.199) | |||
| Num.Obs. | 94 | 94 | 752 | 752 |
| R2 | 0.270 | 0.415 | 0.994 | 0.995 |
| R2 Within | 0.033 | 0.137 | ||
| Std.Errors | IID | IID | by: dpt | by: dpt |
| FE: dpt | X | X | ||
| FE: year | X | X | ||
unemp <- fread(file.path(PROJ, "data/unemployment_dpt_year.csv"),
colClasses = list(character = "dpt"))
panel_une <- build_dept_panel(unemp[, .(dpt, year, unemp_frac = unemp/100)],
"unemp_frac")
xs_une <- panel_une[year == max(year)]
models_une <- list(
"(1) XS raw" = feols(y ~ share_strict, data = xs_une, weights = ~popw),
"(2) XS + covariates" = feols(y ~ share_strict + log_density + share_60plus + share_under20,
data = xs_une, weights = ~popw),
"(3) Panel FEs" = feols(y ~ share_strict | dpt + year, data = panel_une,
weights = ~popw, cluster = ~dpt),
"(4) Panel + covariates"= feols(y ~ share_strict + log_density + share_60plus + share_under20 |
dpt + year, data = panel_une, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_une,
stars = c("*"=0.05, "**"=0.01, "***"=0.001),
coef_omit = "Intercept",
coef_rename = c(share_strict = "Muslim/African names share",
log_density = "log(pop density)",
share_60plus = "Share aged 60+",
share_under20 = "Share aged <20"),
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
title = "Unemployment rate (0-1 fraction) — département 1982-2025 panel",
notes = list(stars_note))
| (1) XS raw | (2) XS + covariates | (3) Panel FEs | (4) Panel + covariates | |
|---|---|---|---|---|
| * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||
| Stars: * p<0.05, ** p<0.01, *** p<0.001. | ||||
| Muslim/African names share | 0.054*** | 0.097*** | 0.091*** | 0.089*** |
| (0.015) | (0.027) | (0.011) | (0.014) | |
| log(pop density) | 0.000 | -0.010 | ||
| (0.002) | (0.011) | |||
| Share aged 60+ | 0.214** | 0.166* | ||
| (0.076) | (0.066) | |||
| Share aged <20 | 0.233 | 0.207* | ||
| (0.132) | (0.083) | |||
| Num.Obs. | 94 | 94 | 4042 | 4042 |
| R2 | 0.126 | 0.274 | 0.884 | 0.888 |
| R2 Within | 0.110 | 0.139 | ||
| Std.Errors | IID | IID | by: dpt | by: dpt |
| FE: dpt | X | X | ||
| FE: year | X | X | ||
imm <- fread(file.path(PROJ, "data/commune/immigre_commune_year_full.csv"),
colClasses = list(character = c("codgeo","dpt")))
imm[, dpt := as.character(dpt)]
filo_com <- fread(file.path(PROJ, "data/commune/filosofi_commune_panel.csv"),
colClasses = list(character = "codgeo"))
educ_com <- fread(file.path(PROJ, "data/commune/education_com_year.csv"),
colClasses = list(character = c("codgeo","dpt")))
unemp_com <- fread(file.path(PROJ, "data/commune/unemp_commune_panel.csv"),
colClasses = list(character = "codgeo"))
age_xs <- fread(file.path(PROJ, "data/commune/age_struct_commune.csv"),
colClasses = list(character = "codgeo"))
age_p <- fread(file.path(PROJ, "data/commune/age_struct_commune_panel.csv"),
colClasses = list(character = "codgeo"))
area <- fread(file.path(PROJ, "data/commune/commune_area.csv"),
colClasses = list(character = "codgeo"))
imm <- merge(imm, area, by = "codgeo")
imm[, log_density := log(pop / area_km2)]
snap_age <- function(yr) c(2010, 2015, 2021)[which.min(abs(c(2010, 2015, 2021) - yr))]
build_com <- function(y_df, y_var, cross_year) {
y_df <- copy(y_df); setnames(y_df, y_var, "y")
m <- merge(y_df[, .(codgeo, year, y)],
imm[, .(codgeo, dpt, year, share_imm, pop, log_density)],
by = c("codgeo","year"))
cs <- merge(m[year == cross_year], age_xs, by = "codgeo", all.x = TRUE)
cs <- cs[!is.na(y) & !is.na(share_imm) & !is.na(log_density) &
!is.na(share_under15) & !is.na(share_60plus) & pop > 0 & !dpt %in% DROM]
m[, age_yr := sapply(year, snap_age)]
pm <- merge(m, age_p, by.x = c("codgeo","age_yr"), by.y = c("codgeo","year"))
pm <- pm[!is.na(y) & !is.na(share_imm) & !is.na(log_density) &
!is.na(share_under15) & !is.na(share_60plus) & pop > 0 & !dpt %in% DROM]
list(panel = pm, cross = cs)
}
inc_b <- build_com(filo_com[, .(codgeo, year, y = log(median_inc))], "y", 2020)
pov_b <- build_com(filo_com[, .(codgeo, year, y = poverty_rate/100)], "y", 2020)
une_b <- build_com(unemp_com[, .(codgeo, year, y = unemp_frac)], "y", 2021)
edu_b <- build_com(educ_com[year >= 2010, .(codgeo, year, y = share_higher_ed)], "y", 2022)
fit6 <- function(b) list(
"(1) Raw cross-section" = feols(y ~ share_imm, data = b$cross, weights = ~pop),
"(2) + Dept FE" = feols(y ~ share_imm | dpt, data = b$cross,
weights = ~pop, cluster = ~dpt),
"(3) + Dept FE + log(density)" = feols(y ~ share_imm + log_density | dpt, data = b$cross,
weights = ~pop, cluster = ~dpt),
"(4) + Dept FE + log(d) + age" = feols(y ~ share_imm + log_density + share_60plus + share_under15 | dpt,
data = b$cross, weights = ~pop, cluster = ~dpt),
"(5) Commune FE + Year FE" = feols(y ~ share_imm | codgeo + year, data = b$panel,
weights = ~pop, cluster = ~dpt),
"(6) + log(density) + age" = feols(y ~ share_imm + log_density + share_60plus + share_under15 |
codgeo + year, data = b$panel,
weights = ~pop, cluster = ~dpt)
)
m_inc <- fit6(inc_b); m_pov <- fit6(pov_b); m_une <- fit6(une_b); m_edu <- fit6(edu_b)
fmt_cell <- function(b, s, p) {
star <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
paste0(sprintf("%.3f%s", b, star),
"<br><span style='color:#666;font-size:90%;'>(",
sprintf("%.3f", s), ")</span>")
}
get_cells <- function(ml) sapply(ml, function(fit) {
b <- coef(fit)["share_imm"]; s <- se(fit)["share_imm"]
p <- 2 * (1 - pnorm(abs(b/s)))
fmt_cell(b, s, p)
})
models_lab <- c("(1) Raw cross-section","(2) + Dept FE","(3) + Dept FE + log(density)",
"(4) + Dept FE + log(d) + age","(5) Commune FE + Year FE",
"(6) + log(density) + age")
tab <- data.table(
Specification = models_lab,
`log(income)` = as.character(get_cells(m_inc)),
`Poverty rate` = as.character(get_cells(m_pov)),
`Unemployment rate` = as.character(get_cells(m_une)),
`Higher-ed share` = as.character(get_cells(m_edu))
)
gt(tab) |>
fmt_markdown(columns = 2:5) |>
cols_align(align = "center", columns = 2:5) |>
tab_header(title = "Commune-level effect of foreign-born share on SES outcomes",
subtitle = "Coefficient on share_imm only. Pop-weighted; SEs dept-clustered.") |>
tab_source_note("Each cell: β (SE). Stars: * p<0.05, ** p<0.01, *** p<0.001.")
| Commune-level effect of foreign-born share on SES outcomes | ||||
| Coefficient on share_imm only. Pop-weighted; SEs dept-clustered. | ||||
| Specification | log(income) | Poverty rate | Unemployment rate | Higher-ed share |
|---|---|---|---|---|
| (1) Raw cross-section | -0.343*** (0.011) |
0.526*** (0.011) |
0.243*** (0.003) |
0.525*** (0.008) |
| (2) + Dept FE | -1.440*** (0.147) |
1.001*** (0.053) |
0.484*** (0.025) |
0.020 (0.111) |
| (3) + Dept FE + log(density) | -1.640*** (0.209) |
0.953*** (0.061) |
0.376*** (0.026) |
-0.536*** (0.115) |
| (4) + Dept FE + log(d) + age | -1.613*** (0.195) |
0.954*** (0.058) |
0.406*** (0.026) |
-0.577*** (0.095) |
| (5) Commune FE + Year FE | -0.458*** (0.064) |
0.395*** (0.045) |
0.095** (0.035) |
0.192* (0.076) |
| (6) + log(density) + age | -0.442*** (0.064) |
0.374*** (0.047) |
0.129*** (0.031) |
0.034 (0.081) |
| Each cell: β (SE). Stars: * p<0.05, ** p<0.01, *** p<0.001. | ||||
e6 <- fread(file.path(PROJ, "data/depp/eval_6eme_dept.csv"), sep = ";")
setnames(e6, "code_departement", "dpt")
e6[, dpt := as.character(dpt)]
e6e <- e6[caracteristique == "Ensemble" & dpt != ""]
e6w <- dcast(e6e[, .(dpt, year = annee, discipline, score_moyen)],
dpt + year ~ discipline, value.var = "score_moyen")
setnames(e6w, c("Mathématiques","Français"), c("math_score","french_score"))
e6w[, score_avg := (math_score + french_score) / 2]
e6w[, iq := 100 + (score_avg - 258) * (15/50)]
# Pooled cross-section (10y MA × 8y mean IQ)
ma_pool <- shares[year %in% 2015:2024,
.(share_strict = sum(maghrebi_arab + sub_saharan + turkish) / sum(total)), by = dpt]
iq_pool <- e6w[year %in% 2017:2024, .(iq = mean(iq, na.rm = TRUE)), by = dpt]
d20 <- dens[year == 2020, .(dpt, log_density, popw = pop)]
a20 <- pop[year == 2021, .(dpt, share_60plus)]
cs6 <- merge(merge(merge(merge(ma_pool, iq_pool, by = "dpt"), d20, by = "dpt"),
a20, by = "dpt"), nm_dt, by = "dpt", all.x = TRUE)
cs6 <- cs6[!dpt %in% DROM]
panel6 <- merge(merge(merge(e6w[, .(dpt, year, iq)],
shares[, .(dpt, year, share_strict)], by = c("dpt","year")),
dens[, .(dpt, year, log_density, popw = pop)], by = c("dpt","year")),
pop[, .(dpt, year, share_60plus)], by = c("dpt","year"))
panel6 <- panel6[!dpt %in% DROM & !is.na(iq) & !is.na(log_density) & !is.na(share_60plus)]
models_iq <- list(
"(1) Cross-sec, raw" = feols(iq ~ share_strict, data = cs6, weights = ~popw),
"(2) Cross-sec + log_density" = feols(iq ~ share_strict + log_density, data = cs6, weights = ~popw),
"(3) Cross-sec + log_dens + age" = feols(iq ~ share_strict + log_density + share_60plus, data = cs6, weights = ~popw),
"(4) Dept FE + Year FE, raw" = feols(iq ~ share_strict | dpt + year, data = panel6, weights = ~popw, cluster = ~dpt),
"(5) + log_density" = feols(iq ~ share_strict + log_density | dpt + year, data = panel6, weights = ~popw, cluster = ~dpt),
"(6) + log_dens + age" = feols(iq ~ share_strict + log_density + share_60plus | dpt + year, data = panel6, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_iq,
stars = c("*"=0.05, "**"=0.01, "***"=0.001),
coef_omit = "Intercept",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
title = "6e cognitive score (IQ-like, mean=100, SD=15) — département analysis",
notes = list(stars_note,
"Cross-section uses 10y pooled MA share + 8y mean IQ (2017-2024).",
"Score scale: native French ≈ 100, SD ≈ 15. Rescaled from raw DEPP score (mean ~258, SD ~50)."))
| (1) Cross-sec, raw | (2) Cross-sec + log_density | (3) Cross-sec + log_dens + age | (4) Dept FE + Year FE, raw | (5) + log_density | (6) + log_dens + age | |
|---|---|---|---|---|---|---|
| * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||
| Stars: * p<0.05, ** p<0.01, *** p<0.001. | ||||||
| Cross-section uses 10y pooled MA share + 8y mean IQ (2017-2024). | ||||||
| Score scale: native French ≈ 100, SD ≈ 15. Rescaled from raw DEPP score (mean ~258, SD ~50). | ||||||
| share_strict | -3.728 | -19.197*** | -20.938*** | -5.383** | -5.340** | -6.797*** |
| (2.611) | (2.951) | (3.266) | (1.852) | (1.730) | (1.721) | |
| log_density | 1.247*** | 1.143*** | 5.659 | 2.530 | ||
| (0.169) | (0.189) | (6.661) | (6.568) | |||
| share_60plus | -7.713 | -54.142* | ||||
| (6.278) | (21.605) | |||||
| Num.Obs. | 94 | 94 | 94 | 752 | 752 | 752 |
| R2 | 0.022 | 0.388 | 0.398 | 0.972 | 0.973 | 0.974 |
| R2 Within | 0.015 | 0.037 | 0.093 | |||
| Std.Errors | IID | IID | IID | by: dpt | by: dpt | by: dpt |
| FE: dpt | X | X | X | |||
| FE: year | X | X | X | |||
fit <- lm(iq ~ share_strict + log_density, data = cs6, weights = popw)
beta_x <- coef(fit)["share_strict"]
mean_logd <- weighted.mean(cs6$log_density, cs6$popw)
cs6[, partial_iq := residuals(fit) + beta_x * share_strict +
coef(fit)["(Intercept)"] + coef(fit)["log_density"] * mean_logd]
cs6[, lab := ifelse(frank(-share_strict) <= 8 | frank(share_strict) <= 4 |
frank(-popw) <= 6 |
frank(-abs(partial_iq - mean(partial_iq))) <= 4,
name, NA_character_)]
ggplot(cs6, aes(share_strict, partial_iq)) +
geom_smooth(method = "lm", aes(weight = popw), formula = y ~ x, se = TRUE,
color = "steelblue", alpha = 0.15) +
geom_point(aes(size = popw), alpha = 0.8, color = "darkred") +
geom_text_repel(aes(label = lab), size = 3.0, max.overlaps = Inf, seed = 1,
min.segment.length = 0, segment.alpha = 0.4,
box.padding = 0.45, point.padding = 0.25, force = 2.5) +
scale_size_continuous(range = c(1, 8), guide = "none") +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "6e cognitive score vs Muslim/African names share — partial residuals",
subtitle = sprintf("Mainland départements, n = %d | β = %+.1f IQ pts (controlling log density)",
nrow(cs6), beta_x),
x = "Muslim/African names share (10-year pool, 2015-2024)",
y = "6e cognitive score (rescaled, French native ≈ 100, SD ≈ 15)\nadjusted for population density",
caption = "Component-plus-residual plot. DEPP 6e (2017-2024 mean), INSEE prénoms strict. Pop-weighted.") +
theme_minimal(base_size = 12) +
theme(plot.background = element_rect(fill = "white", color = NA),
plot.title = element_text(face = "bold", size = 12))
jdc <- fread(file.path(PROJ, "data/depp/jdc_dept_panel.csv"),
colClasses = list(character = "dpt"))
jdc[, p := diff_ens / 100]
jdc[, latent := -qnorm(p)]
jdc_pool <- jdc[, .(latent_avg = mean(latent, na.rm = TRUE)), by = dpt]
mainland_mean_2024 <- jdc[year == 2024 & !dpt %in% DROM, mean(latent, na.rm = TRUE)]
jdc_pool[, iq := 100 + (latent_avg - mainland_mean_2024) * 15]
m_jdc <- merge(merge(merge(merge(ma_pool, jdc_pool, by = "dpt"),
d20, by = "dpt"), a20, by = "dpt"), nm_dt, by = "dpt", all.x = TRUE)
m_jdc <- m_jdc[!dpt %in% DROM]
mods_jdc <- list(
"raw" = feols(iq ~ share_strict, data = m_jdc, weights = ~popw),
"+ log_density" = feols(iq ~ share_strict + log_density, data = m_jdc, weights = ~popw),
"+ log_dens + age" = feols(iq ~ share_strict + log_density + share_60plus, data = m_jdc, weights = ~popw)
)
modelsummary(mods_jdc,
stars = c("*"=0.05, "**"=0.01, "***"=0.001),
coef_omit = "Intercept",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
title = "JDC literacy (latent IQ score, 7-year mean 2014-2024) — département cross-section",
notes = list(stars_note,
"JDC % in difficulty inverse-normal-transformed → latent score on IQ scale (mean=100, SD=15)."))
| raw | + log_density | + log_dens + age | |
|---|---|---|---|
| * p < 0.05, ** p < 0.01, *** p < 0.001 | |||
| Stars: * p<0.05, ** p<0.01, *** p<0.001. | |||
| JDC % in difficulty inverse-normal-transformed → latent score on IQ scale (mean=100, SD=15). | |||
| share_strict | 2.328 | -10.994*** | -12.498*** |
| (2.228) | (2.505) | (2.772) | |
| log_density | 1.074*** | 0.984*** | |
| (0.144) | (0.160) | ||
| share_60plus | -6.665 | ||
| (5.328) | |||
| Num.Obs. | 94 | 94 | 94 |
| R2 | 0.012 | 0.388 | 0.399 |
| Std.Errors | IID | IID | IID |
cr <- fread(file.path(PROJ, "data/crime/crime_dept.csv"), sep = ";", dec = ",",
colClasses = list(character = c("Code_departement","Code_region","annee")))
cr[, year := as.integer(annee)]
setnames(cr, c("Code_departement","indicateur","taux_pour_mille","insee_pop"),
c("dpt","ind","rate","pop_v"))
pool <- cr[year %in% 2020:2024,
.(rate_avg = mean(rate, na.rm = TRUE), pop = mean(pop_v)), by = .(dpt, ind)]
m_crime <- merge(merge(pool, ma_pool, by = "dpt"),
dens[year == 2020, .(dpt, log_density)], by = "dpt")
m_crime <- m_crime[!dpt %in% DROM]
results <- m_crime[, {
fit_d <- lm(rate_avg ~ share_strict + log_density, data = .SD, weights = pop)
b <- coef(fit_d)["share_strict"]
s <- summary(fit_d)$coefficients["share_strict","Std. Error"]
p <- 2 * (1 - pnorm(abs(b/s)))
star <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
mean_r <- weighted.mean(rate_avg, pop)
.(`Mean rate /1000` = round(mean_r, 2),
`β per fraction` = sprintf("%+.2f%s", b, star),
`% of mean` = sprintf("%+.0f%%", b/mean_r*100))
}, by = ind][order(-as.numeric(gsub("[%+]","", `% of mean`)))]
results
chosen <- c("Homicides", "Tentatives d'homicide", "Vols avec armes")
vc_pool <- cr[year %in% 2020:2024 & ind %in% chosen,
.(rate = sum(rate)), by = .(dpt, year)
][, .(rate_avg = mean(rate, na.rm = TRUE)), by = dpt]
poppy <- cr[year == 2024 & ind == "Homicides", .(dpt, pop = pop_v)]
m_vc <- merge(merge(merge(merge(vc_pool, ma_pool, by = "dpt"),
poppy, by = "dpt"),
dens[year == 2020, .(dpt, log_density)], by = "dpt"),
nm_dt, by = "dpt", all.x = TRUE)
m_vc <- m_vc[!dpt %in% DROM]
fit_vc <- lm(rate_avg ~ share_strict + log_density, data = m_vc, weights = pop)
beta_x <- coef(fit_vc)["share_strict"]
mean_logd <- weighted.mean(m_vc$log_density, m_vc$pop)
m_vc[, partial_y := residuals(fit_vc) + beta_x * share_strict +
coef(fit_vc)["(Intercept)"] + coef(fit_vc)["log_density"] * mean_logd]
m_vc[, lab := ifelse(frank(-share_strict) <= 8 | frank(share_strict) <= 4 |
frank(-pop) <= 6 |
frank(-abs(partial_y - mean(partial_y))) <= 4,
name, NA_character_)]
ggplot(m_vc, aes(share_strict, partial_y)) +
geom_smooth(method = "lm", aes(weight = pop), formula = y ~ x, se = TRUE,
color = "steelblue", alpha = 0.15) +
geom_point(aes(size = pop), alpha = 0.8, color = "darkred") +
geom_text_repel(aes(label = lab), size = 3.0, max.overlaps = Inf, seed = 1,
min.segment.length = 0, segment.alpha = 0.4,
box.padding = 0.45, point.padding = 0.25, force = 2.5) +
scale_size_continuous(range = c(1, 8), guide = "none") +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Serious violent crime (homicides + attempted + armed robbery) vs Muslim/African names",
subtitle = sprintf("Mainland départements, n = %d | β = %+.2f per 1,000 pop / fraction (controlling log density)",
nrow(m_vc), beta_x),
x = "Muslim/African names share (10-year pool, 2015-2024)",
y = "Serious violent crime rate per 1,000 (mean 2020-2024)\nadjusted for population density",
caption = "Source: SSMSI (Ministère de l'Intérieur). Gold-standard low-reporting-bias indicators.") +
theme_minimal(base_size = 12) +
theme(plot.background = element_rect(fill = "white", color = NA),
plot.title = element_text(face = "bold", size = 12))
ref <- data.table(
Group = c("Native (no migrant ancestry)",
"All migrants (1st gen)",
" Africa-born", " Europe-born", " Asia-born",
"All 2nd-gen",
" Africa 2nd-gen", " Europe 2nd-gen", " Asia 2nd-gen"),
`Median age` = c("42", "~46", "~44", "~52", "~44", "26", "18", "50", "17"),
`Median income €` = c("22,880", "17,000", "14,850", "20,480", "16,070",
"19,970", "17,760", "22,900", "18,470"),
`Poverty %` = c("11.1", "31.5", "39.2", "19.5", "36.4",
"21.7", "26.7", "11.9", "31.3"),
`Unemployment %` = c("7", "13", "15", "~9", "11",
"12", "16", "8", "11"),
`Higher-ed %` = c("41", "32", "28", "37", "32",
"38", "37", "36", "50"),
`% no diploma` = c("16", "38", "42", "29", "43",
"18", "21", "16", "16")
)
gt(ref) |>
tab_header(title = "Individual-level SES outcomes by migrant origin and generation",
subtitle = "Source: INSEE Insee Références 2023 — Immigrés et descendants d'immigrés en France") |>
cols_align(align = "left", columns = "Group") |>
cols_align(align = "center", columns = 2:7) |>
tab_source_note("Income & poverty: 2019. Unemployment & higher-ed: 2021 (ages 30-64).") |>
tab_source_note("Severe age confound for 2nd-gen: Europe median age 50 (children of 1950s-70s labor migration) vs Africa/Asia median 17-18 (children of recent migrants).")
| Individual-level SES outcomes by migrant origin and generation | ||||||
| Source: INSEE Insee Références 2023 — Immigrés et descendants d'immigrés en France | ||||||
| Group | Median age | Median income € | Poverty % | Unemployment % | Higher-ed % | % no diploma |
|---|---|---|---|---|---|---|
| Native (no migrant ancestry) | 42 | 22,880 | 11.1 | 7 | 41 | 16 |
| All migrants (1st gen) | ~46 | 17,000 | 31.5 | 13 | 32 | 38 |
| Africa-born | ~44 | 14,850 | 39.2 | 15 | 28 | 42 |
| Europe-born | ~52 | 20,480 | 19.5 | ~9 | 37 | 29 |
| Asia-born | ~44 | 16,070 | 36.4 | 11 | 32 | 43 |
| All 2nd-gen | 26 | 19,970 | 21.7 | 12 | 38 | 18 |
| Africa 2nd-gen | 18 | 17,760 | 26.7 | 16 | 37 | 21 |
| Europe 2nd-gen | 50 | 22,900 | 11.9 | 8 | 36 | 16 |
| Asia 2nd-gen | 17 | 18,470 | 31.3 | 11 | 50 | 16 |
| Income & poverty: 2019. Unemployment & higher-ed: 2021 (ages 30-64). | ||||||
| Severe age confound for 2nd-gen: Europe median age 50 (children of 1950s-70s labor migration) vs Africa/Asia median 17-18 (children of recent migrants). | ||||||
sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.3.2.9000 scales_1.4.0 readxl_1.4.5
## [4] sf_1.0-21 ggrepel_0.9.6 ggplot2_4.0.1.9000
## [7] gt_1.3.0 modelsummary_2.5.0 fixest_0.13.2
## [10] data.table_1.17.8
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 dplyr_1.1.4 farver_2.1.2
## [4] S7_0.2.1 fastmap_1.2.0 TH.data_1.1-4
## [7] bayestestR_0.17.0 digest_0.6.39 estimability_1.5.1
## [10] lifecycle_1.0.4 dreamerr_1.5.0 survival_3.8-6
## [13] magrittr_2.0.4 compiler_4.5.3 rlang_1.1.6.9000
## [16] sass_0.4.10 tools_4.5.3 yaml_2.3.10
## [19] knitr_1.50 labeling_0.4.3 classInt_0.4-11
## [22] xml2_1.4.0 RColorBrewer_1.1-3 multcomp_1.4-28
## [25] KernSmooth_2.23-26 tinytable_0.14.0 withr_3.0.2
## [28] numDeriv_2016.8-1.1 datawizard_1.3.0 grid_4.5.3
## [31] xtable_1.8-4 e1071_1.7-16 future_1.67.0
## [34] globals_0.18.0 emmeans_1.11.2-8 MASS_7.3-65
## [37] insight_1.4.2 cli_3.6.5 mvtnorm_1.3-3
## [40] rmarkdown_2.30 ragg_1.5.0 generics_0.1.4
## [43] performance_0.15.2 future.apply_1.20.0 commonmark_2.0.0
## [46] parameters_0.28.2 DBI_1.2.3 cachem_1.1.0
## [49] proxy_0.4-27 splines_4.5.3 parallel_4.5.3
## [52] effectsize_1.0.1 cellranger_1.1.0 stringmagic_1.2.0
## [55] vctrs_0.6.5 Matrix_1.7-4 sandwich_3.1-1
## [58] jsonlite_2.0.0 litedown_0.9 Formula_1.2-5
## [61] listenv_0.9.1 systemfonts_1.3.1 jquerylib_0.1.4
## [64] units_1.0-0 parallelly_1.45.1 glue_1.8.0
## [67] codetools_0.2-20 gtable_0.3.6 tables_0.9.31
## [70] tibble_3.3.0 pillar_1.11.1 htmltools_0.5.8.1
## [73] R6_2.6.1 textshaping_1.0.4 evaluate_1.0.5
## [76] lattice_0.22-7 markdown_2.0 backports_1.5.0
## [79] bslib_0.9.0 class_7.3-23 Rcpp_1.1.0
## [82] coda_0.19-4.1 nlme_3.1-168 checkmate_2.3.3
## [85] mgcv_1.9-3 xfun_0.57 fs_1.6.6
## [88] zoo_1.8-14 pkgconfig_2.0.3