Project description

This document replicates all tables, maps, and figures from Race and social inequality in France (Kirkegaard 2026). It uses publicly available INSEE / DEPP / SSMSI data — no restricted-access files.

Data inputs:

  • INSEE prénoms file (1900-2024) — names by département × year
  • INSEE base-cc / dossier complet — commune-level immigré share
  • INSEE FiLoSoFi (2013-2020) + RFL (2006-2011) — income/poverty
  • INSEE Saphir RP — unemployment, education
  • DEPP évaluations 6e (school + département) — age-11 cognitive
  • DEPP JDC (Journée Défense Citoyenneté) 2014-2024 — age-17 literacy
  • SSMSI — département-level crime rates

See data/SOURCES.md for download URLs.

Setup: load core panels

shares <- fread(file.path(PROJ, "data/shares_dpt_year.csv"),
                colClasses = list(character = "dpt"))
shares[, share_strict := share_maghrebi_arab + share_sub_saharan + share_turkish]

pop <- fread(file.path(PROJ, "data/pop_age_dpt_year.csv"),
             colClasses = list(character = "dpt"))
dens <- fread(file.path(PROJ, "data/pop_density_dpt_year.csv"),
              colClasses = list(character = "dpt"))
nm_dt <- fread(file.path(PROJ, "data/dept_area.csv"),
               colClasses = list(character = "dpt"))[, .(dpt, name)]

1. Muslim/African names share by département (2012-2021 pool)

gdf <- st_read(file.path(PROJ, "data/depts.geojson"), quiet = TRUE)
names(gdf)[names(gdf) == "code"] <- "dpt"

pooled <- shares[year %in% 2012:2021,
  .(share_pct = sum(maghrebi_arab + sub_saharan + turkish) / sum(total) * 100),
  by = dpt]

gdf <- merge(gdf, pooled, by = "dpt", all.x = TRUE)
gdf_metro <- gdf[!startsWith(gdf$dpt, "97"), ]

CAPITAL_REGION <- c("75","77","78","91","92","93","94","95")
PETITE_COURONNE <- c("75","92","93","94")
VMIN <- 0; VMAX <- 60

# Centroid coordinates for label placement (use representative_point for non-convex shapes)
rep_pts <- st_coordinates(st_point_on_surface(st_zm(gdf_metro)))
gdf_metro$cx <- rep_pts[, 1]
gdf_metro$cy <- rep_pts[, 2]

# Main mainland map — labels for non-petite-couronne dépts
main_labels <- gdf_metro[!gdf_metro$dpt %in% PETITE_COURONNE & !is.na(gdf_metro$share_pct), ]

p_main <- ggplot(gdf_metro) +
  geom_sf(aes(fill = share_pct), color = "grey60", linewidth = 0.3) +
  scale_fill_gradient(low = "#ffffcc", high = "#bd0026",
                      limits = c(VMIN, VMAX), na.value = "grey90",
                      name = "% Muslim/African\nnewborn names\n(strict, 2012-2021)") +
  geom_text(data = main_labels,
            aes(x = cx, y = cy, label = sprintf("%.0f%%", share_pct)),
            size = 1.9, color = "black",
            label.size = 0) +
  coord_sf(expand = FALSE) +
  labs(title = "Muslim/African newborn names share by département, pooled 2012-2021",
       subtitle = "(strict: maghrebi + sub-Saharan + Turkish; INSEE prénoms file)") +
  theme_void(base_size = 11) +
  theme(plot.title = element_text(face = "bold"),
        plot.background = element_rect(fill = "white", color = NA),
        legend.position = c(0.92, 0.55))

# Capital region inset
g_idf <- gdf_metro[gdf_metro$dpt %in% CAPITAL_REGION, ]
inset <- ggplot(g_idf) +
  geom_sf(aes(fill = share_pct), color = "black", linewidth = 0.4) +
  scale_fill_gradient(low = "#ffffcc", high = "#bd0026",
                      limits = c(VMIN, VMAX), guide = "none") +
  geom_text(aes(x = cx, y = cy, label = sprintf("%.0f%%", share_pct)),
            size = 2.6, color = "black") +
  coord_sf(expand = FALSE) +
  labs(title = "Capital region (Île-de-France)") +
  theme_void(base_size = 9) +
  theme(plot.title = element_text(size = 9, hjust = 0.5),
        plot.background = element_rect(fill = "white", color = "grey50"),
        plot.margin = margin(2, 2, 2, 2))

# Compose with cowplot::ggdraw or patchwork inset
library(patchwork)
final_map <- p_main + inset_element(inset, left = 0, bottom = 0.62, right = 0.27, top = 1.0)
ggsave(file.path(PROJ, "figs/map_dept_extra_eur_2012_2021.png"), final_map,
       w = 12, h = 10, dpi = 180, bg = "white")
final_map

2. Names time series for major city départements (1900-2024)

cities <- data.table(
  dpt  = c("75","93","13","69","31","59","06","67","33","44"),
  city = c("Paris (75)","Seine-Saint-Denis (93)","Marseille (13)","Lyon (69)",
           "Toulouse (31)","Lille / Nord (59)","Nice (06)","Strasbourg (67)",
           "Bordeaux (33)","Nantes (44)")
)
plotd <- merge(shares[dpt %in% cities$dpt, .(dpt, year, share_strict)],
               cities, by = "dpt")
plotd[, city := factor(city, levels = cities$city)]

p <- ggplot(plotd, aes(year, share_strict, color = city)) +
  geom_line(linewidth = 0.6, alpha = 0.85) +
  scale_x_continuous(breaks = seq(1900, 2020, 20)) +
  scale_y_continuous(breaks = seq(0, 0.5, 0.1), limits = c(0, 0.5)) +
  scale_color_brewer(palette = "Paired") +
  labs(title = "Strict Muslim/African newborn names share, 1900-2024",
       subtitle = "Major city départements (INSEE prénoms file)",
       x = NULL,
       y = "Muslim/African names share (0-1 fraction)",
       color = NULL) +
  theme_minimal(base_size = 12) +
  theme(plot.background = element_rect(fill = "white", color = NA),
        legend.position = "right")
ggsave(file.path(PROJ, "figs/timeseries_names_cities.png"), p,
       w = 12, h = 6.5, dpi = 180, bg = "white")
p

3. Cross-section: dept-level scatters (raw, then partial residuals)

build_dept_panel <- function(y_df, y_var, log_y = FALSE) {
  y_df <- copy(y_df)
  setnames(y_df, y_var, "y")
  if (log_y) y_df[, y := log(y)]
  base <- y_df[!dpt %in% DROM]
  base <- merge(base, shares[, .(dpt, year, share_strict)], by = c("dpt","year"))
  base <- merge(base, dens[, .(dpt, year, log_density, popw = pop)],
                by = c("dpt","year"))
  base <- merge(base, pop[, .(dpt, year, share_60plus, share_under20)],
                by = c("dpt","year"), all.x = TRUE)
  base[!is.na(share_60plus) & !is.na(share_under20)]
}

stars_note <- "Stars: * p<0.05, ** p<0.01, *** p<0.001."

3a. Income (median niveau de vie, 2006-2020 panel)

inc15 <- fread(file.path(PROJ, "data/income_dept_panel_2006_2020.csv"),
               colClasses = list(character = "dpt"))
setnames(inc15, "regime", "source")
panel_inc <- merge(merge(merge(
  inc15[!dpt %in% DROM, .(dpt, year, source, log_inc = log(median_inc))],
  shares[, .(dpt, year, share_strict)], by = c("dpt","year")),
  dens[, .(dpt, year, log_density, popw = pop)], by = c("dpt","year")),
  pop[, .(dpt, year, share_60plus, share_under20)],
  by = c("dpt","year"), all.x = TRUE)
panel_inc <- panel_inc[!is.na(share_60plus) & !is.na(log_density)]
xs_inc <- panel_inc[year == max(year)]

models_inc <- list(
  "(1) XS, MA share only" = feols(log_inc ~ share_strict, data = xs_inc, weights = ~popw),
  "(2) XS + covariates"   = feols(log_inc ~ share_strict + log_density + share_60plus + share_under20,
                                  data = xs_inc, weights = ~popw),
  "(3) Panel FEs only"    = feols(log_inc ~ share_strict | dpt + year + dpt^source,
                                  data = panel_inc, weights = ~popw, cluster = ~dpt),
  "(4) Panel + covariates"= feols(log_inc ~ share_strict + log_density + share_60plus + share_under20 |
                                  dpt + year + dpt^source,
                                  data = panel_inc, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_inc,
  stars = c("*"=0.05, "**"=0.01, "***"=0.001),
  coef_omit = "Intercept",
  coef_rename = c(share_strict = "Muslim/African names share",
                  log_density = "log(pop density)",
                  share_60plus = "Share aged 60+",
                  share_under20 = "Share aged <20"),
  gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
  title = "Median income (log €) — département cross-section + 2006-2020 panel",
  notes = list(stars_note,
    "Income panel stitches RFL (2006-2011) + FiLoSoFi (2012-2020); Dept × source FE absorbs ~4% per-dept break in 2012."))
Median income (log €) — département cross-section + 2006-2020 panel
(1) XS, MA share only (2) XS + covariates (3) Panel FEs only (4) Panel + covariates
* p < 0.05, ** p < 0.01, *** p < 0.001
Stars: * p<0.05, ** p<0.01, *** p<0.001.
Income panel stitches RFL (2006-2011) + FiLoSoFi (2012-2020); Dept × source FE absorbs ~4% per-dept break in 2012.
Muslim/African names share 0.128 -0.345* -0.211*** -0.187***
(0.108) (0.139) (0.060) (0.051)
log(pop density) 0.010 0.037
(0.010) (0.048)
Share aged 60+ -2.028*** 0.172
(0.398) (0.177)
Share aged <20 -2.716*** -0.791***
(0.640) (0.224)
Num.Obs. 94 94 1410 1410
R2 0.015 0.508 0.998 0.998
R2 Within 0.100 0.155
Std.Errors IID IID by: dpt by: dpt
FE: dpt^source X X
FE: dpt X X
FE: year X X

3b. Poverty rate (FiLoSoFi 2013-2020)

filo <- fread(file.path(PROJ, "data/filosofi_panel_2013_2020.csv"),
              colClasses = list(character = "dpt"))
panel_pov <- build_dept_panel(filo[, .(dpt, year, poverty_frac = poverty_rate/100)],
                              "poverty_frac")
xs_pov <- panel_pov[year == max(year)]

models_pov <- list(
  "(1) XS raw"            = feols(y ~ share_strict, data = xs_pov, weights = ~popw),
  "(2) XS + covariates"   = feols(y ~ share_strict + log_density + share_60plus + share_under20,
                                  data = xs_pov, weights = ~popw),
  "(3) Panel FEs"         = feols(y ~ share_strict | dpt + year, data = panel_pov,
                                  weights = ~popw, cluster = ~dpt),
  "(4) Panel + covariates"= feols(y ~ share_strict + log_density + share_60plus + share_under20 |
                                  dpt + year, data = panel_pov, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_pov,
  stars = c("*"=0.05, "**"=0.01, "***"=0.001),
  coef_omit = "Intercept",
  coef_rename = c(share_strict = "Muslim/African names share",
                  log_density = "log(pop density)",
                  share_60plus = "Share aged 60+",
                  share_under20 = "Share aged <20"),
  gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
  title = "Poverty rate (0-1 fraction) — département 2013-2020 panel",
  notes = list(stars_note))
Poverty rate (0-1 fraction) — département 2013-2020 panel
(1) XS raw (2) XS + covariates (3) Panel FEs (4) Panel + covariates
* p < 0.05, ** p < 0.01, *** p < 0.001
Stars: * p<0.05, ** p<0.01, *** p<0.001.
Muslim/African names share 0.204*** 0.315*** 0.057** 0.046**
(0.035) (0.057) (0.019) (0.016)
log(pop density) 0.006 0.051
(0.004) (0.028)
Share aged 60+ 0.536** 0.042
(0.164) (0.142)
Share aged <20 0.179 0.475*
(0.263) (0.199)
Num.Obs. 94 94 752 752
R2 0.270 0.415 0.994 0.995
R2 Within 0.033 0.137
Std.Errors IID IID by: dpt by: dpt
FE: dpt X X
FE: year X X

3c. Unemployment rate (1982-2025)

unemp <- fread(file.path(PROJ, "data/unemployment_dpt_year.csv"),
               colClasses = list(character = "dpt"))
panel_une <- build_dept_panel(unemp[, .(dpt, year, unemp_frac = unemp/100)],
                              "unemp_frac")
xs_une <- panel_une[year == max(year)]

models_une <- list(
  "(1) XS raw"            = feols(y ~ share_strict, data = xs_une, weights = ~popw),
  "(2) XS + covariates"   = feols(y ~ share_strict + log_density + share_60plus + share_under20,
                                  data = xs_une, weights = ~popw),
  "(3) Panel FEs"         = feols(y ~ share_strict | dpt + year, data = panel_une,
                                  weights = ~popw, cluster = ~dpt),
  "(4) Panel + covariates"= feols(y ~ share_strict + log_density + share_60plus + share_under20 |
                                  dpt + year, data = panel_une, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_une,
  stars = c("*"=0.05, "**"=0.01, "***"=0.001),
  coef_omit = "Intercept",
  coef_rename = c(share_strict = "Muslim/African names share",
                  log_density = "log(pop density)",
                  share_60plus = "Share aged 60+",
                  share_under20 = "Share aged <20"),
  gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
  title = "Unemployment rate (0-1 fraction) — département 1982-2025 panel",
  notes = list(stars_note))
Unemployment rate (0-1 fraction) — département 1982-2025 panel
(1) XS raw (2) XS + covariates (3) Panel FEs (4) Panel + covariates
* p < 0.05, ** p < 0.01, *** p < 0.001
Stars: * p<0.05, ** p<0.01, *** p<0.001.
Muslim/African names share 0.054*** 0.097*** 0.091*** 0.089***
(0.015) (0.027) (0.011) (0.014)
log(pop density) 0.000 -0.010
(0.002) (0.011)
Share aged 60+ 0.214** 0.166*
(0.076) (0.066)
Share aged <20 0.233 0.207*
(0.132) (0.083)
Num.Obs. 94 94 4042 4042
R2 0.126 0.274 0.884 0.888
R2 Within 0.110 0.139
Std.Errors IID IID by: dpt by: dpt
FE: dpt X X
FE: year X X

3d. Higher-education share (1968-2022)

educ <- fread(file.path(PROJ, "data/education_dpt_year.csv"),
              colClasses = list(character = "dpt"))
panel_edu <- build_dept_panel(educ[, .(dpt, year, share_higher_ed)], "share_higher_ed")
xs_edu <- panel_edu[year == max(year)]

models_edu <- list(
  "(1) XS raw"            = feols(y ~ share_strict, data = xs_edu, weights = ~popw),
  "(2) XS + covariates"   = feols(y ~ share_strict + log_density + share_60plus + share_under20,
                                  data = xs_edu, weights = ~popw),
  "(3) Panel FEs"         = feols(y ~ share_strict | dpt + year, data = panel_edu,
                                  weights = ~popw, cluster = ~dpt),
  "(4) Panel + covariates"= feols(y ~ share_strict + log_density + share_60plus + share_under20 |
                                  dpt + year, data = panel_edu, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_edu,
  stars = c("*"=0.05, "**"=0.01, "***"=0.001),
  coef_omit = "Intercept",
  coef_rename = c(share_strict = "Muslim/African names share",
                  log_density = "log(pop density)",
                  share_60plus = "Share aged 60+",
                  share_under20 = "Share aged <20"),
  gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
  title = "Higher-education share (0-1 fraction) — département 1968-2022 (RP rounds)",
  notes = list(stars_note,
    "Caveat: RP rounds only (8 time points). Within-dept identification weak."))
Higher-education share (0-1 fraction) — département 1968-2022 (RP rounds)
(1) XS raw (2) XS + covariates (3) Panel FEs (4) Panel + covariates
* p < 0.05, ** p < 0.01, *** p < 0.001
Stars: * p<0.05, ** p<0.01, *** p<0.001.
Caveat: RP rounds only (8 time points). Within-dept identification weak.
Muslim/African names share 0.473*** -0.108 0.276* -0.011
(0.107) (0.098) (0.119) (0.120)
log(pop density) 0.022** 0.011
(0.007) (0.037)
Share aged 60+ -2.064*** -0.904**
(0.276) (0.323)
Share aged <20 -3.252*** 0.918*
(0.457) (0.351)
Num.Obs. 94 94 658 658
R2 0.175 0.812 0.958 0.979
R2 Within 0.092 0.543
Std.Errors IID IID by: dpt by: dpt
FE: dpt X X
FE: year X X

4. Commune-level analysis (compact 6-spec table)

imm <- fread(file.path(PROJ, "data/commune/immigre_commune_year_full.csv"),
             colClasses = list(character = c("codgeo","dpt")))
imm[, dpt := as.character(dpt)]
filo_com <- fread(file.path(PROJ, "data/commune/filosofi_commune_panel.csv"),
                  colClasses = list(character = "codgeo"))
educ_com <- fread(file.path(PROJ, "data/commune/education_com_year.csv"),
                  colClasses = list(character = c("codgeo","dpt")))
unemp_com <- fread(file.path(PROJ, "data/commune/unemp_commune_panel.csv"),
                   colClasses = list(character = "codgeo"))
age_xs <- fread(file.path(PROJ, "data/commune/age_struct_commune.csv"),
                colClasses = list(character = "codgeo"))
age_p <- fread(file.path(PROJ, "data/commune/age_struct_commune_panel.csv"),
               colClasses = list(character = "codgeo"))
area  <- fread(file.path(PROJ, "data/commune/commune_area.csv"),
               colClasses = list(character = "codgeo"))

imm <- merge(imm, area, by = "codgeo")
imm[, log_density := log(pop / area_km2)]
snap_age <- function(yr) c(2010, 2015, 2021)[which.min(abs(c(2010, 2015, 2021) - yr))]

build_com <- function(y_df, y_var, cross_year) {
  y_df <- copy(y_df); setnames(y_df, y_var, "y")
  m <- merge(y_df[, .(codgeo, year, y)],
             imm[, .(codgeo, dpt, year, share_imm, pop, log_density)],
             by = c("codgeo","year"))
  cs <- merge(m[year == cross_year], age_xs, by = "codgeo", all.x = TRUE)
  cs <- cs[!is.na(y) & !is.na(share_imm) & !is.na(log_density) &
           !is.na(share_under15) & !is.na(share_60plus) & pop > 0 & !dpt %in% DROM]
  m[, age_yr := sapply(year, snap_age)]
  pm <- merge(m, age_p, by.x = c("codgeo","age_yr"), by.y = c("codgeo","year"))
  pm <- pm[!is.na(y) & !is.na(share_imm) & !is.na(log_density) &
           !is.na(share_under15) & !is.na(share_60plus) & pop > 0 & !dpt %in% DROM]
  list(panel = pm, cross = cs)
}

inc_b <- build_com(filo_com[, .(codgeo, year, y = log(median_inc))], "y", 2020)
pov_b <- build_com(filo_com[, .(codgeo, year, y = poverty_rate/100)],  "y", 2020)
une_b <- build_com(unemp_com[, .(codgeo, year, y = unemp_frac)],       "y", 2021)
edu_b <- build_com(educ_com[year >= 2010, .(codgeo, year, y = share_higher_ed)], "y", 2022)

fit6 <- function(b) list(
  "(1) Raw cross-section"           = feols(y ~ share_imm, data = b$cross, weights = ~pop),
  "(2) + Dept FE"                   = feols(y ~ share_imm | dpt, data = b$cross,
                                            weights = ~pop, cluster = ~dpt),
  "(3) + Dept FE + log(density)"    = feols(y ~ share_imm + log_density | dpt, data = b$cross,
                                            weights = ~pop, cluster = ~dpt),
  "(4) + Dept FE + log(d) + age"    = feols(y ~ share_imm + log_density + share_60plus + share_under15 | dpt,
                                            data = b$cross, weights = ~pop, cluster = ~dpt),
  "(5) Commune FE + Year FE"        = feols(y ~ share_imm | codgeo + year, data = b$panel,
                                            weights = ~pop, cluster = ~dpt),
  "(6) + log(density) + age"        = feols(y ~ share_imm + log_density + share_60plus + share_under15 |
                                            codgeo + year, data = b$panel,
                                            weights = ~pop, cluster = ~dpt)
)

m_inc <- fit6(inc_b); m_pov <- fit6(pov_b); m_une <- fit6(une_b); m_edu <- fit6(edu_b)

fmt_cell <- function(b, s, p) {
  star <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
  paste0(sprintf("%.3f%s", b, star),
         "<br><span style='color:#666;font-size:90%;'>(",
         sprintf("%.3f", s), ")</span>")
}
get_cells <- function(ml) sapply(ml, function(fit) {
  b <- coef(fit)["share_imm"]; s <- se(fit)["share_imm"]
  p <- 2 * (1 - pnorm(abs(b/s)))
  fmt_cell(b, s, p)
})

models_lab <- c("(1) Raw cross-section","(2) + Dept FE","(3) + Dept FE + log(density)",
                "(4) + Dept FE + log(d) + age","(5) Commune FE + Year FE",
                "(6) + log(density) + age")
tab <- data.table(
  Specification = models_lab,
  `log(income)`       = as.character(get_cells(m_inc)),
  `Poverty rate`      = as.character(get_cells(m_pov)),
  `Unemployment rate` = as.character(get_cells(m_une)),
  `Higher-ed share`   = as.character(get_cells(m_edu))
)
gt(tab) |>
  fmt_markdown(columns = 2:5) |>
  cols_align(align = "center", columns = 2:5) |>
  tab_header(title = "Commune-level effect of foreign-born share on SES outcomes",
             subtitle = "Coefficient on share_imm only. Pop-weighted; SEs dept-clustered.") |>
  tab_source_note("Each cell: β (SE). Stars: * p<0.05, ** p<0.01, *** p<0.001.")
Commune-level effect of foreign-born share on SES outcomes
Coefficient on share_imm only. Pop-weighted; SEs dept-clustered.
Specification log(income) Poverty rate Unemployment rate Higher-ed share
(1) Raw cross-section -0.343***
(0.011)
0.526***
(0.011)
0.243***
(0.003)
0.525***
(0.008)
(2) + Dept FE -1.440***
(0.147)
1.001***
(0.053)
0.484***
(0.025)
0.020
(0.111)
(3) + Dept FE + log(density) -1.640***
(0.209)
0.953***
(0.061)
0.376***
(0.026)
-0.536***
(0.115)
(4) + Dept FE + log(d) + age -1.613***
(0.195)
0.954***
(0.058)
0.406***
(0.026)
-0.577***
(0.095)
(5) Commune FE + Year FE -0.458***
(0.064)
0.395***
(0.045)
0.095**
(0.035)
0.192*
(0.076)
(6) + log(density) + age -0.442***
(0.064)
0.374***
(0.047)
0.129***
(0.031)
0.034
(0.081)
Each cell: β (SE). Stars: * p<0.05, ** p<0.01, *** p<0.001.

5. Cognitive: 6e (age 11) tests

e6 <- fread(file.path(PROJ, "data/depp/eval_6eme_dept.csv"), sep = ";")
setnames(e6, "code_departement", "dpt")
e6[, dpt := as.character(dpt)]
e6e <- e6[caracteristique == "Ensemble" & dpt != ""]
e6w <- dcast(e6e[, .(dpt, year = annee, discipline, score_moyen)],
             dpt + year ~ discipline, value.var = "score_moyen")
setnames(e6w, c("Mathématiques","Français"), c("math_score","french_score"))
e6w[, score_avg := (math_score + french_score) / 2]
e6w[, iq := 100 + (score_avg - 258) * (15/50)]

# Pooled cross-section (10y MA × 8y mean IQ)
ma_pool <- shares[year %in% 2015:2024,
  .(share_strict = sum(maghrebi_arab + sub_saharan + turkish) / sum(total)), by = dpt]
iq_pool <- e6w[year %in% 2017:2024, .(iq = mean(iq, na.rm = TRUE)), by = dpt]
d20 <- dens[year == 2020, .(dpt, log_density, popw = pop)]
a20 <- pop[year == 2021, .(dpt, share_60plus)]
cs6 <- merge(merge(merge(merge(ma_pool, iq_pool, by = "dpt"), d20, by = "dpt"),
             a20, by = "dpt"), nm_dt, by = "dpt", all.x = TRUE)
cs6 <- cs6[!dpt %in% DROM]

panel6 <- merge(merge(merge(e6w[, .(dpt, year, iq)],
                  shares[, .(dpt, year, share_strict)], by = c("dpt","year")),
                  dens[, .(dpt, year, log_density, popw = pop)], by = c("dpt","year")),
                  pop[, .(dpt, year, share_60plus)], by = c("dpt","year"))
panel6 <- panel6[!dpt %in% DROM & !is.na(iq) & !is.na(log_density) & !is.na(share_60plus)]

models_iq <- list(
  "(1) Cross-sec, raw"               = feols(iq ~ share_strict, data = cs6, weights = ~popw),
  "(2) Cross-sec + log_density"      = feols(iq ~ share_strict + log_density, data = cs6, weights = ~popw),
  "(3) Cross-sec + log_dens + age"   = feols(iq ~ share_strict + log_density + share_60plus, data = cs6, weights = ~popw),
  "(4) Dept FE + Year FE, raw"       = feols(iq ~ share_strict | dpt + year, data = panel6, weights = ~popw, cluster = ~dpt),
  "(5) + log_density"                = feols(iq ~ share_strict + log_density | dpt + year, data = panel6, weights = ~popw, cluster = ~dpt),
  "(6) + log_dens + age"             = feols(iq ~ share_strict + log_density + share_60plus | dpt + year, data = panel6, weights = ~popw, cluster = ~dpt)
)
modelsummary(models_iq,
  stars = c("*"=0.05, "**"=0.01, "***"=0.001),
  coef_omit = "Intercept",
  gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
  title = "6e cognitive score (IQ-like, mean=100, SD=15) — département analysis",
  notes = list(stars_note,
    "Cross-section uses 10y pooled MA share + 8y mean IQ (2017-2024).",
    "Score scale: native French ≈ 100, SD ≈ 15. Rescaled from raw DEPP score (mean ~258, SD ~50)."))
6e cognitive score (IQ-like, mean=100, SD=15) — département analysis
(1) Cross-sec, raw (2) Cross-sec + log_density (3) Cross-sec + log_dens + age (4) Dept FE + Year FE, raw (5) + log_density (6) + log_dens + age
* p < 0.05, ** p < 0.01, *** p < 0.001
Stars: * p<0.05, ** p<0.01, *** p<0.001.
Cross-section uses 10y pooled MA share + 8y mean IQ (2017-2024).
Score scale: native French ≈ 100, SD ≈ 15. Rescaled from raw DEPP score (mean ~258, SD ~50).
share_strict -3.728 -19.197*** -20.938*** -5.383** -5.340** -6.797***
(2.611) (2.951) (3.266) (1.852) (1.730) (1.721)
log_density 1.247*** 1.143*** 5.659 2.530
(0.169) (0.189) (6.661) (6.568)
share_60plus -7.713 -54.142*
(6.278) (21.605)
Num.Obs. 94 94 94 752 752 752
R2 0.022 0.388 0.398 0.972 0.973 0.974
R2 Within 0.015 0.037 0.093
Std.Errors IID IID IID by: dpt by: dpt by: dpt
FE: dpt X X X
FE: year X X X

Partial residual plot (6e cross-section, controlling log density)

fit <- lm(iq ~ share_strict + log_density, data = cs6, weights = popw)
beta_x <- coef(fit)["share_strict"]
mean_logd <- weighted.mean(cs6$log_density, cs6$popw)
cs6[, partial_iq := residuals(fit) + beta_x * share_strict +
                     coef(fit)["(Intercept)"] + coef(fit)["log_density"] * mean_logd]
cs6[, lab := ifelse(frank(-share_strict) <= 8 | frank(share_strict) <= 4 |
                    frank(-popw) <= 6 |
                    frank(-abs(partial_iq - mean(partial_iq))) <= 4,
                    name, NA_character_)]

ggplot(cs6, aes(share_strict, partial_iq)) +
  geom_smooth(method = "lm", aes(weight = popw), formula = y ~ x, se = TRUE,
              color = "steelblue", alpha = 0.15) +
  geom_point(aes(size = popw), alpha = 0.8, color = "darkred") +
  geom_text_repel(aes(label = lab), size = 3.0, max.overlaps = Inf, seed = 1,
                  min.segment.length = 0, segment.alpha = 0.4,
                  box.padding = 0.45, point.padding = 0.25, force = 2.5) +
  scale_size_continuous(range = c(1, 8), guide = "none") +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(title = "6e cognitive score vs Muslim/African names share — partial residuals",
       subtitle = sprintf("Mainland départements, n = %d | β = %+.1f IQ pts (controlling log density)",
                          nrow(cs6), beta_x),
       x = "Muslim/African names share (10-year pool, 2015-2024)",
       y = "6e cognitive score (rescaled, French native ≈ 100, SD ≈ 15)\nadjusted for population density",
       caption = "Component-plus-residual plot. DEPP 6e (2017-2024 mean), INSEE prénoms strict. Pop-weighted.") +
  theme_minimal(base_size = 12) +
  theme(plot.background = element_rect(fill = "white", color = NA),
        plot.title = element_text(face = "bold", size = 12))

6. Cognitive: JDC (age 17) literacy, 2014-2024 panel

jdc <- fread(file.path(PROJ, "data/depp/jdc_dept_panel.csv"),
             colClasses = list(character = "dpt"))
jdc[, p := diff_ens / 100]
jdc[, latent := -qnorm(p)]
jdc_pool <- jdc[, .(latent_avg = mean(latent, na.rm = TRUE)), by = dpt]
mainland_mean_2024 <- jdc[year == 2024 & !dpt %in% DROM, mean(latent, na.rm = TRUE)]
jdc_pool[, iq := 100 + (latent_avg - mainland_mean_2024) * 15]

m_jdc <- merge(merge(merge(merge(ma_pool, jdc_pool, by = "dpt"),
                d20, by = "dpt"), a20, by = "dpt"), nm_dt, by = "dpt", all.x = TRUE)
m_jdc <- m_jdc[!dpt %in% DROM]

mods_jdc <- list(
  "raw"                = feols(iq ~ share_strict, data = m_jdc, weights = ~popw),
  "+ log_density"      = feols(iq ~ share_strict + log_density, data = m_jdc, weights = ~popw),
  "+ log_dens + age"   = feols(iq ~ share_strict + log_density + share_60plus, data = m_jdc, weights = ~popw)
)
modelsummary(mods_jdc,
  stars = c("*"=0.05, "**"=0.01, "***"=0.001),
  coef_omit = "Intercept",
  gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj",
  title = "JDC literacy (latent IQ score, 7-year mean 2014-2024) — département cross-section",
  notes = list(stars_note,
    "JDC % in difficulty inverse-normal-transformed → latent score on IQ scale (mean=100, SD=15)."))
JDC literacy (latent IQ score, 7-year mean 2014-2024) — département cross-section
raw + log_density + log_dens + age
* p < 0.05, ** p < 0.01, *** p < 0.001
Stars: * p<0.05, ** p<0.01, *** p<0.001.
JDC % in difficulty inverse-normal-transformed → latent score on IQ scale (mean=100, SD=15).
share_strict 2.328 -10.994*** -12.498***
(2.228) (2.505) (2.772)
log_density 1.074*** 0.984***
(0.144) (0.160)
share_60plus -6.665
(5.328)
Num.Obs. 94 94 94
R2 0.012 0.388 0.399
Std.Errors IID IID IID

7. Crime data (SSMSI 2020-2024 dept averages)

cr <- fread(file.path(PROJ, "data/crime/crime_dept.csv"), sep = ";", dec = ",",
            colClasses = list(character = c("Code_departement","Code_region","annee")))
cr[, year := as.integer(annee)]
setnames(cr, c("Code_departement","indicateur","taux_pour_mille","insee_pop"),
             c("dpt","ind","rate","pop_v"))

pool <- cr[year %in% 2020:2024,
           .(rate_avg = mean(rate, na.rm = TRUE), pop = mean(pop_v)), by = .(dpt, ind)]
m_crime <- merge(merge(pool, ma_pool, by = "dpt"),
                 dens[year == 2020, .(dpt, log_density)], by = "dpt")
m_crime <- m_crime[!dpt %in% DROM]

results <- m_crime[, {
  fit_d <- lm(rate_avg ~ share_strict + log_density, data = .SD, weights = pop)
  b <- coef(fit_d)["share_strict"]
  s <- summary(fit_d)$coefficients["share_strict","Std. Error"]
  p <- 2 * (1 - pnorm(abs(b/s)))
  star <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
  mean_r <- weighted.mean(rate_avg, pop)
  .(`Mean rate /1000` = round(mean_r, 2),
    `β per fraction` = sprintf("%+.2f%s", b, star),
    `% of mean` = sprintf("%+.0f%%", b/mean_r*100))
}, by = ind][order(-as.numeric(gsub("[%+]","", `% of mean`)))]
results

Violent crime composite — partial residuals plot

chosen <- c("Homicides", "Tentatives d'homicide", "Vols avec armes")
vc_pool <- cr[year %in% 2020:2024 & ind %in% chosen,
              .(rate = sum(rate)), by = .(dpt, year)
              ][, .(rate_avg = mean(rate, na.rm = TRUE)), by = dpt]
poppy <- cr[year == 2024 & ind == "Homicides", .(dpt, pop = pop_v)]

m_vc <- merge(merge(merge(merge(vc_pool, ma_pool, by = "dpt"),
              poppy, by = "dpt"),
              dens[year == 2020, .(dpt, log_density)], by = "dpt"),
              nm_dt, by = "dpt", all.x = TRUE)
m_vc <- m_vc[!dpt %in% DROM]

fit_vc <- lm(rate_avg ~ share_strict + log_density, data = m_vc, weights = pop)
beta_x <- coef(fit_vc)["share_strict"]
mean_logd <- weighted.mean(m_vc$log_density, m_vc$pop)
m_vc[, partial_y := residuals(fit_vc) + beta_x * share_strict +
                    coef(fit_vc)["(Intercept)"] + coef(fit_vc)["log_density"] * mean_logd]
m_vc[, lab := ifelse(frank(-share_strict) <= 8 | frank(share_strict) <= 4 |
                     frank(-pop) <= 6 |
                     frank(-abs(partial_y - mean(partial_y))) <= 4,
                     name, NA_character_)]

ggplot(m_vc, aes(share_strict, partial_y)) +
  geom_smooth(method = "lm", aes(weight = pop), formula = y ~ x, se = TRUE,
              color = "steelblue", alpha = 0.15) +
  geom_point(aes(size = pop), alpha = 0.8, color = "darkred") +
  geom_text_repel(aes(label = lab), size = 3.0, max.overlaps = Inf, seed = 1,
                  min.segment.length = 0, segment.alpha = 0.4,
                  box.padding = 0.45, point.padding = 0.25, force = 2.5) +
  scale_size_continuous(range = c(1, 8), guide = "none") +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(title = "Serious violent crime (homicides + attempted + armed robbery) vs Muslim/African names",
       subtitle = sprintf("Mainland départements, n = %d | β = %+.2f per 1,000 pop / fraction (controlling log density)",
                          nrow(m_vc), beta_x),
       x = "Muslim/African names share (10-year pool, 2015-2024)",
       y = "Serious violent crime rate per 1,000 (mean 2020-2024)\nadjusted for population density",
       caption = "Source: SSMSI (Ministère de l'Intérieur). Gold-standard low-reporting-bias indicators.") +
  theme_minimal(base_size = 12) +
  theme(plot.background = element_rect(fill = "white", color = NA),
        plot.title = element_text(face = "bold", size = 12))

8. Reference: published individual-level gaps (Insee Références 2023)

ref <- data.table(
  Group = c("Native (no migrant ancestry)",
            "All migrants (1st gen)",
            "  Africa-born", "  Europe-born", "  Asia-born",
            "All 2nd-gen",
            "  Africa 2nd-gen", "  Europe 2nd-gen", "  Asia 2nd-gen"),
  `Median age` = c("42", "~46", "~44", "~52", "~44", "26", "18", "50", "17"),
  `Median income €` = c("22,880", "17,000", "14,850", "20,480", "16,070",
                        "19,970", "17,760", "22,900", "18,470"),
  `Poverty %` = c("11.1", "31.5", "39.2", "19.5", "36.4",
                  "21.7", "26.7", "11.9", "31.3"),
  `Unemployment %` = c("7", "13", "15", "~9", "11",
                       "12", "16", "8", "11"),
  `Higher-ed %` = c("41", "32", "28", "37", "32",
                    "38", "37", "36", "50"),
  `% no diploma` = c("16", "38", "42", "29", "43",
                     "18", "21", "16", "16")
)
gt(ref) |>
  tab_header(title = "Individual-level SES outcomes by migrant origin and generation",
             subtitle = "Source: INSEE Insee Références 2023 — Immigrés et descendants d'immigrés en France") |>
  cols_align(align = "left",   columns = "Group") |>
  cols_align(align = "center", columns = 2:7) |>
  tab_source_note("Income & poverty: 2019. Unemployment & higher-ed: 2021 (ages 30-64).") |>
  tab_source_note("Severe age confound for 2nd-gen: Europe median age 50 (children of 1950s-70s labor migration) vs Africa/Asia median 17-18 (children of recent migrants).")
Individual-level SES outcomes by migrant origin and generation
Source: INSEE Insee Références 2023 — Immigrés et descendants d'immigrés en France
Group Median age Median income € Poverty % Unemployment % Higher-ed % % no diploma
Native (no migrant ancestry) 42 22,880 11.1 7 41 16
All migrants (1st gen) ~46 17,000 31.5 13 32 38
Africa-born ~44 14,850 39.2 15 28 42
Europe-born ~52 20,480 19.5 ~9 37 29
Asia-born ~44 16,070 36.4 11 32 43
All 2nd-gen 26 19,970 21.7 12 38 18
Africa 2nd-gen 18 17,760 26.7 16 37 21
Europe 2nd-gen 50 22,900 11.9 8 36 16
Asia 2nd-gen 17 18,470 31.3 11 50 16
Income & poverty: 2019. Unemployment & higher-ed: 2021 (ages 30-64).
Severe age confound for 2nd-gen: Europe median age 50 (children of 1950s-70s labor migration) vs Africa/Asia median 17-18 (children of recent migrants).

Session info

sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.3.2.9000 scales_1.4.0         readxl_1.4.5        
##  [4] sf_1.0-21            ggrepel_0.9.6        ggplot2_4.0.1.9000  
##  [7] gt_1.3.0             modelsummary_2.5.0   fixest_0.13.2       
## [10] data.table_1.17.8   
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1    dplyr_1.1.4         farver_2.1.2       
##  [4] S7_0.2.1            fastmap_1.2.0       TH.data_1.1-4      
##  [7] bayestestR_0.17.0   digest_0.6.39       estimability_1.5.1 
## [10] lifecycle_1.0.4     dreamerr_1.5.0      survival_3.8-6     
## [13] magrittr_2.0.4      compiler_4.5.3      rlang_1.1.6.9000   
## [16] sass_0.4.10         tools_4.5.3         yaml_2.3.10        
## [19] knitr_1.50          labeling_0.4.3      classInt_0.4-11    
## [22] xml2_1.4.0          RColorBrewer_1.1-3  multcomp_1.4-28    
## [25] KernSmooth_2.23-26  tinytable_0.14.0    withr_3.0.2        
## [28] numDeriv_2016.8-1.1 datawizard_1.3.0    grid_4.5.3         
## [31] xtable_1.8-4        e1071_1.7-16        future_1.67.0      
## [34] globals_0.18.0      emmeans_1.11.2-8    MASS_7.3-65        
## [37] insight_1.4.2       cli_3.6.5           mvtnorm_1.3-3      
## [40] rmarkdown_2.30      ragg_1.5.0          generics_0.1.4     
## [43] performance_0.15.2  future.apply_1.20.0 commonmark_2.0.0   
## [46] parameters_0.28.2   DBI_1.2.3           cachem_1.1.0       
## [49] proxy_0.4-27        splines_4.5.3       parallel_4.5.3     
## [52] effectsize_1.0.1    cellranger_1.1.0    stringmagic_1.2.0  
## [55] vctrs_0.6.5         Matrix_1.7-4        sandwich_3.1-1     
## [58] jsonlite_2.0.0      litedown_0.9        Formula_1.2-5      
## [61] listenv_0.9.1       systemfonts_1.3.1   jquerylib_0.1.4    
## [64] units_1.0-0         parallelly_1.45.1   glue_1.8.0         
## [67] codetools_0.2-20    gtable_0.3.6        tables_0.9.31      
## [70] tibble_3.3.0        pillar_1.11.1       htmltools_0.5.8.1  
## [73] R6_2.6.1            textshaping_1.0.4   evaluate_1.0.5     
## [76] lattice_0.22-7      markdown_2.0        backports_1.5.0    
## [79] bslib_0.9.0         class_7.3-23        Rcpp_1.1.0         
## [82] coda_0.19-4.1       nlme_3.1-168        checkmate_2.3.3    
## [85] mgcv_1.9-3          xfun_0.57           fs_1.6.6           
## [88] zoo_1.8-14          pkgconfig_2.0.3