options(digits = 3)
library(pacman)
p_load(kirkegaard, readxl, rms, sf, googlesheets, ggeffects, spatialstatstools, plm, future, doFuture, ggrepel)
#default theme
theme_set(theme_bw())
#parallel
registerDoFuture()
plan(multisession(workers = 4))
options('future.globals.maxSize' = 99999999999999)
#SAC correlations, easy summary
SAC_cors = function(x) {
#which variables?
lags = str_subset(names(x), "_lag$")
vars = lags %>% str_replace("_lag$", "")
y = map2_dbl(vars, lags, function(v, l) {
wtd.cors(x[[v]],
x[[l]])
})
tibble(
variable = vars,
SAC_r = y
)
}
#wrapper to get lag variable
add_lag = function(x, var, template = "{v}_lag") {
#ID var
x$..tmp_id_var = seq_along_rows(x)
for (v in var) {
#subset to no-NA, function limitation
x_sub = x[!is.na(x[[v]]) & !is.na(x$lat), ]
#drop geo if needed
if ("sf" %in% class(x_sub)) x_sub = x_sub %>% st_drop_geometry()
#new var name
new_var = str_glue(template) %>% as.character()
#calculate
x_sub[[new_var]] = suppressMessages(spatialstatstools::SAC_knsnr(x_sub, dependent = v, lat_var = "lat", lon_var = "lon"))
#join to d2, erase if exists
x[[new_var]] = NULL
x = left_join(x, x_sub[, c("..tmp_id_var", new_var)], by = "..tmp_id_var")
}
#remove id
x$..tmp_id_var = NULL
x
}
#values with dups
duplicated_vals = function(x) {
x[duplicated(x)] %>% unique()
}
#test
c(1, 1, 2, 3) %>% duplicated_vals()
## [1] 1
#avoid bug
describe = function(...) psych::describe(...) %>% as.matrix()
Data were collected from a variety of French and Dutch-language documents, both public and privately in nature. The data covers various subdivisions of Belgium (provinces, arrondissements and municipalities/communes/gemeenten), including districts of cities.
#load other datasets of national data
dutch2019 = read_rds("data/Dutch_study_2020.rds")
mega = read_csv("data/Megadataset_v2.0p.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## LVnotes = col_character(),
## Names = col_character(),
## Capital = col_character(),
## se_Land = col_character(),
## se_abbrev = col_character(),
## de_Country = col_character(),
## de_abbrev = col_character(),
## de_Country_EN = col_character(),
## de_Western_neighbor = col_logical(),
## de_NW_European = col_logical(),
## dk_Origin = col_character(),
## Country_EN = col_character()
## )
## See spec(...) for full column specifications.
mega$ISO = mega$X1
chess2019 = read_rds("data/chess_kirkegaard2019.rds")
#merge Muslim variable
natl = full_join(mega %>% select(ISO, IslamPewResearch2010, IslamPewResearch1990, LV2012estimatedIQ, Names),
dutch2019 %>% select(ISO, IQ, Muslim)) %>%
full_join(chess2019 %>% select(ISO, UN_macroregion, UN_region, UN_continent))
## Joining, by = "ISO"
## Joining, by = "ISO"
#merge columns
natl$Muslim %<>% miss_fill(natl$IslamPewResearch1990, natl$Muslim)
natl$IQ %<>% miss_fill(natl$IQ, natl$LV2012estimatedIQ)
#no dups
assert_that(!any(duplicated(natl$ISO)))
## [1] TRUE
#set Muslim in NW Euro origins to 0, all of these is recent migration
natl$Muslim[natl$UN_macroregion == "N & W Europe + offshoots"] = 0
#fill in a few values manually
#Yugoslavia
natl$UN_continent[natl$ISO == "YUG"] = "Europe"
natl$UN_region[natl$ISO == "YUG"] = "Southern Europe"
natl$UN_macroregion[natl$ISO == "YUG"] = "Southern Europe"
#Kosovo
natl$UN_continent[natl$ISO == "KSV"] = "Europe"
natl$UN_region[natl$ISO == "KSV"] = "Southern Europe"
natl$UN_macroregion[natl$ISO == "KSV"] = "Southern Europe"
#serbia montenegro
natl$UN_continent[natl$ISO == "SRBM"] = "Europe"
natl$UN_region[natl$ISO == "SRBM"] = "Southern Europe"
natl$UN_macroregion[natl$ISO == "SRBM"] = "Southern Europe"
natl$IQ[natl$ISO == "SRBM"] = 91
natl$Muslim[natl$ISO == "SRBM"] = 0.056
#Soviet Union
natl$UN_continent[natl$ISO == "SUN"] = "Europe"
natl$UN_region[natl$ISO == "SUN"] = "Eastern Europe"
natl$UN_macroregion[natl$ISO == "SUN"] = "Eastern Europe"
#north Vietnam
#use Vietnam values
natl = bind_rows(
natl,
natl %>% filter(ISO == "VNM") %>% mutate(ISO = "VNM_N")
)
#unknown
#guess at random middle eastern, use Iraq values
natl = bind_rows(
natl,
natl %>% filter(ISO == "IRQ") %>% mutate(ISO = "UKN")
)
#czechoslovakia
natl$UN_continent[natl$ISO == "CSK"] = "Europe"
natl$UN_region[natl$ISO == "CSK"] = "Eastern Europe"
natl$UN_macroregion[natl$ISO == "CSK"] = "Eastern Europe"
#monaco
natl$IQ[natl$ISO == "MCO"] = 110 #Guess similar to Singapore
#San Marino
natl$IQ[natl$ISO == "SMR"] = 96.6 #Spain
#Guadeloupe (France)
natl$IQ[natl$ISO == "GLP"] = 83.7 #based on Fuerst and Kirkegaard 2016
#OECD aid data
OECD_aid = read_csv("data/OECD aid DP_LIVE_30062020030508192.csv")
## Parsed with column specification:
## cols(
## LOCATION = col_character(),
## INDICATOR = col_character(),
## SUBJECT = col_character(),
## MEASURE = col_character(),
## FREQUENCY = col_character(),
## TIME = col_double(),
## Value = col_double(),
## `Flag Codes` = col_logical()
## )
#names in both languages / metadata
muni_meta = readxl::read_excel("data/REFNIS_2019.xls") %>%
df_legalize_names()
#age data
muni_age = readxl::read_xls("data/population/Bevolking-leeftijd-gemeente-2014-65plus.xls", skip = 2) %>%
{colnames(.) = str_replace(colnames(.), "\\.+\\d+$", ""); .} %>%
df_legalize_names() %>%
rename(
NIS_code = NIS_CODE
) %>%
mutate(
NIS_code = NIS_code %>% as.numeric(),
#age vars
age_65ao_pct = x65plus_Tot / Tot,
age_0_19_pct = (Totale_bevolking_Minder_dan_5_jaar + Van_5_tot_9_jaar_2 + Van_10_tot_14_jaar_2 + Van_15_tot_19_jaar_2) / Tot,
) %>%
filter(!is.na(NIS_code))
## New names:
## * `10-24 jr` -> `10-24 jr...12`
## * `50-64 jr` -> `50-64 jr...13`
## * Verschil -> Verschil...14
## * `Van 5 tot 9 jaar` -> `Van 5 tot 9 jaar...16`
## * `Van 10 tot 14 jaar` -> `Van 10 tot 14 jaar...17`
## * ...
## Warning in function_list[[k]](value): NAs introduced by coercion
#population density
muni_density = readxl::read_xls("data/population/density.xls", sheet = 2) %>%
df_legalize_names() %>%
mutate(
NIS_code = NIS_code %>% as.numeric()
) %>%
filter(!is.na(NIS_code))
#Musilm data up to 2017
#there were problems with NIS codes which were fixed manually
#we use muni_age for reference
muni_muslims = readxl::read_xls("data/population/Moslims-2017-NIS-CODE.xls", sheet = 2) %>%
df_legalize_names() %>%
mutate(
NIS_code = as.numeric(NIS_code),
NIS_code_fix = as.numeric(NIS_code_fix),
not_unique = NIS_code %in% duplicated_vals(NIS_code),
nonmatch = !NIS_code %in% muni_age$NIS_code,
nonmatch2 = !NIS_code_fix %in% muni_age$NIS_code,
NIS_code_old = NIS_code,
NIS_code = NIS_code_fix
)
#any not unique?
muni_muslims %>% filter(not_unique) %>% select(1:2, NIS_code)
#population data
# gs_auth()
# gs_muni_pop = gs_url("https://docs.google.com/spreadsheets/d/1w55OpRLypkX9EEQDYk26-0BKxCTgIKxtEGtNCj46C88/edit#gid=707936744")
# muni_pop = gs_read(gs_muni_pop, ws = "1989fixed", col_types = cols(Year = col_number(),
# Name = col_character(),
# .default = col_double()))
muni_pop = readxl::excel_sheets("data/population/Years-fixed.xlsx") %>%
map_df(function(ws) {
# browser()
#read sheet
y = readxl::read_excel(path = "data/population/Years-fixed.xlsx",
sheet = ws)
#fix
#
y
}) %>%
#fix types
{
#everything after first two is numeric
for (i in 3:ncol(.)) {
suppressWarnings(.[[i]] %<>% as.numeric())
}
.
}
#reformat data
muni_pop_long = muni_pop %>%
#gather to long format
gather(key = country, value = population, -Year, -Name) %>%
#separate codes from names
mutate(
#municipalities
muni_code = str_match(Name, "^(\\d+)")[, 2],
muni_name = str_match(Name, "^\\d+ (.+)")[, 2],
#countries
country_code = str_match(country, "^(\\d+)")[, 2],
country_name = str_match(country, "^\\d+ (.+)")[, 2],
country_name2 = country_name %>% plyr::mapvalues(c("Onbepaald", "Vaderlandsloos"), c(NA, "Palestine")),
country_ISO = pu_translate(country_name2) %>% plyr::mapvalues(NA, "UKN"),
#recode missing to 0
population = population %>% plyr::mapvalues(NA, 0)
) %>%
#merge data for unknown origins
{
#we dont need to do anything for other countries
y1 = filter(., country_ISO != "UKN")
#merge unknowns within year and units
y2 = filter(., country_ISO == "UKN") %>%
plyr::ddply(c("muni_name", "Year"), function(dd){
#use row 1 as default
z = dd[1, ]
#sum populations
z$population = sum(dd$population, na.rm = T)
z
})
#bind rows
bind_rows(y1, y2)
}
## No exact match: Polen(Republiek)
## No exact match: Congo (Dem. Rep.)
## No exact match: Tanzania /Verenigde Rep./
## No exact match: Seychellen(Eilanden)
## Best fuzzy match found: Polen(Republiek) -> Polen (Republiek) with distance 1.00
## Best fuzzy match found: Congo (Dem. Rep.) -> Congo, Dem. Rep. with distance 3.00
## Best fuzzy match found: Tanzania /Verenigde Rep./ -> Tanzania (Verenigde Rep.) with distance 2.00
## Best fuzzy match found: Seychellen(Eilanden) -> Seychellen (Eilanden) with distance 1.00
#estimate population proportions for year x muni combos
#this one takes a long time, so we cache it
muni_pop_summary = cache_object({
muni_pop_long %>%
left_join(natl, by = c("country_ISO" = "ISO")) %>%
#loop combos and estimate
plyr::ddply(c("muni_name", "Year"), .parallel = T, .fun = function(dd) {
#mean estimated IQ and Muslim by origins
y = tibble(
NIS_code = dd$muni_code[1] %>% as.numeric(),
population = wtd_sum(dd$population),
Muslim = wtd_mean(dd$Muslim, dd$population),
IQ = wtd_mean(dd$IQ, dd$population)
)
#add in region proportions
for (v in natl$UN_continent %>% unique() %>% na.omit()) {
y[[str_glue("UN_continent_{v}") %>% str_legalize()]] = wtd_mean(dd$UN_continent == v, dd$population)
}
for (v in natl$UN_region %>% unique() %>% na.omit()) {
y[[str_glue("UN_region_{v}") %>% str_legalize()]] = wtd_mean(dd$UN_region == v, dd$population)
}
for (v in natl$UN_macroregion %>% unique() %>% na.omit()) {
y[[str_glue("UN_macroregion_{v}") %>% str_legalize()]] = wtd_mean(dd$UN_macroregion == v, dd$population)
}
y
})
}, filename = "cache/muni_pop_summary.rds")
## Cache found, reading object from disk
#income - multiple files
#2009
muni_income09 = readxl::read_xls("data/income/fisc2009_c_nl_tcm325-151868.xls", sheet = "per indiv. aang.", skip = 6) %>%
df_legalize_names() %>%
select(NIS_code, x10_000_EUR, meer_dan_50_000_EUR, Gemiddeld_inkomen_per_aangifte, Mediaan_inkomen_per_aangifte, Interkwartiele_co_ffici_nt) %>%
set_colnames(c("NIS_code", "low_income_2009", "high_income_2009", "mean_income_2009", "median_income_2009", "income_inequality_2009")) %>%
filter(!is.na(NIS_code))
#2016
muni_income16 = readxl::read_xls("data/income/fisc2016_C_NL.xls", sheet = "clean") %>%
df_legalize_names() %>%
select(NIS_code, Gemiddeld_inkomen_per_aangifte, Mediaan_inkomen_per_aangifte, Interkwartiele_co_ffici_nt) %>%
set_colnames(c("NIS_code", "mean_income_2016", "median_income_2016", "income_inequality_2016")) %>%
map_df(as.numeric) %>%
filter(!is.na(NIS_code))
#2005-2016
muni_income = readxl::read_excel("data/income/income-2005-2016.xlsx") %>%
df_legalize_names() %>%
mutate(
income = MS_TOT_NET_INC / MS_TOT_RESIDENTS,
NIS_code = CD_MUNTY_REFNIS %>% as.numeric(),
year = CD_YEAR
) %>%
filter(!is.na(NIS_code))
#single year for use of metadata
muni_income_meta = muni_income %>% filter(year == 2005) %>%
mutate(
region = TX_RGN_DESCR_NL %>% str_replace(" Gewest", ""),
province = TX_PROV_DESCR_NL %>% str_replace("^Provincie ", "") %>% mapvalues(from = NA,
to = "Bruxelles"),
arrondissement = TX_ADM_DSTR_DESCR_NL %>% str_replace("^Arrondissement ", "")
) %>%
select(NIS_code, region, province, arrondissement)
#1989 income, as test
muni_income89 = read_excel("data/income/F01SP08.Y89_Y90.xls", skip = 14, col_names = F) %>%
mutate(
NIS_code = `...2` %>% as.numeric(),
muni_name = `...4` %>% as.character(),
median_income_1989 = `...15` %>% as.numeric()
) %>%
filter(!is.na(NIS_code))
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning in function_list[[k]](value): NAs introduced by coercion
#crime rates 2009-2016
muni_crime = readxl::read_excel("data/All-crimes-mean-2009-2016-per-belgian-commune.xlsx") %>%
df_legalize_names() %>%
mutate(
crimes = select(., x2009:x2016) %>% rowMeans(),
NIS_code = NIS_code %>% as.numeric(),
not_unique = NIS_code %in% duplicated_vals(NIS_code)
) %>%
filter(
!is.na(NIS_code)
)
## Warning in function_list[[k]](value): NAs introduced by coercion
#any not unique?
muni_crime %>% filter(not_unique) %>% select(1:2, NIS_code)
muni_crime_violent = readxl::read_excel("data/violent_crime.xlsx") %>%
df_legalize_names() %>%
mutate(
violent_crimes = select(., x2009:x2016) %>% rowMeans()
)
muni_crime_violent$NIS_code %>% duplicated_vals()
## numeric(0)
setdiff(muni_crime_violent$NIS_code, muni_age$NIS_code)
## numeric(0)
assert_that(all(muni_crime_violent$NIS_code %in% muni_age$NIS_code))
## [1] TRUE
#rename columns
for (v in names(muni_crime)[3:12]) {
#all crimes file
muni_crime[[str_replace(v, "x", "crimes_")]] = muni_crime[[v]]
muni_crime[[v]] = NULL
#violent crimes
muni_crime_violent[[str_replace(v, "x", "violent_crimes_")]] = muni_crime_violent[[v]]
muni_crime_violent[[v]] = NULL
}
#life expectancy
muni_life_exp = read_csv("data/Kaart _ Levensverwachting Belgische gemeenten.csv") %>%
df_legalize_names() %>%
mutate(
life_expectancy = gemiddeld,
NIS_code = COMCOD %>% as.numeric()
)
## Warning: Duplicated column names deduplicated: 'Gemeente' => 'Gemeente_1' [18]
## Parsed with column specification:
## cols(
## NIS = col_character(),
## COMCOD = col_double(),
## NOM = col_character(),
## `NIS kort` = col_double(),
## Gemeente = col_character(),
## man = col_double(),
## vrouw = col_double(),
## gemiddeld = col_double(),
## description = col_character(),
## name = col_character(),
## ICC = col_character(),
## ISN = col_double(),
## NAMN = col_character(),
## DESN = col_character(),
## Shape_Leng = col_double(),
## Shape_Area = col_double(),
## geometry = col_character(),
## Gemeente_1 = col_character()
## )
#education
muni_education = read_excel("data/EDUCATION-WITH-NIS.xlsx")
assert_that(all(muni_education$NIS_code %in% muni_age$NIS_code))
## [1] TRUE
#unemployment
muni_unemployment = read_excel("data/Unemployement.xls", skip = 4) %>%
df_legalize_names() %>%
mutate(NIS_code = INS_code)
muni_unemployment$INS_code %>% duplicated_vals()
## numeric(0)
muni_unemployment$ex_INS %>% duplicated_vals() #has some duplicates
## [1] 12000 44000 45000 70000 72000
sum(muni_unemployment$INS_code %in% muni_age$NIS_code)
## [1] 563
sum(muni_unemployment$ex_INS %in% muni_age$NIS_code) #slightly more matches
## [1] 574
#calculate the mean for 2009-2016
muni_unemployment$unemployment = muni_unemployment %>%
select(x2009:x2016) %>%
rowMeans(na.rm = T)
#assert uniq NIS
assert_that(!any(duplicated(muni_muslims$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_income_meta$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_income09$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_income16$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_income89$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_age$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_density$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_crime$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_crime_violent$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_life_exp$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_education$NIS_code)))
## [1] TRUE
assert_that(!any(duplicated(muni_unemployment$NIS_code)))
## [1] TRUE
# assert_that(!any(duplicated(muni_pop_summary$NIS_code)))
#merge by NIS
muni = full_join(muni_life_exp %>% select(NIS_code, life_expectancy), muni_income09, by = "NIS_code") %>%
full_join(muni_income16, by = "NIS_code") %>%
full_join(muni_income89 %>% select(NIS_code, median_income_1989), by = "NIS_code") %>%
full_join(muni_age %>% select(NIS_code, age_65ao_pct, age_0_19_pct, Tot), by = "NIS_code") %>%
full_join(muni_density, by = "NIS_code") %>%
full_join(muni_income_meta, by = "NIS_code") %>%
full_join(muni_muslims %>% select(NIS_code, Gemeente, Bevolking_2011:pct_moslims_2017), by = "NIS_code") %>%
full_join(muni_crime %>% select(NIS_code:crimes_2017), by = "NIS_code") %>%
left_join(muni_crime_violent %>% select(NIS_code:violent_crimes_2017), by = "NIS_code") %>%
full_join(muni_education %>% select(NIS_code, higher_education_pct), by = "NIS_code") %>%
full_join(muni_unemployment %>% select(NIS_code, unemployment), by = "NIS_code") %>%
#mutate
mutate(
#rename some stuff
Muslim_pct = pct_moslims_2016,
name = Gemeente,
population = Bevolking_2016,
population2 = Tot,
pop_density = Bevolkings_dichtheid_inw_km,
population_growth = (Bevolking_2017/Bevolking_2011) - 1,
#crime variables
crime_rate = crimes / population,
crime_rate_RR = crime_rate / (sum(crimes, na.rm=T) / sum(population, na.rm=T)),
crime_rate_2011 = crimes_2011 / Bevolking_2011,
crime_rate_2017 = crimes_2017 / Bevolking_2017,
violent_crimes_rate = violent_crimes / population,
violent_crimes_rate_RR = violent_crimes_rate / (sum(violent_crimes, na.rm=T) / sum(population, na.rm=T)),
violent_crimes_rate_2011 = violent_crimes_2011 / Bevolking_2011,
violent_crimes_rate_2017 = violent_crimes_2017 / Bevolking_2017
)
#no dup NIS
assert_that(!any(duplicated(muni$NIS_code)))
## [1] TRUE
#subset to communes
muni %<>% filter(NIS_code %in% muni_age$NIS_code)
#no duplicated variables
assert_that(!any(str_detect(muni %>% names(), "\\.x$")))
## [1] TRUE
#find merge errors by looking at population discrepancies
muni %>%
mutate(
pop_ratio = population / population2
) %>%
filter(pop_ratio > 1.1 | pop_ratio < 0.9) %>%
select(Gemeente, population, population2, pop_ratio)
#variable lists
main_vars = muni %>% select(life_expectancy,
# mean_income_2009:income_inequality_2009,
mean_income_2016:income_inequality_2016,
Muslim_pct, crime_rate, crime_rate_RR, violent_crimes_rate, pop_density,
age_65ao_pct:age_0_19_pct, higher_education_pct, unemployment) %>% names()
#no spatial
muni_no_spatial = muni
#read spatial data
# muni_spatial = sf::st_read("data/spatial/BEL_adm/BEL_adm4.shp")
muni_spatial = read_rds("data/spatial/statbel_shapefile_com.rds")
muni_spatial_orig = muni_spatial
#spatial file missing NIS codes
#read manually coded NIS
# muni_spatial_NIS = read_excel("data/spatial/map_nis.xlsx") %>% df_legalize_names()
# muni_spatial_NIS$problem = muni_spatial_NIS$NIS_CODE %in% (muni_spatial_NIS$NIS_CODE %>% duplicated_vals())
# muni_spatial$NIS_code = muni_spatial_NIS$NIS_CODE %>% as.numeric()
# muni_spatial$NIS_CODE_2019 = muni_spatial_NIS$NIS_CODE_2019 %>% as.numeric()
#unique
assert_that(!any(duplicated(muni_spatial$NIS_code)))
## [1] TRUE
#merge
muni_spatial = full_join(muni_spatial %>% select(-province, -region), muni_no_spatial, by = "NIS_code")
#calculate centroids, and they must lie within the unit
#https://stackoverflow.com/questions/52522872/r-sf-package-centroid-within-polygon
# muni_spatial %<>%
# mutate(lon = map_dbl(geometry, ~st_point_on_surface(.x)[[1]]),
# lat = map_dbl(geometry, ~st_point_on_surface(.x)[[2]]))
#add lags
muni_spatial = add_lag(muni_spatial, var = main_vars)
#rename
#these are the regional codings in 2019 update
# muni_spatial %<>% mutate(
# region2 = NAME_1,
# province2 = NAME_2,
# arrondissement2 = NAME_3,
# )
#remove geo data to return to normal
#and drop units that aren't communes or which are problematic
muni = muni_spatial %>% st_drop_geometry() %>% filter(NIS_code %in% muni_age$NIS_code)
#unique
assert_that(!any(duplicated(muni$NIS_code)))
## [1] TRUE
assert_that(!any(str_detect(muni %>% names(), "\\.x$")))
## [1] TRUE
#Brussels
brussels = readxl::read_xlsx("data/Brussels.xlsx") %>%
df_legalize_names()
#legalize names in data frame, keep labels
brussels %<>%
#mutate
mutate(
#as fraction
North_African_proportion_2016_pct = North_African_proportion_2016_pct / 100
)
#merge with demographics
brussels = brussels %>%
left_join(muni_muslims %>% select(NIS_code, pct_moslims_2013), by = "NIS_code")
#timeline data
wealth_timeline = read_excel("data/Belgium regional decline paper Brussels 2020.xlsx", sheet = 1) %>%
df_legalize_names()
Social data usually correlate strongly, meaning that (in latent variable theory) they measure a common underlying construct. This we call S, for general socioeconomic factor. It can be extracted with factor analysis.
#social data
brussels_social = brussels %>%
select(Wealth_index_2013:Blood_donor_pct) %>%
select(-Population_growth_pct) %>%
as_tibble() %>%
#nicer names
rename(
Wealth_index = Wealth_index_2013,
Share_of_households_in_demand_for_social_housing_pct = Share_of_households_in_demand_for_social_housing_2012_pct,
Unemployment_rate_pct = Unemployment_rate_2012_pct,
Median_income_euros = Median_income_2012_euros,
Age_18_64_beneficiaries_of_social_integration_income_pct = x18_64_years_beneficiaries_of_social_integration_income_2012_pct,
Fetal_infantile_mortality = Fetal_infantile_mortality_on_1000_2009_2013,
Mothers_aged_20_pct = Mothers_20_years_pct_2009_2013,
Proportion_with_two_years_late_in_school = Proportion_with_two_years_late_in_school_2013_2014,
Children_with_mental_deficiency_pct = x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct,
Relative_risk_of_mortality_by_cardiovascular_diseases_men = Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013
)
#impute single missing datapoint
brussels_social %<>% miss_impute(method = "irmi")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
#analyze
S_fa = silence(fa(brussels_social))
S_fa_wtd = silence(fa(brussels_social, weight = brussels$Total_population_of_the_municipality_2014 %>% sqrt))
S_fa_rank = silence(fa(brussels_social %>% df_rank()))
#there are often warnings due to the small number of cases. In most cases, they are not problematic for the analysis.
S_fa_methods = fa_all_methods(brussels_social, messages = F)
S_fa_methods$loadings %>% GG_heatmap(reorder_vars = F)
#no variation in loadings by method
S_fa_methods$scores %>% GG_heatmap(reorder_vars = F)
#no variation in scores by method
#loadings
fa_plot_loadings(list(standard = S_fa,
weights = S_fa_wtd,
ranks = S_fa_rank),
reverse_vector = rep(-1, 3))
GG_save("figs/Brussels_loadings.png")
#extreme results!
#save scores
brussels$S = S_fa$scores[, 1] * -1
#Jensen method
fa_Jensens_method(S_fa, brussels_social %>% cbind(Muslim = brussels$pct_moslims_2013), "Muslim") +
scale_y_continuous("r (Muslim % x indicator)") +
scale_x_continuous("S-loading", limits = c(.55, 1))
## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Brussels_loadings_Jensen.png")
## `geom_smooth()` using formula 'y ~ x'
#Muslim proportion 2013 ish
wtd_mean(brussels$pct_moslims_2013, brussels$Total_population_of_the_municipality_2014)
## [1] 0.226
#all correlations
combine_upperlower(
wtd.cors(brussels %>% select(pct_moslims_2013, S, Wealth_index_2013:General_humanities_technical_and_professional_humanities_ratio)),
wtd.cors(brussels %>% select(pct_moslims_2013, S, Wealth_index_2013:General_humanities_technical_and_professional_humanities_ratio),
weight = brussels$Total_population_of_the_municipality_2014 %>% sqrt())
)
## pct_moslims_2013
## pct_moslims_2013 NA
## S -0.942
## Wealth_index_2013 -0.944
## Mean_wages_compared_with_the_national_mean_pct -0.944
## Share_of_households_in_demand_for_social_housing_2012_pct 0.840
## Unemployment_rate_2012_pct 0.957
## Median_income_2012_euros -0.898
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.899
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.892
## Delinquency_in_the_total_population_18_25_years_pct 0.906
## Proportion_with_a_university_degree_pct -0.799
## Part_with_low_Birth_weight_pct 0.499
## Life_expectancy_at_birth -0.731
## Children_born_in_a_family_with_no_income_from_work_pct 0.956
## Population_growth_pct 0.792
## Houses_median_prize_euros -0.650
## Fraudulent_declaration_pct 0.684
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.883
## Mothers_20_years_pct_2009_2013 0.827
## Fetal_infantile_mortality_on_1000_2009_2013 0.619
## Proportion_with_two_years_late_in_school_2013_2014 0.927
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.796
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.560
## General_humanities_technical_and_professional_humanities_ratio -0.856
## S
## pct_moslims_2013 -0.944
## S NA
## Wealth_index_2013 0.952
## Mean_wages_compared_with_the_national_mean_pct 0.952
## Share_of_households_in_demand_for_social_housing_2012_pct -0.846
## Unemployment_rate_2012_pct -0.974
## Median_income_2012_euros 0.950
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.870
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.915
## Delinquency_in_the_total_population_18_25_years_pct -0.946
## Proportion_with_a_university_degree_pct 0.896
## Part_with_low_Birth_weight_pct -0.639
## Life_expectancy_at_birth 0.835
## Children_born_in_a_family_with_no_income_from_work_pct -0.949
## Population_growth_pct -0.831
## Houses_median_prize_euros 0.706
## Fraudulent_declaration_pct -0.735
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.965
## Mothers_20_years_pct_2009_2013 -0.802
## Fetal_infantile_mortality_on_1000_2009_2013 -0.605
## Proportion_with_two_years_late_in_school_2013_2014 -0.956
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.872
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.719
## General_humanities_technical_and_professional_humanities_ratio 0.931
## Wealth_index_2013
## pct_moslims_2013 -0.946
## S 0.950
## Wealth_index_2013 NA
## Mean_wages_compared_with_the_national_mean_pct 1.000
## Share_of_households_in_demand_for_social_housing_2012_pct -0.834
## Unemployment_rate_2012_pct -0.960
## Median_income_2012_euros 0.918
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.910
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.892
## Delinquency_in_the_total_population_18_25_years_pct -0.889
## Proportion_with_a_university_degree_pct 0.867
## Part_with_low_Birth_weight_pct -0.565
## Life_expectancy_at_birth 0.750
## Children_born_in_a_family_with_no_income_from_work_pct -0.939
## Population_growth_pct -0.840
## Houses_median_prize_euros 0.644
## Fraudulent_declaration_pct -0.677
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.903
## Mothers_20_years_pct_2009_2013 -0.795
## Fetal_infantile_mortality_on_1000_2009_2013 -0.560
## Proportion_with_two_years_late_in_school_2013_2014 -0.948
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.869
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.623
## General_humanities_technical_and_professional_humanities_ratio 0.937
## Mean_wages_compared_with_the_national_mean_pct
## pct_moslims_2013 -0.946
## S 0.950
## Wealth_index_2013 1.000
## Mean_wages_compared_with_the_national_mean_pct NA
## Share_of_households_in_demand_for_social_housing_2012_pct -0.834
## Unemployment_rate_2012_pct -0.960
## Median_income_2012_euros 0.918
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.910
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.892
## Delinquency_in_the_total_population_18_25_years_pct -0.889
## Proportion_with_a_university_degree_pct 0.867
## Part_with_low_Birth_weight_pct -0.565
## Life_expectancy_at_birth 0.750
## Children_born_in_a_family_with_no_income_from_work_pct -0.939
## Population_growth_pct -0.840
## Houses_median_prize_euros 0.644
## Fraudulent_declaration_pct -0.677
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.903
## Mothers_20_years_pct_2009_2013 -0.795
## Fetal_infantile_mortality_on_1000_2009_2013 -0.560
## Proportion_with_two_years_late_in_school_2013_2014 -0.948
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.869
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.623
## General_humanities_technical_and_professional_humanities_ratio 0.937
## Share_of_households_in_demand_for_social_housing_2012_pct
## pct_moslims_2013 0.855
## S -0.849
## Wealth_index_2013 -0.834
## Mean_wages_compared_with_the_national_mean_pct -0.834
## Share_of_households_in_demand_for_social_housing_2012_pct NA
## Unemployment_rate_2012_pct 0.831
## Median_income_2012_euros -0.796
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.755
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.746
## Delinquency_in_the_total_population_18_25_years_pct 0.895
## Proportion_with_a_university_degree_pct -0.815
## Part_with_low_Birth_weight_pct 0.581
## Life_expectancy_at_birth -0.688
## Children_born_in_a_family_with_no_income_from_work_pct 0.803
## Population_growth_pct 0.768
## Houses_median_prize_euros -0.696
## Fraudulent_declaration_pct 0.715
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.788
## Mothers_20_years_pct_2009_2013 0.663
## Fetal_infantile_mortality_on_1000_2009_2013 0.453
## Proportion_with_two_years_late_in_school_2013_2014 0.798
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.833
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.532
## General_humanities_technical_and_professional_humanities_ratio -0.781
## Unemployment_rate_2012_pct
## pct_moslims_2013 0.963
## S -0.971
## Wealth_index_2013 -0.961
## Mean_wages_compared_with_the_national_mean_pct -0.961
## Share_of_households_in_demand_for_social_housing_2012_pct 0.835
## Unemployment_rate_2012_pct NA
## Median_income_2012_euros -0.964
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.917
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.921
## Delinquency_in_the_total_population_18_25_years_pct 0.915
## Proportion_with_a_university_degree_pct -0.830
## Part_with_low_Birth_weight_pct 0.599
## Life_expectancy_at_birth -0.813
## Children_born_in_a_family_with_no_income_from_work_pct 0.946
## Population_growth_pct 0.770
## Houses_median_prize_euros -0.633
## Fraudulent_declaration_pct 0.718
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.938
## Mothers_20_years_pct_2009_2013 0.827
## Fetal_infantile_mortality_on_1000_2009_2013 0.679
## Proportion_with_two_years_late_in_school_2013_2014 0.967
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.894
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.683
## General_humanities_technical_and_professional_humanities_ratio -0.894
## Median_income_2012_euros
## pct_moslims_2013 -0.908
## S 0.951
## Wealth_index_2013 0.925
## Mean_wages_compared_with_the_national_mean_pct 0.925
## Share_of_households_in_demand_for_social_housing_2012_pct -0.810
## Unemployment_rate_2012_pct -0.968
## Median_income_2012_euros NA
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.836
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.913
## Delinquency_in_the_total_population_18_25_years_pct -0.872
## Proportion_with_a_university_degree_pct 0.772
## Part_with_low_Birth_weight_pct -0.617
## Life_expectancy_at_birth 0.803
## Children_born_in_a_family_with_no_income_from_work_pct -0.887
## Population_growth_pct -0.699
## Houses_median_prize_euros 0.517
## Fraudulent_declaration_pct -0.687
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.946
## Mothers_20_years_pct_2009_2013 -0.754
## Fetal_infantile_mortality_on_1000_2009_2013 -0.717
## Proportion_with_two_years_late_in_school_2013_2014 -0.974
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.890
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.706
## General_humanities_technical_and_professional_humanities_ratio 0.890
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct
## pct_moslims_2013 0.886
## S -0.849
## Wealth_index_2013 -0.895
## Mean_wages_compared_with_the_national_mean_pct -0.895
## Share_of_households_in_demand_for_social_housing_2012_pct 0.735
## Unemployment_rate_2012_pct 0.906
## Median_income_2012_euros -0.834
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct NA
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.810
## Delinquency_in_the_total_population_18_25_years_pct 0.834
## Proportion_with_a_university_degree_pct -0.743
## Part_with_low_Birth_weight_pct 0.472
## Life_expectancy_at_birth -0.761
## Children_born_in_a_family_with_no_income_from_work_pct 0.892
## Population_growth_pct 0.727
## Houses_median_prize_euros -0.595
## Fraudulent_declaration_pct 0.538
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.761
## Mothers_20_years_pct_2009_2013 0.806
## Fetal_infantile_mortality_on_1000_2009_2013 0.572
## Proportion_with_two_years_late_in_school_2013_2014 0.869
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.805
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.507
## General_humanities_technical_and_professional_humanities_ratio -0.789
## Juvenile_delinquency_in_the_total_population_under_18_years_pct
## pct_moslims_2013 0.911
## S -0.932
## Wealth_index_2013 -0.904
## Mean_wages_compared_with_the_national_mean_pct -0.904
## Share_of_households_in_demand_for_social_housing_2012_pct 0.765
## Unemployment_rate_2012_pct 0.930
## Median_income_2012_euros -0.918
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.799
## Juvenile_delinquency_in_the_total_population_under_18_years_pct NA
## Delinquency_in_the_total_population_18_25_years_pct 0.907
## Proportion_with_a_university_degree_pct -0.789
## Part_with_low_Birth_weight_pct 0.602
## Life_expectancy_at_birth -0.803
## Children_born_in_a_family_with_no_income_from_work_pct 0.872
## Population_growth_pct 0.766
## Houses_median_prize_euros -0.621
## Fraudulent_declaration_pct 0.663
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.922
## Mothers_20_years_pct_2009_2013 0.795
## Fetal_infantile_mortality_on_1000_2009_2013 0.564
## Proportion_with_two_years_late_in_school_2013_2014 0.921
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.823
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.716
## General_humanities_technical_and_professional_humanities_ratio -0.858
## Delinquency_in_the_total_population_18_25_years_pct
## pct_moslims_2013 0.910
## S -0.955
## Wealth_index_2013 -0.887
## Mean_wages_compared_with_the_national_mean_pct -0.887
## Share_of_households_in_demand_for_social_housing_2012_pct 0.890
## Unemployment_rate_2012_pct 0.916
## Median_income_2012_euros -0.876
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.813
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.907
## Delinquency_in_the_total_population_18_25_years_pct NA
## Proportion_with_a_university_degree_pct -0.920
## Part_with_low_Birth_weight_pct 0.692
## Life_expectancy_at_birth -0.860
## Children_born_in_a_family_with_no_income_from_work_pct 0.901
## Population_growth_pct 0.847
## Houses_median_prize_euros -0.820
## Fraudulent_declaration_pct 0.721
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.912
## Mothers_20_years_pct_2009_2013 0.785
## Fetal_infantile_mortality_on_1000_2009_2013 0.490
## Proportion_with_two_years_late_in_school_2013_2014 0.893
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.839
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.731
## General_humanities_technical_and_professional_humanities_ratio -0.891
## Proportion_with_a_university_degree_pct
## pct_moslims_2013 -0.788
## S 0.893
## Wealth_index_2013 0.854
## Mean_wages_compared_with_the_national_mean_pct 0.854
## Share_of_households_in_demand_for_social_housing_2012_pct -0.794
## Unemployment_rate_2012_pct -0.815
## Median_income_2012_euros 0.771
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.693
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.795
## Delinquency_in_the_total_population_18_25_years_pct -0.918
## Proportion_with_a_university_degree_pct NA
## Part_with_low_Birth_weight_pct -0.702
## Life_expectancy_at_birth 0.814
## Children_born_in_a_family_with_no_income_from_work_pct -0.844
## Population_growth_pct -0.897
## Houses_median_prize_euros 0.882
## Fraudulent_declaration_pct -0.744
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.874
## Mothers_20_years_pct_2009_2013 -0.685
## Fetal_infantile_mortality_on_1000_2009_2013 -0.345
## Proportion_with_two_years_late_in_school_2013_2014 -0.824
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.774
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.748
## General_humanities_technical_and_professional_humanities_ratio 0.930
## Part_with_low_Birth_weight_pct
## pct_moslims_2013 0.432
## S -0.579
## Wealth_index_2013 -0.494
## Mean_wages_compared_with_the_national_mean_pct -0.494
## Share_of_households_in_demand_for_social_housing_2012_pct 0.513
## Unemployment_rate_2012_pct 0.521
## Median_income_2012_euros -0.555
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.352
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.534
## Delinquency_in_the_total_population_18_25_years_pct 0.636
## Proportion_with_a_university_degree_pct -0.666
## Part_with_low_Birth_weight_pct NA
## Life_expectancy_at_birth -0.765
## Children_born_in_a_family_with_no_income_from_work_pct 0.522
## Population_growth_pct 0.589
## Houses_median_prize_euros -0.618
## Fraudulent_declaration_pct 0.659
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.626
## Mothers_20_years_pct_2009_2013 0.353
## Fetal_infantile_mortality_on_1000_2009_2013 0.211
## Proportion_with_two_years_late_in_school_2013_2014 0.590
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.556
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.569
## General_humanities_technical_and_professional_humanities_ratio -0.657
## Life_expectancy_at_birth
## pct_moslims_2013 -0.749
## S 0.854
## Wealth_index_2013 0.766
## Mean_wages_compared_with_the_national_mean_pct 0.766
## Share_of_households_in_demand_for_social_housing_2012_pct -0.692
## Unemployment_rate_2012_pct -0.822
## Median_income_2012_euros 0.819
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.749
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.828
## Delinquency_in_the_total_population_18_25_years_pct -0.876
## Proportion_with_a_university_degree_pct 0.818
## Part_with_low_Birth_weight_pct -0.715
## Life_expectancy_at_birth NA
## Children_born_in_a_family_with_no_income_from_work_pct -0.775
## Population_growth_pct -0.724
## Houses_median_prize_euros 0.748
## Fraudulent_declaration_pct -0.676
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.812
## Mothers_20_years_pct_2009_2013 -0.658
## Fetal_infantile_mortality_on_1000_2009_2013 -0.509
## Proportion_with_two_years_late_in_school_2013_2014 -0.772
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.709
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.730
## General_humanities_technical_and_professional_humanities_ratio 0.772
## Children_born_in_a_family_with_no_income_from_work_pct
## pct_moslims_2013 0.953
## S -0.944
## Wealth_index_2013 -0.935
## Mean_wages_compared_with_the_national_mean_pct -0.935
## Share_of_households_in_demand_for_social_housing_2012_pct 0.789
## Unemployment_rate_2012_pct 0.951
## Median_income_2012_euros -0.892
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.884
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.888
## Delinquency_in_the_total_population_18_25_years_pct 0.900
## Proportion_with_a_university_degree_pct -0.826
## Part_with_low_Birth_weight_pct 0.460
## Life_expectancy_at_birth -0.784
## Children_born_in_a_family_with_no_income_from_work_pct NA
## Population_growth_pct 0.844
## Houses_median_prize_euros -0.714
## Fraudulent_declaration_pct 0.668
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.919
## Mothers_20_years_pct_2009_2013 0.918
## Fetal_infantile_mortality_on_1000_2009_2013 0.652
## Proportion_with_two_years_late_in_school_2013_2014 0.917
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.771
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.651
## General_humanities_technical_and_professional_humanities_ratio -0.866
## Population_growth_pct
## pct_moslims_2013 0.793
## S -0.840
## Wealth_index_2013 -0.836
## Mean_wages_compared_with_the_national_mean_pct -0.836
## Share_of_households_in_demand_for_social_housing_2012_pct 0.744
## Unemployment_rate_2012_pct 0.771
## Median_income_2012_euros -0.712
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.686
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.783
## Delinquency_in_the_total_population_18_25_years_pct 0.840
## Proportion_with_a_university_degree_pct -0.886
## Part_with_low_Birth_weight_pct 0.544
## Life_expectancy_at_birth -0.728
## Children_born_in_a_family_with_no_income_from_work_pct 0.838
## Population_growth_pct NA
## Houses_median_prize_euros -0.869
## Fraudulent_declaration_pct 0.652
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.791
## Mothers_20_years_pct_2009_2013 0.718
## Fetal_infantile_mortality_on_1000_2009_2013 0.281
## Proportion_with_two_years_late_in_school_2013_2014 0.764
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.698
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.626
## General_humanities_technical_and_professional_humanities_ratio -0.836
## Houses_median_prize_euros
## pct_moslims_2013 -0.645
## S 0.716
## Wealth_index_2013 0.630
## Mean_wages_compared_with_the_national_mean_pct 0.630
## Share_of_households_in_demand_for_social_housing_2012_pct -0.660
## Unemployment_rate_2012_pct -0.627
## Median_income_2012_euros 0.526
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.544
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.640
## Delinquency_in_the_total_population_18_25_years_pct -0.821
## Proportion_with_a_university_degree_pct 0.877
## Part_with_low_Birth_weight_pct -0.601
## Life_expectancy_at_birth 0.746
## Children_born_in_a_family_with_no_income_from_work_pct -0.707
## Population_growth_pct -0.855
## Houses_median_prize_euros NA
## Fraudulent_declaration_pct -0.640
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.674
## Mothers_20_years_pct_2009_2013 -0.638
## Fetal_infantile_mortality_on_1000_2009_2013 -0.124
## Proportion_with_two_years_late_in_school_2013_2014 -0.573
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.542
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.658
## General_humanities_technical_and_professional_humanities_ratio 0.695
## Fraudulent_declaration_pct
## pct_moslims_2013 0.666
## S -0.706
## Wealth_index_2013 -0.654
## Mean_wages_compared_with_the_national_mean_pct -0.654
## Share_of_households_in_demand_for_social_housing_2012_pct 0.699
## Unemployment_rate_2012_pct 0.679
## Median_income_2012_euros -0.673
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.442
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.679
## Delinquency_in_the_total_population_18_25_years_pct 0.698
## Proportion_with_a_university_degree_pct -0.718
## Part_with_low_Birth_weight_pct 0.623
## Life_expectancy_at_birth -0.640
## Children_born_in_a_family_with_no_income_from_work_pct 0.625
## Population_growth_pct 0.623
## Houses_median_prize_euros -0.605
## Fraudulent_declaration_pct NA
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.768
## Mothers_20_years_pct_2009_2013 0.446
## Fetal_infantile_mortality_on_1000_2009_2013 0.445
## Proportion_with_two_years_late_in_school_2013_2014 0.695
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.632
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.466
## General_humanities_technical_and_professional_humanities_ratio -0.684
## Rate_of_borrowers_with_at_least_one_non_regularized_credit
## pct_moslims_2013 0.894
## S -0.967
## Wealth_index_2013 -0.910
## Mean_wages_compared_with_the_national_mean_pct -0.910
## Share_of_households_in_demand_for_social_housing_2012_pct 0.791
## Unemployment_rate_2012_pct 0.940
## Median_income_2012_euros -0.945
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.745
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.922
## Delinquency_in_the_total_population_18_25_years_pct 0.917
## Proportion_with_a_university_degree_pct -0.880
## Part_with_low_Birth_weight_pct 0.587
## Life_expectancy_at_birth -0.824
## Children_born_in_a_family_with_no_income_from_work_pct 0.921
## Population_growth_pct 0.813
## Houses_median_prize_euros -0.695
## Fraudulent_declaration_pct 0.747
## Rate_of_borrowers_with_at_least_one_non_regularized_credit NA
## Mothers_20_years_pct_2009_2013 0.783
## Fetal_infantile_mortality_on_1000_2009_2013 0.666
## Proportion_with_two_years_late_in_school_2013_2014 0.946
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.850
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.791
## General_humanities_technical_and_professional_humanities_ratio -0.929
## Mothers_20_years_pct_2009_2013
## pct_moslims_2013 0.810
## S -0.777
## Wealth_index_2013 -0.766
## Mean_wages_compared_with_the_national_mean_pct -0.766
## Share_of_households_in_demand_for_social_housing_2012_pct 0.608
## Unemployment_rate_2012_pct 0.818
## Median_income_2012_euros -0.738
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.809
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.776
## Delinquency_in_the_total_population_18_25_years_pct 0.752
## Proportion_with_a_university_degree_pct -0.627
## Part_with_low_Birth_weight_pct 0.265
## Life_expectancy_at_birth -0.643
## Children_born_in_a_family_with_no_income_from_work_pct 0.911
## Population_growth_pct 0.677
## Houses_median_prize_euros -0.606
## Fraudulent_declaration_pct 0.372
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.759
## Mothers_20_years_pct_2009_2013 NA
## Fetal_infantile_mortality_on_1000_2009_2013 0.606
## Proportion_with_two_years_late_in_school_2013_2014 0.777
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.648
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.666
## General_humanities_technical_and_professional_humanities_ratio -0.686
## Fetal_infantile_mortality_on_1000_2009_2013
## pct_moslims_2013 0.672
## S -0.640
## Wealth_index_2013 -0.626
## Mean_wages_compared_with_the_national_mean_pct -0.626
## Share_of_households_in_demand_for_social_housing_2012_pct 0.508
## Unemployment_rate_2012_pct 0.725
## Median_income_2012_euros -0.741
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.633
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.600
## Delinquency_in_the_total_population_18_25_years_pct 0.535
## Proportion_with_a_university_degree_pct -0.392
## Part_with_low_Birth_weight_pct 0.171
## Life_expectancy_at_birth -0.523
## Children_born_in_a_family_with_no_income_from_work_pct 0.708
## Population_growth_pct 0.357
## Houses_median_prize_euros -0.169
## Fraudulent_declaration_pct 0.425
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.690
## Mothers_20_years_pct_2009_2013 0.654
## Fetal_infantile_mortality_on_1000_2009_2013 NA
## Proportion_with_two_years_late_in_school_2013_2014 0.686
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.556
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.393
## General_humanities_technical_and_professional_humanities_ratio -0.466
## Proportion_with_two_years_late_in_school_2013_2014
## pct_moslims_2013 0.927
## S -0.956
## Wealth_index_2013 -0.952
## Mean_wages_compared_with_the_national_mean_pct -0.952
## Share_of_households_in_demand_for_social_housing_2012_pct 0.812
## Unemployment_rate_2012_pct 0.967
## Median_income_2012_euros -0.975
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.857
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.914
## Delinquency_in_the_total_population_18_25_years_pct 0.892
## Proportion_with_a_university_degree_pct -0.825
## Part_with_low_Birth_weight_pct 0.527
## Life_expectancy_at_birth -0.784
## Children_born_in_a_family_with_no_income_from_work_pct 0.919
## Population_growth_pct 0.773
## Houses_median_prize_euros -0.578
## Fraudulent_declaration_pct 0.675
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.949
## Mothers_20_years_pct_2009_2013 0.757
## Fetal_infantile_mortality_on_1000_2009_2013 0.728
## Proportion_with_two_years_late_in_school_2013_2014 NA
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.879
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.688
## General_humanities_technical_and_professional_humanities_ratio -0.928
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct
## pct_moslims_2013 0.838
## S -0.885
## Wealth_index_2013 -0.894
## Mean_wages_compared_with_the_national_mean_pct -0.894
## Share_of_households_in_demand_for_social_housing_2012_pct 0.835
## Unemployment_rate_2012_pct 0.910
## Median_income_2012_euros -0.907
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.824
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.831
## Delinquency_in_the_total_population_18_25_years_pct 0.839
## Proportion_with_a_university_degree_pct -0.761
## Part_with_low_Birth_weight_pct 0.455
## Life_expectancy_at_birth -0.720
## Children_born_in_a_family_with_no_income_from_work_pct 0.799
## Population_growth_pct 0.704
## Houses_median_prize_euros -0.522
## Fraudulent_declaration_pct 0.593
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.860
## Mothers_20_years_pct_2009_2013 0.642
## Fetal_infantile_mortality_on_1000_2009_2013 0.629
## Proportion_with_two_years_late_in_school_2013_2014 0.900
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct NA
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 0.694
## General_humanities_technical_and_professional_humanities_ratio -0.855
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013
## pct_moslims_2013 0.616
## S -0.764
## Wealth_index_2013 -0.658
## Mean_wages_compared_with_the_national_mean_pct -0.658
## Share_of_households_in_demand_for_social_housing_2012_pct 0.545
## Unemployment_rate_2012_pct 0.725
## Median_income_2012_euros -0.735
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct 0.557
## Juvenile_delinquency_in_the_total_population_under_18_years_pct 0.736
## Delinquency_in_the_total_population_18_25_years_pct 0.764
## Proportion_with_a_university_degree_pct -0.767
## Part_with_low_Birth_weight_pct 0.539
## Life_expectancy_at_birth -0.763
## Children_born_in_a_family_with_no_income_from_work_pct 0.706
## Population_growth_pct 0.657
## Houses_median_prize_euros -0.696
## Fraudulent_declaration_pct 0.452
## Rate_of_borrowers_with_at_least_one_non_regularized_credit 0.815
## Mothers_20_years_pct_2009_2013 0.693
## Fetal_infantile_mortality_on_1000_2009_2013 0.446
## Proportion_with_two_years_late_in_school_2013_2014 0.723
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct 0.700
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 NA
## General_humanities_technical_and_professional_humanities_ratio -0.745
## General_humanities_technical_and_professional_humanities_ratio
## pct_moslims_2013 -0.845
## S 0.926
## Wealth_index_2013 0.930
## Mean_wages_compared_with_the_national_mean_pct 0.930
## Share_of_households_in_demand_for_social_housing_2012_pct -0.778
## Unemployment_rate_2012_pct -0.883
## Median_income_2012_euros 0.885
## x18_64_years_beneficiaries_of_social_integration_income_2012_pct -0.757
## Juvenile_delinquency_in_the_total_population_under_18_years_pct -0.852
## Delinquency_in_the_total_population_18_25_years_pct -0.887
## Proportion_with_a_university_degree_pct 0.934
## Part_with_low_Birth_weight_pct -0.620
## Life_expectancy_at_birth 0.786
## Children_born_in_a_family_with_no_income_from_work_pct -0.851
## Population_growth_pct -0.837
## Houses_median_prize_euros 0.697
## Fraudulent_declaration_pct -0.676
## Rate_of_borrowers_with_at_least_one_non_regularized_credit -0.930
## Mothers_20_years_pct_2009_2013 -0.634
## Fetal_infantile_mortality_on_1000_2009_2013 -0.511
## Proportion_with_two_years_late_in_school_2013_2014 -0.924
## x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct -0.864
## Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 -0.760
## General_humanities_technical_and_professional_humanities_ratio NA
#specific Muslim % and S indicators
wtd.cors(cbind(brussels$pct_moslims_2013, brussels_social, S = brussels$S),
weight = brussels$Total_population_of_the_municipality_2014 %>% sqrt())[1, -1]
## Wealth_index
## -0.944
## Mean_wages_compared_with_the_national_mean_pct
## -0.944
## Share_of_households_in_demand_for_social_housing_pct
## 0.840
## Unemployment_rate_pct
## 0.957
## Median_income_euros
## -0.898
## Age_18_64_beneficiaries_of_social_integration_income_pct
## 0.899
## Juvenile_delinquency_in_the_total_population_under_18_years_pct
## 0.892
## Delinquency_in_the_total_population_18_25_years_pct
## 0.906
## Proportion_with_a_university_degree_pct
## -0.799
## Part_with_low_Birth_weight_pct
## 0.499
## Life_expectancy_at_birth
## -0.731
## Children_born_in_a_family_with_no_income_from_work_pct
## 0.956
## Houses_median_prize_euros
## -0.650
## Fraudulent_declaration_pct
## 0.684
## Rate_of_borrowers_with_at_least_one_non_regularized_credit
## 0.889
## Mothers_aged_20_pct
## 0.827
## Fetal_infantile_mortality
## 0.619
## Proportion_with_two_years_late_in_school
## 0.927
## Children_with_mental_deficiency_pct
## 0.796
## Relative_risk_of_mortality_by_cardiovascular_diseases_men
## 0.560
## General_humanities_technical_and_professional_humanities_ratio
## -0.856
## Blood_donor_pct
## -0.854
## S
## -0.942
#Muslim % 2013 vs. North African % 2016?
GG_scatter(brussels, "North_African_proportion_2016_pct", "pct_moslims_2013",
case_names = "Name")
## `geom_smooth()` using formula 'y ~ x'
#scatter
GG_scatter(brussels, "pct_moslims_2013", "S", case_names = "Name", repel_names = T, weights = sqrt(brussels$Total_population_of_the_municipality_2014)) +
scale_x_continuous("Muslim %", labels = scales::percent) +
scale_y_continuous("General socioeconomic factor (S)")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Brussels_Muslim_S.png")
## `geom_smooth()` using formula 'y ~ x'
Examine relationship to Muslim demographics.
#specific outcomes
(brussels_income = ols(Median_income_2012_euros ~ pct_moslims_2013, data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014)))
## Linear Regression Model
##
## ols(formula = Median_income_2012_euros ~ pct_moslims_2013, data = brussels,
## weights = sqrt(brussels$Total_population_of_the_municipality_2014))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 19 LR chi2 31.15 R2 0.806
## sigma18519.9237 d.f. 1 R2 adj 0.795
## d.f. 17 Pr(> chi2) 0.0000 g 2757.785
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2290.99 -336.04 -63.88 634.92 2493.49
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 23105.6062 552.5774 41.81 <0.0001
## pct_moslims_2013 -19356.9850 2303.6140 -8.40 <0.0001
##
(brussels_unempl = ols(Unemployment_rate_2012_pct ~ pct_moslims_2013, data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014)))
## Linear Regression Model
##
## ols(formula = Unemployment_rate_2012_pct ~ pct_moslims_2013,
## data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 19 LR chi2 47.16 R2 0.916
## sigma28.4748 d.f. 1 R2 adj 0.912
## d.f. 17 Pr(> chi2) 0.0000 g 6.890
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.3298 -1.0621 0.1465 1.3019 3.2057
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 11.8152 0.8496 13.91 <0.0001
## pct_moslims_2013 48.3626 3.5419 13.65 <0.0001
##
(brussels_uni = ols(Proportion_with_a_university_degree_pct ~ pct_moslims_2013, data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014)))
## Linear Regression Model
##
## ols(formula = Proportion_with_a_university_degree_pct ~ pct_moslims_2013,
## data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 19 LR chi2 19.29 R2 0.638
## sigma42.6414 d.f. 1 R2 adj 0.616
## d.f. 17 Pr(> chi2) 0.0000 g 4.133
##
## Residuals
##
## Min 1Q Median 3Q Max
## -5.246 -1.932 -0.321 1.872 4.701
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 14.5637 1.2723 11.45 <0.0001
## pct_moslims_2013 -29.0102 5.3040 -5.47 <0.0001
##
(brussels_S = ols(S ~ pct_moslims_2013, data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014)))
## Linear Regression Model
##
## ols(formula = S ~ pct_moslims_2013, data = brussels, weights = sqrt(brussels$Total_population_of_the_municipality_2014))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 19 LR chi2 41.51 R2 0.887
## sigma5.2230 d.f. 1 R2 adj 0.881
## d.f. 17 Pr(> chi2) 0.0000 g 1.072
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.47673 -0.27677 -0.02969 0.13085 0.76451
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.4761 0.1558 9.47 <0.0001
## pct_moslims_2013 -7.5221 0.6497 -11.58 <0.0001
##
#summarize
list(
median_income = brussels_income,
unemployment_rate = brussels_unempl,
uni_degree = brussels_uni,
S = brussels_S
) %>%
summarize_models()
#plot wealth index timeline
wealth_timeline %>%
ggplot(aes(Year, Wealth_index, color = Area)) +
geom_path() +
scale_y_continuous("Wealth index (100 = Belgian mean income)", breaks = seq(0, 200, by = 10)) +
scale_color_discrete(labels = c("Brussels", "Brussels\nperiphery"))
GG_save("figs/Brussels_decline.png")
We cannot do S factor analysis because we only have one good outcome (median income).
#social data
muni_social = muni %>%
select(median_income_2016, mean_income_2016, income_inequality_2016,
crime_rate, violent_crimes_rate,
life_expectancy, higher_education_pct, unemployment)
#impute
muni_social %>% miss_amount()
## cases with missing data vars with missing data cells with missing data
## 0.06000 0.50000 0.00806
muni_social %<>% miss_impute(method = "irmi")
muni_social %>% miss_amount()
## cases with missing data vars with missing data cells with missing data
## 0 0 0
#cors
wtd.cors(muni_social)
## median_income_2016 mean_income_2016
## median_income_2016 1.000 0.850
## mean_income_2016 0.850 1.000
## income_inequality_2016 0.261 0.592
## crime_rate -0.433 -0.273
## violent_crimes_rate -0.544 -0.454
## life_expectancy 0.445 0.474
## higher_education_pct 0.501 0.783
## unemployment -0.674 -0.491
## income_inequality_2016 crime_rate violent_crimes_rate
## median_income_2016 0.2611 -0.4331 -0.544
## mean_income_2016 0.5924 -0.2734 -0.454
## income_inequality_2016 1.0000 -0.0379 -0.240
## crime_rate -0.0379 1.0000 0.855
## violent_crimes_rate -0.2399 0.8551 1.000
## life_expectancy 0.2794 -0.2835 -0.468
## higher_education_pct 0.8582 -0.1029 -0.313
## unemployment -0.0577 0.6826 0.736
## life_expectancy higher_education_pct unemployment
## median_income_2016 0.445 0.501 -0.6735
## mean_income_2016 0.474 0.783 -0.4910
## income_inequality_2016 0.279 0.858 -0.0577
## crime_rate -0.283 -0.103 0.6826
## violent_crimes_rate -0.468 -0.313 0.7357
## life_expectancy 1.000 0.351 -0.5471
## higher_education_pct 0.351 1.000 -0.2057
## unemployment -0.547 -0.206 1.0000
#FA object
S_fa_muni = fa(muni_social)
S_fa_muni
## Factor Analysis using method = minres
## Call: fa(r = muni_social)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## median_income_2016 0.83 0.68 0.32 1
## mean_income_2016 0.85 0.72 0.28 1
## income_inequality_2016 0.46 0.22 0.78 1
## crime_rate -0.57 0.33 0.67 1
## violent_crimes_rate -0.76 0.58 0.42 1
## life_expectancy 0.59 0.35 0.65 1
## higher_education_pct 0.62 0.39 0.61 1
## unemployment -0.74 0.54 0.46 1
##
## MR1
## SS loadings 3.80
## Proportion Var 0.47
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 7.58 with Chi Square of 4428
## The degrees of freedom for the model are 20 and the objective function was 4.27
##
## The root mean square of the residuals (RMSR) is 0.2
## The df corrected root mean square of the residuals is 0.24
##
## The harmonic number of observations is 589 with the empirical chi square 1380 with prob < 2e-280
## The total number of observations was 589 with Likelihood Chi Square = 2494 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.212
## RMSEA index = 0.458 and the 90 % confidence intervals are 0.444 0.474
## BIC = 2366
## Fit based upon off diagonal values = 0.84
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.95
## Multiple R square of scores with factors 0.91
## Minimum correlation of possible factor scores 0.82
#weighted
S_fa_muni_wtd = fa(muni_social, weight = muni$population %>% sqrt())
S_fa_muni_wtd
## Factor Analysis using method = minres
## Call: fa(r = muni_social, weight = muni$population %>% sqrt())
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## median_income_2016 0.86 0.74 0.26 1
## mean_income_2016 0.84 0.70 0.30 1
## income_inequality_2016 0.38 0.14 0.86 1
## crime_rate -0.59 0.35 0.65 1
## violent_crimes_rate -0.76 0.58 0.42 1
## life_expectancy 0.63 0.40 0.60 1
## higher_education_pct 0.55 0.31 0.69 1
## unemployment -0.77 0.59 0.41 1
##
## MR1
## SS loadings 3.80
## Proportion Var 0.48
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 8.26 with Chi Square of 4828
## The degrees of freedom for the model are 20 and the objective function was 4.86
##
## The root mean square of the residuals (RMSR) is 0.23
## The df corrected root mean square of the residuals is 0.27
##
## The harmonic number of observations is 589 with the empirical chi square 1677 with prob < 0
## The total number of observations was 589 with Likelihood Chi Square = 2836 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.178
## RMSEA index = 0.489 and the 90 % confidence intervals are 0.474 0.505
## BIC = 2708
## Fit based upon off diagonal values = 0.81
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.96
## Multiple R square of scores with factors 0.92
## Minimum correlation of possible factor scores 0.84
#there are often warnings due to the small number of cases. In most cases, they are not problematic for the analysis.
S_fa_methods = fa_all_methods(muni_social, messages = F)
S_fa_methods_rank = fa_all_methods(muni_social %>% df_rank(), messages = F)
S_fa_methods$loadings %>% GG_heatmap()
S_fa_methods_rank$loadings %>% GG_heatmap()
#loadings plot
S_fa_methods$loadings %>%
cbind(indicator = names(muni_social)) %>%
gather(key = method, value = loading, -indicator) %>%
mutate(indicator = fct_reorder(indicator, loading)) %>%
ggplot(aes(loading, indicator, color = method)) +
geom_point()
GG_save("figs/belgium_S_loadings.png")
S_fa_methods_rank$loadings %>%
cbind(indicator = names(muni_social)) %>%
gather(key = method, value = loading, -indicator) %>%
mutate(indicator = fct_reorder(indicator, loading)) %>%
ggplot(aes(loading, indicator, color = method)) +
geom_point()
GG_save("figs/belgium_S_loadings_rank.png")
#add Z score mean
#reverse S factor
# S_fa_methods$scores = S_fa_methods$scores * -1
# S_fa_methods$scores$zmean = muni_social %>% map_df(standardize) %>% map2_df(c(1, -1, -1, 1), ~.x * .y) %>% rowMeans(na.rm = T)
S_fa_methods$scores %>% cbind(muslim = muni$Muslim_pct) %>% GG_heatmap(reorder_vars = F)
#which has strongest cors to the others?
S_fa_methods$scores %>% wtd.cors() %>%
{
diag(.) = NA
.
} %>% colMeans(na.rm = T) %>%
sort()
## Bartlett_ml regression_ml Thurstone_ml tenBerge_ml
## 0.913 0.913 0.913 0.913
## tenBerge_minrank regression_minrank Thurstone_minrank regression_gls
## 0.975 0.975 0.975 0.976
## Thurstone_gls tenBerge_gls Bartlett_minchi regression_minchi
## 0.976 0.976 0.981 0.981
## Thurstone_minchi tenBerge_minchi regression_pa Thurstone_pa
## 0.981 0.981 0.982 0.982
## tenBerge_pa tenBerge_minres regression_minres Thurstone_minres
## 0.982 0.982 0.982 0.982
## regression_ols Thurstone_ols tenBerge_ols Bartlett_gls
## 0.982 0.982 0.982 0.982
## regression_wls Thurstone_wls tenBerge_wls Bartlett_wls
## 0.983 0.983 0.983 0.983
## Bartlett_minres Bartlett_ols Bartlett_pa Bartlett_minrank
## 0.984 0.984 0.984 0.984
#use best guess
# muni$S = S_fa_methods$scores$zmean %>% standardize()
muni$S = S_fa_methods$scores$tenBerge_wls %>% standardize()
muni$S_rank = S_fa_methods_rank$scores$tenBerge_wls %>% standardize()
muni_spatial$S = S_fa_methods$scores$tenBerge_wls %>% standardize()
#compare with ranks
GG_scatter(muni, "S", "S_rank")
## `geom_smooth()` using formula 'y ~ x'
#lag
muni = add_lag(muni, var = c("S", "S_rank"))
#Jensen method, naive
S_fa_muni_data = muni_social %>% cbind(Muslim = muni$Muslim_pct)
fa_Jensens_method(S_fa_muni, S_fa_muni_data, "Muslim", text_pos = "tr") +
scale_y_continuous("r (Muslim % x indicator)") +
scale_x_continuous("S-loading")
## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/muni_loadings_Jensen.png")
## `geom_smooth()` using formula 'y ~ x'
#R2 gain from Muslim
S_fa_muni_muslim_r2_gain = map_df(names(muni_social), function(v) {
# browser()
#formula
f0 = str_glue("{v} ~ rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density)")
f0 = as.formula(f0)
f1 = str_glue("{v} ~ Muslim + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density)")
f1 = as.formula(f1)
#fit models
local_d = cbind(S_fa_muni_data, muni %>% select(age_65ao_pct, age_0_19_pct, pop_density, ends_with("_lag")))
mod0 = ols(f0, data = local_d, weights = muni$population %>% sqrt())
mod1 = ols(f1, data = local_d, weights = muni$population %>% sqrt())
mod1b = ols(f1, data = local_d %>% df_standardize(), weights = muni$population %>% sqrt())
mod1c = ols(f1, data = local_d %>% df_standardize(exclude = "Muslim"), weights = muni$population %>% sqrt())
y = tibble(
indicator = v,
reversed = F,
loading = S_fa_muni$loadings[v, 1],
r = wtd.cors(local_d$Muslim, local_d[[v]]) %>% as.numeric(),
r_wt = wtd.cors(local_d$Muslim, local_d[[v]], weight = muni$population %>% sqrt()) %>% as.numeric(),
partial_r2 = plot(anova(mod1, tol=1e-12), what='partial R2', pl = F)[1],
delta_r2 = summary.lm(mod1)$adj.r.squared - summary.lm(mod0)$adj.r.squared,
slope = coef(mod1c)[2]
)
#reverse if needed
if (y$loading < 0) {
y$reversed = T
y$loading = y$loading * -1
y$r = y$r * -1
y$r_wt = y$r_wt * -1
y$slope = y$slope * -1
}
y
})
#correlations
combine_upperlower(
S_fa_muni_muslim_r2_gain[-c(1:2)] %>% wtd.cors(),
S_fa_muni_muslim_r2_gain[-c(1:2)] %>% cor(method = "spearman")
)
## loading r r_wt partial_r2 delta_r2 slope
## loading NA -0.4634 -0.5648 0.510 0.527 -0.500
## r -0.310 NA 0.9880 0.212 0.151 0.184
## r_wt -0.333 0.9762 NA 0.175 0.119 0.178
## partial_r2 0.595 0.2857 0.2619 NA 0.997 -0.868
## delta_r2 0.595 0.2857 0.2619 1.000 NA -0.901
## slope -0.690 0.0476 0.0238 -0.905 -0.905 NA
Instead of including control variables of age and population density in the regressions, we can residualize the S factor indicators for them initially.
#residualize for age and population density
muni_social_resid = muni_social
for (v in names(muni_social_resid)) {
#fit model with age splines
v_form = str_glue("{v} ~ rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density)")
v_form = as.formula(v_form)
v_mod = ols(v_form, data = cbind(muni_social_resid, muni %>% select(age_65ao_pct, age_0_19_pct, pop_density)))
muni_social_resid[[v]] = resid(v_mod)
}
#with resid data
S_fa_muni_resid = fa(muni_social_resid)
S_fa_muni_resid
## Factor Analysis using method = minres
## Call: fa(r = muni_social_resid)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## median_income_2016 0.81 0.65 0.35 1
## mean_income_2016 0.87 0.75 0.25 1
## income_inequality_2016 0.63 0.40 0.60 1
## crime_rate -0.58 0.34 0.66 1
## violent_crimes_rate -0.76 0.57 0.43 1
## life_expectancy 0.60 0.36 0.64 1
## higher_education_pct 0.72 0.52 0.48 1
## unemployment -0.77 0.59 0.41 1
##
## MR1
## SS loadings 4.18
## Proportion Var 0.52
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 7.79 with Chi Square of 4553
## The degrees of freedom for the model are 20 and the objective function was 3.92
##
## The root mean square of the residuals (RMSR) is 0.17
## The df corrected root mean square of the residuals is 0.2
##
## The harmonic number of observations is 589 with the empirical chi square 972 with prob < 3.2e-193
## The total number of observations was 589 with Likelihood Chi Square = 2291 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.297
## RMSEA index = 0.439 and the 90 % confidence intervals are 0.424 0.455
## BIC = 2163
## Fit based upon off diagonal values = 0.9
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.96
## Multiple R square of scores with factors 0.92
## Minimum correlation of possible factor scores 0.85
#save scores
muni$S_resid = S_fa_muni_resid$scores[, 1] %>% standardize()
#compare
GG_scatter(muni, "S", "S_resid", color = "province")
## `geom_smooth()` using formula 'y ~ x'
Same data but limited area.
#subset
muni_social_brussels = muni_social[muni$region == "Brussels Hoofdstedelijk", ]
#factor analyze
S_fa_muni_brussels = fa(muni_social_brussels)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
S_fa_muni_brussels
## Factor Analysis using method = minres
## Call: fa(r = muni_social_brussels)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## median_income_2016 0.96 0.92 0.082 1
## mean_income_2016 0.98 0.97 0.032 1
## income_inequality_2016 0.49 0.24 0.764 1
## crime_rate -0.44 0.19 0.810 1
## violent_crimes_rate -0.61 0.37 0.627 1
## life_expectancy 0.91 0.83 0.166 1
## higher_education_pct 0.86 0.73 0.268 1
## unemployment -0.95 0.90 0.102 1
##
## MR1
## SS loadings 5.15
## Proportion Var 0.64
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 15.8 with Chi Square of 230
## The degrees of freedom for the model are 20 and the objective function was 7.64
##
## The root mean square of the residuals (RMSR) is 0.2
## The df corrected root mean square of the residuals is 0.23
##
## The harmonic number of observations is 19 with the empirical chi square 40.8 with prob < 0.0039
## The total number of observations was 19 with Likelihood Chi Square = 106 with prob < 1.2e-13
##
## Tucker Lewis Index of factoring reliability = 0.372
## RMSEA index = 0.472 and the 90 % confidence intervals are 0.398 0.581
## BIC = 46.8
## Fit based upon off diagonal values = 0.91
#method variance
S_fa_muni_brussels_method_variance = fa_all_methods(muni_social_brussels)
## 1 out of 40 - regression_minres
## Saving results from regression_minres
## 2 out of 40 - Thurstone_minres
## Saving results from Thurstone_minres
## 3 out of 40 - tenBerge_minres
## Skipping tenBerge_minres due to extraction error
## 4 out of 40 - Anderson_minres
## Skipping Anderson_minres due to extraction error
## 5 out of 40 - Bartlett_minres
## Saving results from Bartlett_minres
## 6 out of 40 - regression_ols
## Saving results from regression_ols
## 7 out of 40 - Thurstone_ols
## Saving results from Thurstone_ols
## 8 out of 40 - tenBerge_ols
## Skipping tenBerge_ols due to extraction error
## 9 out of 40 - Anderson_ols
## Skipping Anderson_ols due to extraction error
## 10 out of 40 - Bartlett_ols
## Saving results from Bartlett_ols
## 11 out of 40 - regression_wls
## Saving results from regression_wls
## 12 out of 40 - Thurstone_wls
## Saving results from Thurstone_wls
## 13 out of 40 - tenBerge_wls
## Saving results from tenBerge_wls
## 14 out of 40 - Anderson_wls
## Skipping Anderson_wls due to extraction error
## 15 out of 40 - Bartlett_wls
## Saving results from Bartlett_wls
## 16 out of 40 - regression_gls
## Saving results from regression_gls
## 17 out of 40 - Thurstone_gls
## Saving results from Thurstone_gls
## 18 out of 40 - tenBerge_gls
## Skipping tenBerge_gls due to extraction error
## 19 out of 40 - Anderson_gls
## Skipping Anderson_gls due to extraction error
## 20 out of 40 - Bartlett_gls
## Saving results from Bartlett_gls
## 21 out of 40 - regression_pa
## Saving results from regression_pa
## 22 out of 40 - Thurstone_pa
## Saving results from Thurstone_pa
## 23 out of 40 - tenBerge_pa
## Skipping tenBerge_pa due to extraction error
## 24 out of 40 - Anderson_pa
## Skipping Anderson_pa due to extraction error
## 25 out of 40 - Bartlett_pa
## Saving results from Bartlett_pa
## 26 out of 40 - regression_ml
## Saving results from regression_ml
## 27 out of 40 - Thurstone_ml
## Saving results from Thurstone_ml
## 28 out of 40 - tenBerge_ml
## Saving results from tenBerge_ml
## 29 out of 40 - Anderson_ml
## Skipping Anderson_ml due to extraction error
## 30 out of 40 - Bartlett_ml
## Saving results from Bartlett_ml
## 31 out of 40 - regression_minchi
## Saving results from regression_minchi
## 32 out of 40 - Thurstone_minchi
## Saving results from Thurstone_minchi
## 33 out of 40 - tenBerge_minchi
## Saving results from tenBerge_minchi
## 34 out of 40 - Anderson_minchi
## Skipping Anderson_minchi due to extraction error
## 35 out of 40 - Bartlett_minchi
## Saving results from Bartlett_minchi
## 36 out of 40 - regression_minrank
## Saving results from regression_minrank
## 37 out of 40 - Thurstone_minrank
## Saving results from Thurstone_minrank
## 38 out of 40 - tenBerge_minrank
## Saving results from tenBerge_minrank
## 39 out of 40 - Anderson_minrank
## Skipping Anderson_minrank due to extraction error
## 40 out of 40 - Bartlett_minrank
## Saving results from Bartlett_minrank
S_fa_muni_brussels_method_variance$scores %>% GG_heatmap()
S_fa_muni_brussels_method_variance$scores %>% psych::alpha()
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 1 1 1 0.97 892 0.00042 -7.7e-16 0.99 0.98
##
## lower alpha upper 95% confidence boundaries
## 1 1 1
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N var.r med.r
## regression_minres 1 1 1 0.97 849 0.00066 0.98
## Thurstone_minres 1 1 1 0.97 849 0.00066 0.98
## Bartlett_minres 1 1 1 0.97 840 0.00067 0.98
## regression_ols 1 1 1 0.97 849 0.00066 0.98
## Thurstone_ols 1 1 1 0.97 849 0.00066 0.98
## Bartlett_ols 1 1 1 0.97 840 0.00067 0.98
## regression_wls 1 1 1 0.97 844 0.00068 0.98
## Thurstone_wls 1 1 1 0.97 844 0.00068 0.98
## tenBerge_wls 1 1 1 0.97 844 0.00068 0.98
## Bartlett_wls 1 1 1 0.97 836 0.00068 0.98
## regression_gls 1 1 1 0.97 848 0.00068 0.98
## Thurstone_gls 1 1 1 0.97 848 0.00068 0.98
## Bartlett_gls 1 1 1 0.97 836 0.00068 0.98
## regression_pa 1 1 1 0.97 848 0.00066 0.98
## Thurstone_pa 1 1 1 0.97 848 0.00066 0.98
## Bartlett_pa 1 1 1 0.97 840 0.00067 0.98
## regression_ml 1 1 1 0.97 858 0.00066 0.98
## Thurstone_ml 1 1 1 0.97 858 0.00066 0.98
## tenBerge_ml 1 1 1 0.97 858 0.00066 0.98
## Bartlett_ml 1 1 1 0.97 858 0.00066 0.98
## regression_minchi 1 1 1 0.97 935 0.00058 0.98
## Thurstone_minchi 1 1 1 0.97 935 0.00058 0.98
## tenBerge_minchi 1 1 1 0.97 935 0.00058 0.98
## Bartlett_minchi 1 1 1 0.97 940 0.00057 0.98
## regression_minrank 1 1 1 0.97 866 0.00067 0.98
## Thurstone_minrank 1 1 1 0.97 866 0.00067 0.98
## tenBerge_minrank 1 1 1 0.97 866 0.00067 0.98
## Bartlett_minrank 1 1 1 0.97 840 0.00067 0.98
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## regression_minres 19 0.99 0.99 0.99 0.99 -3.5e-16 1.01
## Thurstone_minres 19 0.99 0.99 0.99 0.99 -3.5e-16 1.01
## Bartlett_minres 19 0.99 0.99 0.99 0.99 -3.7e-16 1.01
## regression_ols 19 0.99 0.99 0.99 0.99 -3.2e-16 1.01
## Thurstone_ols 19 0.99 0.99 0.99 0.99 -3.2e-16 1.01
## Bartlett_ols 19 0.99 0.99 0.99 0.99 -3.4e-16 1.01
## regression_wls 19 0.99 0.99 0.99 0.99 -1.5e-15 0.99
## Thurstone_wls 19 0.99 0.99 0.99 0.99 -1.5e-15 0.99
## tenBerge_wls 19 0.99 0.99 0.99 0.99 -1.4e-15 0.99
## Bartlett_wls 19 1.00 1.00 1.00 1.00 -7.4e-16 1.02
## regression_gls 19 0.99 0.99 0.99 0.99 -1.5e-15 1.01
## Thurstone_gls 19 0.99 0.99 0.99 0.99 -1.5e-15 1.01
## Bartlett_gls 19 1.00 1.00 1.00 1.00 -7.2e-16 1.01
## regression_pa 19 0.99 0.99 0.99 0.99 -2.7e-16 1.01
## Thurstone_pa 19 0.99 0.99 0.99 0.99 -2.7e-16 1.01
## Bartlett_pa 19 0.99 0.99 0.99 0.99 -3.0e-16 1.01
## regression_ml 19 0.99 0.99 0.99 0.99 1.2e-16 1.00
## Thurstone_ml 19 0.99 0.99 0.99 0.99 1.2e-16 1.00
## tenBerge_ml 19 0.99 0.99 0.99 0.99 8.3e-17 1.00
## Bartlett_ml 19 0.99 0.99 0.99 0.99 1.8e-16 1.00
## regression_minchi 19 0.96 0.96 0.96 0.95 -2.4e-15 1.00
## Thurstone_minchi 19 0.96 0.96 0.96 0.95 -2.4e-15 1.00
## tenBerge_minchi 19 0.96 0.96 0.96 0.95 -2.4e-15 1.00
## Bartlett_minchi 19 0.95 0.95 0.95 0.95 -2.4e-15 1.00
## regression_minrank 19 0.98 0.98 0.98 0.98 -1.3e-16 1.00
## Thurstone_minrank 19 0.98 0.98 0.98 0.98 -1.3e-16 1.00
## tenBerge_minrank 19 0.98 0.98 0.98 0.98 -1.4e-16 1.00
## Bartlett_minrank 19 0.99 0.99 0.99 0.99 -4.7e-16 1.01
#compare results
S_fa_muni_brussels_scores = tibble(
name = muni$Gemeente[muni$region == "Brussels Hoofdstedelijk"],
all = S_fa_muni$scores[muni$region == "Brussels Hoofdstedelijk", 1],
all_brussels = S_fa_muni_brussels$scores[, 1],
brussels = brussels$S[order(brussels$NIS_code)]
)
#correlations
S_fa_muni_brussels_scores[-1] %>% wtd.cors()
## all all_brussels brussels
## all 1.000 0.977 0.980
## all_brussels 0.977 1.000 0.976
## brussels 0.980 0.976 1.000
#are the S scores on the same scale?
#compare SD of indicators for full sample and Brussels area only
map_df(names(muni_social_brussels), function(v) {
tibble(
variable = v,
all = wtd_sd(muni[[v]]),
brussels = wtd_sd(muni_social_brussels[[v]]),
ratio = brussels / all
)
}) %>%
print() %>%
.$ratio %>%
describe()
## # A tibble: 8 x 4
## variable all brussels ratio
## <chr> <dbl> <dbl> <dbl>
## 1 median_income_2016 1293. 1809. 1.40
## 2 mean_income_2016 1932. 3172. 1.64
## 3 income_inequality_2016 10.1 11.1 1.10
## 4 crime_rate 0.0284 0.0571 2.01
## 5 violent_crimes_rate 0.00270 0.00313 1.16
## 6 life_expectancy 1.45 1.15 0.790
## 7 higher_education_pct 0.0754 0.112 1.49
## 8 unemployment 3.94 5.07 1.29
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 1.36 0.371 1.34 1.36 0.321 0.79 2.01 1.22 0.227 -1.02 0.131
#cors
combine_upperlower(
wtd.cors(muni %>% select(S, Muslim_pct, !!!names(muni_social))),
wtd.cors(muni %>% select(S, Muslim_pct, !!!names(muni_social)), weight = muni$population %>% sqrt()),
)
## S Muslim_pct median_income_2016 mean_income_2016
## S NA -0.4084 0.880 0.879
## Muslim_pct -0.519 NA -0.370 -0.239
## median_income_2016 0.891 -0.5075 NA 0.850
## mean_income_2016 0.866 -0.3159 0.833 NA
## income_inequality_2016 0.403 0.1318 0.188 0.577
## crime_rate -0.647 0.6300 -0.496 -0.272
## violent_crimes_rate -0.807 0.4867 -0.579 -0.447
## life_expectancy 0.641 -0.1534 0.486 0.527
## higher_education_pct 0.586 -0.0612 0.446 0.773
## unemployment -0.808 0.7186 -0.744 -0.510
## income_inequality_2016 crime_rate violent_crimes_rate
## S 0.4792 -0.6110 -0.794
## Muslim_pct 0.0494 0.5608 0.397
## median_income_2016 0.2611 -0.4317 -0.543
## mean_income_2016 0.5924 -0.2711 -0.451
## income_inequality_2016 NA -0.0330 -0.239
## crime_rate 0.0682 NA 0.853
## violent_crimes_rate -0.1601 0.8811 NA
## life_expectancy 0.3158 -0.2824 -0.473
## higher_education_pct 0.8604 -0.0166 -0.240
## unemployment 0.0428 0.6991 0.722
## life_expectancy higher_education_pct unemployment
## S 0.610 0.6452 -0.7784
## Muslim_pct -0.101 -0.0800 0.6228
## median_income_2016 0.446 0.5010 -0.6660
## mean_income_2016 0.473 0.7829 -0.4844
## income_inequality_2016 0.281 0.8582 -0.0486
## crime_rate -0.279 -0.0996 0.6754
## violent_crimes_rate -0.465 -0.3107 0.7262
## life_expectancy NA 0.3509 -0.5333
## higher_education_pct 0.405 NA -0.1949
## unemployment -0.533 -0.1523 NA
#sample sizes
pairwiseCount(muni %>% select(S, Muslim_pct, !!!names(muni_social)))
## S Muslim_pct median_income_2016 mean_income_2016
## S 589 589 589 589
## Muslim_pct 589 589 589 589
## median_income_2016 589 589 589 589
## mean_income_2016 589 589 589 589
## income_inequality_2016 589 589 589 589
## crime_rate 582 582 582 582
## violent_crimes_rate 585 585 585 585
## life_expectancy 588 588 588 588
## higher_education_pct 589 589 589 589
## unemployment 563 563 563 563
## income_inequality_2016 crime_rate violent_crimes_rate
## S 589 582 585
## Muslim_pct 589 582 585
## median_income_2016 589 582 585
## mean_income_2016 589 582 585
## income_inequality_2016 589 582 585
## crime_rate 582 582 578
## violent_crimes_rate 585 578 585
## life_expectancy 588 581 584
## higher_education_pct 589 582 585
## unemployment 563 561 559
## life_expectancy higher_education_pct unemployment
## S 588 589 563
## Muslim_pct 588 589 563
## median_income_2016 588 589 563
## mean_income_2016 588 589 563
## income_inequality_2016 588 589 563
## crime_rate 581 582 561
## violent_crimes_rate 584 585 559
## life_expectancy 588 588 562
## higher_education_pct 588 589 563
## unemployment 562 563 563
#p values
suppressWarnings(wtd.cor(muni %>% select(S, Muslim_pct, !!!names(muni_social)))$p.value)
## S Muslim_pct median_income_2016 mean_income_2016
## S 0.00e+00 4.40e-25 4.88e-192 4.94e-191
## Muslim_pct 4.40e-25 0.00e+00 1.40e-20 4.22e-09
## median_income_2016 4.88e-192 1.40e-20 0.00e+00 2.84e-165
## mean_income_2016 4.94e-191 4.22e-09 2.84e-165 0.00e+00
## income_inequality_2016 3.83e-35 2.31e-01 1.24e-10 4.47e-57
## crime_rate 7.37e-61 1.61e-49 7.97e-28 2.89e-11
## violent_crimes_rate 3.18e-128 1.52e-23 4.27e-46 1.09e-30
## life_expectancy 2.98e-61 1.40e-02 4.33e-30 4.07e-34
## higher_education_pct 1.19e-70 5.22e-02 9.34e-39 4.29e-123
## unemployment 1.60e-115 8.75e-62 1.98e-73 1.81e-34
## income_inequality_2016 crime_rate violent_crimes_rate
## S 3.83e-35 7.37e-61 3.18e-128
## Muslim_pct 2.31e-01 1.61e-49 1.52e-23
## median_income_2016 1.24e-10 7.97e-28 4.27e-46
## mean_income_2016 4.47e-57 2.89e-11 1.09e-30
## income_inequality_2016 0.00e+00 4.27e-01 5.02e-09
## crime_rate 4.27e-01 0.00e+00 5.17e-165
## violent_crimes_rate 5.02e-09 5.17e-165 0.00e+00
## life_expectancy 4.24e-12 8.03e-12 1.01e-32
## higher_education_pct 3.45e-172 1.63e-02 1.48e-14
## unemployment 2.50e-01 5.58e-76 1.05e-92
## life_expectancy higher_education_pct unemployment
## S 2.98e-61 1.19e-70 1.60e-115
## Muslim_pct 1.40e-02 5.22e-02 8.75e-62
## median_income_2016 4.33e-30 9.34e-39 1.98e-73
## mean_income_2016 4.07e-34 4.29e-123 1.81e-34
## income_inequality_2016 4.24e-12 3.45e-172 2.50e-01
## crime_rate 8.03e-12 1.63e-02 5.58e-76
## violent_crimes_rate 1.01e-32 1.48e-14 1.05e-92
## life_expectancy 0.00e+00 1.75e-18 1.27e-42
## higher_education_pct 1.75e-18 0.00e+00 3.16e-06
## unemployment 1.27e-42 3.16e-06 0.00e+00
#Muslim proportion 2011
#country
wtd_mean(muni$pct_moslims_2011, muni$Bevolking_2011)
## [1] 0.0626
#Brussels
muni %>% filter(province == "Bruxelles") %$% {
c(
wtd_mean(pct_moslims_2011, Bevolking_2011)
)
}
## [1] 0.224
#spatial autocor
SAC_cors(muni)
SAC_cors(muni)[1:14,2] %>% unlist() %>% describe()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 0.697 0.131 0.748 0.705 0.0648 0.475 0.83 0.355 -0.756 -1.2
## se
## X1 0.035
#scatterplots for each
for (v in SAC_cors(muni)$variable) {
GG_scatter(muni, v, v + "_lag")
GG_save(str_glue("figs/sac_{v}.png"))
}
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#means by region and province
#region
plyr::ddply(muni, "region", function(dd) {
tibble(
var = c(names(muni_social), "S", "Muslim_pct"),
mean = map_dbl(c(names(muni_social), "S", "Muslim_pct"), ~wtd_mean(dd[[.]], dd$population))
)
}) %>%
spread(key = var, value = mean)
#province
plyr::ddply(muni, "province", function(dd) {
tibble(
var = c(names(muni_social), "S", "Muslim_pct"),
mean = map_dbl(c(names(muni_social), "S", "Muslim_pct"), ~wtd_mean(dd[[.]], dd$population))
)
}) %>%
spread(key = var, value = mean)
#income
GG_scatter(muni, "Muslim_pct", "median_income_2016", case_names = "name", alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5))
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_median_income.png")
## `geom_smooth()` using formula 'y ~ x'
#weights
GG_scatter(muni, "Muslim_pct", "median_income_2016", case_names = "name",
weights = muni$population %>% sqrt, alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5))
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_median_income_weights.png")
## `geom_smooth()` using formula 'y ~ x'
#weights + colors
GG_scatter(muni, "Muslim_pct", "median_income_2016", color = "province", case_names = "name",
weights = muni$population %>% sqrt, alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5)) +
scale_color_discrete("Province")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_median_income_weights_prov.png")
## `geom_smooth()` using formula 'y ~ x'
#crime
GG_scatter(muni, "Muslim_pct", "crime_rate_RR", case_names = "name", alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5))
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_crime_rate.png")
## `geom_smooth()` using formula 'y ~ x'
#weights
GG_scatter(muni, "Muslim_pct", "crime_rate_RR", case_names = "name",
weights = muni$population %>% sqrt, alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5))
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_crime_rate_weights.png")
## `geom_smooth()` using formula 'y ~ x'
#weights + colors
GG_scatter(muni, "Muslim_pct", "crime_rate_RR", color = "province", case_names = "name",
weights = muni$population %>% sqrt, alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5)) +
scale_color_discrete("Province")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_crime_rate_weights_prov.png")
## `geom_smooth()` using formula 'y ~ x'
#S
GG_scatter(muni, "Muslim_pct", "S", case_names = "name", alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5)) +
scale_y_continuous("General socioeconomic score")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_S.png")
## `geom_smooth()` using formula 'y ~ x'
#weights
GG_scatter(muni, "Muslim_pct", "S", case_names = "name",
weights = muni$population %>% sqrt, alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5)) +
scale_y_continuous("General socioeconomic score")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_S_weights.png")
## `geom_smooth()` using formula 'y ~ x'
#weights + colors
GG_scatter(muni, "Muslim_pct", "S", color = "province", case_names = "name",
weights = muni$population %>% sqrt(), alpha = .5) +
scale_x_continuous("Muslim %", labels = scales::percent, lim = c(NA, .5)) +
scale_color_discrete("Province") +
scale_y_continuous("General socioeconomic score")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/Belgium_Muslim_S_weights_prov.png")
## `geom_smooth()` using formula 'y ~ x'
#Muslim based on the citizenship data
#country time line
muni_pop_summary_country = muni_pop_summary %>%
group_by(Year) %>%
dplyr::summarise(
Muslim = wtd_mean(Muslim, population)
)
## `summarise()` ungrouping output (override with `.groups` argument)
#Muslims over time
ggplot(muni_pop_summary, aes(Year, Muslim, group = NIS_code)) +
geom_line() +
scale_y_continuous("Muslim %", labels = scales::percent) +
#add country
geom_line(data = muni_pop_summary_country, aes(group = NULL), color = "red")
GG_save("figs/year_muslim_unit.png")
#Muslim z
ggplot(muni_pop_summary, aes(Year, standardize(Muslim), group = NIS_code)) +
geom_line() +
scale_y_continuous("Muslim % (in z-units)") +
theme_bw()
GG_save("figs/year_muslim_unit_z.png")
#based on Jan Hertogen's data
#long form first
muni_muslims_long = muni %>%
select(NIS_code, Bevolking_2011:pct_moslims_2017) %>%
pivot_longer(
cols = Bevolking_2011:pct_moslims_2017
) %>%
mutate(
year = str_match(name, "\\d+") %>% as.numeric(),
name = str_replace(name, "_\\d+", "")
) %>%
pivot_wider(names_from = name, values_from = value)
#country by year
muni_muslims_long_country = muni_muslims_long %>%
group_by(year) %>%
summarise(pct_moslims = wtd_sum(Moslims) / wtd_sum(Bevolking))
## `summarise()` ungrouping output (override with `.groups` argument)
muni_muslims_long_country
#plot
muni_muslims_long %>%
ggplot(aes(year, pct_moslims, group = NIS_code)) +
geom_line() +
scale_x_continuous(breaks = 2000:2020) +
scale_y_continuous("Muslim", labels = scales::percent) +
#add country
geom_line(data = muni_muslims_long_country, aes(group = NULL), color = "red") +
theme_bw()
GG_save("figs/year_muslim_Hertogen.png")
#basic model
(income_1 = rms::ols(median_income_2016 ~ Muslim_pct, data = muni, weights = sqrt(muni$population)))
## Linear Regression Model
##
## rms::ols(formula = median_income_2016 ~ Muslim_pct, data = muni,
## weights = sqrt(muni$population))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 589 LR chi2 175.38 R2 0.258
## sigma13052.8724 d.f. 1 R2 adj 0.256
## d.f. 587 Pr(> chi2) 0.0000 g 365.739
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2946.3 -948.6 -157.3 696.2 3197.3
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 18698.5082 57.8037 323.48 <0.0001
## Muslim_pct -9908.7595 694.4467 -14.27 <0.0001
##
#normal controls
(income_2 = rms::ols(median_income_2016 ~ Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population)))
## Linear Regression Model
##
## rms::ols(formula = median_income_2016 ~ Muslim_pct + rcs(age_65ao_pct) +
## rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 589 LR chi2 299.77 R2 0.399
## sigma11866.7392 d.f. 13 R2 adj 0.385
## d.f. 575 Pr(> chi2) 0.0000 g 771.055
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3121.62 -653.70 -33.98 656.28 2879.29
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 21296.8570 1.94e+03 11.00 <0.0001
## Muslim_pct -13412.8022 1.19e+03 -11.23 <0.0001
## age_65ao_pct -9713.9862 8.15e+03 -1.19 0.2336
## age_65ao_pct' -9309.4486 3.04e+04 -0.31 0.7596
## age_65ao_pct'' 239447.1263 2.53e+05 0.94 0.3451
## age_65ao_pct''' -576890.3646 4.39e+05 -1.31 0.1898
## age_0_19_pct -46063.7297 2.90e+04 -1.59 0.1130
## age_0_19_pct' 119407.1336 5.63e+05 0.21 0.8321
## age_0_19_pct'' 336967.7869 1.92e+06 0.18 0.8607
## age_0_19_pct''' -984417.5949 1.96e+06 -0.50 0.6159
## pop_density 8.4694 2.08e+00 4.07 <0.0001
## pop_density' -157.7893 1.13e+02 -1.40 0.1620
## pop_density'' 242.3116 2.63e+02 0.92 0.3565
## pop_density''' -48.3817 1.71e+02 -0.28 0.7769
##
#outcome lag
(income_3 = rms::ols(median_income_2016 ~ median_income_2016_lag + Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population)))
## Linear Regression Model
##
## rms::ols(formula = median_income_2016 ~ median_income_2016_lag +
## Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density),
## data = muni, weights = sqrt(muni$population))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 589 LR chi2 687.12 R2 0.689
## sigma8548.7837 d.f. 14 R2 adj 0.681
## d.f. 574 Pr(> chi2) 0.0000 g 1143.630
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1824.00 -480.42 -48.51 431.13 4027.34
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 8.36e+03 1.50e+03 5.56 <0.0001
## median_income_2016_lag 7.13e-01 3.09e-02 23.11 <0.0001
## Muslim_pct -9.55e+03 8.77e+02 -10.90 <0.0001
## age_65ao_pct -5.56e+03 5.87e+03 -0.95 0.3438
## age_65ao_pct' -3.33e+04 2.19e+04 -1.52 0.1294
## age_65ao_pct'' 4.10e+05 1.83e+05 2.24 0.0252
## age_65ao_pct''' -7.93e+05 3.17e+05 -2.50 0.0126
## age_0_19_pct -4.27e+04 2.09e+04 -2.04 0.0417
## age_0_19_pct' 4.93e+05 4.06e+05 1.22 0.2247
## age_0_19_pct'' -1.54e+06 1.39e+06 -1.11 0.2676
## age_0_19_pct''' 1.47e+06 1.42e+06 1.04 0.3008
## pop_density 2.95e+00 1.52e+00 1.94 0.0524
## pop_density' -4.62e+01 8.13e+01 -0.57 0.5699
## pop_density'' 6.55e+01 1.89e+02 0.35 0.7294
## pop_density''' -6.42e+00 1.23e+02 -0.05 0.9584
##
#summarize into a table
mget("income_" + 1:3) %>%
summarize_models()
#Brussels alone, using this dataset
(income_1b = rms::ols(median_income_2016 ~ Muslim_pct, data = muni %>% filter(province == "Bruxelles"),
weights = muni %>% filter(province == "Bruxelles") %>% pull(population) %>% sqrt()))
## Linear Regression Model
##
## rms::ols(formula = median_income_2016 ~ Muslim_pct, data = muni %>%
## filter(province == "Bruxelles"), weights = muni %>% filter(province ==
## "Bruxelles") %>% pull(population) %>% sqrt())
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 19 LR chi2 35.82 R2 0.848
## sigma10936.6589 d.f. 1 R2 adj 0.839
## d.f. 17 Pr(> chi2) 0.0000 g 1888.183
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1399.42 -368.86 23.39 398.76 1137.16
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 19294.5151 336.3994 57.36 <0.0001
## Muslim_pct -12834.5575 1316.6556 -9.75 <0.0001
##
plot(anova(income_3, tol=1e-12), what='partial R2')
#basic model
(crime_1 = rms::ols(crime_rate_RR ~ Muslim_pct, data = muni, weights = sqrt(muni$population)))
## Frequencies of Missing Values Due to Each Variable
## crime_rate_RR Muslim_pct (weights)
## 7 0 0
##
## Linear Regression Model
##
## rms::ols(formula = crime_rate_RR ~ Muslim_pct, data = muni, weights = sqrt(muni$population))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 582 LR chi2 294.28 R2 0.397
## sigma3.6340 d.f. 1 R2 adj 0.396
## d.f. 580 Pr(> chi2) 0.0000 g 0.141
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.08985 -0.20510 -0.09331 0.10534 1.44632
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.6470 0.0162 39.89 <0.0001
## Muslim_pct 3.7814 0.1936 19.54 <0.0001
##
#normal controls
(crime_2 = rms::ols(crime_rate_RR ~ Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population)))
## Frequencies of Missing Values Due to Each Variable
## crime_rate_RR Muslim_pct age_65ao_pct age_0_19_pct pop_density
## 7 0 0 0 0
## (weights)
## 0
##
## Linear Regression Model
##
## rms::ols(formula = crime_rate_RR ~ Muslim_pct + rcs(age_65ao_pct) +
## rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 582 LR chi2 405.12 R2 0.501
## sigma3.3387 d.f. 13 R2 adj 0.490
## d.f. 568 Pr(> chi2) 0.0000 g 0.225
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.88284 -0.18689 -0.06557 0.11844 1.30816
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.5589 0.5456 2.86 0.0044
## Muslim_pct 3.1422 0.3385 9.28 <0.0001
## age_65ao_pct -7.6998 2.3007 -3.35 0.0009
## age_65ao_pct' 22.2088 8.6328 2.57 0.0103
## age_65ao_pct'' -163.8466 72.1430 -2.27 0.0235
## age_65ao_pct''' 280.7784 125.2426 2.24 0.0254
## age_0_19_pct 5.0803 8.1801 0.62 0.5348
## age_0_19_pct' -156.1716 158.9336 -0.98 0.3262
## age_0_19_pct'' 686.5509 542.4286 1.27 0.2061
## age_0_19_pct''' -857.9245 554.3930 -1.55 0.1223
## pop_density -0.0003 0.0006 -0.42 0.6710
## pop_density' -0.0161 0.0319 -0.50 0.6141
## pop_density'' 0.0677 0.0744 0.91 0.3627
## pop_density''' -0.0720 0.0483 -1.49 0.1367
##
#outcome lag
(crime_3 = rms::ols(crime_rate_RR ~ crime_rate_RR_lag + Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population)))
## Frequencies of Missing Values Due to Each Variable
## crime_rate_RR crime_rate_RR_lag Muslim_pct age_65ao_pct
## 7 7 0 0
## age_0_19_pct pop_density (weights)
## 0 0 0
##
## Linear Regression Model
##
## rms::ols(formula = crime_rate_RR ~ crime_rate_RR_lag + Muslim_pct +
## rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density),
## data = muni, weights = sqrt(muni$population))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 582 LR chi2 420.13 R2 0.514
## sigma3.2988 d.f. 14 R2 adj 0.502
## d.f. 567 Pr(> chi2) 0.0000 g 0.233
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.92853 -0.16942 -0.06638 0.10302 1.29339
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.0042 0.5580 1.80 0.0724
## crime_rate_RR_lag 0.2432 0.0632 3.85 0.0001
## Muslim_pct 3.1335 0.3344 9.37 <0.0001
## age_65ao_pct -6.8513 2.2839 -3.00 0.0028
## age_65ao_pct' 20.8982 8.5364 2.45 0.0147
## age_65ao_pct'' -149.3285 71.3808 -2.09 0.0369
## age_65ao_pct''' 250.1228 124.0021 2.02 0.0442
## age_0_19_pct 9.3166 8.1569 1.14 0.2539
## age_0_19_pct' -216.2815 157.8091 -1.37 0.1711
## age_0_19_pct'' 873.6776 538.1476 1.62 0.1050
## age_0_19_pct''' -1030.7497 549.6059 -1.88 0.0612
## pop_density -0.0001 0.0006 -0.16 0.8709
## pop_density' -0.0190 0.0315 -0.60 0.5476
## pop_density'' 0.0694 0.0735 0.94 0.3453
## pop_density''' -0.0684 0.0478 -1.43 0.1525
##
#summarize into a table
mget("crime_" + 1:3) %>%
summarize_models()
#basic model
(S_1 = rms::ols(S ~ Muslim_pct, data = muni, weights = sqrt(muni$population)))
## Linear Regression Model
##
## rms::ols(formula = S ~ Muslim_pct, data = muni, weights = sqrt(muni$population))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 589 LR chi2 184.47 R2 0.269
## sigma10.2952 d.f. 1 R2 adj 0.268
## d.f. 587 Pr(> chi2) 0.0000 g 0.297
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.46638 -0.54915 0.07277 0.53565 2.44696
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.2669 0.0456 5.86 <0.0001
## Muslim_pct -8.0479 0.5477 -14.69 <0.0001
##
#normal controls
(S_2 = rms::ols(S ~ Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population)))
## Linear Regression Model
##
## rms::ols(formula = S ~ Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) +
## rcs(pop_density), data = muni, weights = sqrt(muni$population))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 589 LR chi2 298.57 R2 0.398
## sigma9.4417 d.f. 13 R2 adj 0.384
## d.f. 575 Pr(> chi2) 0.0000 g 0.607
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.49200 -0.42750 0.08105 0.55243 2.34460
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 3.1275 1.5400 2.03 0.0427
## Muslim_pct -12.8011 0.9505 -13.47 <0.0001
## age_65ao_pct -11.5845 6.4825 -1.79 0.0745
## age_65ao_pct' 6.3597 24.1990 0.26 0.7928
## age_65ao_pct'' 161.1180 201.6183 0.80 0.4245
## age_65ao_pct''' -483.2003 349.6457 -1.38 0.1675
## age_0_19_pct -38.7062 23.0932 -1.68 0.0943
## age_0_19_pct' 265.5832 447.9045 0.59 0.5535
## age_0_19_pct'' -457.6299 1527.6814 -0.30 0.7646
## age_0_19_pct''' 144.8383 1560.6018 0.09 0.9261
## pop_density 0.0063 0.0017 3.82 0.0001
## pop_density' -0.1332 0.0897 -1.49 0.1380
## pop_density'' 0.2075 0.2089 0.99 0.3210
## pop_density''' -0.0432 0.1358 -0.32 0.7504
##
#outcome lag
(S_3 = rms::ols(S ~ S_lag + Muslim_pct + rcs(age_65ao_pct) + rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population)))
## Linear Regression Model
##
## rms::ols(formula = S ~ S_lag + Muslim_pct + rcs(age_65ao_pct) +
## rcs(age_0_19_pct) + rcs(pop_density), data = muni, weights = sqrt(muni$population))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 589 LR chi2 659.24 R2 0.673
## sigma6.9576 d.f. 14 R2 adj 0.666
## d.f. 574 Pr(> chi2) 0.0000 g 0.889
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.16748 -0.31173 0.03997 0.37229 2.94394
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 3.8914 1.1354 3.43 0.0007
## S_lag 0.7302 0.0332 22.02 <0.0001
## Muslim_pct -8.7716 0.7239 -12.12 <0.0001
## age_65ao_pct -8.6404 4.7788 -1.81 0.0711
## age_65ao_pct' -6.1131 17.8413 -0.34 0.7320
## age_65ao_pct'' 166.0078 148.5731 1.12 0.2643
## age_65ao_pct''' -374.4582 257.7018 -1.45 0.1468
## age_0_19_pct -46.1087 17.0207 -2.71 0.0070
## age_0_19_pct' 448.4179 330.1660 1.36 0.1749
## age_0_19_pct'' -1301.8334 1126.4037 -1.16 0.2483
## age_0_19_pct''' 1200.3810 1151.0088 1.04 0.2974
## pop_density 0.0018 0.0012 1.42 0.1564
## pop_density' -0.0337 0.0662 -0.51 0.6114
## pop_density'' 0.0489 0.1541 0.32 0.7512
## pop_density''' -0.0054 0.1001 -0.05 0.9567
##
#summarize into a table
mget("S_" + 1:3) %>%
summarize_models()
#maps of legal regions
muni_spatial %>%
ggplot() +
geom_sf(aes(fill = province)) +
scale_fill_discrete("Province") +
theme_classic()
GG_save("figs/maps/provinces_map.png")
muni_spatial %>%
ggplot() +
geom_sf(aes(fill = arrondissement)) +
theme_classic() +
scale_fill_discrete("Arrondissement")
GG_save("figs/maps/arrondissements_map.png")
#maps of main variables
nice_names = c(
life_expectancy = "Life expectancy (years)",
mean_income_2016 = "Mean income (EUR, 2016)",
median_income_2016 = "Median income (EUR, 2016)",
income_inequality_2016 = "Income inequality (2016, IQR)",
Muslim_pct = "Muslim % (2016)",
crime_rate = "Crime rate (2009-2016)",
crime_rate_RR = "Relative crime rate, 1 = country average (2009-2016)",
pop_density = "Population density",
age_65ao_pct = "Age 65 and above, %",
age_0_19_pct = "Age 0-19, %",
S = "General socioeconomic factor (S)"
)
#add S
muni_spatial$S = muni$S
#plot map for each main variable
for (v in c(main_vars, "S")) {
v_sym = as.symbol(v)
plot = muni_spatial %>%
ggplot() +
geom_sf(aes(fill = !!v_sym)) +
theme_classic() +
scale_fill_gradient(nice_names[v] %>% kirkegaard::add_newlines(line_length = 10),
low = "black", high = "red")
#percent? fix the scale
if (str_detect(v, "%|pct")) plot = plot + scale_fill_gradient(nice_names[v] %>% kirkegaard::add_newlines(line_length = 10),
low = "black", high = "red", labels = scales::percent)
plot
GG_save(str_glue("figs/maps/Belgium/{v}_map.png"))
}
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
#do Brussels too
#merge data
brussels_spatial = right_join(muni_spatial_orig %>% mutate(NIS_code = as.numeric(NIS_code)), brussels, by = "NIS_code") %>%
mutate(
pct_moslims_2013 = pct_moslims_2013 * 100 #percent
)
#vars
brussels_vars = brussels %>%
select(Wealth_index_2013:General_humanities_technical_and_professional_humanities_ratio, S, pct_moslims_2013) %>%
names()
#make nice names
brussels_vars_name = c(
Wealth_index_2013 = "Wealth index (2013)",
Mean_wages_compared_with_the_national_mean_pct = "Wage index",
Share_of_households_in_demand_for_social_housing_2012_pct = "Share of households in demand for social housing % (2012)",
Unemployment_rate_2012_pct = "Unemployment rate (2012)",
Median_income_2012_euros = "Median income in Euro (2012)",
x18_64_years_beneficiaries_of_social_integration_income_2012_pct = "18-64 year old beneficiaries of social integration income % (2012)",
Juvenile_delinquency_in_the_total_population_under_18_years_pct = "Delinquent population aged <18 years %",
Delinquency_in_the_total_population_18_25_years_pct = "Delinquent population aged 18-25 years %",
Proportion_with_a_university_degree_pct = "University degree %",
Part_with_low_Birth_weight_pct = "Low birth weight %",
Life_expectancy_at_birth = "Life expectancy at birth",
Children_born_in_a_family_with_no_income_from_work_pct = "Children born in a family with no income from work %",
Population_growth_pct = "Population growth %",
Houses_median_prize_euros = "Median house price in Euro",
Fraudulent_declaration_pct = "Fraudulent insurance declarations %",
Rate_of_borrowers_with_at_least_one_non_regularized_credit = "Rate of borrowers with bad credit",
Mothers_20_years_pct_2009_2013 = "Mothers <20 years % (2009-2013)",
Fetal_infantile_mortality_on_1000_2009_2013 = "Infant mortality x 1000 (2009-2013)",
Proportion_with_two_years_late_in_school_2013_2014 = "Percentage two years late in school (2013-2014)",
x2013_2014_Special_humanity_designed_for_young_with_mental_deficiency_pct = "Special education mental deficiency % (2013-2014)",
Relative_risk_of_mortality_by_cardiovascular_diseases_men_2009_2013 = "Relative risk of mortality by cardiovascular diseases, men (2009-2013)",
General_humanities_technical_and_professional_humanities_ratio = "General humanities to technical and professional humanities ratio in education",
S = "General socioeconomic factor (S)",
pct_moslims_2013 = "Muslim % (2013)"
)
#loop
for (v in brussels_vars) {
v_sym = as.symbol(v)
brussels_spatial %>%
ggplot() +
geom_sf(aes(fill = !!v_sym)) +
theme_classic() +
scale_fill_gradient(brussels_vars_name[v == names(brussels_vars_name)] %>% kirkegaard::add_newlines(line_length = 30), low = "black", high = "red") +
geom_label_repel(aes(x = lon, y = lat, label = Name, fill = !!v_sym), size = 3, color = "white")
GG_save(str_glue("figs/maps/Brussels/{v}_map.png"))
}
#fill missing with 0
OECD_aid_filled = OECD_aid %>%
filter(
MEASURE == "PC_GNI",
SUBJECT == "ODAFLOWS"
) %>%
select(-`Flag Codes`) %>%
#fill missing ones to 0
tidyr::complete(
TIME,
nesting(LOCATION, INDICATOR, SUBJECT, MEASURE, FREQUENCY),
fill = list(Value = 0)
)
#timeline
OECD_aid_filled %>%
ggplot(aes(TIME, Value/100, color = LOCATION)) +
geom_line() +
scale_y_continuous("Percent of GNI% in aid", labels = scales::percent) +
scale_color_discrete("ISO-3 code")
#most giving
#all time
OECD_aid_filled %>%
group_by(LOCATION) %>%
summarise(
mean = wtd_mean(Value)
) %>% arrange(-mean)
## `summarise()` ungrouping output (override with `.groups` argument)
#last 10 years
OECD_aid_filled %>%
filter(TIME %in% 2008:2017) %>%
group_by(LOCATION) %>%
summarise(
mean = wtd_mean(Value)
) %>% arrange(-mean)
## `summarise()` ungrouping output (override with `.groups` argument)
#versions
write_sessioninfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggrepel_0.8.2 doFuture_0.9.0 iterators_1.0.12
## [4] foreach_1.5.0 globals_0.12.5 future_1.17.0
## [7] plm_2.2-3 spatialstatstools_0.1.0 ape_5.3
## [10] geosphere_1.5-10 rmngb_0.6-1 fields_10.3
## [13] maps_3.3.0 spam_2.5-1 dotCall64_1.0-0
## [16] ggeffects_0.14.3 googlesheets_0.3.0 sf_0.9-3
## [19] rms_5.1-4 SparseM_1.78 readxl_1.3.1
## [22] kirkegaard_2020-07-02 metafor_2.4-0 Matrix_1.2-18
## [25] psych_1.9.12.31 magrittr_1.5 assertthat_0.2.1
## [28] weights_1.0.1 mice_3.9.0 gdata_2.18.0
## [31] Hmisc_4.4-0 Formula_1.2-3 survival_3.1-12
## [34] lattice_0.20-41 forcats_0.5.0 stringr_1.4.0
## [37] dplyr_0.8.99.9003 purrr_0.3.4 readr_1.3.1
## [40] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.2
## [43] tidyverse_1.3.0 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.8 miscTools_0.6-26 plyr_1.8.6
## [4] sp_1.4-2 splines_4.0.1 listenv_0.8.0
## [7] TH.data_1.0-10 digest_0.6.25 htmltools_0.5.0
## [10] fansi_0.4.1 checkmate_2.0.0 cluster_2.1.0
## [13] openxlsx_4.1.5 modelr_0.1.8 sandwich_2.5-1
## [16] bdsmatrix_1.3-4 jpeg_0.1-8.1 colorspace_1.4-1
## [19] blob_1.2.1 rvest_0.3.5 haven_2.3.1
## [22] xfun_0.15 crayon_1.3.4 jsonlite_1.7.0
## [25] zoo_1.8-8 glue_1.4.1 gtable_0.3.0
## [28] MatrixModels_0.4-1 car_3.0-7 maxLik_1.3-8
## [31] DEoptimR_1.0-8 abind_1.4-5 VIM_6.0.0
## [34] scales_1.1.1 mvtnorm_1.1-0 DBI_1.1.0
## [37] bibtex_0.4.2.2 Rcpp_1.0.4.6 laeken_0.5.1
## [40] htmlTable_2.0.0 tmvnsim_1.0-2 units_0.6-6
## [43] foreign_0.8-79 vcd_1.4-7 htmlwidgets_1.5.1
## [46] httr_1.4.1 RColorBrewer_1.1-2 acepack_1.4.1
## [49] ellipsis_0.3.1 farver_2.0.3 pkgconfig_2.0.3
## [52] nnet_7.3-14 dbplyr_1.4.4 utf8_1.1.4
## [55] labeling_0.3 tidyselect_1.1.0 rlang_0.4.6
## [58] multilevel_2.6 munsell_0.5.0 cellranger_1.1.0
## [61] tools_4.0.1 cli_2.0.2 generics_0.0.2
## [64] ranger_0.12.1 sjlabelled_1.1.4 broom_0.5.6
## [67] evaluate_0.14 yaml_2.2.1 knitr_1.29
## [70] fs_1.4.2 zip_2.0.4 robustbase_0.93-6
## [73] nlme_3.1-147 quantreg_5.55 xml2_1.3.2
## [76] psychometric_2.2 compiler_4.0.1 rstudioapi_0.11
## [79] curl_4.3 png_0.1-7 e1071_1.7-3
## [82] reprex_0.3.0 stringi_1.4.6 classInt_0.4-3
## [85] vctrs_0.3.1 stringdist_0.9.5.5 pillar_1.4.4
## [88] lifecycle_0.2.0 Rdpack_0.11-1 lmtest_0.9-37
## [91] data.table_1.12.8 insight_0.8.4 gbRd_0.4-11
## [94] R6_2.4.1 latticeExtra_0.6-29 rio_0.5.16
## [97] KernSmooth_2.23-17 gridExtra_2.3 codetools_0.2-16
## [100] polspline_1.1.19 boot_1.3-25 MASS_7.3-51.6
## [103] gtools_3.8.2 withr_2.2.0 mnormt_2.0.1
## [106] multcomp_1.4-13 mgcv_1.8-31 hms_0.5.3
## [109] rpart_4.1-15 Rcsdp_0.1.57.1 class_7.3-17
## [112] rmarkdown_2.3 carData_3.0-3 lubridate_1.7.9
## [115] base64enc_0.1-3
#save data
brussels %>% write_rds("data/brussels_out.rds", compress = "xz")
brussels %>% writexl::write_xlsx("data/brussels_out.xlsx")
muni %>% write_rds("data/muni_out.rds", compress = "xz")
muni %>% writexl::write_xlsx("data/muni_out.xlsx")
#upload
if (F) {
library(osfr)
osf_auth(read_lines("~/.config/osf_token"))
osf_proj = osf_retrieve_node("https://osf.io/ja6ce/")
osf_upload(osf_proj, path = c("notebook.Rmd", "notebook.html", "data", "figs", "sessions_info.txt"), conflicts = "overwrite")
}