Init
library(pacman)
p_load(stringr, tidyr, plyr, dplyr, psych, readr, weights, geosphere, scales, dkstat, kirkegaard, magrittr, readxl, purrr, assertthat)
#devtools::install_github("rOpenGov/dkstat")
options(encoding = "UTF-8", digits = 2)
Functions
Functions for this project
# recode age --------------------------------------------------------------
recode_age = function(x) {
x %>% str_replace(" years?", "")
}
recode_age("15 years") == "15"
## [1] TRUE
recode_age("1 year") == "1"
## [1] TRUE
age_startend = function(x) {
m = str_match(x, "(\\d\\d).(\\d\\d)")
c(start = m[1, 2] %>% as.numeric(),
end = m[1, 3] %>% as.numeric()
)
}
age_startend("20-30")
## start end
## 20 30
# german crime data loader ------------------------------------------------
german_crime_loader = function(file) {
# browser()
#file ending
#file_ending = str_match(file, "\\.(\\w+)$")[, 2]
#read sheet
#d = readWorksheetFromFile(file = file, sheet = 1, startRow = 5)
#if (file_ending == "xls") d = xlsx::read.xlsx(file = file, sheetIndex = 1, startRow = 5)
# if (file_ending == "xls") d = readxl::read_excel(file, sheet = 1, skip = 4)
# if (file_ending == "xls") d = read.xls(xls = file, sheet = 1, startRow = 5)
# if (file_ending == "xlsx") d = read.xlsx(file = file, sheetIndex = 1, startRow = 5)
#csv fuck yeah!
d = read_csv(file)
#subset metadata
meta = d[, 1:2]
d = d[, -c(1:4)]
#transpose
d = df_t(d)
#add colnames
colnames(d) = "code_" + meta[[1]] #generate names from codes
colnames(d)[1] = "All_crime" #custom name for the first
#add country name as column
d$Country = rownames(d) %>% str_replace_all(pattern = "([\\w\\.])\\.(\\w)", replacement = "\\1 \\2") %>%
str_replace_all(pattern = "(\\w)\\. ", replacement = "\\1, ") #replace "name. " with "name, ".
d
}
# german census data loader -----------------------------------------------
#function to load the census data
#this data is messy!
german_census_loader = function(sheet, filename = "data/census_data2.xlsx") {
# browser()
#read data
# d = XLConnect::readWorksheetFromFile(filename, sheet = sheet, startRow = 4)
d = readxl::read_excel(filename, sheet = sheet, skip = 3)
#fill in origins
d = tidyr::fill(d, Country)
#sex
d$Sex = rep(c("Male", "Female", "Total"), length.out = nrow(d))
#remove non-countries
#these are regional aggregates and the like
noncountries = c("EU-Staaten",
"EU-Kandidatenländer",
"EWR-Staaten/Schweiz",
"Sonstiges Europa",
"Afrika",
"Nordafrika",
"Westafrika",
"Zentralafrika",
"Ostafrika",
"Südliches Afrika",
"Amerika",
"Nordamerika",
"Mittelamerika und Karibik",
"Südamerika",
"Asien",
"Vorderasien",
"Süd- und Südostasien",
"Ost- und Zentralasien",
"Australien und Ozeanien",
"Sonstige Ausprägungen",
"Insgesamt"
)
#remove
d = dplyr::filter(d, !Country %in% noncountries)
#remove totals
# d = dplyr::filter(d, Sex != "Total")
}
# growth ------------------------------------------------------------------
growth = function(x) (x[4] / x[1])-1
# df_block_merger ---------------------------------------------------------
#helper function for df_merge_rows
df_block_merger = function(block, funcs = list(.numeric = sum, .default = dplyr::first)) {
#skip if nrow==1
if (nrow(block) == 1) return(block)
#prepare out block
out = as.data.frame(matrix(ncol=ncol(block), nrow = 1))
colnames(out) = colnames(block)
#apply functions
for (var in colnames(block)) {
#specific func?
if (var %in% funcs) {
out[[var]] = funcs[[var]](block[[var]])
} else if (is.numeric(block[[var]]) && ".numeric" %in% funcs) {
#if var is numeric, apply numeric default func if present
out[[var]] = funcs[[".numeric"]](block[[var]])
} else {
#else, apply default function
out[[var]] = funcs[[".default"]](block[[var]])
}
}
out
}
German data
#census
de_census_years = as.character(2012:2015) %>% set_names(2012:2015)
de_census = lapply(de_census_years, FUN = german_census_loader) %>%
ldf_to_df(by_name = "Year")
#crime
#http://stackoverflow.com/questions/7963393/out-of-memory-error-java-when-using-r-and-xlconnect-package
de_crime = lapply(c("data/german_crime_2012.csv",
"data/german_crime_2013.csv",
"data/german_crime_2014.csv",
"data/german_crime_2015.csv"),
FUN = german_crime_loader)
## Parsed with column specification:
## cols(
## .default = col_double(),
## Straftat = col_character(),
## `Straftaten-Text` = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_double(),
## Straftat = col_character(),
## `Straftaten-Text` = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Schl.-Zahl der Tat` = col_character(),
## `Straftaten-Text` = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_double(),
## Schlüssel = col_character(),
## Straftat = col_character(),
## `Tatverdächtige insgesamt` = col_character(),
## Deutschland = col_character()
## )
## See spec(...) for full column specifications.
names(de_crime) = 2012:2015
#tidy
### Tidyr German data, and calculate statistics
# as one df ---------------------------------------------------------------
de_crime %<>% ldf_to_df(by_name = "Year")
## Not all columns were found in each data.frame. This may indicate an error.
#it's not an error, the crime types covered by different datasets are slightly non-overlapping
#move important cols to front
de_crime %<>% df_reorder_columns(vars = c("Country" = 1, "Year" = 2))
# subset to overlap between sources ---------------------------------------
#rename Staatenlos to Palästinensische Gebiete
de_crime$Country %<>% mapvalues("Staatenlos", "Palästinensische Gebiete")
de_census$Country %<>% mapvalues("Staatenlos", "Palästinensische Gebiete")
#intersect of uniq countries is the overlap
de_countries = intersect(unique(de_crime$Country), unique(de_census$Country))
de_census_countries = unique(de_census$Country)
#which are missing?
de_census_countries[!de_census_countries %in% de_countries]
## [1] "Ungeklärt und ohne Angabe" "Britische Überseegebiete"
# stopifnot(length(de_census_countries[!de_census_countries %in% de_countries]) == 0)
#subset
de_census %<>% dplyr::filter(Country %in% de_countries)
de_crime %<>% dplyr::filter(Country %in% de_countries)
# long form ---------------------------------------------------------------
#gather the age columns to make the data into long form
de_census %<>% tidyr::gather_(key_col = "age_group", value_col = "pop", gather_cols = str_detect2(names(de_census), pattern = "Age_\\d", value = T))
#this results in duplicates for the mean age data, but we remove it later
#fix age_group content
de_census$age_group %<>% str_replace("Age_", "") %>% str_replace("_", "-")
# overall demographic data -----------------------------------------------------------
de_demo = de_census %>% ddply(.variables = c("Country"), .fun = function(d) {
#pop by sex
sex_pop = plyr::ddply(d, "Sex", plyr::summarise, pop = sum(pop))
#divide by years to get mean
sex_pop$pop %<>% divide_by(length(unique(d$Year)))
#y is output
stopifnot(nrow(sex_pop) == 3)
y = dplyr::data_frame(pop = sex_pop %>% dplyr::filter(Sex == "Total") %$% pop,
male_pct = (sex_pop %>% dplyr::filter(Sex == "Male") %$% pop) / pop)
#calculate weighted mean for mean age
mean_age_year = d %>% dplyr::filter(Sex == "Total") %>% plyr::ddply("Year", .fun = plyr::summarise,
pop = sum(pop),
mean_age = wtd_mean(Age_mean))
y$mean_age = mean_age_year %$% wtd_mean(mean_age, w = pop)
#population growth
if (length(unique(d$Year)) < 4) browser()
y$growth = mean_age_year[4, 2] / mean_age_year[1, 2]
#out
y
})
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
# age and sex distribution, averaged across years ------------------------------------------------
de_age_sex = ddply(de_census, c("Country", "Sex", "age_group"), function(x) {
data.frame(pop = mean(x$pop))
})
#age x sex crime data
de_de_age_sex = readxl::read_excel("data/data-27.10.2016.xlsx", sheet = 1, skip = 1) %>% df_legalize_names()
names(de_de_age_sex)[15] = "crime_rate"
#divide rates by 100
de_de_age_sex$crime_rate %<>% divide_by(100)
#remove columns we dont care about
de_de_age_sex %<>% select(Age_group, Sex, Sum, crime_rate)
German analysis
# get mean values for 2012-2014 -------------------------------------------
#mean across years 2012-2014
de_crime_mean = df_merge_rows(de_crime, key = "Country", func = wtd_mean, error = F) %>% magrittr::extract(, -c(2))
# calculate per capita ----------------------------------------------------
#same order?
stopifnot(all(de_crime_mean$Country == de_demo$Country))
de_main = de_demo
de_main$crime_rate = de_crime_mean$All_crime / de_demo$pop
#merge to one
# de_main = cbind(de_census_mean[-1], de_crime_per_capita) #skip first col to avoid duplicated lists of german names
# abbreviations -----------------------------------------------------
#abbrev
de_main$abbrev = pu_translate(de_main$Country)
#standard EN names
de_main$Country_EN = pu_translate(de_main$abbrev, reverse = T)
rownames(de_main) = de_main$Country_EN
#assert unique
assert_that(!any(duplicated(de_main$abbrev)))
## [1] TRUE
# Germany age x sex data --------------------------------------------------
#forward fill the age ranges
de_de_age_sex %<>% tidyr::fill(Age_group)
#remove "Both sex" rows
de_de_age_sex %<>% dplyr::filter(Sex != "Both")
#filter <14 group
de_de_age_sex %<>% dplyr::filter(Age_group != "<14")
#weighted mean for germans
(de_crime_rate = wtd_mean(de_de_age_sex$crime_rate, w = de_de_age_sex$Sum))
## [1] 0.023
#plausible, similar to neighbors countries
#calculate unadjusted crime rate rr's
de_main$crime_rate_rr = de_main$crime_rate / de_crime_rate
#estimate matching crime rates for age x sex groups seen in the foreigner data
de_crime_rates_age_sex = data.frame(
Sex = rep(c("Female", "Male"), each = 10),
age_group = c("15-19", "20-24", "25-34", "35-44", "45-54",
"55-64", "65-74", "75-84", "85-94", "95-inf")
)
#fill in estimates manually
de_crime_rates_age_sex$crime_rate = NA
#from these
#14<18 18<21 21<25 25<30 30<40 40<50 50<60 60<70 70<80 ≥80
#Female 15-19
#4x 14-17 + 2x 18-20
de_crime_rates_age_sex[1, "crime_rate"] = wtd_mean(c(de_de_age_sex[[2, "crime_rate"]], de_de_age_sex[[4, "crime_rate"]]), c(4, 2))
#Female 20-24
#1x 18<21 + 4x 21<25
de_crime_rates_age_sex[2, "crime_rate"] = wtd_mean(c(de_de_age_sex[[4, "crime_rate"]], de_de_age_sex[[6, "crime_rate"]]), c(1, 4))
#Female 25-34
#5x 25<30 + 5x 30<40
de_crime_rates_age_sex[3, "crime_rate"] = wtd_mean(c(de_de_age_sex[[8, "crime_rate"]], de_de_age_sex[[10, "crime_rate"]]), c(5, 5))
#Female 35-44
#5x 30<40 + 5x 40<50
de_crime_rates_age_sex[4, "crime_rate"] = wtd_mean(c(de_de_age_sex[[10, "crime_rate"]], de_de_age_sex[[12, "crime_rate"]]), c(5, 5))
#Female 45-54
#5x 40<50 + 5x 50<60
de_crime_rates_age_sex[5, "crime_rate"] = wtd_mean(c(de_de_age_sex[[12, "crime_rate"]], de_de_age_sex[[14, "crime_rate"]]), c(5, 5))
#Female 55-64
#5x 50<60 + 5x 60<70
de_crime_rates_age_sex[6, "crime_rate"] = wtd_mean(c(de_de_age_sex[[14, "crime_rate"]], de_de_age_sex[[16, "crime_rate"]]), c(5, 5))
#Female 65-74
#5x 60<70 + 5x 70<80
de_crime_rates_age_sex[7, "crime_rate"] = wtd_mean(c(de_de_age_sex[[16, "crime_rate"]], de_de_age_sex[[18, "crime_rate"]]), c(5, 5))
#Female 75-84
#5x 70<80 + 5x ≥80
de_crime_rates_age_sex[8, "crime_rate"] = wtd_mean(c(de_de_age_sex[[18, "crime_rate"]], de_de_age_sex[[20, "crime_rate"]]), c(5, 5))
#Female 85-94
#Female 95-inf
#10x ≥80
de_crime_rates_age_sex[9:10, "crime_rate"] = de_de_age_sex[[20, "crime_rate"]]
#males
de_crime_rates_age_sex[11, "crime_rate"] = wtd_mean(c(de_de_age_sex[[1, "crime_rate"]], de_de_age_sex[[3, "crime_rate"]]), c(4, 2))
de_crime_rates_age_sex[12, "crime_rate"] = wtd_mean(c(de_de_age_sex[[3, "crime_rate"]], de_de_age_sex[[5, "crime_rate"]]), c(1, 4))
de_crime_rates_age_sex[13, "crime_rate"] = wtd_mean(c(de_de_age_sex[[7, "crime_rate"]], de_de_age_sex[[9, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[14, "crime_rate"] = wtd_mean(c(de_de_age_sex[[9, "crime_rate"]], de_de_age_sex[[11, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[15, "crime_rate"] = wtd_mean(c(de_de_age_sex[[11, "crime_rate"]], de_de_age_sex[[13, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[16, "crime_rate"] = wtd_mean(c(de_de_age_sex[[13, "crime_rate"]], de_de_age_sex[[15, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[17, "crime_rate"] = wtd_mean(c(de_de_age_sex[[15, "crime_rate"]], de_de_age_sex[[17, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[18, "crime_rate"] = wtd_mean(c(de_de_age_sex[[17, "crime_rate"]], de_de_age_sex[[19, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[19:20, "crime_rate"] = de_de_age_sex[[19, "crime_rate"]]
#populations
de_crime_rates_age_sex$pop = NA
#female
de_crime_rates_age_sex[1, "pop"] = wtd_mean(c(de_de_age_sex[[2, "Sum"]], de_de_age_sex[[4, "Sum"]]), c(4, 2))
de_crime_rates_age_sex[2, "pop"] = wtd_mean(c(de_de_age_sex[[4, "Sum"]], de_de_age_sex[[6, "Sum"]]), c(1, 4))
de_crime_rates_age_sex[3, "pop"] = wtd_mean(c(de_de_age_sex[[8, "Sum"]], de_de_age_sex[[10, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[4, "pop"] = wtd_mean(c(de_de_age_sex[[10, "Sum"]], de_de_age_sex[[12, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[5, "pop"] = wtd_mean(c(de_de_age_sex[[12, "Sum"]], de_de_age_sex[[14, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[6, "pop"] = wtd_mean(c(de_de_age_sex[[14, "Sum"]], de_de_age_sex[[16, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[7, "pop"] = wtd_mean(c(de_de_age_sex[[16, "Sum"]], de_de_age_sex[[18, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[8, "pop"] = wtd_mean(c(de_de_age_sex[[18, "Sum"]], de_de_age_sex[[20, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[9:10, "pop"] = de_de_age_sex[[20, "Sum"]]
#males
de_crime_rates_age_sex[11, "pop"] = wtd_mean(c(de_de_age_sex[[1, "Sum"]], de_de_age_sex[[3, "Sum"]]), c(4, 2))
de_crime_rates_age_sex[12, "pop"] = wtd_mean(c(de_de_age_sex[[3, "Sum"]], de_de_age_sex[[5, "Sum"]]), c(1, 4))
de_crime_rates_age_sex[13, "pop"] = wtd_mean(c(de_de_age_sex[[7, "Sum"]], de_de_age_sex[[9, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[14, "pop"] = wtd_mean(c(de_de_age_sex[[9, "Sum"]], de_de_age_sex[[11, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[15, "pop"] = wtd_mean(c(de_de_age_sex[[11, "Sum"]], de_de_age_sex[[13, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[16, "pop"] = wtd_mean(c(de_de_age_sex[[13, "Sum"]], de_de_age_sex[[15, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[17, "pop"] = wtd_mean(c(de_de_age_sex[[15, "Sum"]], de_de_age_sex[[17, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[18, "pop"] = wtd_mean(c(de_de_age_sex[[17, "Sum"]], de_de_age_sex[[19, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[19:20, "pop"] = de_de_age_sex[[19, "crime_rate"]]
#method check: recalculated overall crime rate using new age groups
#must be close to original!
wtd_mean(de_crime_rates_age_sex$crime_rate, de_crime_rates_age_sex$pop)
## [1] 0.02
#calcualte rr's to use for adjusting
de_crime_rates_age_sex$crime_rate_rr = de_crime_rates_age_sex$crime_rate / wtd_mean(de_crime_rates_age_sex$crime_rate, de_crime_rates_age_sex$pop)
#method check
#must be 1
assert_that(are_equal(wtd_mean(de_crime_rates_age_sex$crime_rate_rr, w = de_crime_rates_age_sex$pop), 1))
## [1] TRUE
# add Germany -------------------------------------------------------------
#add Germany manually
de_main = rbind(de_main,
data.frame("Deutschland",
73661434,
.49,
44.3,
.98,
de_crime_rate,
"DEU",
"Germany",
1) %>% set_colnames(names(de_main)))
rownames(de_main)[83] = "Germany"
# add a few more variables ------------------------------------------------
#western neighbors
de_main$Western_neighbor = de_main$Country_EN %in% c("Denmark", "Netherlands", "Luxembourg", "Belgium", "Switzerland", "Austria")
#western european + offshoots
de_main$NW_European = de_main$Country_EN %in% c("Denmark", "Sweden", "Norway", "Finland", "Iceland", "United Kingdom", "Netherlands", "Luxembourg", "Belgium", "Switzerland", "Liechtenstein", "Austria", "France", "Australia", "New Zealand", "USA", "Canada", "Germany")
#age check
#based on Germany's neighbors
mean(de_main[de_main$Western_neighbor == T, c("mean_age")])
## [1] 45
# adjust crime rates for population structure -----------------------------
#loop over each country, calculate the adjustment factor, then adjust
de_main = map_df(de_main$Country, function(cntry) {
#germany?
if (cntry == "Deutschland") {
return(data.frame("adj_factor" = 1,
"crime_rate_adj" = de_crime_rate,
"crime_rate_adj_rr" = 1)
)
}
#fetch data
d = de_age_sex %>%
#filter by country and age group
dplyr::filter(Country == cntry, !age_group %in% c("0-4", "5-9", "10-14"), Sex != "Total")
#calculate!
tibble(
adj_factor = sum(de_crime_rates_age_sex$crime_rate_rr * (d$pop/sum(d$pop))),
crime_rate_adj = de_main[de_main$Country == cntry, "crime_rate"] / adj_factor,
crime_rate_adj_rr = crime_rate_adj / de_crime_rate
)
}) %>% cbind(de_main, .)
# check results -----------------------------------------------------------
GG_scatter(de_main, "crime_rate", "crime_rate_adj") +
geom_abline(slope = 1, linetype = "dashed")

GG_scatter(de_main, "crime_rate_rr", "crime_rate_adj_rr") +
geom_abline(slope = 1, linetype = "dashed") +
scale_x_continuous(breaks = 0:15, name = "Relative rate of per capita arrests (2012-2015, Germany = 1)") +
scale_y_continuous(breaks = 0:15, name = "Relative rate of per capita arrests (2012-2015, Germany = 1)\nAdjusted for population structure (age +sex)")

ggsave("figures/relative_ralative_adj.png")
## Saving 7 x 5 in image
Danish
Data
### load meta-data for DST databases
# population --------------------------------------------------------------
meta_FOLK2 = dkstat::dst_meta("FOLK2", lang = "en")
# crimes ------------------------------------------------------------------
#we load data from file from DST now
dk_sa_crimes = readr::read_csv("data/danish_2000_2015.csv")
## Parsed with column specification:
## cols(
## Origin = col_character(),
## Sex = col_character(),
## `15-19` = col_double(),
## `20-29` = col_double(),
## `30-39` = col_double(),
## `40-49` = col_double(),
## `50-59` = col_double(),
## `60-69` = col_double(),
## `70-79` = col_double()
## )
# overlap -----------------------------------------------------------------
v_countries_to_get = dk_sa_crimes$Origin %>% unique
# years -------------------------------------------------------------------
dk_years = c(2000, 2002, 2004:2015)
dk_num_years = length(dk_years)
# save copy ---------------------------------------------------------------
#but only if it has lots of data. dont overwrite with nothing!
if (object.size(meta_FOLK2) > 5e4) {
write_rds(meta_FOLK2, "data/dk_backup/FOLK2_meta.rds")
}
Age and sex
#correct for both age and sex distribution
#we need crime and populatin data broken down by sex and age group
# overlapping countries ---------------------------------------------------
#the two source data tables do not overlap entirely in countries, so we need to find the overlap first and use that
#load data
if (file.exists("data/dk_pop.rds")) {
dk_sa_pop = read_rds("data/dk_pop.rds")
} else {
#download again
dk_sa_pop = dkstat::dst_get_data("FOLK2", lang = "en",
query = list(IELAND = v_countries_to_get,
KØN = meta_FOLK2$values$KØN$text,
TID = dk_years,
ALDER = meta_FOLK2$values$ALDER$text
)
) %>%
df_rename(c("KØN", "TID", "ALDER", "value", "IELAND"), c("Sex", "Year", "Age", "pop", "Origin"))
#save local copy
write_rds(dk_sa_pop, "data/dk_pop.rds")
}
#recode age
dk_sa_pop$Age %<>% recode_age %>% as.numeric()
#calculate mean age
dk_mean_age = dk_sa_pop %>%
#swap the yugoslavia name
mutate(Origin = Origin %>% mapvalues("Yugoslavia, Federal Republic", "Yugoslavia")) %>%
ddply("Origin", function(d) {
#sum across years
d2 = ddply(d, c("Age"), function(x) {
data.frame(pop = sum(x$pop))
})
#weighted mean
data.frame(mean_age = wtd_mean(d2$Age+.5, w = d2$pop))
})
#remove under and overaged
dk_sa_pop %<>% dplyr::filter(is_between(Age, 15, 79))
#collapse across years
#yes, have to use sums, otherwise trouble!
dk_sa_pop %<>% plyr::ddply(.variables = c("Origin", "Age", "Sex"), .progress = "text", plyr::summarize, pop = sum(pop))
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======= | 12%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|====================== | 35%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================= | 52%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|=================================== | 55%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|============================================== | 72%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================ | 75%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|=========================================================== | 92%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
#crime data by sex and age
#we already loaded this data in the dk_meta.R
#restructure
dk_sa_crimes = dk_sa_crimes %>%
tidyr::gather_(key_col = "Age",
value_col = "crimes",
gather_cols = c("15-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79"))
#crime rates by origin, sex, and age groups
dk_sa_comb = dk_sa_crimes %>% ddply(.variables = c("Origin", "Age", "Sex"), .fun = function(block) {
# browser()
startend = age_startend(block$Age)
#calculate population
pop = dk_sa_pop %>% dplyr::filter(is_between(Age, startend[1], startend[2]) & Sex == block$Sex & Origin == block$Origin) %>%
extract2("pop") %>%
sum()
#out
data.frame(pop = pop, crimes = block$crimes) %>% mutate(crime_rate = crimes / pop)
})
#combine yugoslavias
dk_sa_comb$Origin %<>% mapvalues("Yugoslavia, Federal Republic", "Yugoslavia")
dk_sa_comb %<>% ddply(.variables = c("Origin", "Age", "Sex"), .fun = df_block_merger)
#calc rates for DK for comparison
dk_sa_dk = dplyr::filter(dk_sa_comb, Origin == "Denmark")
dk_sa_dk_total = dk_sa_dk %$% wtd_mean(crime_rate, w = pop)
dk_sa_dk %<>% mutate(prop = pop/sum(pop),
crime_rate_rr = crime_rate / dk_sa_dk_total
)
#check, total DK rr == 1
assert_that(are_equal(wtd_mean(dk_sa_dk$crime_rate_rr, w = dk_sa_dk$prop), 1))
## [1] TRUE
#calculate relative rates, corrected rates, and aggregate to single number by country
dk_sa_rates = plyr::ddply(dk_sa_comb, .variables = c("Origin"), .fun = function(block) {
# browser()
#initial calcs
block = dplyr::mutate(block, crime_rate_rr = crime_rate / dk_sa_dk$crime_rate,
prop = pop / sum(pop))
#aggregate
d = tibble::tibble(crime_rate = wtd_mean(block$crime_rate, w = block$prop),
crime_rate_rr = crime_rate / dk_sa_dk_total,
pop_struc_correction_factor = wtd_mean(dk_sa_dk$crime_rate_rr, w = block$prop),
struc_adj_crime_rate = crime_rate / pop_struc_correction_factor,
struc_adj_crime_rate_rr = struc_adj_crime_rate / dk_sa_dk_total,
sg_crime_rate_rr = wtd_mean(block$crime_rate_rr, w = block$prop),
sg_crime_rate = sg_crime_rate_rr * dk_sa_dk_total,
male_pct = sum(block$pop[block$Sex == "Men"]) / sum(block$pop),
pop = sum(block$pop) / dk_num_years
)
d
})
# merge with mean age -----------------------------------------------------
dk_main = full_join(dk_sa_rates, dk_mean_age)
## Joining, by = "Origin"
wtd.cors(dk_main[-1]) %>% write_clipboard()
## Crime rate Crime rate rr
## Crime rate 1.00 1.00
## Crime rate rr 1.00 1.00
## Pop struc correction factor 0.42 0.42
## Struc adj crime rate 0.98 0.98
## Struc adj crime rate rr 0.98 0.98
## Sg crime rate rr 0.95 0.95
## Sg crime rate 0.95 0.95
## Male pct 0.37 0.37
## Pop -0.06 -0.06
## Mean age -0.60 -0.60
## Pop struc correction factor
## Crime rate 0.42
## Crime rate rr 0.42
## Pop struc correction factor 1.00
## Struc adj crime rate 0.24
## Struc adj crime rate rr 0.24
## Sg crime rate rr 0.23
## Sg crime rate 0.23
## Male pct 0.73
## Pop -0.11
## Mean age -0.62
## Struc adj crime rate Struc adj crime rate rr
## Crime rate 0.98 0.98
## Crime rate rr 0.98 0.98
## Pop struc correction factor 0.24 0.24
## Struc adj crime rate 1.00 1.00
## Struc adj crime rate rr 1.00 1.00
## Sg crime rate rr 0.98 0.98
## Sg crime rate 0.98 0.98
## Male pct 0.25 0.25
## Pop -0.03 -0.03
## Mean age -0.50 -0.50
## Sg crime rate rr Sg crime rate Male pct Pop
## Crime rate 0.95 0.95 0.37 -0.06
## Crime rate rr 0.95 0.95 0.37 -0.06
## Pop struc correction factor 0.23 0.23 0.73 -0.11
## Struc adj crime rate 0.98 0.98 0.25 -0.03
## Struc adj crime rate rr 0.98 0.98 0.25 -0.03
## Sg crime rate rr 1.00 1.00 0.26 -0.05
## Sg crime rate 1.00 1.00 0.26 -0.05
## Male pct 0.26 0.26 1.00 0.00
## Pop -0.05 -0.05 0.00 1.00
## Mean age -0.43 -0.43 -0.08 0.09
## Mean age
## Crime rate -0.60
## Crime rate rr -0.60
## Pop struc correction factor -0.62
## Struc adj crime rate -0.50
## Struc adj crime rate rr -0.50
## Sg crime rate rr -0.43
## Sg crime rate -0.43
## Male pct -0.08
## Pop 0.09
## Mean age 1.00
#abbreviate
dk_main$abbrev = pu_translate(dk_main$Origin)
#assert unique
assert_that(!any(duplicated(dk_main$abbrev)))
## [1] TRUE
# plot ---------------------------------------------------------------
#compare adj methods
GG_scatter(dk_main, "struc_adj_crime_rate_rr", "sg_crime_rate_rr", case_names = "Origin") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
xlab("Relative crime rate.\nAdjusted by population structure (age and sex).") +
ylab("Relative crime rate.\nCalculated by subgroup comparisons (age and sex).") +
xlim(0, NA) + ylim(0, NA)

ggsave("figures/dk_adj_compare.png")
## Saving 7 x 5 in image
#compare raw and adj
GG_scatter(dk_main, "crime_rate_rr", "sg_crime_rate_rr", case_names = "Origin") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
xlab("Relative crime rate.") +
ylab("Relative crime rate.\nCalculated by subgroup comparisons (age and sex).") + ylim(0, 3.5) +
xlim(0, NA) + ylim(0, NA)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

ggsave("figures/dk_raw_adj_sg.png")
## Saving 7 x 5 in image
#compare raw and adj2
GG_scatter(dk_main, "crime_rate_rr", "struc_adj_crime_rate_rr", case_names = "Origin") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
xlab("Relative crime rate.") +
ylab("Relative crime rate.\nAdjusted by population structure (age and sex).") +
xlim(0, NA) + ylim(0, NA)

ggsave("figures/dk_raw_adj_ps.png")
## Saving 7 x 5 in image
Merge data
#combine danish and german data
#merge with interational country level data
#keep only countries that are in either DE or DK datasets
v_included_countries = unique(c(dk_main$abbrev, de_main$abbrev))
#assert no NA
assert_that(!any(is.na(v_included_countries)))
## [1] TRUE
#merge
d_main = dplyr::full_join(de_main %>% df_add_affix(prefix = "de_"),
dk_main %>% df_add_affix(prefix = "dk_"),
by = c("de_abbrev" = "dk_abbrev"))
#standard EN names
d_main$Country_EN = pu_translate(d_main$de_abbrev, reverse = T)
#use as rownames
rownames(d_main) = d_main$Country_EN
#mega
d_mega_all = read.csv("data/Megadataset_v2.0n.csv", row.names = 1, sep = ";")
#subset mega
d_mega = d_mega_all[d_main$de_abbrev, ]
Recode data
#copy wanted variables
d_main$IQ = d_mega$LV2012estimatedIQ
d_main$Muslim = d_mega$IslamPewResearch2010
d_main$lat = d_mega$lat
d_main$lon = d_mega$lon
#fill in some missing values
d_main["Occupied Palestinian Territory", c("lat", "lon")] = c(31.9, 35.2)
#based on Ramallah, https://en.wikipedia.org/wiki/Ramallah
d_main["Czechslovakia", c("IQ", "Muslim", "lat", "lon")] = c(98.6, 0, 50.083333, 14.416667)
#2/3 weights to Czech IQ because this was their fraction of pop, 1/3 to slovakia.
#0 muslims
#Prague lat lon
d_main["USSR", c("IQ", "Muslim", "lat", "lon")] = c(96.6, .05, 55.75, 37.616667)
#Russia IQ
#few Muslims because immigrants prob came from western part
#lat lon from Moscow
d_main["Yugoslavia", c("Muslim", "lat", "lon")] = c(0.06 + .037 * .5879 + .0014, 44.816667, 20.466667)
#Muslim from ethnic guess: 100% of Bosniaks (0.06) + 59% of Albanians (.037) + 100% of Turks (0.014)
#https://en.wikipedia.org/wiki/Demographics_of_the_Socialist_Federal_Republic_of_Yugoslavia
#https://en.wikipedia.org/wiki/Albanians#Religion
#lat lon from Serbia (Belgrade)
d_main["Serbia & Montenegro", c("IQ", "Muslim")] = c(89, .19)
#Muslim from wiki
#https://en.wikipedia.org/wiki/Demographics_of_Serbia_and_Montenegro
#IQ from Serbia + reduce a bit because Albania is 82!
#distance to Germany
v_row_germany = which(d_main$Country_EN == "Germany")
m_dists = distm(x = matrix(c(d_main$lon, d_main$lat), ncol = 2))
d_main$distance_to_Germany = m_dists[v_row_germany, ]
Main results
### Main analyses
# Compare DK DE -----------------------------------------------------------
GG_scatter(d_main, "dk_crime_rate_rr", "de_crime_rate_rr") +
xlab("Relative crime rate, Denmark") +
ylab("Relative crime rate, Germany") +
scale_x_continuous(breaks = seq(0, 20 ,.5), limits = c(0, NA)) +
scale_y_continuous(breaks = 0:20)

ggsave("figures/rr_compare.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "dk_sg_crime_rate_rr", "de_crime_rate_adj_rr") +
xlab("Relative crime rate, Denmark\nAdjusted for age and sex") +
ylab("Relative crime rate, Germany\nAdjusted for age and sex") +
scale_x_continuous(breaks = seq(0, 20 ,.5), limits = c(0, NA)) +
scale_y_continuous(breaks = 0:20)

ggsave("figures/rr_compare_adj.png")
## Saving 7 x 5 in image
# correlation plots -------------------------------------------------------
#simple correlational analysis
GG_scatter(d_main, "IQ", "de_crime_rate_rr", case_names = d_main$Country_EN) +
ylab("Relative crime rate 2012-2015") +
xlab("National IQ (Lynn and Vanhanen 2012 dataset)") +
scale_x_continuous(breaks = seq(0, 200, by = 5)) +
scale_y_continuous(breaks = 0:20)

ggsave("figures/de_IQ_crime.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "Muslim", "de_crime_rate_rr", case_names = d_main$Country_EN) +
ylab("Relative crime rate 2012-2015") +
xlab("Percentage of Muslims in country of origin (Pew Research 2012 data)") +
scale_x_continuous(labels = scales::percent, breaks = seq(0, 1, by = .1)) +
scale_y_continuous(breaks = 0:20)

ggsave("figures/de_muslim_crime.png")
## Saving 7 x 5 in image
#weights
max(d_main$de_pop, na.rm=T) / min(d_main$de_pop, na.rm=T)
## [1] 3e+05
max(d_main$de_pop %>% sqrt, na.rm=T) / min(d_main$de_pop %>% sqrt, na.rm=T)
## [1] 549
#intercors
de_cors_wtd = wtd.cors(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")], weight = d_main$de_pop %>% sqrt)
de_cors = wtd.cors(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")])
combine_upperlower(de_cors, de_cors_wtd) %>% write_clipboard(print = T, clean_names = T)
## De crime rate rr Dk crime rate rr IQ Muslim
## De crime rate rr 0.66 -0.53 0.49
## Dk crime rate rr 0.69 -0.49 0.76
## IQ -0.67 -0.68 -0.44
## Muslim 0.49 0.84 -0.59
## De mean age -0.68 -0.54 0.66 -0.47
## De male pct 0.33 0.44 -0.29 0.39
## Distance to Germany 0.17 0.15 -0.41 0.16
## De mean age De male pct Distance to Germany
## De crime rate rr -0.53 0.39 -0.11
## Dk crime rate rr -0.45 0.55 -0.17
## IQ 0.58 -0.29 -0.29
## Muslim -0.45 0.49 -0.01
## De mean age -0.22 -0.25
## De male pct -0.24 -0.23
## Distance to Germany -0.47 -0.16
# rates of comparatives -------------------------------------------------------------------
dplyr::filter(d_main, de_Western_neighbor == T)[c("de_crime_rate_rr")] %>% unlist() %>% wtd_mean()
## [1] 1.7
dplyr::filter(d_main, de_NW_European == T)[c("de_crime_rate_rr")] %>% unlist() %>% wtd_mean()
## [1] 1.6
# regressions --------------------------------------------------------------
#simple
fit = lm("de_crime_rate_rr ~ IQ + Muslim", data = d_main) %>% MOD_summary(runs = 200)
fit[[1]] %>% write_clipboard()
## Beta SE CI lower CI upper
## IQ -0.39 0.10 -0.59 -0.19
## Muslim 0.31 0.10 0.11 0.51
#with controls
fit_controls = lm("de_crime_rate_rr ~ IQ + Muslim + de_male_pct + de_mean_age + distance_to_Germany", data = d_main) %>% MOD_summary(runs = 200)
fit_controls[[1]] %>% write_clipboard()
## Beta SE CI lower CI upper
## IQ -0.30 0.11 -0.52 -0.09
## Muslim 0.15 0.11 -0.06 0.36
## De male pct 0.10 0.10 -0.10 0.30
## De mean age -0.32 0.11 -0.53 -0.10
## Distance to Germany -0.23 0.09 -0.42 -0.05
# Danish correlations -----------------------------------------------------
wtd.cors(d_main[c("dk_crime_rate_rr",
"dk_sg_crime_rate_rr",
"dk_struc_adj_crime_rate",
"dk_male_pct",
"dk_mean_age",
"IQ",
"Muslim")]) %>%
write_clipboard()
## Dk crime rate rr Dk sg crime rate rr
## Dk crime rate rr 1.00 0.95
## Dk sg crime rate rr 0.95 1.00
## Dk struc adj crime rate 0.98 0.98
## Dk male pct 0.37 0.26
## Dk mean age -0.60 -0.43
## IQ -0.49 -0.41
## Muslim 0.76 0.67
## Dk struc adj crime rate Dk male pct Dk mean age
## Dk crime rate rr 0.98 0.37 -0.60
## Dk sg crime rate rr 0.98 0.26 -0.43
## Dk struc adj crime rate 1.00 0.25 -0.50
## Dk male pct 0.25 1.00 -0.08
## Dk mean age -0.50 -0.08 1.00
## IQ -0.47 -0.18 0.51
## Muslim 0.73 0.32 -0.46
## IQ Muslim
## Dk crime rate rr -0.49 0.76
## Dk sg crime rate rr -0.41 0.67
## Dk struc adj crime rate -0.47 0.73
## Dk male pct -0.18 0.32
## Dk mean age 0.51 -0.46
## IQ 1.00 -0.44
## Muslim -0.44 1.00
# Adjusted German ---------------------------------------------------------
GG_scatter(d_main, "de_crime_rate_rr", "de_crime_rate_adj_rr") +
scale_x_continuous(breaks = 0:20, name = "Relative crime rate in Germany") +
scale_y_continuous(breaks = 0:20, name = "Age and sex adjusted relative crime rate in Germany")

ggsave("figures/de_rr_compare.png")
## Saving 7 x 5 in image
#relative
sd(d_main$de_crime_rate_rr, na.rm=T)
## [1] 1.7
sd(d_main$de_crime_rate_adj_rr, na.rm=T)
## [1] 0.92
sd(d_main$dk_crime_rate_rr, na.rm=T)
## [1] 0.86
sd(d_main$dk_struc_adj_crime_rate_rr, na.rm=T)
## [1] 0.65
# International comparison ------------------------------------------------------
#copy data into mega
d_mega_all = d_main %>%
set_rownames(d_main$de_abbrev) %>%
merge_datasets(d_mega_all)
## Factors were converted to characters.
#international crime correlations
wtd.cors(d_mega_all[c("NorwayViolentCrimeAdjustedOddsRatioSkardhamar2014",
"FinlandViolentCrimeAdjustedOddsRatioSkardhamar2014",
"de_crime_rate_adj_rr",
"dk_struc_adj_crime_rate_rr",
"DutchCrimeAll")]) %>%
MAT_half() %>%
psych::describe() %>%
as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 10 0.72 0.14 0.76 0.75 0.12 0.42 0.85 0.43 -0.9 -0.67
## se
## X1 0.046
pairwiseCount(d_mega_all[c("NorwayViolentCrimeAdjustedOddsRatioSkardhamar2014",
"FinlandViolentCrimeAdjustedOddsRatioSkardhamar2014",
"de_crime_rate_adj_rr",
"dk_struc_adj_crime_rate_rr",
"DutchCrimeAll")]) %>%
MAT_half() %>%
psych::describe() %>%
as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10 28 16 20 26 5.2 16 60 44 0.9 -0.92 5
# predictor correlations with adjusted data Germany -----------------------
#intercors
wtd.cors(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "de_crime_rate_adj_rr", "dk_struc_adj_crime_rate_rr", "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")]) %>%
magrittr::extract(-c(1:4), 1:4) %>%
write_clipboard()
## De crime rate rr Dk crime rate rr De crime rate adj rr
## IQ -0.53 -0.49 -0.46
## Muslim 0.49 0.76 0.35
## De mean age -0.53 -0.45 -0.37
## De male pct 0.39 0.55 0.22
## Distance to Germany -0.11 -0.17 -0.16
## Dk struc adj crime rate rr
## IQ -0.47
## Muslim 0.73
## De mean age -0.40
## De male pct 0.47
## Distance to Germany -0.17
pairwiseCount(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "de_crime_rate_adj_rr", "dk_struc_adj_crime_rate_rr", "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")]) %>%
magrittr::extract(-c(1:4), 1:4)
## de_crime_rate_rr dk_crime_rate_rr de_crime_rate_adj_rr
## IQ 83 70 83
## Muslim 83 70 83
## de_mean_age 83 60 83
## de_male_pct 83 60 83
## distance_to_Germany 83 70 83
## dk_struc_adj_crime_rate_rr
## IQ 70
## Muslim 70
## de_mean_age 60
## de_male_pct 60
## distance_to_Germany 70
#plots
GG_scatter(d_main, "IQ", "de_crime_rate_adj_rr", case_names = d_main$Country_EN) +
ylab("Relative crime rate 2012-2015. Adjusted for age and sex.") +
xlab("National IQ (Lynn and Vanhanen 2012 dataset)") +
scale_x_continuous(breaks = seq(0, 200, by = 5)) +
scale_y_continuous(breaks = 0:20)

ggsave("figures/de_IQ_crime_adj.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "Muslim", "de_crime_rate_adj_rr", case_names = d_main$Country_EN) +
ylab("Relative crime rate 2012-2015. Adjusted for age and sex.") +
xlab("Percentage of Muslims in country of origin (Pew Research 2012 data)") +
scale_x_continuous(labels = scales::percent, breaks = seq(0, 1, by = .1))

ggsave("figures/de_muslim_crime_adj.png")
## Saving 7 x 5 in image
# regressions with adjusted numbers ---------------------------------------
#simple
fit = lm("de_crime_rate_adj_rr ~ IQ + Muslim", data = d_main) %>% MOD_summary(runs = 200)
fit[[1]] %>% write_clipboard()
## Beta SE CI lower CI upper
## IQ -0.37 0.11 -0.59 -0.15
## Muslim 0.18 0.11 -0.04 0.40
#with controls
fit_controls = lm("de_crime_rate_adj_rr ~ IQ + Muslim + distance_to_Germany", data = d_main) %>% MOD_summary(runs = 200)
fit_controls[[1]] %>% write_clipboard()
## Beta SE CI lower CI upper
## IQ -0.45 0.11 -0.67 -0.24
## Muslim 0.14 0.11 -0.08 0.35
## Distance to Germany -0.26 0.10 -0.46 -0.07
# robustness check: no outliers --------------------------------------------------------------
d_no_outliers = d_main %>% filter(!de_abbrev %in% c("GEO", "DZA"))
#cors
wtd.cors(d_no_outliers[c("de_crime_rate_rr", "dk_crime_rate_rr", "de_crime_rate_adj_rr", "dk_struc_adj_crime_rate_rr", "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")]) %>%
magrittr::extract(-c(1:4), 1:4) %>%
write_clipboard()
## De crime rate rr Dk crime rate rr De crime rate adj rr
## IQ -0.62 -0.49 -0.52
## Muslim 0.53 0.76 0.36
## De mean age -0.60 -0.45 -0.38
## De male pct 0.40 0.53 0.20
## Distance to Germany -0.10 -0.16 -0.17
## Dk struc adj crime rate rr
## IQ -0.46
## Muslim 0.73
## De mean age -0.39
## De male pct 0.45
## Distance to Germany -0.16
#regression
#simple
fit = lm("de_crime_rate_adj_rr ~ IQ + Muslim", data = d_no_outliers) %>% MOD_summary(runs = 200)
fit[[1]] %>% write_clipboard()
## Beta SE CI lower CI upper
## IQ -0.45 0.11 -0.66 -0.24
## Muslim 0.16 0.11 -0.05 0.37
#with controls
fit_controls = lm("de_crime_rate_adj_rr ~ IQ + Muslim + distance_to_Germany", data = d_no_outliers) %>% MOD_summary(runs = 200)
fit_controls[[1]] %>% write_clipboard()
## Beta SE CI lower CI upper
## IQ -0.54 0.11 -0.75 -0.33
## Muslim 0.11 0.10 -0.09 0.32
## Distance to Germany -0.29 0.09 -0.48 -0.10
# robustness check: log-transform outcome ---------------------------------
logp1 = function(x) log(x + 1)
#normality
GG_denhist(d_main$de_crime_rate_adj_rr, vline = NULL) +
xlab("Relative crime rate, adjusted for age and sex")
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing non-finite values (stat_density).

ggsave("figures/de_rr_adj_dist.png")
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing non-finite values (stat_density).
#cors
wtd.cors(d_main$de_crime_rate_adj_rr %>% logp1,
d_main$de_crime_rate_adj_rr)
## [,1]
## [1,] 0.97
wtd.cors(d_no_outliers$de_crime_rate_adj_rr %>% logp1,
d_no_outliers$de_crime_rate_adj_rr)
## [,1]
## [1,] 0.99
# growth and crime --------------------------------------------------------
GG_scatter(d_main, "de_growth", "de_crime_rate_adj_rr")

cor.test(d_main$de_growth, d_main$de_crime_rate_adj_rr, method = "spearman", use = "pair")
##
## Spearman's rank correlation rho
##
## data: d_main$de_growth and d_main$de_crime_rate_adj_rr
## S = 72932, p-value = 0.03
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.23