Init

library(pacman)
p_load(stringr, tidyr, plyr, dplyr, psych, readr, weights, geosphere, scales, dkstat, kirkegaard, magrittr, readxl, purrr, assertthat)
#devtools::install_github("rOpenGov/dkstat")

options(encoding = "UTF-8", digits = 2)

Functions

Functions for this project

# recode age --------------------------------------------------------------

recode_age = function(x) {
  x %>% str_replace(" years?", "")
}
recode_age("15 years") == "15"
## [1] TRUE
recode_age("1 year") == "1"
## [1] TRUE
age_startend = function(x) {
  m = str_match(x, "(\\d\\d).(\\d\\d)")
  c(start = m[1, 2] %>% as.numeric(),
    end = m[1, 3] %>% as.numeric()
    )
}
age_startend("20-30")
## start   end 
##    20    30
# german crime data loader ------------------------------------------------
german_crime_loader = function(file) {
  # browser()
  #file ending
  #file_ending = str_match(file, "\\.(\\w+)$")[, 2]
  
  #read sheet
  #d = readWorksheetFromFile(file = file, sheet = 1, startRow = 5)
  #if (file_ending == "xls") d = xlsx::read.xlsx(file = file, sheetIndex = 1, startRow = 5)
  # if (file_ending == "xls") d = readxl::read_excel(file, sheet = 1, skip = 4)
  # if (file_ending == "xls") d = read.xls(xls = file, sheet = 1, startRow = 5)
  # if (file_ending == "xlsx") d = read.xlsx(file = file, sheetIndex = 1, startRow = 5)
  
  #csv fuck yeah!
  d = read_csv(file)
  
  #subset metadata
  meta = d[, 1:2]
  d = d[, -c(1:4)]
  
  #transpose
  d = df_t(d)
  
  #add colnames
  colnames(d) = "code_" + meta[[1]] #generate names from codes
  colnames(d)[1] = "All_crime" #custom name for the first
  
  #add country name as column
  d$Country = rownames(d) %>% str_replace_all(pattern = "([\\w\\.])\\.(\\w)", replacement = "\\1 \\2") %>%
    str_replace_all(pattern = "(\\w)\\. ", replacement = "\\1, ") #replace "name. " with "name, ".
  
  d
}


# german census data loader -----------------------------------------------
#function to load the census data
#this data is messy!
german_census_loader = function(sheet, filename = "data/census_data2.xlsx") {
  # browser()
  #read data
  # d = XLConnect::readWorksheetFromFile(filename, sheet = sheet, startRow = 4)
  d = readxl::read_excel(filename, sheet = sheet, skip = 3)
  
  #fill in origins
  d = tidyr::fill(d, Country)
  
  #sex
  d$Sex = rep(c("Male", "Female", "Total"), length.out = nrow(d))
  
  #remove non-countries
  #these are regional aggregates and the like
  noncountries = c("EU-Staaten",
           "EU-Kandidatenländer",
           "EWR-Staaten/Schweiz",
           "Sonstiges Europa",
           "Afrika",
           "Nordafrika",
           "Westafrika",
           "Zentralafrika",
           "Ostafrika",
           "Südliches Afrika",
           "Amerika",
           "Nordamerika",
           "Mittelamerika und Karibik",
           "Südamerika",
           "Asien",
           "Vorderasien",
           "Süd- und Südostasien",
           "Ost- und Zentralasien",
           "Australien und Ozeanien",
           "Sonstige Ausprägungen",
           "Insgesamt"
           )
  #remove
  d = dplyr::filter(d, !Country %in% noncountries)
  
  #remove totals
  # d = dplyr::filter(d, Sex != "Total")
}


# growth ------------------------------------------------------------------

growth = function(x) (x[4] / x[1])-1

# df_block_merger ---------------------------------------------------------
#helper function for df_merge_rows

df_block_merger = function(block, funcs = list(.numeric = sum, .default = dplyr::first)) {
  #skip if nrow==1
  if (nrow(block) == 1) return(block)
  #prepare out block
  out = as.data.frame(matrix(ncol=ncol(block), nrow = 1))
  colnames(out) = colnames(block)
  #apply functions
  for (var in colnames(block)) {
    #specific func?
    if (var %in% funcs) {
      out[[var]] = funcs[[var]](block[[var]])
    } else if (is.numeric(block[[var]]) && ".numeric" %in% funcs) {
      #if var is numeric, apply numeric default func if present
      out[[var]] = funcs[[".numeric"]](block[[var]])
    } else {
      #else, apply default function
      out[[var]] = funcs[[".default"]](block[[var]])
    }
  }
  
  out
}

German data

#census
de_census_years = as.character(2012:2015) %>% set_names(2012:2015)
de_census = lapply(de_census_years, FUN = german_census_loader) %>% 
  ldf_to_df(by_name = "Year")


#crime
#http://stackoverflow.com/questions/7963393/out-of-memory-error-java-when-using-r-and-xlconnect-package
de_crime = lapply(c("data/german_crime_2012.csv",
                    "data/german_crime_2013.csv",
                    "data/german_crime_2014.csv",
                    "data/german_crime_2015.csv"),
                  FUN = german_crime_loader)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Straftat = col_character(),
##   `Straftaten-Text` = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Straftat = col_character(),
##   `Straftaten-Text` = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Schl.-Zahl der Tat` = col_character(),
##   `Straftaten-Text` = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Schlüssel = col_character(),
##   Straftat = col_character(),
##   `Tatverdächtige insgesamt` = col_character(),
##   Deutschland = col_character()
## )
## See spec(...) for full column specifications.
names(de_crime) = 2012:2015

#tidy
### Tidyr German data, and calculate statistics


# as one df ---------------------------------------------------------------
de_crime %<>% ldf_to_df(by_name = "Year")
## Not all columns were found in each data.frame. This may indicate an error.
#it's not an error, the crime types covered by different datasets are slightly non-overlapping

#move important cols to front
de_crime %<>% df_reorder_columns(vars = c("Country" = 1, "Year" = 2))


# subset to overlap between sources ---------------------------------------
#rename Staatenlos to Palästinensische Gebiete
de_crime$Country %<>% mapvalues("Staatenlos", "Palästinensische Gebiete")
de_census$Country %<>% mapvalues("Staatenlos", "Palästinensische Gebiete")

#intersect of uniq countries is the overlap
de_countries = intersect(unique(de_crime$Country), unique(de_census$Country))
de_census_countries = unique(de_census$Country)

#which are missing?
de_census_countries[!de_census_countries %in% de_countries]
## [1] "Ungeklärt und ohne Angabe" "Britische Überseegebiete"
# stopifnot(length(de_census_countries[!de_census_countries %in% de_countries]) == 0)

#subset
de_census %<>% dplyr::filter(Country %in% de_countries)
de_crime %<>% dplyr::filter(Country %in% de_countries)


# long form ---------------------------------------------------------------
#gather the age columns to make the data into long form
de_census %<>% tidyr::gather_(key_col = "age_group", value_col = "pop", gather_cols = str_detect2(names(de_census), pattern = "Age_\\d", value = T))
#this results in duplicates for the mean age data, but we remove it later

#fix age_group content
de_census$age_group %<>% str_replace("Age_", "") %>% str_replace("_", "-")

# overall demographic data -----------------------------------------------------------
de_demo = de_census %>% ddply(.variables = c("Country"), .fun = function(d) {

  #pop by sex
  sex_pop = plyr::ddply(d, "Sex", plyr::summarise, pop = sum(pop))
  
  #divide by years to get mean
  sex_pop$pop %<>% divide_by(length(unique(d$Year)))
  
  #y is output
  stopifnot(nrow(sex_pop) == 3)
  y = dplyr::data_frame(pop = sex_pop %>% dplyr::filter(Sex == "Total") %$% pop,
                 male_pct = (sex_pop %>% dplyr::filter(Sex == "Male") %$% pop) / pop)
  
  #calculate weighted mean for mean age
  mean_age_year = d %>% dplyr::filter(Sex == "Total") %>% plyr::ddply("Year", .fun = plyr::summarise,
                                                                 pop = sum(pop),
                                                                 mean_age = wtd_mean(Age_mean))
  y$mean_age = mean_age_year %$% wtd_mean(mean_age, w = pop)

  #population growth
  if (length(unique(d$Year)) < 4) browser()
  y$growth = mean_age_year[4, 2] / mean_age_year[1, 2]
  
  #out
  y
})
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
# age and sex distribution, averaged across years ------------------------------------------------
de_age_sex = ddply(de_census, c("Country", "Sex", "age_group"), function(x) {
  data.frame(pop = mean(x$pop))
})


#age x sex crime data
de_de_age_sex = readxl::read_excel("data/data-27.10.2016.xlsx", sheet = 1, skip = 1) %>% df_legalize_names()
names(de_de_age_sex)[15] = "crime_rate"

#divide rates by 100
de_de_age_sex$crime_rate %<>% divide_by(100)

#remove columns we dont care about
de_de_age_sex %<>% select(Age_group, Sex, Sum, crime_rate)

German analysis

# get mean values for 2012-2014 -------------------------------------------

#mean across years 2012-2014
de_crime_mean = df_merge_rows(de_crime, key = "Country", func = wtd_mean, error = F) %>% magrittr::extract(, -c(2))


# calculate per capita ----------------------------------------------------
#same order?
stopifnot(all(de_crime_mean$Country == de_demo$Country))

de_main = de_demo
de_main$crime_rate = de_crime_mean$All_crime / de_demo$pop

#merge to one
# de_main = cbind(de_census_mean[-1], de_crime_per_capita) #skip first col to avoid duplicated lists of german names

# abbreviations -----------------------------------------------------

#abbrev
de_main$abbrev = pu_translate(de_main$Country)

#standard EN names
de_main$Country_EN = pu_translate(de_main$abbrev, reverse = T)
rownames(de_main) = de_main$Country_EN

#assert unique
assert_that(!any(duplicated(de_main$abbrev)))
## [1] TRUE
# Germany age x sex data --------------------------------------------------
#forward fill the age ranges
de_de_age_sex %<>% tidyr::fill(Age_group)

#remove "Both sex" rows
de_de_age_sex %<>% dplyr::filter(Sex != "Both")

#filter <14 group
de_de_age_sex %<>% dplyr::filter(Age_group != "<14")

#weighted mean for germans
(de_crime_rate = wtd_mean(de_de_age_sex$crime_rate, w = de_de_age_sex$Sum))
## [1] 0.023
#plausible, similar to neighbors countries

#calculate unadjusted crime rate rr's
de_main$crime_rate_rr = de_main$crime_rate / de_crime_rate

#estimate matching crime rates for age x sex groups seen in the foreigner data
de_crime_rates_age_sex = data.frame(
  Sex = rep(c("Female", "Male"), each = 10),
  age_group = c("15-19", "20-24", "25-34", "35-44", "45-54", 
                "55-64", "65-74", "75-84", "85-94", "95-inf")
)

#fill in estimates manually
de_crime_rates_age_sex$crime_rate = NA

#from these
#14<18 18<21 21<25 25<30 30<40 40<50 50<60 60<70 70<80 ≥80

#Female     15-19
#4x 14-17 + 2x 18-20
de_crime_rates_age_sex[1, "crime_rate"] = wtd_mean(c(de_de_age_sex[[2, "crime_rate"]], de_de_age_sex[[4, "crime_rate"]]), c(4, 2))
#Female     20-24
#1x 18<21 + 4x 21<25
de_crime_rates_age_sex[2, "crime_rate"] = wtd_mean(c(de_de_age_sex[[4, "crime_rate"]], de_de_age_sex[[6, "crime_rate"]]), c(1, 4))
#Female     25-34
#5x 25<30 + 5x 30<40
de_crime_rates_age_sex[3, "crime_rate"] = wtd_mean(c(de_de_age_sex[[8, "crime_rate"]], de_de_age_sex[[10, "crime_rate"]]), c(5, 5))
#Female     35-44
#5x 30<40 + 5x 40<50
de_crime_rates_age_sex[4, "crime_rate"] = wtd_mean(c(de_de_age_sex[[10, "crime_rate"]], de_de_age_sex[[12, "crime_rate"]]), c(5, 5))
#Female     45-54
#5x 40<50 + 5x 50<60
de_crime_rates_age_sex[5, "crime_rate"] = wtd_mean(c(de_de_age_sex[[12, "crime_rate"]], de_de_age_sex[[14, "crime_rate"]]), c(5, 5))
#Female     55-64
#5x 50<60 + 5x 60<70
de_crime_rates_age_sex[6, "crime_rate"] = wtd_mean(c(de_de_age_sex[[14, "crime_rate"]], de_de_age_sex[[16, "crime_rate"]]), c(5, 5))
#Female     65-74
#5x 60<70 + 5x 70<80
de_crime_rates_age_sex[7, "crime_rate"] = wtd_mean(c(de_de_age_sex[[16, "crime_rate"]], de_de_age_sex[[18, "crime_rate"]]), c(5, 5))
#Female     75-84
#5x 70<80 + 5x ≥80
de_crime_rates_age_sex[8, "crime_rate"] = wtd_mean(c(de_de_age_sex[[18, "crime_rate"]], de_de_age_sex[[20, "crime_rate"]]), c(5, 5))
#Female     85-94
#Female    95-inf
#10x ≥80
de_crime_rates_age_sex[9:10, "crime_rate"] = de_de_age_sex[[20, "crime_rate"]]

#males
de_crime_rates_age_sex[11, "crime_rate"] = wtd_mean(c(de_de_age_sex[[1, "crime_rate"]], de_de_age_sex[[3, "crime_rate"]]), c(4, 2))
de_crime_rates_age_sex[12, "crime_rate"] = wtd_mean(c(de_de_age_sex[[3, "crime_rate"]], de_de_age_sex[[5, "crime_rate"]]), c(1, 4))
de_crime_rates_age_sex[13, "crime_rate"] = wtd_mean(c(de_de_age_sex[[7, "crime_rate"]], de_de_age_sex[[9, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[14, "crime_rate"] = wtd_mean(c(de_de_age_sex[[9, "crime_rate"]], de_de_age_sex[[11, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[15, "crime_rate"] = wtd_mean(c(de_de_age_sex[[11, "crime_rate"]], de_de_age_sex[[13, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[16, "crime_rate"] = wtd_mean(c(de_de_age_sex[[13, "crime_rate"]], de_de_age_sex[[15, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[17, "crime_rate"] = wtd_mean(c(de_de_age_sex[[15, "crime_rate"]], de_de_age_sex[[17, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[18, "crime_rate"] = wtd_mean(c(de_de_age_sex[[17, "crime_rate"]], de_de_age_sex[[19, "crime_rate"]]), c(5, 5))
de_crime_rates_age_sex[19:20, "crime_rate"] = de_de_age_sex[[19, "crime_rate"]]

#populations
de_crime_rates_age_sex$pop = NA

#female
de_crime_rates_age_sex[1, "pop"] = wtd_mean(c(de_de_age_sex[[2, "Sum"]], de_de_age_sex[[4, "Sum"]]), c(4, 2))
de_crime_rates_age_sex[2, "pop"] = wtd_mean(c(de_de_age_sex[[4, "Sum"]], de_de_age_sex[[6, "Sum"]]), c(1, 4))
de_crime_rates_age_sex[3, "pop"] = wtd_mean(c(de_de_age_sex[[8, "Sum"]], de_de_age_sex[[10, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[4, "pop"] = wtd_mean(c(de_de_age_sex[[10, "Sum"]], de_de_age_sex[[12, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[5, "pop"] = wtd_mean(c(de_de_age_sex[[12, "Sum"]], de_de_age_sex[[14, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[6, "pop"] = wtd_mean(c(de_de_age_sex[[14, "Sum"]], de_de_age_sex[[16, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[7, "pop"] = wtd_mean(c(de_de_age_sex[[16, "Sum"]], de_de_age_sex[[18, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[8, "pop"] = wtd_mean(c(de_de_age_sex[[18, "Sum"]], de_de_age_sex[[20, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[9:10, "pop"] = de_de_age_sex[[20, "Sum"]]

#males
de_crime_rates_age_sex[11, "pop"] = wtd_mean(c(de_de_age_sex[[1, "Sum"]], de_de_age_sex[[3, "Sum"]]), c(4, 2))
de_crime_rates_age_sex[12, "pop"] = wtd_mean(c(de_de_age_sex[[3, "Sum"]], de_de_age_sex[[5, "Sum"]]), c(1, 4))
de_crime_rates_age_sex[13, "pop"] = wtd_mean(c(de_de_age_sex[[7, "Sum"]], de_de_age_sex[[9, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[14, "pop"] = wtd_mean(c(de_de_age_sex[[9, "Sum"]], de_de_age_sex[[11, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[15, "pop"] = wtd_mean(c(de_de_age_sex[[11, "Sum"]], de_de_age_sex[[13, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[16, "pop"] = wtd_mean(c(de_de_age_sex[[13, "Sum"]], de_de_age_sex[[15, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[17, "pop"] = wtd_mean(c(de_de_age_sex[[15, "Sum"]], de_de_age_sex[[17, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[18, "pop"] = wtd_mean(c(de_de_age_sex[[17, "Sum"]], de_de_age_sex[[19, "Sum"]]), c(5, 5))
de_crime_rates_age_sex[19:20, "pop"] = de_de_age_sex[[19, "crime_rate"]]

#method check: recalculated overall crime rate using new age groups
#must be close to original!
wtd_mean(de_crime_rates_age_sex$crime_rate, de_crime_rates_age_sex$pop)
## [1] 0.02
#calcualte rr's to use for adjusting
de_crime_rates_age_sex$crime_rate_rr = de_crime_rates_age_sex$crime_rate / wtd_mean(de_crime_rates_age_sex$crime_rate, de_crime_rates_age_sex$pop)

#method check
#must be 1
assert_that(are_equal(wtd_mean(de_crime_rates_age_sex$crime_rate_rr, w = de_crime_rates_age_sex$pop), 1))
## [1] TRUE
# add Germany -------------------------------------------------------------

#add Germany manually
de_main = rbind(de_main,
                data.frame("Deutschland",
                           73661434,
                           .49,
                           44.3,
                           .98,
                           de_crime_rate,
                           "DEU",
                           "Germany",
                           1) %>% set_colnames(names(de_main)))
rownames(de_main)[83] = "Germany"


# add a few more variables ------------------------------------------------

#western neighbors
de_main$Western_neighbor = de_main$Country_EN %in% c("Denmark", "Netherlands", "Luxembourg", "Belgium", "Switzerland", "Austria")

#western european + offshoots
de_main$NW_European = de_main$Country_EN %in% c("Denmark", "Sweden", "Norway", "Finland", "Iceland", "United Kingdom", "Netherlands", "Luxembourg", "Belgium", "Switzerland", "Liechtenstein", "Austria", "France", "Australia", "New Zealand", "USA", "Canada", "Germany")


#age check
#based on Germany's neighbors
mean(de_main[de_main$Western_neighbor == T, c("mean_age")])
## [1] 45
# adjust crime rates for population structure -----------------------------
#loop over each country, calculate the adjustment factor, then adjust

de_main = map_df(de_main$Country, function(cntry) {

  #germany?
  if (cntry == "Deutschland") {
    return(data.frame("adj_factor" = 1,
                      "crime_rate_adj" = de_crime_rate,
                      "crime_rate_adj_rr" = 1)
           )
  }
  
  #fetch data
  d = de_age_sex %>% 
    #filter by country and age group
    dplyr::filter(Country == cntry, !age_group %in% c("0-4", "5-9", "10-14"), Sex != "Total")
  
  #calculate!
  tibble(
    adj_factor = sum(de_crime_rates_age_sex$crime_rate_rr * (d$pop/sum(d$pop))),
    crime_rate_adj = de_main[de_main$Country == cntry, "crime_rate"] / adj_factor,
    crime_rate_adj_rr = crime_rate_adj / de_crime_rate
  )
  
}) %>% cbind(de_main, .)



# check results -----------------------------------------------------------

GG_scatter(de_main, "crime_rate", "crime_rate_adj") +
  geom_abline(slope = 1, linetype = "dashed")

GG_scatter(de_main, "crime_rate_rr", "crime_rate_adj_rr") +
  geom_abline(slope = 1, linetype = "dashed") +
  scale_x_continuous(breaks = 0:15, name = "Relative rate of per capita arrests (2012-2015, Germany = 1)") +
  scale_y_continuous(breaks = 0:15, name = "Relative rate of per capita arrests (2012-2015, Germany = 1)\nAdjusted for population structure (age +sex)")

ggsave("figures/relative_ralative_adj.png")
## Saving 7 x 5 in image

Danish

Data

### load meta-data for DST databases


# population --------------------------------------------------------------
meta_FOLK2 = dkstat::dst_meta("FOLK2", lang = "en")

# crimes ------------------------------------------------------------------
#we load data from file from DST now
dk_sa_crimes = readr::read_csv("data/danish_2000_2015.csv")
## Parsed with column specification:
## cols(
##   Origin = col_character(),
##   Sex = col_character(),
##   `15-19` = col_double(),
##   `20-29` = col_double(),
##   `30-39` = col_double(),
##   `40-49` = col_double(),
##   `50-59` = col_double(),
##   `60-69` = col_double(),
##   `70-79` = col_double()
## )
# overlap -----------------------------------------------------------------

v_countries_to_get = dk_sa_crimes$Origin %>% unique

# years -------------------------------------------------------------------

dk_years = c(2000, 2002, 2004:2015)
dk_num_years = length(dk_years)

# save copy ---------------------------------------------------------------
#but only if it has lots of data. dont overwrite with nothing!

if (object.size(meta_FOLK2) > 5e4) {
  write_rds(meta_FOLK2, "data/dk_backup/FOLK2_meta.rds")
}

Age and sex

#correct for both age and sex distribution
#we need crime and populatin data broken down by sex and age group


# overlapping countries ---------------------------------------------------
#the two source data tables do not overlap entirely in countries, so we need to find the overlap first and use that

#load data
if (file.exists("data/dk_pop.rds")) {
  dk_sa_pop = read_rds("data/dk_pop.rds")
  
} else {
  #download again
  dk_sa_pop = dkstat::dst_get_data("FOLK2", lang = "en",
                                   query = list(IELAND = v_countries_to_get,
                                                KØN = meta_FOLK2$values$KØN$text,
                                                TID = dk_years,
                                                ALDER = meta_FOLK2$values$ALDER$text
                                   )
  ) %>% 
    df_rename(c("KØN", "TID", "ALDER", "value", "IELAND"), c("Sex", "Year", "Age", "pop", "Origin"))
  
  
  #save local copy
  write_rds(dk_sa_pop, "data/dk_pop.rds")
}

#recode age
dk_sa_pop$Age %<>% recode_age %>% as.numeric()

#calculate mean age
dk_mean_age = dk_sa_pop %>%
  #swap the yugoslavia name
  mutate(Origin = Origin %>% mapvalues("Yugoslavia, Federal Republic", "Yugoslavia")) %>% 
  ddply("Origin", function(d) {
    
    #sum across years
    d2 = ddply(d, c("Age"), function(x) {
      data.frame(pop = sum(x$pop))
    })
    
    #weighted mean
    data.frame(mean_age = wtd_mean(d2$Age+.5, w = d2$pop))
  })

#remove under and overaged
dk_sa_pop %<>% dplyr::filter(is_between(Age, 15, 79))

#collapse across years
#yes, have to use sums, otherwise trouble!
dk_sa_pop %<>% plyr::ddply(.variables = c("Origin", "Age", "Sex"), .progress = "text", plyr::summarize, pop = sum(pop))
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |=======                                                          |  12%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |====================                                             |  32%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |======================                                           |  35%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |=================================                                |  52%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |===================================                              |  55%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |==============================================                   |  72%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |================================================                 |  75%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |===========================================================      |  92%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |=============================================================    |  95%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%
#crime data by sex and age
#we already loaded this data in the dk_meta.R

#restructure
dk_sa_crimes = dk_sa_crimes %>% 
  tidyr::gather_(key_col = "Age",
                 value_col = "crimes",
                 gather_cols = c("15-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79"))

#crime rates by origin, sex, and age groups
dk_sa_comb = dk_sa_crimes %>% ddply(.variables = c("Origin", "Age", "Sex"), .fun = function(block) {
  # browser()
  startend = age_startend(block$Age)
  
  #calculate population
  pop = dk_sa_pop %>% dplyr::filter(is_between(Age, startend[1], startend[2]) & Sex == block$Sex & Origin == block$Origin) %>% 
    extract2("pop") %>% 
    sum()
  
  #out
  data.frame(pop = pop, crimes = block$crimes) %>% mutate(crime_rate = crimes / pop)
})

#combine yugoslavias
dk_sa_comb$Origin %<>% mapvalues("Yugoslavia, Federal Republic", "Yugoslavia")
dk_sa_comb %<>% ddply(.variables = c("Origin", "Age", "Sex"), .fun = df_block_merger)

#calc rates for DK for comparison
dk_sa_dk = dplyr::filter(dk_sa_comb, Origin == "Denmark")
dk_sa_dk_total = dk_sa_dk %$% wtd_mean(crime_rate, w = pop)
dk_sa_dk %<>% mutate(prop = pop/sum(pop),
                     crime_rate_rr = crime_rate / dk_sa_dk_total
)
#check, total DK rr == 1
assert_that(are_equal(wtd_mean(dk_sa_dk$crime_rate_rr, w = dk_sa_dk$prop), 1))
## [1] TRUE
#calculate relative rates, corrected rates, and aggregate to single number by country
dk_sa_rates = plyr::ddply(dk_sa_comb, .variables = c("Origin"), .fun = function(block) {
  # browser()
  #initial calcs
  block = dplyr::mutate(block, crime_rate_rr = crime_rate / dk_sa_dk$crime_rate,
                        prop = pop / sum(pop))
  #aggregate
  
  d = tibble::tibble(crime_rate = wtd_mean(block$crime_rate, w = block$prop),
                         crime_rate_rr = crime_rate / dk_sa_dk_total,
                         pop_struc_correction_factor = wtd_mean(dk_sa_dk$crime_rate_rr, w = block$prop),
                         struc_adj_crime_rate = crime_rate / pop_struc_correction_factor,
                         struc_adj_crime_rate_rr = struc_adj_crime_rate / dk_sa_dk_total,
                         sg_crime_rate_rr = wtd_mean(block$crime_rate_rr, w = block$prop),
                         sg_crime_rate = sg_crime_rate_rr * dk_sa_dk_total,
                         male_pct = sum(block$pop[block$Sex == "Men"]) / sum(block$pop),
                         pop = sum(block$pop) / dk_num_years
  )
  
  d
})



# merge with mean age -----------------------------------------------------
dk_main = full_join(dk_sa_rates, dk_mean_age)
## Joining, by = "Origin"
wtd.cors(dk_main[-1]) %>% write_clipboard()
##                             Crime rate Crime rate rr
## Crime rate                        1.00          1.00
## Crime rate rr                     1.00          1.00
## Pop struc correction factor       0.42          0.42
## Struc adj crime rate              0.98          0.98
## Struc adj crime rate rr           0.98          0.98
## Sg crime rate rr                  0.95          0.95
## Sg crime rate                     0.95          0.95
## Male pct                          0.37          0.37
## Pop                              -0.06         -0.06
## Mean age                         -0.60         -0.60
##                             Pop struc correction factor
## Crime rate                                         0.42
## Crime rate rr                                      0.42
## Pop struc correction factor                        1.00
## Struc adj crime rate                               0.24
## Struc adj crime rate rr                            0.24
## Sg crime rate rr                                   0.23
## Sg crime rate                                      0.23
## Male pct                                           0.73
## Pop                                               -0.11
## Mean age                                          -0.62
##                             Struc adj crime rate Struc adj crime rate rr
## Crime rate                                  0.98                    0.98
## Crime rate rr                               0.98                    0.98
## Pop struc correction factor                 0.24                    0.24
## Struc adj crime rate                        1.00                    1.00
## Struc adj crime rate rr                     1.00                    1.00
## Sg crime rate rr                            0.98                    0.98
## Sg crime rate                               0.98                    0.98
## Male pct                                    0.25                    0.25
## Pop                                        -0.03                   -0.03
## Mean age                                   -0.50                   -0.50
##                             Sg crime rate rr Sg crime rate Male pct   Pop
## Crime rate                              0.95          0.95     0.37 -0.06
## Crime rate rr                           0.95          0.95     0.37 -0.06
## Pop struc correction factor             0.23          0.23     0.73 -0.11
## Struc adj crime rate                    0.98          0.98     0.25 -0.03
## Struc adj crime rate rr                 0.98          0.98     0.25 -0.03
## Sg crime rate rr                        1.00          1.00     0.26 -0.05
## Sg crime rate                           1.00          1.00     0.26 -0.05
## Male pct                                0.26          0.26     1.00  0.00
## Pop                                    -0.05         -0.05     0.00  1.00
## Mean age                               -0.43         -0.43    -0.08  0.09
##                             Mean age
## Crime rate                     -0.60
## Crime rate rr                  -0.60
## Pop struc correction factor    -0.62
## Struc adj crime rate           -0.50
## Struc adj crime rate rr        -0.50
## Sg crime rate rr               -0.43
## Sg crime rate                  -0.43
## Male pct                       -0.08
## Pop                             0.09
## Mean age                        1.00
#abbreviate
dk_main$abbrev = pu_translate(dk_main$Origin)

#assert unique
assert_that(!any(duplicated(dk_main$abbrev)))
## [1] TRUE
# plot ---------------------------------------------------------------

#compare adj methods
GG_scatter(dk_main, "struc_adj_crime_rate_rr", "sg_crime_rate_rr", case_names = "Origin") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") + 
  xlab("Relative crime rate.\nAdjusted by population structure (age and sex).") +
  ylab("Relative crime rate.\nCalculated by subgroup comparisons (age and sex).") +
  xlim(0, NA) + ylim(0, NA)

ggsave("figures/dk_adj_compare.png")
## Saving 7 x 5 in image
#compare raw and adj
GG_scatter(dk_main, "crime_rate_rr", "sg_crime_rate_rr", case_names = "Origin") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") + 
  xlab("Relative crime rate.") +
  ylab("Relative crime rate.\nCalculated by subgroup comparisons (age and sex).") + ylim(0, 3.5) +
  xlim(0, NA) + ylim(0, NA)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

ggsave("figures/dk_raw_adj_sg.png")
## Saving 7 x 5 in image
#compare raw and adj2
GG_scatter(dk_main, "crime_rate_rr", "struc_adj_crime_rate_rr", case_names = "Origin") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") + 
  xlab("Relative crime rate.") +
  ylab("Relative crime rate.\nAdjusted by population structure (age and sex).") +
  xlim(0, NA) + ylim(0, NA)

ggsave("figures/dk_raw_adj_ps.png")
## Saving 7 x 5 in image

Merge data

#combine danish and german data
#merge with interational country level data

#keep only countries that are in either DE or DK datasets
v_included_countries = unique(c(dk_main$abbrev, de_main$abbrev))

#assert no NA
assert_that(!any(is.na(v_included_countries)))
## [1] TRUE
#merge
d_main = dplyr::full_join(de_main %>% df_add_affix(prefix = "de_"),
                          dk_main %>% df_add_affix(prefix = "dk_"),
                          by = c("de_abbrev" = "dk_abbrev"))

#standard EN names
d_main$Country_EN = pu_translate(d_main$de_abbrev, reverse = T)

#use as rownames
rownames(d_main) = d_main$Country_EN

#mega
d_mega_all = read.csv("data/Megadataset_v2.0n.csv", row.names = 1, sep = ";")

#subset mega
d_mega = d_mega_all[d_main$de_abbrev, ]

Recode data

#copy wanted variables
d_main$IQ = d_mega$LV2012estimatedIQ
d_main$Muslim = d_mega$IslamPewResearch2010
d_main$lat = d_mega$lat
d_main$lon = d_mega$lon

#fill in some missing values
d_main["Occupied Palestinian Territory", c("lat", "lon")] = c(31.9, 35.2)
#based on Ramallah, https://en.wikipedia.org/wiki/Ramallah

d_main["Czechslovakia", c("IQ", "Muslim", "lat", "lon")] = c(98.6, 0, 50.083333, 14.416667)
#2/3 weights to Czech IQ because this was their fraction of pop, 1/3 to slovakia.
#0 muslims
#Prague lat lon

d_main["USSR", c("IQ", "Muslim", "lat", "lon")] = c(96.6, .05, 55.75, 37.616667)
#Russia IQ
#few Muslims because immigrants prob came from western part
#lat lon from Moscow

d_main["Yugoslavia", c("Muslim", "lat", "lon")] = c(0.06 + .037 * .5879 + .0014, 44.816667, 20.466667)
#Muslim from ethnic guess: 100% of Bosniaks (0.06) + 59% of Albanians (.037) + 100% of Turks (0.014)
#https://en.wikipedia.org/wiki/Demographics_of_the_Socialist_Federal_Republic_of_Yugoslavia
#https://en.wikipedia.org/wiki/Albanians#Religion
#lat lon from Serbia (Belgrade)

d_main["Serbia & Montenegro", c("IQ", "Muslim")] = c(89, .19)
#Muslim from wiki
#https://en.wikipedia.org/wiki/Demographics_of_Serbia_and_Montenegro
#IQ from Serbia + reduce a bit because Albania is 82!


#distance to Germany
v_row_germany = which(d_main$Country_EN == "Germany")
m_dists = distm(x = matrix(c(d_main$lon, d_main$lat), ncol = 2))
d_main$distance_to_Germany = m_dists[v_row_germany, ]

Main results

### Main analyses


# Compare DK DE -----------------------------------------------------------

GG_scatter(d_main, "dk_crime_rate_rr", "de_crime_rate_rr") +
  xlab("Relative crime rate, Denmark") +
  ylab("Relative crime rate, Germany") +
  scale_x_continuous(breaks = seq(0, 20 ,.5), limits = c(0, NA)) +
  scale_y_continuous(breaks = 0:20)

ggsave("figures/rr_compare.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "dk_sg_crime_rate_rr", "de_crime_rate_adj_rr") +
  xlab("Relative crime rate, Denmark\nAdjusted for age and sex") +
  ylab("Relative crime rate, Germany\nAdjusted for age and sex") +
  scale_x_continuous(breaks = seq(0, 20 ,.5), limits = c(0, NA)) +
  scale_y_continuous(breaks = 0:20)

ggsave("figures/rr_compare_adj.png")
## Saving 7 x 5 in image
# correlation plots -------------------------------------------------------
#simple correlational analysis

GG_scatter(d_main, "IQ", "de_crime_rate_rr", case_names = d_main$Country_EN) +
  ylab("Relative crime rate 2012-2015") +
  xlab("National IQ (Lynn and Vanhanen 2012 dataset)") +
  scale_x_continuous(breaks = seq(0, 200, by = 5)) +
  scale_y_continuous(breaks = 0:20)

ggsave("figures/de_IQ_crime.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "Muslim", "de_crime_rate_rr", case_names = d_main$Country_EN) +
  ylab("Relative crime rate 2012-2015") +
  xlab("Percentage of Muslims in country of origin (Pew Research 2012 data)") +
  scale_x_continuous(labels = scales::percent, breaks = seq(0, 1, by = .1)) +
  scale_y_continuous(breaks = 0:20)

ggsave("figures/de_muslim_crime.png")
## Saving 7 x 5 in image
#weights
max(d_main$de_pop, na.rm=T) / min(d_main$de_pop, na.rm=T)
## [1] 3e+05
max(d_main$de_pop %>% sqrt, na.rm=T) / min(d_main$de_pop %>% sqrt, na.rm=T)
## [1] 549
#intercors
de_cors_wtd = wtd.cors(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr",  "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")], weight = d_main$de_pop %>% sqrt)
de_cors = wtd.cors(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")])

combine_upperlower(de_cors, de_cors_wtd) %>% write_clipboard(print = T, clean_names = T)
##                     De crime rate rr Dk crime rate rr    IQ Muslim
## De crime rate rr                                 0.66 -0.53   0.49
## Dk crime rate rr                0.69                  -0.49   0.76
## IQ                             -0.67            -0.68        -0.44
## Muslim                          0.49             0.84 -0.59       
## De mean age                    -0.68            -0.54  0.66  -0.47
## De male pct                     0.33             0.44 -0.29   0.39
## Distance to Germany             0.17             0.15 -0.41   0.16
##                     De mean age De male pct Distance to Germany
## De crime rate rr          -0.53        0.39               -0.11
## Dk crime rate rr          -0.45        0.55               -0.17
## IQ                         0.58       -0.29               -0.29
## Muslim                    -0.45        0.49               -0.01
## De mean age                           -0.22               -0.25
## De male pct               -0.24                           -0.23
## Distance to Germany       -0.47       -0.16
# rates of comparatives -------------------------------------------------------------------

dplyr::filter(d_main, de_Western_neighbor == T)[c("de_crime_rate_rr")] %>% unlist() %>% wtd_mean()
## [1] 1.7
dplyr::filter(d_main, de_NW_European == T)[c("de_crime_rate_rr")] %>% unlist() %>% wtd_mean()
## [1] 1.6
# regressions --------------------------------------------------------------

#simple
fit = lm("de_crime_rate_rr ~ IQ + Muslim", data = d_main) %>% MOD_summary(runs = 200)
fit[[1]] %>% write_clipboard()
##         Beta   SE CI lower CI upper
## IQ     -0.39 0.10    -0.59    -0.19
## Muslim  0.31 0.10     0.11     0.51
#with controls
fit_controls = lm("de_crime_rate_rr ~ IQ + Muslim + de_male_pct + de_mean_age + distance_to_Germany", data = d_main) %>% MOD_summary(runs = 200)
fit_controls[[1]] %>% write_clipboard()
##                      Beta   SE CI lower CI upper
## IQ                  -0.30 0.11    -0.52    -0.09
## Muslim               0.15 0.11    -0.06     0.36
## De male pct          0.10 0.10    -0.10     0.30
## De mean age         -0.32 0.11    -0.53    -0.10
## Distance to Germany -0.23 0.09    -0.42    -0.05
# Danish correlations -----------------------------------------------------
wtd.cors(d_main[c("dk_crime_rate_rr",
                  "dk_sg_crime_rate_rr",
                  "dk_struc_adj_crime_rate",
                  "dk_male_pct",
                  "dk_mean_age",
                  "IQ",
                  "Muslim")]) %>% 
  write_clipboard()
##                         Dk crime rate rr Dk sg crime rate rr
## Dk crime rate rr                    1.00                0.95
## Dk sg crime rate rr                 0.95                1.00
## Dk struc adj crime rate             0.98                0.98
## Dk male pct                         0.37                0.26
## Dk mean age                        -0.60               -0.43
## IQ                                 -0.49               -0.41
## Muslim                              0.76                0.67
##                         Dk struc adj crime rate Dk male pct Dk mean age
## Dk crime rate rr                           0.98        0.37       -0.60
## Dk sg crime rate rr                        0.98        0.26       -0.43
## Dk struc adj crime rate                    1.00        0.25       -0.50
## Dk male pct                                0.25        1.00       -0.08
## Dk mean age                               -0.50       -0.08        1.00
## IQ                                        -0.47       -0.18        0.51
## Muslim                                     0.73        0.32       -0.46
##                            IQ Muslim
## Dk crime rate rr        -0.49   0.76
## Dk sg crime rate rr     -0.41   0.67
## Dk struc adj crime rate -0.47   0.73
## Dk male pct             -0.18   0.32
## Dk mean age              0.51  -0.46
## IQ                       1.00  -0.44
## Muslim                  -0.44   1.00
# Adjusted German ---------------------------------------------------------

GG_scatter(d_main, "de_crime_rate_rr", "de_crime_rate_adj_rr") +
  scale_x_continuous(breaks = 0:20, name = "Relative crime rate in Germany") +
  scale_y_continuous(breaks = 0:20, name = "Age and sex adjusted relative crime rate in Germany")

ggsave("figures/de_rr_compare.png")
## Saving 7 x 5 in image
#relative
sd(d_main$de_crime_rate_rr, na.rm=T)
## [1] 1.7
sd(d_main$de_crime_rate_adj_rr, na.rm=T)
## [1] 0.92
sd(d_main$dk_crime_rate_rr, na.rm=T)
## [1] 0.86
sd(d_main$dk_struc_adj_crime_rate_rr, na.rm=T)
## [1] 0.65
# International comparison ------------------------------------------------------
#copy data into mega
d_mega_all = d_main %>% 
  set_rownames(d_main$de_abbrev) %>% 
  merge_datasets(d_mega_all)
## Factors were converted to characters.
#international crime correlations
wtd.cors(d_mega_all[c("NorwayViolentCrimeAdjustedOddsRatioSkardhamar2014",
                      "FinlandViolentCrimeAdjustedOddsRatioSkardhamar2014",
                      "de_crime_rate_adj_rr",
                      "dk_struc_adj_crime_rate_rr",
                      "DutchCrimeAll")]) %>% 
  MAT_half() %>% 
  psych::describe() %>% 
  as.matrix()
##    vars  n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 10 0.72 0.14   0.76    0.75 0.12 0.42 0.85  0.43 -0.9    -0.67
##       se
## X1 0.046
pairwiseCount(d_mega_all[c("NorwayViolentCrimeAdjustedOddsRatioSkardhamar2014",
                      "FinlandViolentCrimeAdjustedOddsRatioSkardhamar2014",
                      "de_crime_rate_adj_rr",
                      "dk_struc_adj_crime_rate_rr",
                      "DutchCrimeAll")]) %>% 
  MAT_half() %>% 
  psych::describe() %>% 
  as.matrix()
##    vars  n mean sd median trimmed mad min max range skew kurtosis se
## X1    1 10   28 16     20      26 5.2  16  60    44  0.9    -0.92  5
# predictor correlations with adjusted data Germany -----------------------

#intercors
wtd.cors(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "de_crime_rate_adj_rr", "dk_struc_adj_crime_rate_rr",  "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")]) %>%
  magrittr::extract(-c(1:4), 1:4) %>% 
  write_clipboard()
##                     De crime rate rr Dk crime rate rr De crime rate adj rr
## IQ                             -0.53            -0.49                -0.46
## Muslim                          0.49             0.76                 0.35
## De mean age                    -0.53            -0.45                -0.37
## De male pct                     0.39             0.55                 0.22
## Distance to Germany            -0.11            -0.17                -0.16
##                     Dk struc adj crime rate rr
## IQ                                       -0.47
## Muslim                                    0.73
## De mean age                              -0.40
## De male pct                               0.47
## Distance to Germany                      -0.17
pairwiseCount(d_main[c("de_crime_rate_rr", "dk_crime_rate_rr", "de_crime_rate_adj_rr", "dk_struc_adj_crime_rate_rr",  "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")]) %>%
  magrittr::extract(-c(1:4), 1:4)
##                     de_crime_rate_rr dk_crime_rate_rr de_crime_rate_adj_rr
## IQ                                83               70                   83
## Muslim                            83               70                   83
## de_mean_age                       83               60                   83
## de_male_pct                       83               60                   83
## distance_to_Germany               83               70                   83
##                     dk_struc_adj_crime_rate_rr
## IQ                                          70
## Muslim                                      70
## de_mean_age                                 60
## de_male_pct                                 60
## distance_to_Germany                         70
#plots
GG_scatter(d_main, "IQ", "de_crime_rate_adj_rr", case_names = d_main$Country_EN) +
  ylab("Relative crime rate 2012-2015. Adjusted for age and sex.") +
  xlab("National IQ (Lynn and Vanhanen 2012 dataset)") +
  scale_x_continuous(breaks = seq(0, 200, by = 5)) +
  scale_y_continuous(breaks = 0:20)

ggsave("figures/de_IQ_crime_adj.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "Muslim", "de_crime_rate_adj_rr", case_names = d_main$Country_EN) +
  ylab("Relative crime rate 2012-2015. Adjusted for age and sex.") +
  xlab("Percentage of Muslims in country of origin (Pew Research 2012 data)") +
  scale_x_continuous(labels = scales::percent, breaks = seq(0, 1, by = .1))

ggsave("figures/de_muslim_crime_adj.png")
## Saving 7 x 5 in image
# regressions with adjusted numbers ---------------------------------------

#simple
fit = lm("de_crime_rate_adj_rr ~ IQ + Muslim", data = d_main) %>% MOD_summary(runs = 200)
fit[[1]] %>% write_clipboard()
##         Beta   SE CI lower CI upper
## IQ     -0.37 0.11    -0.59    -0.15
## Muslim  0.18 0.11    -0.04     0.40
#with controls
fit_controls = lm("de_crime_rate_adj_rr ~ IQ + Muslim + distance_to_Germany", data = d_main) %>% MOD_summary(runs = 200)
fit_controls[[1]] %>% write_clipboard()
##                      Beta   SE CI lower CI upper
## IQ                  -0.45 0.11    -0.67    -0.24
## Muslim               0.14 0.11    -0.08     0.35
## Distance to Germany -0.26 0.10    -0.46    -0.07
# robustness check: no outliers --------------------------------------------------------------
d_no_outliers = d_main %>% filter(!de_abbrev %in% c("GEO", "DZA"))

#cors
wtd.cors(d_no_outliers[c("de_crime_rate_rr", "dk_crime_rate_rr", "de_crime_rate_adj_rr", "dk_struc_adj_crime_rate_rr",  "IQ", "Muslim", "de_mean_age", "de_male_pct", "distance_to_Germany")]) %>%
  magrittr::extract(-c(1:4), 1:4) %>% 
  write_clipboard()
##                     De crime rate rr Dk crime rate rr De crime rate adj rr
## IQ                             -0.62            -0.49                -0.52
## Muslim                          0.53             0.76                 0.36
## De mean age                    -0.60            -0.45                -0.38
## De male pct                     0.40             0.53                 0.20
## Distance to Germany            -0.10            -0.16                -0.17
##                     Dk struc adj crime rate rr
## IQ                                       -0.46
## Muslim                                    0.73
## De mean age                              -0.39
## De male pct                               0.45
## Distance to Germany                      -0.16
#regression
#simple
fit = lm("de_crime_rate_adj_rr ~ IQ + Muslim", data = d_no_outliers) %>% MOD_summary(runs = 200)
fit[[1]] %>% write_clipboard()
##         Beta   SE CI lower CI upper
## IQ     -0.45 0.11    -0.66    -0.24
## Muslim  0.16 0.11    -0.05     0.37
#with controls
fit_controls = lm("de_crime_rate_adj_rr ~ IQ + Muslim + distance_to_Germany", data = d_no_outliers) %>% MOD_summary(runs = 200)
fit_controls[[1]] %>% write_clipboard()
##                      Beta   SE CI lower CI upper
## IQ                  -0.54 0.11    -0.75    -0.33
## Muslim               0.11 0.10    -0.09     0.32
## Distance to Germany -0.29 0.09    -0.48    -0.10
# robustness check: log-transform outcome ---------------------------------
logp1 = function(x) log(x + 1)

#normality
GG_denhist(d_main$de_crime_rate_adj_rr, vline = NULL) +
  xlab("Relative crime rate, adjusted for age and sex")
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 10 rows containing non-finite values (stat_bin).

## Warning: Removed 10 rows containing non-finite values (stat_density).

ggsave("figures/de_rr_adj_dist.png")
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 10 rows containing non-finite values (stat_bin).

## Warning: Removed 10 rows containing non-finite values (stat_density).
#cors
wtd.cors(d_main$de_crime_rate_adj_rr %>% logp1,
         d_main$de_crime_rate_adj_rr)
##      [,1]
## [1,] 0.97
wtd.cors(d_no_outliers$de_crime_rate_adj_rr %>% logp1,
         d_no_outliers$de_crime_rate_adj_rr)
##      [,1]
## [1,] 0.99
# growth and crime --------------------------------------------------------

GG_scatter(d_main, "de_growth", "de_crime_rate_adj_rr")

cor.test(d_main$de_growth, d_main$de_crime_rate_adj_rr, method = "spearman", use = "pair")
## 
##  Spearman's rank correlation rho
## 
## data:  d_main$de_growth and d_main$de_crime_rate_adj_rr
## S = 72932, p-value = 0.03
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##  rho 
## 0.23

Meta

#data out
write_rds(d_main, "data/main.rds")
write_csv(d_main, "data/main.csv")

#versions
write_sessioninfo("session_info.txt")
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.3.1       kirkegaard_2018.05 metafor_2.1-0     
##  [4] Matrix_1.2-17      magrittr_1.5       assertthat_0.2.1  
##  [7] forcats_0.4.0      purrr_0.3.2        tibble_2.1.3      
## [10] tidyverse_1.2.1    dkstat_0.08        scales_1.0.0      
## [13] geosphere_1.5-10   weights_1.0        mice_3.6.0        
## [16] gdata_2.18.0       Hmisc_4.2-0        ggplot2_3.2.1     
## [19] Formula_1.2-3      survival_2.44-1.1  lattice_0.20-38   
## [22] readr_1.3.1        psych_1.8.12       dplyr_0.8.3       
## [25] plyr_1.8.4         tidyr_0.8.3        stringr_1.4.0     
## [28] pacman_0.5.1      
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-141        lubridate_1.7.4     RColorBrewer_1.1-2 
##  [4] httr_1.4.1          tools_3.6.1         backports_1.1.4    
##  [7] R6_2.4.0            rpart_4.1-15        lazyeval_0.2.2     
## [10] colorspace_1.4-1    jomo_2.6-9          nnet_7.3-12        
## [13] withr_2.1.2         sp_1.3-1            tidyselect_0.2.5   
## [16] gridExtra_2.3       mnormt_1.5-5        curl_4.0           
## [19] compiler_3.6.1      lsr_0.5             cli_1.1.0          
## [22] rvest_0.3.4         htmlTable_1.13.1    xml2_1.2.2         
## [25] labeling_0.3        checkmate_1.9.4     multilevel_2.6     
## [28] digest_0.6.20       foreign_0.8-72      minqa_1.2.4        
## [31] rmarkdown_1.14      base64enc_0.1-3     pkgconfig_2.0.2    
## [34] htmltools_0.3.6     lme4_1.1-21         htmlwidgets_1.3    
## [37] rlang_0.4.0         rstudioapi_0.10     generics_0.0.2     
## [40] jsonlite_1.6        gtools_3.8.1        acepack_1.4.1      
## [43] Rcpp_1.0.2          munsell_0.5.0       stringi_1.4.3      
## [46] yaml_2.2.0          MASS_7.3-51.4       psychometric_2.2   
## [49] grid_3.6.1          parallel_3.6.1      mitml_0.3-7        
## [52] crayon_1.3.4        haven_2.1.1         splines_3.6.1      
## [55] hms_0.5.0           zeallot_0.1.0       knitr_1.24         
## [58] pillar_1.4.2        boot_1.3-23         pan_1.6            
## [61] glue_1.3.1          evaluate_0.14       latticeExtra_0.6-28
## [64] data.table_1.12.2   modelr_0.1.5        vctrs_0.2.0        
## [67] nloptr_1.2.1        cellranger_1.1.0    gtable_0.3.0       
## [70] xfun_0.8            broom_0.5.2         cluster_2.1.0