R data analysis for:
options(digits = 2)
library(pacman)
p_load(kirkegaard, readr, dplyr, rms, googlesheets4, tmap, sf, broom, readxl, writexl)
#default theme
theme_set(theme_bw())
#redefine describe
describe = function(...) psych::describe(...) %>% as.matrix()
#read data from gdrive
#suspect counts
#combined data
crime = read_sheet("https://docs.google.com/spreadsheets/d/1GI_XW5Icj-5cvWA0MZNKZW3VxsYWcmUSZuhseYdsFFA/edit#gid=0", sheet = 1) %>% df_legalize_names() %>% {set_colnames(., names(.) %>% str_to_lower())}
## Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## https://gargle.r-lib.org/articles/non-interactive-auth.html
## The googlesheets4 package is using a cached token for the.dfx@gmail.com.
## Reading from 'Public preferences and reality: crime rates among 70 immigrant groups in the Netherlands (data)'
## Range "'Combined_crime_rates_long'"
#by generation
crime_gen = read_sheet("https://docs.google.com/spreadsheets/d/1GI_XW5Icj-5cvWA0MZNKZW3VxsYWcmUSZuhseYdsFFA/edit#gid=0", sheet = 2) %>% df_legalize_names() %>% {set_colnames(., names(.) %>% str_to_lower())}
## Reading from 'Public preferences and reality: crime rates among 70 immigrant groups in the Netherlands (data)'
## Range "'Combined_crime_rates_by_generation_long'"
#Dutch native counts
natives = read_sheet("https://docs.google.com/spreadsheets/d/1GI_XW5Icj-5cvWA0MZNKZW3VxsYWcmUSZuhseYdsFFA/edit#gid=0", sheet = 3) %>% df_legalize_names() %>% {set_colnames(., names(.) %>% str_to_lower())}
## Reading from 'Public preferences and reality: crime rates among 70 immigrant groups in the Netherlands (data)'
## Range "'Dutch_rates'"
#national data from 2017 Kirkegaard
#https://openpsych.net/paper/53
natl = read_csv("data/Megadataset_v2.0p.csv") %>%
rename(ISO = X1)
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## LVnotes = col_character(),
## Names = col_character(),
## Capital = col_character(),
## se_Land = col_character(),
## se_abbrev = col_character(),
## de_Country = col_character(),
## de_abbrev = col_character(),
## de_Country_EN = col_character(),
## de_Western_neighbor = col_logical(),
## de_NW_European = col_logical(),
## dk_Origin = col_character(),
## Country_EN = col_character()
## )
## See spec(...) for full column specifications.
#Rindermann IQs
rindermann = read_excel("data/cognitive_capitalism_appendix.xlsx") %>%
df_legalize_names() %>%
mutate(IQ_Rindermann = CA_totc,
ISO = pu_translate(Country))
## No exact match: Antigua-Barbuda
## No exact match: Benin (Dahomey)
## No exact match: Central Afric R
## No exact match: Congo (Brazz)
## No exact match: Dominican Repub
## No exact match: Equat. Guinea
## No exact match: Korea-North
## No exact match: Korea-South
## No exact match: Nether Antilles
## No exact match: Papua N-Guinea
## No exact match: Sao Tome/Princi
## No exact match: St. Kitts & Nevis
## No exact match: St. Vincent/Gre
## No exact match: Trinidad Tobago
## No exact match: United Arab Emi
## Best fuzzy match found: Antigua-Barbuda -> Antigua & Barbuda with distance 3.00
## Best fuzzy match found: Benin (Dahomey) -> Benin (ex Dahomey) with distance 3.00
## Best fuzzy match found: Central Afric R -> Central Africa with distance 2.00
## Best fuzzy match found: Congo (Brazz) -> Congo (Brazz rep) with distance 4.00
## Best fuzzy match found: Dominican Repub -> Dominican Rep. with distance 2.00
## Best fuzzy match found: Equat. Guinea -> Equ. Guinea with distance 2.00
## Best fuzzy match found: Korea-North -> Korea North with distance 1.00
## Best fuzzy match found: Korea-South -> Korea South with distance 1.00
## Best fuzzy match found: Nether Antilles -> Neth. Antilles with distance 2.00
## Best fuzzy match found: Papua N-Guinea -> Papua New Guinea with distance 3.00
## Best fuzzy match found: Sao Tome/Princi -> Sao Tome & Principe with distance 5.00
## Best fuzzy match found: St. Kitts & Nevis -> St. Kitts en Nevis with distance 2.00
## Best fuzzy match found: St. Vincent/Gre -> St. Vincent with distance 4.00
## Best fuzzy match found: Trinidad Tobago -> Trinidad & Tobago with distance 2.00
## Best fuzzy match found: United Arab Emi -> United Arab Emirates with distance 5.00
#data from German study, 2017, Kirkegaard Becker
#https://openpsych.net/paper/50
natl2 = read_rds("data/main.rds") %>%
mutate(ISO = de_abbrev)
#old dutch data
olddutch = read_rds("data/Dutch crime study 2015.rds") %>%
rownames_to_column("ISO")
#UN regions
chess2019 = read_rds("data/chess_kirkegaard2019.rds")
#preference data
prefs = read_sheet("https://docs.google.com/spreadsheets/d/1BE1GoWvcEojhUNs5r8Dy05cvEk1aFYSfE0dJqybdGhM/edit#gid=508472659") %>% df_legalize_names()
## Reading from 'Enquête immigratiebeleid (Responses)'
##
#exclusions
prefs_excl = read_sheet("https://docs.google.com/spreadsheets/d/1BE1GoWvcEojhUNs5r8Dy05cvEk1aFYSfE0dJqybdGhM/edit#gid=508472659", sheet = 2) %>% df_legalize_names()
## Reading from 'Enquête immigratiebeleid (Responses)'
## Range "'Sheet2'"
#remove rejected rows
prefs_orig = prefs
prefs = prefs %>% filter(Wat_is_uw_Prolific_ID %in% (prefs_excl %>% filter(is.na(rejected)) %>% pull(Wat_is_uw_Prolific_ID)))
prefs_excl$rejected %>% sum(na.rm = T)
## [1] 20
prefs %<>% miss_filter(missing = 1)
#mutate variables for each dataset
#calculate rates, and relative rates to natives
natives %<>% mutate(
suspect_rate = total_registered_suspects / population_size,
arrest_rate = total_arrested_suspects / population_size,
ISO = origin %>% pu_translate(),
year = as.numeric(year)
)
crime %<>% mutate(
suspect_rate = total_registered_suspects / population_size,
arrest_rate = total_arrested_suspects / population_size,
ISO = origin %>% pu_translate(),
year = as.numeric(year)
) %>%
#add natives
bind_rows(natives) %>%
#relative rates
plyr::ddply("year", function(dd) {
dd$suspect_rate_RR = dd$suspect_rate / natives$suspect_rate[natives$year == (dd$year[1])]
dd$arrest_rate_RR = dd$arrest_rate / natives$arrest_rate[natives$year == (dd$year[1])]
dd
})
## No exact match: Former Czechoslovakia
## No exact match: Congo (Democratic Republic)
## No exact match: Tunesia
## Best fuzzy match found: Former Czechoslovakia -> Czechoslovakia with distance 7.00
## Best fuzzy match found: Congo (Democratic Republic) -> Congo (Zaire Democratic Republic) with distance 6.00
## Best fuzzy match found: Tunesia -> Tunisia with distance 1.00
crime_gen %<>% mutate(
suspect_rate = total_registered_suspects / population_size,
arrest_rate = total_arrested_suspects / population_size,
ISO = origin %>% pu_translate(),
year = as.numeric(year)
) %>%
plyr::ddply("year", function(dd) {
dd$suspect_rate_RR = dd$suspect_rate / natives$suspect_rate[natives$year == (dd$year[1])]
dd$arrest_rate_RR = dd$arrest_rate / natives$arrest_rate[natives$year == (dd$year[1])]
dd
})
## No exact match: Congo (Democratic Republic)
## No exact match: Former Czechoslovakia
## No exact match: Tunesia
## Best fuzzy match found: Congo (Democratic Republic) -> Congo (Zaire Democratic Republic) with distance 6.00
## Best fuzzy match found: Former Czechoslovakia -> Czechoslovakia with distance 7.00
## Best fuzzy match found: Tunesia -> Tunisia with distance 1.00
#average across years
#then join with other natl data
crime_avg = crime %>%
group_by(origin) %>%
dplyr::summarise(
pop = wtd_mean(population_size),
suspect_rate = wtd_mean(suspect_rate, population_size),
arrest_rate = wtd_mean(arrest_rate, population_size),
ISO = ISO[1]
)
crime_gen_avg = crime_gen %>%
plyr::ddply(c("origin", "generation"), function(dd) {
tibble(
pop = wtd_mean(dd$population_size),
suspect_rate = wtd_mean(dd$suspect_rate, dd$population_size),
arrest_rate = wtd_mean(dd$arrest_rate, dd$population_size),
)
}) %>%
gather(key = stat, value = value, pop, suspect_rate, arrest_rate) %>%
unite(col = stat, stat, generation) %>%
mutate(stat = stat %>% str_replace("first generation", "1st") %>% str_replace("second generation", "2nd")) %>%
spread(key = stat, value = value) %>%
mutate(
prop_2nd = pop_2nd / (pop_1st + pop_2nd),
ISO = pu_translate(origin)
)
## No exact match: Congo (Democratic Republic)
## No exact match: Former Czechoslovakia
## No exact match: Tunesia
## Best fuzzy match found: Congo (Democratic Republic) -> Congo (Zaire Democratic Republic) with distance 6.00
## Best fuzzy match found: Former Czechoslovakia -> Czechoslovakia with distance 7.00
## Best fuzzy match found: Tunesia -> Tunisia with distance 1.00
#RRs
crime_avg %<>% mutate(
arrest_rate_RR = arrest_rate / crime_avg$arrest_rate[crime_avg$ISO=="NLD"],
suspect_rate_RR = suspect_rate / crime_avg$suspect_rate[crime_avg$ISO=="NLD"]
)
crime_gen_avg %<>% mutate(
arrest_rate_1st_RR = arrest_rate_1st / crime_avg$arrest_rate[crime_avg$ISO=="NLD"],
arrest_rate_2nd_RR = arrest_rate_2nd / crime_avg$arrest_rate[crime_avg$ISO=="NLD"],
suspect_rate_1st_RR = suspect_rate_1st / crime_avg$suspect_rate[crime_avg$ISO=="NLD"],
suspect_rate_2nd_RR = suspect_rate_2nd / crime_avg$suspect_rate[crime_avg$ISO=="NLD"]
)
#join to one main dataset
d = full_join(crime_avg, crime_gen_avg %>% select(-origin), by = "ISO") %>%
#join with national datasets
left_join(natl %>% select(ISO, IslamPewResearch2010, LV2012estimatedIQ), by = "ISO") %>%
left_join(natl2 %>% select(ISO, Muslim, IQ), by = "ISO") %>%
left_join(rindermann %>% select(ISO, IQ_Rindermann), by = "ISO") %>%
#join with old dutch data
left_join(olddutch %>% dplyr::select(`12_17men`:YoungPersonsCrimeFactor, ISO), by = "ISO") %>%
#fill in missing data from the other cols
mutate(IQ = miss_fill(IQ, LV2012estimatedIQ),
Muslim = miss_fill(Muslim, IslamPewResearch2010)) %>%
left_join(chess2019 %>% select(ISO, UN_continent, UN_macroregion, UN_region), by = "ISO")
#overlap?
#data with generations does not have Netherlands
crime_avg$ISO[!crime_avg$ISO %in% crime_gen_avg$ISO]
## [1] "NLD"
crime_gen_avg$ISO[!crime_gen_avg$ISO %in% crime_avg$ISO]
## character(0)
#add some missing values
imputed_muslim = c("YUG", "SUN", "CSK", "HKG")
#Yugoslavia
d$UN_continent[d$ISO == "YUG"] = "Europe"
d$UN_region[d$ISO == "YUG"] = "Southern Europe"
d$UN_macroregion[d$ISO == "YUG"] = "Southern Europe"
#Soviet Union
d$UN_continent[d$ISO == "SUN"] = "Europe"
d$UN_region[d$ISO == "SUN"] = "Eastern Europe"
d$UN_macroregion[d$ISO == "SUN"] = "Eastern Europe"
#czechoslovakia
d$UN_continent[d$ISO == "CSK"] = "Europe"
d$UN_region[d$ISO == "CSK"] = "Eastern Europe"
d$UN_macroregion[d$ISO == "CSK"] = "Eastern Europe"
#Hong Kong
d$UN_continent[d$ISO == "HKG"] = "Asia"
d$UN_region[d$ISO == "HKG"] = "Eastern Asia"
d$UN_macroregion[d$ISO == "HKG"] = "Eastern Asia"
#colonial
d$colonial = F
d$colonial[d$ISO %in% c("ANT", "SUR", "IDN")] = T
#prefs
country_vars = names(prefs) %>% str_subset("specifieke_landen") %>% {.[!str_detect(., "Selecteer")]}
country_prefs = prefs[country_vars]
#fix names, translate
colnames(country_prefs) = str_match(colnames(country_prefs), "landen_(.+)")[, 2] %>% pu_translate()
## No exact match: Argentini
## No exact match: Australi
## No exact match: Belgi
## No exact match: Brazili
## No exact match: Democratische_Republiek_van_Congo
## No exact match: Dominicaanse_Republiek
## No exact match: Ethiopi
## No exact match: Filippijnen
## No exact match: Indi
## No exact match: Indonesi
## No exact match: Isra_l
## No exact match: Itali
## No exact match: Voormalig_Joegoslavi
## No exact match: Kaapverdi
## No exact match: Malasia
## No exact match: Nederlandse_Antillen
## No exact match: Nieuw_Zeeland
## No exact match: Roemeni
## No exact match: Sierra_Leone
## No exact match: Somali
## No exact match: Sri_Lanka
## No exact match: Syri
## No exact match: Tunesi
## No exact match: Verenigde_Staten_van_Amerika
## No exact match: Verenigd_Koningkrijk
## No exact match: Zuid_Afrika
## No exact match: Zuid_Korea
## Best fuzzy match found: Argentini -> Argentina with distance 1.00
## Best fuzzy match found: Australi -> Australia with distance 1.00
## Best fuzzy match found: Belgi -> Belgio with distance 1.00
## Best fuzzy match found: Brazili -> Brazil with distance 1.00
## Best fuzzy match found: Democratische_Republiek_van_Congo -> Democratic Republic of Congo with distance 11.00
## Best fuzzy match found: Dominicaanse_Republiek -> Dominicaanse Republiek with distance 1.00
## Best fuzzy match found: Ethiopi -> Ethiopia with distance 1.00
## Best fuzzy match found: Filippijnen -> Filipijnen with distance 1.00
## Best fuzzy match found: Indi -> India with distance 1.00
## Best fuzzy match found: Indonesi -> Indonesia with distance 1.00
## Best fuzzy match found: Isra_l -> Israel with distance 1.00
## Best fuzzy match found: Itali -> Italy with distance 1.00
## Best fuzzy match found: Voormalig_Joegoslavi -> Joegoslavië with distance 11.00
## Best fuzzy match found: Kaapverdi -> Kapp Verde with distance 4.00
## Best fuzzy match found: Malasia -> Malaysia with distance 1.00
## Best fuzzy match found: Nederlandse_Antillen -> Netherlands Antilles with distance 5.00
## Best fuzzy match found: Nieuw_Zeeland -> Nieuw-Zeeland with distance 1.00
## Best fuzzy match found: Roemeni -> Roemenië with distance 1.00
## Best fuzzy match found: Sierra_Leone -> Sierra Leone with distance 1.00
## Best fuzzy match found: Somali -> Somalia with distance 1.00
## Best fuzzy match found: Sri_Lanka -> Sri Lanka with distance 1.00
## Best fuzzy match found: Syri -> Syria with distance 1.00
## Best fuzzy match found: Tunesi -> Tunesië with distance 1.00
## Best fuzzy match found: Verenigde_Staten_van_Amerika -> Verenigde Staten van Amerika with distance 3.00
## Best fuzzy match found: Verenigd_Koningkrijk -> Verenigd Koninkrijk with distance 2.00
## Best fuzzy match found: Zuid_Afrika -> Zuid-Afrika with distance 1.00
## Best fuzzy match found: Zuid_Korea -> Zuid-Korea with distance 1.00
#recode to -1 and +1's
country_prefs_num = country_prefs
for (v in colnames(country_prefs_num)) {
country_prefs_num[[v]] %<>% mapvalues(from = c("We moeten helemaal geen mensen meer uit dit land toelaten.", "We moeten minder mensen uit dit land toelaten.", "We moeten meer mensen uit dit land toelaten.", "We moeten net zoveel mensen uit dit land toelaten als nu."), to = c(1, 1, -1, -1)) %>% as.numeric()
}
#replace values
prefs$last_vote = prefs$Wat_heeft_u_bij_de_afgelopen_Tweede_Kamerverkiezingen_gestemd
prefs$last_vote %<>% mapvalues(from = c("Forum voor Democratie"), to = c("FvD"))
#weights for representative sample wrt. parties
#https://en.wikipedia.org/wiki/2017_Dutch_general_election#Results
party_votes = c(
"VVD" = 21.3,
"PVV" = 13.1,
"CDA" = 12.4,
"D66" = 12.2,
"GroenLinks" = 9.1,
"SP" = 9.1,
"PvdA" = 5.7,
"ChristenUnie" = 3.4,
"Partij voor de Dieren" = 3.2,
"50Plus" = 3.1,
"SGP" = 2.1,
"DENK" = 2.1,
"Forum voor Democratie" = 1.8
)
#calculate weights
#start with 0, for non-parties
weights = rep(0, nrow(prefs))
for (p in names(party_votes)) {
which_in_p = prefs$last_vote == p
weights[which_in_p] = party_votes[p] / sum(which_in_p)
}
#sums match?
#some parties aren't represented in our data, so we cannot adjust for them
sum(party_votes)
## [1] 99
sum(weights)
## [1] 92
#means by country
country_prefs_avg = tibble(
ISO = colnames(country_prefs),
net_opposition_raw = colMeans(country_prefs_num, na.rm = T),
net_opposition = map_dbl(country_prefs_num, ~wtd_mean(., w = weights)),
frac_none = map_dbl(country_prefs, ~wtd_mean(.=="We moeten helemaal geen mensen meer uit dit land toelaten.", w = weights)),
frac_fewer = map_dbl(country_prefs, ~wtd_mean(.=="We moeten minder mensen uit dit land toelaten.", w = weights)),
frac_same = map_dbl(country_prefs, ~wtd_mean(.=="We moeten net zoveel mensen uit dit land toelaten als nu.", w = weights)),
frac_more = map_dbl(country_prefs, ~wtd_mean(.=="We moeten meer mensen uit dit land toelaten.", w = weights))
)
#correlations?
country_prefs_avg[-1] %>% wtd.cors()
## net_opposition_raw net_opposition frac_none frac_fewer
## net_opposition_raw 1.00 0.96 0.85 0.93
## net_opposition 0.96 1.00 0.90 0.96
## frac_none 0.85 0.90 1.00 0.76
## frac_fewer 0.93 0.96 0.76 1.00
## frac_same -0.87 -0.94 -0.89 -0.88
## frac_more -0.46 -0.40 -0.26 -0.45
## frac_same frac_more
## net_opposition_raw -0.866 -0.462
## net_opposition -0.938 -0.399
## frac_none -0.887 -0.259
## frac_fewer -0.880 -0.448
## frac_same 1.000 0.055
## frac_more 0.055 1.000
#join
d = left_join(d, country_prefs_avg, by = "ISO")
#count
nrow(d)
## [1] 71
#years covered
crime$year %>% range()
## [1] 2005 2018
#data total
d$pop %>% sum()
## [1] 6547215
#highest factor diff
d$suspect_rate_RR %>% range() %>%
{
print(.)
.[2] / .[1]}
## [1] 0.22 3.81
## [1] 17
#prefs
d %>% select(net_opposition:frac_more) %>% wtd.cors()
## net_opposition frac_none frac_fewer frac_same frac_more
## net_opposition 1.00 0.90 0.96 -0.938 -0.399
## frac_none 0.90 1.00 0.76 -0.887 -0.259
## frac_fewer 0.96 0.76 1.00 -0.880 -0.448
## frac_same -0.94 -0.89 -0.88 1.000 0.055
## frac_more -0.40 -0.26 -0.45 0.055 1.000
d %>% select(origin, net_opposition:frac_more)
#prefs demographics
prefs %>% filter(last_vote %in% names(party_votes)) %>% pull(last_vote) %>% table2()
#zero cells
(crime$suspect_rate==0) %>% wtd_mean()
## [1] 0.001
(crime$arrest_rate==0) %>% wtd_mean()
## [1] 0.0028
(crime_gen$suspect_rate==0) %>% wtd_mean()
## [1] 0.031
(crime_gen$arrest_rate==0) %>% wtd_mean()
## [1] 0.052
#Muslim and IQ in this sample and elsewhere
d %>% select(Muslim, IQ) %>% wtd.cor()
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight = weight), :
## essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight = weight), :
## essentially perfect fit: summary may be unreliable
## $correlation
## Muslim IQ
## Muslim 1.00 -0.42
## IQ -0.42 1.00
##
## $std.err
## Muslim IQ
## Muslim 2.6e-17 1.1e-01
## IQ 1.1e-01 7.9e-17
##
## $t.value
## Muslim IQ
## Muslim 3.9e+16 -3.9e+00
## IQ -3.9e+00 1.3e+16
##
## $p.value
## Muslim IQ
## Muslim 0.00000 0.00026
## IQ 0.00026 0.00000
d %>% select(Muslim, IQ) %>% pairwiseCount()
## Muslim IQ
## Muslim 71 71
## IQ 71 71
natl %>% select(IslamPewResearch2010, LV2012estimatedIQ) %>% wtd.cor()
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight = weight), :
## essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight = weight), :
## essentially perfect fit: summary may be unreliable
## $correlation
## IslamPewResearch2010 LV2012estimatedIQ
## IslamPewResearch2010 1.00 -0.27
## LV2012estimatedIQ -0.27 1.00
##
## $std.err
## IslamPewResearch2010 LV2012estimatedIQ
## IslamPewResearch2010 3.5e-17 6.9e-02
## LV2012estimatedIQ 6.9e-02 2.1e-17
##
## $t.value
## IslamPewResearch2010 LV2012estimatedIQ
## IslamPewResearch2010 2.8e+16 -3.9e+00
## LV2012estimatedIQ -3.9e+00 4.8e+16
##
## $p.value
## IslamPewResearch2010 LV2012estimatedIQ
## IslamPewResearch2010 0.00000 0.00015
## LV2012estimatedIQ 0.00015 0.00000
natl %>% select(IslamPewResearch2010, LV2012estimatedIQ) %>% pairwiseCount()
## IslamPewResearch2010 LV2012estimatedIQ
## IslamPewResearch2010 232 198
## LV2012estimatedIQ 198 203
#regional codings
d$UN_continent %>% table2()
d$UN_macroregion %>% table2()
#temporal development
suspects_largest_10 = crime %>%
filter(year == 2018) %>%
arrange(-population_size) %>%
pull(origin) %>% head(10)
crime %>%
#largest 10 in 2018
filter(origin %in% suspects_largest_10) %>%
ggplot(aes(year, suspect_rate, color = origin)) +
geom_line() +
scale_x_continuous("Year", breaks = seq(2005, 2018, by = 3)) +
scale_y_continuous("Crime rate")
GG_save("figs/suspect_rate_timeline.png")
#relative rates
crime %>%
#largest 10 in 2018
filter(origin %in% suspects_largest_10) %>%
ggplot(aes(year, suspect_rate_RR, color = origin)) +
geom_line() +
scale_x_continuous("Year", breaks = seq(2005, 2018, by = 3)) +
scale_y_continuous("Crime rate")
GG_save("figs/suspect_rate_RR_timeline.png")
#By generation
suspects_gen_largest_10 = crime_gen %>%
plyr::ddply(c("origin"), function(dd) {
tibble(
origin = dd$origin[1],
pop = dd$population_size %>% sum()
)
}) %>% arrange(-pop) %>% pull(origin) %>% head(10)
crime_gen %>%
#largest 10 in 2018
filter(origin %in% suspects_gen_largest_10) %>%
ggplot(aes(year, suspect_rate, color = origin, linetype = generation)) +
geom_line() +
scale_x_continuous("Year", breaks = seq(2005, 2018, by = 3)) +
scale_y_continuous("Crime rate")
GG_save("figs/suspect_rate_timeline_gen.png")
#population level
d_years = crime %>%
group_by(year) %>%
summarise(suspects = sum(total_registered_suspects),
arrests = sum(total_arrested_suspects),
population = sum(population_size),
suspect_rate = suspects/population,
arrest_rate = arrests/population
) %>%
mutate(
suspect_rate_RR = suspect_rate/first(suspect_rate),
arrest_rate_RR = arrest_rate/first(arrest_rate)
)
d_years
#within each
d_origins = crime %>%
arrange(year) %>%
group_by(origin) %>%
summarise(
first = first(suspect_rate),
last = last(suspect_rate),
RR = last / first)
d_origins
#descriptives
d_origins$RR[d_origins$origin != "The Netherlands"] %>% describe()
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 70 0.5 0.23 0.48 0.48 0.14 0.14 2 1.9 3.8 22 0.028
d_origins$RR[d_origins$origin == "The Netherlands"]
## [1] 0.45
d_origins[-1] %>% wtd.cors()
## first last RR
## first 1.000 0.93 0.023
## last 0.932 1.00 0.291
## RR 0.023 0.29 1.000
#scattersplots
GG_scatter(d, "IQ", "suspect_rate_RR", case_names = "origin") +
xlab("Origin country national IQ")
GG_save("figs/IQ_crime.png")
GG_scatter(d, "Muslim", "suspect_rate_RR", case_names = "origin", text_pos = "tr") +
xlab("Origin country Muslim% of population")
GG_save("figs/Muslim_crime.png")
#with weights
GG_scatter(d, "IQ", "suspect_rate_RR", case_names = "origin", weights = d$pop %>% sqrt()) +
xlab("Origin country national IQ")
GG_save("figs/IQ_crime_wt.png")
GG_scatter(d, "Muslim", "suspect_rate_RR", case_names = "origin", text_pos = "tr", weights = d$pop %>% sqrt()) +
xlab("Origin country Muslim% of population")
GG_save("figs/Muslim_crime_wt.png")
#correlations
combine_upperlower(
wtd.cors(d %>% select(suspect_rate, suspect_rate_1st, suspect_rate_2nd, Muslim, IQ, net_opposition, prop_2nd)),
wtd.cors(d %>% select(suspect_rate, suspect_rate_1st, suspect_rate_2nd, Muslim, IQ, net_opposition, prop_2nd), weight = d$pop %>% sqrt())
)
## suspect_rate suspect_rate_1st suspect_rate_2nd Muslim IQ
## suspect_rate NA 0.96 0.897 0.44 -0.66
## suspect_rate_1st 0.946 NA 0.797 0.39 -0.66
## suspect_rate_2nd 0.906 0.76 NA 0.43 -0.59
## Muslim 0.452 0.27 0.432 NA -0.42
## IQ -0.635 -0.56 -0.552 -0.52 NA
## net_opposition 0.458 0.41 0.527 0.68 -0.63
## prop_2nd -0.021 -0.19 -0.024 0.10 0.18
## net_opposition prop_2nd
## suspect_rate 0.57 -0.084
## suspect_rate_1st 0.58 -0.239
## suspect_rate_2nd 0.58 -0.089
## Muslim 0.66 -0.070
## IQ -0.71 0.299
## net_opposition NA -0.477
## prop_2nd -0.43 NA
#regression
ols(suspect_rate_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
## Linear Regression Model
##
## ols(formula = suspect_rate_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F),
## weights = d$pop %>% sqrt())
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 71 LR chi2 39.22 R2 0.424
## sigma9.5523 d.f. 2 R2 adj 0.407
## d.f. 68 Pr(> chi2) 0.0000 g 0.789
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.7953 -0.6496 -0.1990 0.1089 2.2776
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1764 0.0965 1.83 0.0719
## IQ -0.6065 0.1190 -5.10 <0.0001
## Muslim 0.1607 0.1020 1.58 0.1198
##
ols(suspect_rate_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F))
## Linear Regression Model
##
## ols(formula = suspect_rate_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 71 LR chi2 44.65 R2 0.467
## sigma0.7409 d.f. 2 R2 adj 0.451
## d.f. 68 Pr(> chi2) 0.0000 g 0.781
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.53838 -0.46878 -0.03552 0.28373 2.48775
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0000 0.0879 0.00 1.0000
## IQ -0.5738 0.0976 -5.88 <0.0001
## Muslim 0.2011 0.0976 2.06 0.0432
##
#compare models
lrtest(
ols(suspect_rate_RR ~ IQ, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(suspect_rate_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
)
##
## Model 1: suspect_rate_RR ~ IQ
## Model 2: suspect_rate_RR ~ IQ + Muslim
##
## L.R. Chisq d.f. P
## 2.55 1.00 0.11
#by generation
ols(suspect_rate_1st_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
## Frequencies of Missing Values Due to Each Variable
## suspect_rate_1st_RR IQ Muslim (weights)
## 1 0 0 0
##
## Linear Regression Model
##
## ols(formula = suspect_rate_1st_RR ~ IQ + Muslim, data = d %>%
## df_standardize(messages = F), weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 70 LR chi2 26.41 R2 0.314
## sigma9.6951 d.f. 2 R2 adj 0.294
## d.f. 67 Pr(> chi2) 0.0000 g 0.733
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.8586 -0.5475 -0.2512 0.1496 2.9419
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1621 0.1063 1.52 0.1320
## IQ -0.6356 0.1314 -4.84 <0.0001
## Muslim 0.0204 0.1037 0.20 0.8445
##
ols(suspect_rate_2nd_RR ~ IQ + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
## Frequencies of Missing Values Due to Each Variable
## suspect_rate_2nd_RR IQ Muslim (weights)
## 1 0 0 0
##
## Linear Regression Model
##
## ols(formula = suspect_rate_2nd_RR ~ IQ + Muslim, data = d %>%
## df_standardize(messages = F), weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 70 LR chi2 29.69 R2 0.346
## sigma10.2171 d.f. 2 R2 adj 0.326
## d.f. 67 Pr(> chi2) 0.0000 g 0.782
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.8801 -0.5380 -0.1417 0.1111 2.2376
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1338 0.1120 1.19 0.2367
## IQ -0.5586 0.1384 -4.04 0.0001
## Muslim 0.2240 0.1092 2.05 0.0442
##
#combined model
#long form the generation data
crime_gen2= crime_gen %>%
left_join(d %>% select(ISO, IQ, Muslim), by = "ISO")
ols(suspect_rate ~ IQ + Muslim + generation, data = crime_gen2 %>% df_standardize(messages = F), weights = crime_gen2$population_size %>% sqrt())
## Linear Regression Model
##
## ols(formula = suspect_rate ~ IQ + Muslim + generation, data = crime_gen2 %>%
## df_standardize(messages = F), weights = crime_gen2$population_size %>%
## sqrt())
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1960 LR chi2 742.19 R2 0.315
## sigma7.8267 d.f. 3 R2 adj 0.314
## d.f. 1956 Pr(> chi2) 0.0000 g 0.713
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.1639 -0.5645 -0.1978 0.2104 3.7282
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1405 0.0260 -5.41 <0.0001
## IQ -0.5053 0.0242 -20.87 <0.0001
## Muslim 0.1161 0.0192 6.05 <0.0001
## generation=second generation 0.5387 0.0388 13.87 <0.0001
##
ols(suspect_rate ~ IQ + Muslim + generation, data = crime_gen2 %>% df_standardize(messages = F))
## Linear Regression Model
##
## ols(formula = suspect_rate ~ IQ + Muslim + generation, data = crime_gen2 %>%
## df_standardize(messages = F))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1960 LR chi2 878.61 R2 0.361
## sigma0.7998 d.f. 3 R2 adj 0.360
## d.f. 1956 Pr(> chi2) 0.0000 g 0.686
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.95544 -0.43459 -0.08021 0.32838 3.82923
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.2851 0.0255 -11.16 <0.0001
## IQ -0.4464 0.0199 -22.45 <0.0001
## Muslim 0.1535 0.0199 7.72 <0.0001
## generation=second generation 0.5701 0.0361 15.78 <0.0001
##
#correlations with old data
wtd.cors(d %>% select(suspect_rate, suspect_rate_1st, suspect_rate_2nd,
arrest_rate, arrest_rate_1st, arrest_rate_2nd,
`12_17men`:YoungPersonsCrimeFactor)) %>%
.[-c(1:6), c(1:6)]
## suspect_rate suspect_rate_1st suspect_rate_2nd
## 12_17men 0.82 0.76 0.87
## 12_17women 0.75 0.70 0.71
## 18_24men 0.89 0.80 0.91
## 18_24women 0.74 0.75 0.68
## 25_44men 0.90 0.89 0.81
## 25_44women 0.78 0.86 0.59
## 45_79men 0.78 0.82 0.66
## 45_79women 0.50 0.57 0.41
## All 0.92 0.93 0.82
## 12_17_1gen 0.89 0.92 0.79
## 18_24_1gen 0.93 0.93 0.85
## 25_44_1gen 0.87 0.96 0.69
## 45_79_1gen 0.72 0.87 0.51
## All_1gen 0.89 0.97 0.72
## 12_17_2gen 0.85 0.74 0.90
## 18_24_2gen 0.83 0.63 0.94
## 25_44_2gen 0.79 0.59 0.91
## 45_79_2gen 0.27 0.30 0.15
## All_2gen 0.81 0.60 0.92
## YoungPersonsCrimeFactor 0.96 0.89 0.95
## arrest_rate arrest_rate_1st arrest_rate_2nd
## 12_17men 0.83 0.77 0.82
## 12_17women 0.74 0.69 0.65
## 18_24men 0.90 0.81 0.88
## 18_24women 0.74 0.75 0.55
## 25_44men 0.92 0.92 0.76
## 25_44women 0.79 0.87 0.47
## 45_79men 0.78 0.82 0.62
## 45_79women 0.51 0.58 0.34
## All 0.93 0.94 0.71
## 12_17_1gen 0.90 0.91 0.77
## 18_24_1gen 0.94 0.93 0.83
## 25_44_1gen 0.89 0.97 0.67
## 45_79_1gen 0.75 0.89 0.49
## All_1gen 0.91 0.98 0.70
## 12_17_2gen 0.83 0.72 0.89
## 18_24_2gen 0.80 0.61 0.94
## 25_44_2gen 0.78 0.56 0.93
## 45_79_2gen 0.28 0.32 0.13
## All_2gen 0.79 0.57 0.94
## YoungPersonsCrimeFactor 0.96 0.88 0.94
#scatterplots
GG_scatter(d, "suspect_rate_RR", "net_opposition", case_names = "origin", text_pos = "tl")
GG_save("figs/suspect_opposition.png")
GG_scatter(d, "suspect_rate_RR", "net_opposition", case_names = "origin", text_pos = "tl", weights = d$pop %>% sqrt())
GG_save("figs/suspect_opposition_wt.png")
#without colonials
d %>% filter(!colonial) %>% select(suspect_rate, net_opposition) %>% wtd.cors()
## suspect_rate net_opposition
## suspect_rate 1.00 0.66
## net_opposition 0.66 1.00
d %>% filter(!colonial) %>% select(suspect_rate, net_opposition, pop) %>% {wtd.cors(., weight = .$pop %>% sqrt())}
## suspect_rate net_opposition pop
## suspect_rate 1.00 0.68 -0.29
## net_opposition 0.68 1.00 0.33
## pop -0.29 0.33 1.00
#regression models
regs = list(
ols(net_opposition ~ suspect_rate, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial + Muslim + UN_continent, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial + Muslim + UN_macroregion, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
)
names(regs) = 1:length(regs)
#residuals for colonials
regs$`1` %>% {
y = resid(.) %>% standardize()
names(y) = d$origin
y[d$colonial]
}
## Indonesia Netherlands Antilles Suriname
## -0.084 -2.172 -1.956
#full output
regs
## $`1`
## Frequencies of Missing Values Due to Each Variable
## net_opposition suspect_rate (weights)
## 3 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition ~ suspect_rate, data = d %>% df_standardize(messages = F),
## weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 15.99 R2 0.210
## sigma10.3005 d.f. 1 R2 adj 0.198
## d.f. 66 Pr(> chi2) 0.0001 g 0.494
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.7445 -0.7475 0.2077 0.6795 1.6717
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0695 0.1144 -0.61 0.5458
## suspect_rate 0.4383 0.1048 4.18 <0.0001
##
##
## $`2`
## Frequencies of Missing Values Due to Each Variable
## net_opposition suspect_rate colonial (weights)
## 3 0 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition ~ suspect_rate + colonial, data = d %>%
## df_standardize(messages = F), weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 32.95 R2 0.384
## sigma9.1623 d.f. 2 R2 adj 0.365
## d.f. 65 Pr(> chi2) 0.0000 g 0.618
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.2878 -0.8297 0.0640 0.5951 1.5217
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0780 0.1074 0.73 0.4705
## suspect_rate 0.5489 0.0967 5.68 <0.0001
## colonial -1.3200 0.3076 -4.29 <0.0001
##
##
## $`3`
## Frequencies of Missing Values Due to Each Variable
## net_opposition suspect_rate colonial Muslim (weights)
## 3 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition ~ suspect_rate + colonial + Muslim,
## data = d %>% df_standardize(messages = F), weights = d$pop %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 70.34 R2 0.645
## sigma7.0146 d.f. 3 R2 adj 0.628
## d.f. 64 Pr(> chi2) 0.0000 g 0.764
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.97919 -0.43661 0.01758 0.61289 1.29841
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0095 0.0832 -0.11 0.9097
## suspect_rate 0.3268 0.0808 4.04 0.0001
## colonial -1.1960 0.2362 -5.06 <0.0001
## Muslim 0.5034 0.0735 6.85 <0.0001
##
##
## $`4`
## Frequencies of Missing Values Due to Each Variable
## net_opposition suspect_rate colonial Muslim UN_continent
## 3 0 0 0 0
## (weights)
## 0
##
## Linear Regression Model
##
## ols(formula = net_opposition ~ suspect_rate + colonial + Muslim +
## UN_continent, data = d %>% df_standardize(messages = F),
## weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 88.75 R2 0.729
## sigma6.3274 d.f. 7 R2 adj 0.697
## d.f. 60 Pr(> chi2) 0.0000 g 0.917
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.88265 -0.40730 -0.06779 0.29813 1.50423
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.3688 0.1360 -2.71 0.0087
## suspect_rate 0.3663 0.0990 3.70 0.0005
## colonial -1.1974 0.2484 -4.82 <0.0001
## Muslim 0.2701 0.0939 2.88 0.0055
## UN_continent=Africa 0.6648 0.2690 2.47 0.0163
## UN_continent=Americas 0.1759 0.2489 0.71 0.4826
## UN_continent=Asia 0.8292 0.2221 3.73 0.0004
## UN_continent=Oceania -0.7103 0.5141 -1.38 0.1722
##
##
## $`5`
## Frequencies of Missing Values Due to Each Variable
## net_opposition suspect_rate colonial Muslim UN_macroregion
## 3 0 0 0 0
## (weights)
## 0
##
## Linear Regression Model
##
## ols(formula = net_opposition ~ suspect_rate + colonial + Muslim +
## UN_macroregion, data = d %>% df_standardize(messages = F),
## weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 138.53 R2 0.870
## sigma4.5828 d.f. 12 R2 adj 0.841
## d.f. 55 Pr(> chi2) 0.0000 g 1.058
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.92875 -0.23372 -0.05334 0.23475 1.17074
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.8761 0.1289 -6.80 <0.0001
## suspect_rate 0.2132 0.0895 2.38 0.0206
## colonial -1.1158 0.2329 -4.79 <0.0001
## Muslim 0.3454 0.0958 3.61 0.0007
## UN_macroregion=Caribbean 0.8884 0.3939 2.26 0.0281
## UN_macroregion=Latin America 0.8521 0.2231 3.82 0.0003
## UN_macroregion=Africa 1.5499 0.2454 6.32 <0.0001
## UN_macroregion=Eastern Asia 0.9529 0.2564 3.72 0.0005
## UN_macroregion=Eastern Europe 1.7101 0.2254 7.59 <0.0001
## UN_macroregion=MENA 1.1775 0.2907 4.05 0.0002
## UN_macroregion=South-Eastern Asia 1.1056 0.2402 4.60 <0.0001
## UN_macroregion=Southern Asia 1.4684 0.2756 5.33 <0.0001
## UN_macroregion=Southern Europe 0.5369 0.2213 2.43 0.0185
##
#summarize
regs %>% summarize_models(asterisks_only = F)
Alternative IQ estimates.
#plots
GG_scatter(d, "IQ_Rindermann", "suspect_rate_RR", case_names = "origin") +
xlab("Origin country national IQ")
GG_scatter(d, "IQ_Rindermann", "suspect_rate_RR", case_names = "origin", weights = d$pop %>% sqrt()) +
xlab("Origin country national IQ")
#correlations
wtd.cors(d %>% select(suspect_rate, Muslim, IQ, IQ_Rindermann))
## suspect_rate Muslim IQ IQ_Rindermann
## suspect_rate 1.00 0.44 -0.66 -0.65
## Muslim 0.44 1.00 -0.42 -0.38
## IQ -0.66 -0.42 1.00 0.98
## IQ_Rindermann -0.65 -0.38 0.98 1.00
wtd.cors(d %>% select(suspect_rate, Muslim, IQ, IQ_Rindermann), weight = d$pop %>% sqrt())
## suspect_rate Muslim IQ IQ_Rindermann
## suspect_rate 1.00 0.45 -0.64 -0.65
## Muslim 0.45 1.00 -0.52 -0.53
## IQ -0.64 -0.52 1.00 0.98
## IQ_Rindermann -0.65 -0.53 0.98 1.00
#regression
ols(suspect_rate ~ IQ_Rindermann + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
## Frequencies of Missing Values Due to Each Variable
## suspect_rate IQ_Rindermann Muslim (weights)
## 0 4 0 0
##
## Linear Regression Model
##
## ols(formula = suspect_rate ~ IQ_Rindermann + Muslim, data = d %>%
## df_standardize(messages = F), weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 67 LR chi2 38.98 R2 0.441
## sigma9.6493 d.f. 2 R2 adj 0.424
## d.f. 64 Pr(> chi2) 0.0000 g 0.786
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.7435 -0.6348 -0.2164 0.1140 2.2975
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1635 0.1001 1.63 0.1072
## IQ_Rindermann -0.6165 0.1196 -5.15 <0.0001
## Muslim 0.1478 0.1050 1.41 0.1642
##
ols(suspect_rate ~ IQ_Rindermann + Muslim, data = d %>% df_standardize(messages = F))
## Frequencies of Missing Values Due to Each Variable
## suspect_rate IQ_Rindermann Muslim
## 0 4 0
##
## Linear Regression Model
##
## ols(formula = suspect_rate ~ IQ_Rindermann + Muslim, data = d %>%
## df_standardize(messages = F))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 67 LR chi2 41.75 R2 0.464
## sigma0.7603 d.f. 2 R2 adj 0.447
## d.f. 64 Pr(> chi2) 0.0000 g 0.794
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.4500 -0.4328 -0.0410 0.3099 2.5330
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0112 0.0929 -0.12 0.9046
## IQ_Rindermann -0.5730 0.1014 -5.65 <0.0001
## Muslim 0.2278 0.0994 2.29 0.0252
##
#compare models
lrtest(
ols(suspect_rate ~ IQ_Rindermann, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(suspect_rate ~ IQ_Rindermann + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
)
##
## Model 1: suspect_rate ~ IQ_Rindermann
## Model 2: suspect_rate ~ IQ_Rindermann + Muslim
##
## L.R. Chisq d.f. P
## 2.04 1.00 0.15
Don’t weigh the Prolific results by demopgrahics (voting choice), but accept the sample skew.
#scatterplots
GG_scatter(d, "suspect_rate_RR", "net_opposition_raw", case_names = "origin", text_pos = "tl")
GG_scatter(d, "suspect_rate_RR", "net_opposition_raw", case_names = "origin", text_pos = "tl", weights = d$pop %>% sqrt())
#correlations
wtd.cors(d %>% select(suspect_rate, Muslim, IQ, net_opposition, net_opposition_raw))
## suspect_rate Muslim IQ net_opposition net_opposition_raw
## suspect_rate 1.00 0.44 -0.66 0.57 0.54
## Muslim 0.44 1.00 -0.42 0.66 0.63
## IQ -0.66 -0.42 1.00 -0.71 -0.68
## net_opposition 0.57 0.66 -0.71 1.00 0.96
## net_opposition_raw 0.54 0.63 -0.68 0.96 1.00
wtd.cors(d %>% select(suspect_rate, Muslim, IQ, net_opposition, net_opposition_raw), weight = d$pop %>% sqrt())
## suspect_rate Muslim IQ net_opposition net_opposition_raw
## suspect_rate 1.00 0.45 -0.64 0.46 0.46
## Muslim 0.45 1.00 -0.52 0.68 0.69
## IQ -0.64 -0.52 1.00 -0.63 -0.60
## net_opposition 0.46 0.68 -0.63 1.00 0.96
## net_opposition_raw 0.46 0.69 -0.60 0.96 1.00
#without colonials
d %>% filter(!colonial) %>% select(suspect_rate, net_opposition, net_opposition_raw) %>% wtd.cors()
## suspect_rate net_opposition net_opposition_raw
## suspect_rate 1.00 0.66 0.63
## net_opposition 0.66 1.00 0.96
## net_opposition_raw 0.63 0.96 1.00
d %>% filter(!colonial) %>% select(suspect_rate, net_opposition, net_opposition_raw, pop) %>% {wtd.cors(., weight = .$pop %>% sqrt())}
## suspect_rate net_opposition net_opposition_raw pop
## suspect_rate 1.00 0.68 0.67 -0.29
## net_opposition 0.68 1.00 0.96 0.33
## net_opposition_raw 0.67 0.96 1.00 0.43
## pop -0.29 0.33 0.43 1.00
#regression models
regs = list(
ols(net_opposition_raw ~ suspect_rate, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition_raw ~ suspect_rate + colonial, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition_raw ~ suspect_rate + colonial + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition_raw ~ suspect_rate + colonial + Muslim + UN_continent, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition_raw ~ suspect_rate + colonial + Muslim + UN_macroregion, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
)
names(regs) = "model" + 1:length(regs)
#residuals for colonials
regs$model1 %>% {
y = resid(.) %>% standardize()
names(y) = d$origin
y[d$colonial]
}
## Indonesia Netherlands Antilles Suriname
## 0.12 -2.49 -1.52
#full output
regs
## $model1
## Frequencies of Missing Values Due to Each Variable
## net_opposition_raw suspect_rate (weights)
## 3 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition_raw ~ suspect_rate, data = d %>%
## df_standardize(messages = F), weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 15.96 R2 0.209
## sigma10.7751 d.f. 1 R2 adj 0.197
## d.f. 66 Pr(> chi2) 0.0001 g 0.516
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.0798 -0.8557 0.2572 0.7076 1.4717
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0344 0.1197 -0.29 0.7747
## suspect_rate 0.4579 0.1096 4.18 <0.0001
##
##
## $model2
## Frequencies of Missing Values Due to Each Variable
## net_opposition_raw suspect_rate colonial (weights)
## 3 0 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition_raw ~ suspect_rate + colonial, data = d %>%
## df_standardize(messages = F), weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 28.85 R2 0.346
## sigma9.8753 d.f. 2 R2 adj 0.326
## d.f. 65 Pr(> chi2) 0.0000 g 0.627
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.37032 -0.85693 0.04163 0.56805 1.28321
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1021 0.1158 0.88 0.3813
## suspect_rate 0.5602 0.1042 5.38 <0.0001
## colonial -1.2215 0.3315 -3.68 0.0005
##
##
## $model3
## Frequencies of Missing Values Due to Each Variable
## net_opposition_raw suspect_rate colonial Muslim
## 3 0 0 0
## (weights)
## 0
##
## Linear Regression Model
##
## ols(formula = net_opposition_raw ~ suspect_rate + colonial +
## Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 65.96 R2 0.621
## sigma7.5758 d.f. 3 R2 adj 0.603
## d.f. 64 Pr(> chi2) 0.0000 g 0.786
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.0517 -0.5740 0.1009 0.5149 1.3823
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0081 0.0899 0.09 0.9288
## suspect_rate 0.3216 0.0873 3.68 0.0005
## colonial -1.0882 0.2551 -4.27 <0.0001
## Muslim 0.5411 0.0794 6.82 <0.0001
##
##
## $model4
## Frequencies of Missing Values Due to Each Variable
## net_opposition_raw suspect_rate colonial Muslim
## 3 0 0 0
## UN_continent (weights)
## 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition_raw ~ suspect_rate + colonial +
## Muslim + UN_continent, data = d %>% df_standardize(messages = F),
## weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 84.52 R2 0.711
## sigma6.8262 d.f. 7 R2 adj 0.678
## d.f. 60 Pr(> chi2) 0.0000 g 0.956
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.8989 -0.4305 -0.1110 0.2779 1.6667
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.4217 0.1467 -2.88 0.0056
## suspect_rate 0.3376 0.1068 3.16 0.0025
## colonial -1.1345 0.2680 -4.23 <0.0001
## Muslim 0.3065 0.1012 3.03 0.0036
## UN_continent=Africa 0.7877 0.2902 2.71 0.0087
## UN_continent=Americas 0.3434 0.2686 1.28 0.2060
## UN_continent=Asia 0.9132 0.2396 3.81 0.0003
## UN_continent=Oceania -0.6258 0.5546 -1.13 0.2636
##
##
## $model5
## Frequencies of Missing Values Due to Each Variable
## net_opposition_raw suspect_rate colonial Muslim
## 3 0 0 0
## UN_macroregion (weights)
## 0 0
##
## Linear Regression Model
##
## ols(formula = net_opposition_raw ~ suspect_rate + colonial +
## Muslim + UN_macroregion, data = d %>% df_standardize(messages = F),
## weights = d$pop %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 68 LR chi2 172.55 R2 0.921
## sigma3.7321 d.f. 12 R2 adj 0.904
## d.f. 55 Pr(> chi2) 0.0000 g 1.133
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.63423 -0.22078 -0.04751 0.11800 0.94697
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -1.0705 0.1050 -10.20 <0.0001
## suspect_rate 0.2092 0.0729 2.87 0.0058
## colonial -1.0685 0.1897 -5.63 <0.0001
## Muslim 0.2757 0.0780 3.54 0.0008
## UN_macroregion=Caribbean 0.8185 0.3207 2.55 0.0135
## UN_macroregion=Latin America 1.3221 0.1817 7.28 <0.0001
## UN_macroregion=Africa 1.6713 0.1999 8.36 <0.0001
## UN_macroregion=Eastern Asia 1.1008 0.2088 5.27 <0.0001
## UN_macroregion=Eastern Europe 2.0958 0.1836 11.42 <0.0001
## UN_macroregion=MENA 1.7515 0.2368 7.40 <0.0001
## UN_macroregion=South-Eastern Asia 1.5170 0.1956 7.76 <0.0001
## UN_macroregion=Southern Asia 1.4856 0.2244 6.62 <0.0001
## UN_macroregion=Southern Europe 0.5444 0.1802 3.02 0.0038
##
#summary
regs %>% summarize_models()
Add a smoothing line (LOESS, default ggplot2 settings).
#scattersplots
GG_scatter(d, "IQ", "suspect_rate_RR", case_names = "origin") +
xlab("Origin country national IQ") +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_scatter(d, "Muslim", "suspect_rate_RR", case_names = "origin", text_pos = "tr") +
xlab("Origin country Muslim% of population") +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#with weights
GG_scatter(d, "IQ", "suspect_rate_RR", case_names = "origin", weights = d$pop %>% sqrt()) +
xlab("Origin country national IQ") +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_scatter(d, "Muslim", "suspect_rate_RR", case_names = "origin", text_pos = "tr", weights = d$pop %>% sqrt()) +
xlab("Origin country Muslim% of population") +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Colonial status as an interaction.
#regression models
regs = list(
ols(net_opposition ~ suspect_rate, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate * colonial, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate * colonial + Muslim, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate * colonial + Muslim + UN_continent, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate * colonial + Muslim + UN_macroregion, data = d %>% df_standardize(messages = F), weights = d$pop %>% sqrt())
)
#summarize
regs %>% summarize_models(asterisks_only = F)
#subset data
d_no_imp = d %>% filter(!ISO %in% imputed_muslim)
#models
regs = list(
ols(net_opposition ~ suspect_rate, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial + Muslim, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial + Muslim + UN_continent, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ suspect_rate + colonial + Muslim + UN_macroregion, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt())
)
#summarize
regs %>% summarize_models()
#correlations
d %>% select(suspect_rate, suspect_rate_1st, suspect_rate_2nd,
arrest_rate, arrest_rate_1st, arrest_rate_2nd,
IQ, Muslim, net_opposition) %>%
wtd.cors()
## suspect_rate suspect_rate_1st suspect_rate_2nd arrest_rate
## suspect_rate 1.00 0.96 0.90 1.00
## suspect_rate_1st 0.96 1.00 0.80 0.96
## suspect_rate_2nd 0.90 0.80 1.00 0.89
## arrest_rate 1.00 0.96 0.89 1.00
## arrest_rate_1st 0.96 1.00 0.80 0.97
## arrest_rate_2nd 0.79 0.67 0.94 0.79
## IQ -0.66 -0.66 -0.59 -0.64
## Muslim 0.44 0.39 0.43 0.44
## net_opposition 0.57 0.58 0.58 0.55
## arrest_rate_1st arrest_rate_2nd IQ Muslim net_opposition
## suspect_rate 0.96 0.79 -0.66 0.44 0.57
## suspect_rate_1st 1.00 0.67 -0.66 0.39 0.58
## suspect_rate_2nd 0.80 0.94 -0.59 0.43 0.58
## arrest_rate 0.97 0.79 -0.64 0.44 0.55
## arrest_rate_1st 1.00 0.68 -0.65 0.38 0.56
## arrest_rate_2nd 0.68 1.00 -0.43 0.34 0.47
## IQ -0.65 -0.43 1.00 -0.42 -0.71
## Muslim 0.38 0.34 -0.42 1.00 0.66
## net_opposition 0.56 0.47 -0.71 0.66 1.00
#more decimals
d %>% select(suspect_rate, arrest_rate) %>% wtd.cor() %>% print(digits = 4)
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight = weight), :
## essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight = weight), :
## essentially perfect fit: summary may be unreliable
## $correlation
## suspect_rate arrest_rate
## suspect_rate 1.0000 0.9952
## arrest_rate 0.9952 1.0000
##
## $std.err
## suspect_rate arrest_rate
## suspect_rate 9.827e-17 1.172e-02
## arrest_rate 1.172e-02 2.804e-17
##
## $t.value
## suspect_rate arrest_rate
## suspect_rate 1.018e+16 8.490e+01
## arrest_rate 8.490e+01 3.566e+16
##
## $p.value
## suspect_rate arrest_rate
## suspect_rate 0.000e+00 1.532e-71
## arrest_rate 1.532e-71 0.000e+00
#models
regs = list(
ols(net_opposition ~ arrest_rate, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ arrest_rate + colonial, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ arrest_rate + colonial + Muslim, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ arrest_rate + colonial + Muslim + UN_continent, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt()),
ols(net_opposition ~ arrest_rate + colonial + Muslim + UN_macroregion, data = d_no_imp %>% df_standardize(messages = F), weights = d_no_imp$pop %>% sqrt())
)
#summarize
regs %>% summarize_models()
#this one is missing too many units
# world = spData::world %>% mutate(ISO = pu_translate(name_long))
#http://thematicmapping.org/downloads/world_borders.php
world = sf::read_sf("data/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp")
#regions coding
world2 = left_join(world, chess2019, by = c("ISO3" = "ISO"))
#continents
map_continents = tm_shape(world2) +
tm_fill("UN_continent", title = "Continents") +
tm_borders()
map_continents
## Warning: The shape world2 is invalid. See sf::st_is_valid
tmap_save(map_continents, "figs/continents map.png")
## Warning: The shape world2 is invalid. See sf::st_is_valid
## Warning: The shape world2 is invalid. See sf::st_is_valid
## Map saved to /media/truecrypt1/Projects/Dutch immigrant crime 2019/figs/continents map.png
## Resolution: 2992 by 1474 pixels
## Size: 10 by 4.9 inches (300 dpi)
map_macroregions = tm_shape(world2) +
tm_fill("UN_macroregion", title = "Macroregions") +
tm_borders()
map_macroregions
## Warning: The shape world2 is invalid. See sf::st_is_valid
tmap_save(map_macroregions, "figs/macroregions map.png")
## Warning: The shape world2 is invalid. See sf::st_is_valid
## Warning: The shape world2 is invalid. See sf::st_is_valid
## Map saved to /media/truecrypt1/Projects/Dutch immigrant crime 2019/figs/macroregions map.png
## Resolution: 2992 by 1474 pixels
## Size: 10 by 4.9 inches (300 dpi)
map_regions = tm_shape(world2) +
tm_fill("UN_region", title = "Regions") +
tm_borders()
map_regions
## Warning: The shape world2 is invalid. See sf::st_is_valid
tmap_save(map_regions, "figs/regions map.png")
## Warning: The shape world2 is invalid. See sf::st_is_valid
## Warning: The shape world2 is invalid. See sf::st_is_valid
## Map saved to /media/truecrypt1/Projects/Dutch immigrant crime 2019/figs/regions map.png
## Resolution: 2992 by 1474 pixels
## Size: 10 by 4.9 inches (300 dpi)
#merge some ex-countries for plotting
world %<>% mutate(
ISO = case_when((ISO3 %in% c("RUS") ~ "SUN"), #soviet is Russia
(ISO3 %in% c("CZE", "SVK")) ~ "CSK", #merge czech slovakia
(ISO3 %in% c("HRV", "SRB", "SVN", "MKD", "BIH", "MNE")) ~ "YUG", #merge yugoslavia
TRUE ~ ISO3
)
)
#merge table
d_spatial = full_join(world, d, by = "ISO")
#crime map
crime_map = tm_shape(d_spatial) +
tm_fill("suspect_rate_RR", title = "Relative crime rate") +
tm_borders()
crime_map
## Warning: The shape d_spatial is invalid. See sf::st_is_valid
tmap_save(crime_map, "figs/suspect_rate_map.png")
## Warning: The shape d_spatial is invalid. See sf::st_is_valid
## Warning: The shape d_spatial is invalid. See sf::st_is_valid
## Map saved to /media/truecrypt1/Projects/Dutch immigrant crime 2019/figs/suspect_rate_map.png
## Resolution: 2992 by 1474 pixels
## Size: 10 by 4.9 inches (300 dpi)
#print main variables
d %>%
select(origin, UN_continent, UN_macroregion, pop, suspect_rate, suspect_rate_RR, suspect_rate_1st_RR, suspect_rate_2nd_RR, Muslim, IQ, net_opposition)
write_sessioninfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] writexl_1.2 readxl_1.3.1 broom_0.5.3
## [4] sf_0.8-0 tmap_2.3-1 googlesheets4_0.1.0
## [7] rms_5.1-4 SparseM_1.78 kirkegaard_2018.05
## [10] metafor_2.1-0 Matrix_1.2-18 psych_1.9.12
## [13] magrittr_1.5 assertthat_0.2.1 weights_1.0
## [16] mice_3.7.0 gdata_2.18.0 Hmisc_4.3-0
## [19] Formula_1.2-3 survival_3.1-8 lattice_0.20-38
## [22] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
## [25] purrr_0.3.3 readr_1.3.1 tidyr_1.0.0
## [28] tibble_2.1.3 ggplot2_3.2.1 tidyverse_1.3.0
## [31] pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.5 lwgeom_0.1-7 plyr_1.8.5
## [4] lazyeval_0.2.2 sp_1.3-2 splines_3.6.2
## [7] crosstalk_1.0.0 leaflet_2.0.3 TH.data_1.0-10
## [10] digest_0.6.23 htmltools_0.4.0 fansi_0.4.0
## [13] checkmate_1.9.4 cluster_2.1.0 modelr_0.1.5
## [16] sandwich_2.5-1 askpass_1.1 jpeg_0.1-8.1
## [19] colorspace_1.4-1 rvest_0.3.5 haven_2.2.0
## [22] pan_1.6 xfun_0.11 rgdal_1.4-8
## [25] crayon_1.3.4 jsonlite_1.6 lme4_1.1-21
## [28] zeallot_0.1.0 zoo_1.8-6 glue_1.3.1
## [31] gargle_0.4.0 gtable_0.3.0 MatrixModels_0.4-1
## [34] jomo_2.6-10 scales_1.1.0 mvtnorm_1.0-11
## [37] DBI_1.1.0 Rcpp_1.0.3 viridisLite_0.3.0
## [40] xtable_1.8-4 htmlTable_1.13.3 units_0.6-5
## [43] foreign_0.8-74 htmlwidgets_1.5.1 httr_1.4.1
## [46] RColorBrewer_1.1-2 ellipsis_0.3.0 acepack_1.4.1
## [49] farver_2.0.1 pkgconfig_2.0.3 XML_3.98-1.20
## [52] nnet_7.3-12 dbplyr_1.4.2 labeling_0.3
## [55] tidyselect_0.2.5 rlang_0.4.2 later_1.0.0
## [58] tmaptools_2.0-2 multilevel_2.6 munsell_0.5.0
## [61] cellranger_1.1.0 tools_3.6.2 cli_2.0.0
## [64] generics_0.0.2 evaluate_0.14 fastmap_1.0.1
## [67] yaml_2.2.0 leafsync_0.1.0 knitr_1.26
## [70] fs_1.3.1 mitml_0.3-7 nlme_3.1-143
## [73] mime_0.8 quantreg_5.54 xml2_1.2.2
## [76] psychometric_2.2 compiler_3.6.2 rstudioapi_0.10
## [79] curl_4.3 png_0.1-7 e1071_1.7-3
## [82] reprex_0.3.0 stringi_1.4.3 rgeos_0.5-2
## [85] classInt_0.4-2 nloptr_1.2.1 vctrs_0.2.1
## [88] stringdist_0.9.5.5 pillar_1.4.3 lifecycle_0.1.0
## [91] data.table_1.12.8 raster_3.0-7 httpuv_1.5.2
## [94] R6_2.4.1 latticeExtra_0.6-29 promises_1.1.0
## [97] KernSmooth_2.23-16 gridExtra_2.3 codetools_0.2-16
## [100] polspline_1.1.17 dichromat_2.0-0 boot_1.3-24
## [103] MASS_7.3-51.5 gtools_3.8.1 openssl_1.4.1
## [106] withr_2.1.2 mnormt_1.5-5 multcomp_1.4-11
## [109] parallel_3.6.2 hms_0.5.2 grid_3.6.2
## [112] rpart_4.1-15 class_7.3-15 minqa_1.2.4
## [115] rmarkdown_2.0 shiny_1.4.0 lubridate_1.7.4
## [118] base64enc_0.1-3
#final dataset out
write_rds(d, "data/data_out.rds")
write_csv(d, "data/data_out.csv")
#local copies of main input data
write_xlsx(crime, "data/crime_combined.xlsx")
write_xlsx(crime_gen, "data/crime_combined_generation.xlsx")
write_xlsx(prefs_orig, "data/dutch_survey.xlsx")