Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:psych':
## 
##     rescale
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## The following object is masked from 'package:base':
## 
##     +
load_packages(
    readxl,
    rms,
    ggeffects,
    sf, tmap
)
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3; sf_use_s2() is TRUE
theme_set(theme_bw())

options(
    digits = 3
)

tmap_options(check.and.fix = TRUE)

Data

#prison data
#https://www.prisonstudies.org/highest-to-lowest/prison_population_rate?field_region_taxonomy_tid=All
#from https://en.wikipedia.org/wiki/List_of_countries_by_incarceration_rate
prison = read_excel("data/prison rates.xlsx") %>% 
  df_legalize_names() %>% 
  mutate(
    ISO = pu_translate(Title, ad_level = 0)
  )
## No exact match: American Samoa (USA)
## No exact match: Virgin Islands (USA)
## No exact match: Northern Mariana Islands (USA)
## No exact match: Cape Verde (Cabo Verde)
## No exact match: French Guiana/Guyane (France)
## No exact match: eSwatini/Swaziland
## No exact match: Sao Tome e Principe
## No exact match: Republic of (South) Korea
## No exact match: Kosovo/Kosova
## No exact match: Bosnia and Herzegovina: Federation
## No exact match: Timor-Leste (formerly East Timor)
## No exact match: Bosnia and Herzegovina: Republika Srpska
## No exact match: Congo (Republic of)
## Best fuzzy match found: American Samoa (USA) -> American Samoa with distance 6.00
## Warning: There were multiple equally good matches for Virgin Islands (USA):
## Jarvis Island (USA) | Midway Islands (USA) | U.S. Virgin Islands (USA). All with
## distance 5.00
## Best fuzzy match found: Northern Mariana Islands (USA) -> Northern Mariana Islands with distance 6.00
## Best fuzzy match found: Cape Verde (Cabo Verde) -> Cape Verde with distance 13.00
## Best fuzzy match found: French Guiana/Guyane (France) -> French Guiana (France) (France) with distance 6.00
## Best fuzzy match found: eSwatini/Swaziland -> Swaziland with distance 9.00
## Best fuzzy match found: Sao Tome e Principe -> Sao Tome & Principe with distance 1.00
## Best fuzzy match found: Republic of (South) Korea -> Republic of Korea with distance 8.00
## Best fuzzy match found: Kosovo/Kosova -> Kosovo with distance 7.00
## Best fuzzy match found: Bosnia and Herzegovina: Federation -> Bosnia and Herzegovina with distance 12.00
## Best fuzzy match found: Timor-Leste (formerly East Timor) -> Myanmar (formerly Burma) with distance 20.00
## Best fuzzy match found: Bosnia and Herzegovina: Republika Srpska -> Bosnia and Herzegovina with distance 18.00
## Best fuzzy match found: Congo (Republic of) -> Congo Republic of with distance 2.00
#we have some subnational units, we need to average within them
#first convert to the same national ISO
prison$ISO %<>% plyr::revalue(replace = c(
  "SCO" = "GBR",
  "ENGWAL" = "GBR",
  "NIR" = "GBR"
))

#average within ISO
prison = plyr::ddply(prison, "ISO", function(dd) {
  tibble(
    Prison_Population_Rate = mean(dd$Prison_Population_Rate)
  )
})

#becker data
becker = read_excel("data/NIQ-DATASET-V1.3.3/NIQ-DATA (V1.3.3).xlsx", sheet = 2, range = "A2:N205") %>% 
  df_legalize_names()

#smart fraction data for UN regions
sf = read_rds("data/smart fraction data_out.rds")

#African population shares by migration matrix
putterman = read_excel("data/putterman matrix version 1.1.xls")

#calculate European
euro_homelands = c("Albania", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Macedonia", "Malta", "Moldova", "Netherlands", "Norway", "Poland", "Portugal", "Romania", "Russia", "Serbia and Montenegro", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Ukraine", "United Kingdom")
euro_homelands_abbr = putterman %>% filter(wbname %in% euro_homelands) %>% pull(wbcode)

#African
african_homelands = c("Angola", "Belize", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cameroon", "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire", "Equatorial Guinea", "Eritrea", "Ethiopia", "Gabon", "Gambia", "Ghana", "Guinea", "Guinea-Bissau", "Lesotho", "Liberia", "Malawi", "Mali", "Mauritania", "Mozambique", "Namibia", "Niger", "Nigeria", "Rwanda", "Senegal", "Sierra Leone", "Somalia", "South Africa", "Sudan", "Swaziland", "Tanzania", "Togo", "Uganda", "Zambia", "Zimbabwe")
african_homelands_abbr = putterman %>% filter(wbname %in% african_homelands) %>% pull(wbcode)

#calculate European fraction
putterman$european = putterman %>% select(!!str_to_lower(euro_homelands_abbr)) %>% rowSums()
putterman$african = putterman %>% select(!!str_to_lower(african_homelands_abbr)) %>% rowSums()

#make new ISO, some of existing are wrong
putterman$wbname[76] = "Israel" #matching is incorrectly done to Palestine otherwise
putterman$ISO = pu_translate(putterman$wbname)
## No exact match: Hong Kong, China
## No exact match: Korea, Dem. Rep. (North)
## No exact match: Korea, Rep. (South)
## Best fuzzy match found: Hong Kong, China -> Hong Kong-China with distance 2.00
## Best fuzzy match found: Korea, Dem. Rep. (North) -> Korea, Dem. Rep. with distance 8.00
## Best fuzzy match found: Korea, Rep. (South) -> Korea, South with distance 7.00
#african filled data from wikipedia, CIA etc.
african_filled = read_excel("data/prison rates.xlsx", sheet = 2)

#population size UN
unpop = readxl::read_excel("data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx", range = "A17:BZ306") %>% 
  df_legalize_names() %>% 
  #filter to countries
  filter(Type == "Country/Area") %>% 
  #ISO
  mutate(ISO = pu_translate(Region_subregion_country_or_area))
## No exact match: China, Taiwan Province of China
## No exact match: Dem. People's Republic of Korea
## No exact match: Micronesia (Fed. States of)
## Best fuzzy match found: China, Taiwan Province of China -> Taiwan, Province of China with distance 8.00
## Best fuzzy match found: Dem. People's Republic of Korea -> Democratic People's Republic of Korea with distance 7.00
## Best fuzzy match found: Micronesia (Fed. States of) -> Micronesia (Federated States of) with distance 6.00
#homicide rate
homicide = read_csv("data/homicide-rate.csv") %>% filter(Year == 2019) %>% rename(homicide_rate = `Deaths - Interpersonal violence - Sex: Both - Age: All Ages (Rate)`)
## Rows: 6840 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Deaths - Interpersonal violence - Sex: Both - Age: All Ages (...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#join
d = full_join(
  prison,
  becker,
  by = "ISO"
) %>% 
  full_join(
    sf %>% select(ISO, UN_macroregion),
    by = "ISO"
  ) %>% 
  full_join(
    putterman %>% select(ISO, european, african),
    by = "ISO"
  ) %>% 
  full_join(
    unpop %>% select(x2020, ISO),
    by = "ISO"
  ) %>% 
  full_join(
    african_filled %>% select(ISO, african_estimate),
    by = "ISO"
  ) %>% 
  full_join(
    homicide %>% select(homicide_rate, Code) %>% rename(ISO = Code) %>% filter(!is.na(ISO)),
    by = "ISO"
  )

#no dups
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
#derive vars
d %<>% mutate(
  sqrt_pop = sqrt(as.numeric(x2020)),
  R = as.numeric(R),
  lynn2012 = as.numeric(L_and_V12plusGEO),
  country = pu_translate(ISO, reverse = T),
  african_filled = miss_fill(african, african_estimate/100)
)
## No match: KNA.
## No match: OWID_WRL
#fill in missing african %
d %>% filter(!is.na(Prison_Population_Rate), is.na(african)) %>% select(ISO, country)
#worldmap
worldmap = st_read("data/World_Countries/World_Countries.shp")
## Reading layer `World_Countries' from data source 
##   `/media/8tb_ssd_3/projects/Prison rate/data/World_Countries/World_Countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 252 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.6
## CRS:           4326
worldmap$ISO = pu_translate(worldmap$COUNTRY)
## No exact match: American Samoa (US)
## No exact match: Anguilla (UK)
## No exact match: Bermuda (UK)
## No exact match: Cayman Islands (UK)
## No exact match: Cocos (Keeling) Islands (Australia)
## No exact match: Northern Mariana Islands (US)
## No exact match: Jarvis Island (US)
## No exact match: Falkland Islands (UK)
## No exact match: Baker Island (US)
## No exact match: French Southern & Antarctic Lands (France)
## No exact match: Gibraltar (UK)
## No exact match: Guernsey (UK)
## No exact match: Guam (US)
## No exact match: Heard Island & McDonald Islands (Australia)
## No exact match: Howland Island (US)
## No exact match: Isle of Man (UK)
## No exact match: British Indian Ocean Territory (UK)
## No exact match: Jersey (UK)
## No exact match: Jan Mayen (Norway)
## No exact match: Johnston Atoll (US)
## No exact match: Christmas Island (Australia)
## No exact match: Montserrat (UK)
## No exact match: Midway Islands (US)
## No exact match: Norfolk Island (Australia)
## No exact match: Pitcairn Islands (UK)
## No exact match: Spratly Islands (Disputed)
## No exact match: Puerto Rico (US)
## No exact match: St. Pierre and Miquelon (France)
## No exact match: St. Helena (UK)
## No exact match: South Georgia and the South Sandwich Is (UK)
## No exact match: Turks and Caicos Islands (UK)
## No exact match: British Virgin Islands(UK)
## No exact match: American Virgin Islands (US)
## No exact match: Wallis and Futuna (France)
## No exact match: Wake Island (US)
## No exact match: Curacao (Netherlands)
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## Best fuzzy match found: Cocos (Keeling) Islands (Australia) -> Cocos (Keeling) Islands with distance 12.00
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## Best fuzzy match found: Jarvis Island (US) -> Jarvis Island (USA) with distance 1.00
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00
## Best fuzzy match found: Baker Island (US) -> Baker Island (USA) with distance 1.00
## Best fuzzy match found: French Southern & Antarctic Lands (France) -> French Southern Territories (France) with distance 14.00
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## Best fuzzy match found: Heard Island & McDonald Islands (Australia) -> Heard Island and McDonald Islands with distance 15.00
## Best fuzzy match found: Howland Island (US) -> Howland Island (USA) with distance 1.00
## Best fuzzy match found: Isle of Man (UK) -> Isle of Man (GBR) with distance 3.00
## Best fuzzy match found: British Indian Ocean Territory (UK) -> British Indian Ocean Territory (GBR) with distance 3.00
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## Best fuzzy match found: Jan Mayen (Norway) -> Svalbard (Norway) with distance 8.00
## Best fuzzy match found: Johnston Atoll (US) -> Johnston Atoll (USA) with distance 1.00
## Best fuzzy match found: Christmas Island (Australia) -> Christmas Island with distance 12.00
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## Best fuzzy match found: Midway Islands (US) -> Midway Islands (USA) with distance 1.00
## Best fuzzy match found: Norfolk Island (Australia) -> Norfolk Island with distance 12.00
## Best fuzzy match found: Pitcairn Islands (UK) -> Pitcairn Islands with distance 5.00
## Best fuzzy match found: Spratly Islands (Disputed) -> Spratly Islands with distance 11.00
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## Best fuzzy match found: St. Pierre and Miquelon (France) -> St. Pierre and Miquelon with distance 9.00
## Best fuzzy match found: St. Helena (UK) -> St. Helena with distance 5.00
## Best fuzzy match found: South Georgia and the South Sandwich Is (UK) -> South Georgia and the South Sandwich Islands with distance 5.00
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## Best fuzzy match found: British Virgin Islands(UK) -> British Virgin Islands with distance 4.00
## Best fuzzy match found: American Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 9.00
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00
## Best fuzzy match found: Wake Island (US) -> Wake Island (USA) with distance 1.00
## Best fuzzy match found: Curacao (Netherlands) -> Curaçao (Netherlands) with distance 1.00

Worldwide

GG_scatter(d, "homicide_rate", "Prison_Population_Rate", weights = "sqrt_pop", case_names = "ISO") +
  scale_x_continuous() +
  geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/homicide_rate_prison_rate.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_scatter(d, "african", "Prison_Population_Rate", weights = "sqrt_pop") +
  scale_x_continuous(labels = scales::percent) +
  geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_scatter(d, "african_filled", "Prison_Population_Rate", weights = "sqrt_pop", case_names = "ISO") +
  scale_x_continuous("African population share", labels = scales::percent) +
  geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/african_prison_rate.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_scatter(d, "european", "Prison_Population_Rate", weights = "sqrt_pop") +
  scale_x_continuous(labels = scales::percent) +
  geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_scatter(d, "lynn2012", "Prison_Population_Rate", weights = "sqrt_pop", case_names = "ISO") +
  geom_smooth() +
  scale_x_continuous("National IQ (Lynn 2012)")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/IQ_prison_rate.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#sample sizes by variable
d %>% 
  select(african, african_filled, lynn2012, R, Prison_Population_Rate, sqrt_pop) %>% 
  pairwiseCount()
##                        african african_filled lynn2012   R
## african                    165            165      162 162
## african_filled             165            224      193 193
## lynn2012                   162            193      199 199
## R                          162            193      199 199
## Prison_Population_Rate     160            218      190 190
## sqrt_pop                   163            218      195 195
##                        Prison_Population_Rate sqrt_pop
## african                                   160      163
## african_filled                            218      218
## lynn2012                                  190      195
## R                                         190      195
## Prison_Population_Rate                    219      214
## sqrt_pop                                  214      235
d$lynn2012_z = d$lynn2012 %>% standardize()

#models
model_fits = list(
  ols(Prison_Population_Rate ~ african_filled + lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ rcs(african_filled, 3) + lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ african_filled + rcs(lynn2012_z, 3), data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ african_filled * lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ rcs(african_filled, 3) * lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ rcs(african_filled, 3) * rcs(lynn2012_z, 3), data = d, weights = d$sqrt_pop)
)

model_fits2 = list(
  ols(Prison_Population_Rate ~ african + lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ rcs(african, 3) + lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ african + rcs(lynn2012_z, 3), data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ african * lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ rcs(african, 3) * lynn2012_z, data = d, weights = d$sqrt_pop),
  ols(Prison_Population_Rate ~ rcs(african, 3) * rcs(lynn2012_z, 3), data = d, weights = d$sqrt_pop)
)

#summarise
model_fits %>% summarize_models(asterisks_only = F)
model_fits2 %>% summarize_models(asterisks_only = F)
lrtest(model_fits[[1]], model_fits[[6]])
## 
## Model 1: Prison_Population_Rate ~ african_filled + lynn2012_z
## Model 2: Prison_Population_Rate ~ rcs(african_filled, 3) * rcs(lynn2012_z, 
##     3)
## 
## L.R. Chisq       d.f.          P 
##        108          6          0
lrtest(model_fits[[2]], model_fits[[6]])
## 
## Model 1: Prison_Population_Rate ~ rcs(african_filled, 3) + lynn2012_z
## Model 2: Prison_Population_Rate ~ rcs(african_filled, 3) * rcs(lynn2012_z, 
##     3)
## 
## L.R. Chisq       d.f.          P 
##   7.58e+01   5.00e+00   6.33e-15
lrtest(model_fits[[3]], model_fits[[6]])
## 
## Model 1: Prison_Population_Rate ~ african_filled + rcs(lynn2012_z, 3)
## Model 2: Prison_Population_Rate ~ rcs(african_filled, 3) * rcs(lynn2012_z, 
##     3)
## 
## L.R. Chisq       d.f.          P 
##         92          5          0
lrtest(model_fits[[4]], model_fits[[6]])
## 
## Model 1: Prison_Population_Rate ~ african_filled * lynn2012_z
## Model 2: Prison_Population_Rate ~ rcs(african_filled, 3) * rcs(lynn2012_z, 
##     3)
## 
## L.R. Chisq       d.f.          P 
##       89.3        5.0        0.0
lrtest(model_fits[[5]], model_fits[[6]])
## 
## Model 1: Prison_Population_Rate ~ rcs(african_filled, 3) * lynn2012_z
## Model 2: Prison_Population_Rate ~ rcs(african_filled, 3) * rcs(lynn2012_z, 
##     3)
## 
## L.R. Chisq       d.f.          P 
##   3.73e+01   3.00e+00   3.97e-08

Plots

#merge
d_spatial = left_join(worldmap,
                      d %>% select(ISO, Prison_Population_Rate),
                      by = "ISO")

#get rid of Jan Mayan island, same as Svalbard
d_spatial %<>% filter(!COUNTRY %in% c("Jan Mayen (Norway)"))

#no dups
assert_that(!anyDuplicated(d_spatial$ISO))
## [1] TRUE
#prison map
prison_map = d_spatial %>% 
  tm_shape() +
  tm_fill("Prison_Population_Rate", title = "Prison rate") +
  tm_borders()

prison_map

tmap_save(prison_map, "figs/prison_map.png")
## Map saved to /media/8tb_ssd_3/projects/Prison rate/figs/prison_map.png
## Resolution: 3054 by 1444 pixels
## Size: 10.2 by 4.81 inches (300 dpi)
#understand model by predictions
#try suggested model
ols(Prison_Population_Rate ~ rcs(lynn2012, 3) * rcs(african_filled, 3), data = d, weights = d$sqrt_pop) %>% 
  ggpredict(terms = c("african_filled", "lynn2012")) %>% 
  plot(add.data = T, jitter = NULL) +
  scale_x_continuous("African population share", labels = scales::percent) +
  geom_text(data = d, aes(x = african_filled, y = Prison_Population_Rate, label = ISO, color = NULL, fill = NULL), check_overlap = T, size = 3, vjust = -1)
## Warning: Removed 38 rows containing missing values (geom_text).

GG_save("figs/model6_preds.png")
## Warning: Removed 38 rows containing missing values (geom_text).

US states

us_africans = read_csv("data/us states african.csv") %>% 
  df_legalize_names() %>% 
  mutate(
    african = str_match(pct_Black_or_African_American_alone_2, "(\\d+\\.\\d+)")[, 2] %>% as.numeric() %>% divide_by(100),
    name = State_or_territory %>% str_replace(" http.*", "") %>% str_trim()
  )
## Rows: 57 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): % Black or African-American alone[2], Rank, State or territory, Bla...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us_prison = read_csv("data/us states prison.csv") %>% 
  df_legalize_names() %>% 
  mutate(
    name = Jurisdiction %>% str_replace(" \\*.+", "") %>% str_replace("http.+", "") %>% str_trim()
  )
## Rows: 54 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Jurisdiction
## dbl (1): 2018 rate per 100,000 of all ages
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us_pol = read_csv("data/us states popvotes.csv") %>% 
  df_legalize_names() %>% 
  filter(!str_detect(State, "District")) %>% 
  mutate(
    biden = str_match(Biden_Share, "(\\d+\\.\\d+)")[, 2] %>% as.numeric() %>% divide_by(100),
    name = State
    )
## Rows: 61 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): State, Certified?, Biden Share, Trump Share, Other Share, Dem '20 M...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us = full_join(
  us_africans %>% select(african, name),
  us_prison %>% select(name, x2018_rate_per_100_000_adults),
  by = "name"
) %>% full_join(
  us_pol %>% select(biden, name),
  by = "name"
)

GG_scatter(us, "african", "x2018_rate_per_100_000_adults", case_names = "name") +
  scale_x_continuous("African population share", labels = scales::percent) +
  scale_y_continuous("Prison population per 100k adults") +
  geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/us_african_prison_rate.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
list(
  ols(x2018_rate_per_100_000_adults ~ african, data = us),
  ols(x2018_rate_per_100_000_adults ~ african, data = us[-2, ]),
  ols(x2018_rate_per_100_000_adults ~ african + biden, data = us),
  ols(x2018_rate_per_100_000_adults ~ african * biden, data = us)
) %>% summarize_models(asterisks_only = F)