Overview

Note: This files makes use of synthetic data. Synthetic data is artificially generated data that mimics the statistical properties and patterns of real-world data without containing any actual real-world information.

This file is provided as a preliminary resource until official data is added to the critstats package. You may also use this code to gather data related to your class project, thesis, or other academic tasks beyond what is provided below. Content in this file comes from a host of different sources which you should be familiar with prior to access and analyzing any data.

Set up your work enviornment

Open up a new .Rmd file.

Use {r setup, include=F} in your first code chunk.

knitr::opts_chunk$set(echo = TRUE)

# Load necessary libraries
library(knitr)
library(kableExtra)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr) 
library(dplyr)
library(tidyr)

Data

I provide the code below to exemplify how to reproduce a data frame as a last option when scraping data from the internet.

State data (2021)

We pull data on states in 2021 from Prison Policy Initiative here.

# reproduce the data frame
incarceration_data <- tribble(
  ~State, ~Combined_prison_and_local_jail, ~Youth, ~Indian_Country_jails, ~Civil_commitment, ~Pre_trial_evaluation, ~Not_guilty_by_reason_of_insanity, ~Incompetent_to_stand_trial, ~Federal, ~Immigration, ~Military, ~Total_incarceration, ~Federal_origin_percentage, ~State_population, ~Incarceration_rate, ~Violent_crime_rate,
  "Alabama", 42200, 915, NA, NA, NA, 171, NA, 3855, NA, NA, 47141, 0.0197, 5024279, 938, 511,
  "Alaska", 4500, 243, 1, NA, 1, 1, 8, 509, NA, NA, 5263, 0.0026, 733391, 718, 867,
  "American Samoa", 196, NA, NA, NA, NA, NA, NA, 1, NA, NA, 197, 0.0000, 55191, 358, NA,
  "Arizona", 56000, 786, 734, 82, NA, 82, 6, 4364, NA, NA, 62054, 0.0223, 7151502, 868, 455,
  "Arkansas", 25500, 459, NA, NA, 9, 123, 55, 2231, NA, NA, 28377, 0.0114, 3011524, 942, 585,
  "California", 196100, 4239, NA, 937, NA, 1369, 1256, 13015, NA, NA, 216916, 0.0665, 39538223, 549, 441,
  "Colorado", 32500, 837, 65, NA, 29, 121, 164, 1761, NA, NA, 35477, 0.0090, 5773714, 614, 381,
  "Connecticut", 12800, 81, NA, NA, NA, 118, 52, 1174, NA, NA, 14225, 0.0060, 3605944, 394, 184,
  "Delaware", 5700, 105, NA, NA, 6, 7, 14, 411, NA, NA, 6243, 0.0021, 989948, 631, 423,
  "District of Columbia", 1800, 87, NA, NA, 57, 102, 25, 4129, NA, NA, 6200, 0.0211, 689545, 899, 1049,
  "Florida", 151600, 2301, NA, 543, NA, 462, 1062, 15324, NA, NA, 171292, 0.0783, 21538187, 795, 378,
  "Georgia (State)", 94800, 1317, NA, NA, 27, 163, 213, 7202, NA, NA, 103722, 0.0368, 10711908, 968, 341,
  "Guam", 559, NA, NA, NA, NA, NA, NA, 78, NA, NA, 637, 0.0004, 159385, 400, NA,
  "Hawaii", 5300, 39, NA, NA, 20, 38, 75, 920, NA, NA, 6392, 0.0047, 1455271, 439, 286,
  "Idaho", 12700, 438, 48, NA, NA, 1, 14, 802, NA, NA, 14003, 0.0041, 1839106, 761, 224,
  "Illinois", 54700, 912, NA, 553, NA, 359, 301, 6909, NA, NA, 63734, 0.0353, 12812508, 497, 407,
  "Indiana", 47300, 1329, NA, NA, NA, 9, 140, 3151, NA, NA, 51929, 0.0161, 6785528, 765, 371,
  "Iowa", 14300, 627, NA, 150, NA, 0, NA, 3503, NA, NA, 18580, 0.0179, 3190369, 582, 267,
  "Kansas", 18000, 402, NA, 266, 19, 25, 46, 1742, NA, NA, 20500, 0.0089, 2937880, 698, 411,
  "Kentucky", 38400, 453, NA, NA, 52, NA, NA, 3014, NA, NA, 41919, 0.0154, 4505836, 930, 217,
  "Louisiana", 47600, 711, NA, NA, NA, NA, NA, 2623, NA, NA, 50934, 0.0134, 4657757, 1094, 549,
  "Maine", 3800, 48, NA, NA, 5, 38, 10, 568, NA, NA, 4469, 0.0029, 1362359, 328, 115,
  "Maryland", 27100, 510, NA, NA, 259, 232, 224, 4482, NA, NA, 32807, 0.0229, 6177224, 531, 454,
  "Massachusetts", 17200, 288, NA, 161, 72, 17, 54, 1546, NA, NA, 19338, 0.0079, 7029917, 275, 328,
  "Michigan", 53900, 1473, 9, NA, NA, 124, 85, 4756, NA, NA, 60347, 0.0243, 10077331, 599, 437,
  "Minnesota", 15900, 531, 64, 742, NA, 39, 98, 2153, NA, NA, 19527, 0.0110, 5706494, 342, 236,
  "Mississippi", 28300, 204, 62, NA, 15, 12, 3, 1938, NA, NA, 30534, 0.0099, 2961279, 1031, 278,
  "Missouri", 37600, 798, NA, 265, 4, 217, 184, 6165, NA, NA, 45233, 0.0315, 6154913, 735, 495,
  "Montana", 6900, 117, 242, NA, 5, 49, 5, 1233, NA, NA, 8551, 0.0063, 1084225, 789, 405,
  "Nebraska", 9800, 435, 14, 158, 1, 33, 10, 1331, NA, NA, 11782, 0.0068, 1961504, 601, 301,
  "Nevada", 20000, 540, 19, NA, 12, 2, 44, 1507, NA, NA, 22124, 0.0077, 3104614, 713, 494,
  "New Hampshire", 4000, 42, NA, 1, NA, NA, NA, 470, NA, NA, 4513, 0.0024, 1377529, 328, 153,
  "New Jersey", 27900, 507, NA, 458, 29, 249, 100, 2407, NA, NA, 31650, 0.0123, 9288994, 341, 207,
  "New Mexico", 13300, 288, 165, NA, 2, NA, 35, 1742, NA, NA, 15532, 0.0089, 2117522, 733, 832,
  "New York", 64000, 891, NA, 375, NA, 503, 290, 9962, NA, NA, 76021, 0.0509, 20201249, 376, 359,
  "North Carolina", 54400, 474, NA, NA, 6, 50, 151, 9296, NA, NA, 64377, 0.0475, 10439388, 617, 372,
  "North Dakota", 3300, 90, 179, 51, NA, NA, NA, 920, NA, NA, 4540, 0.0047, 779094, 583, 285,
  "Northern Mariana Islands", 175, NA, NA, NA, NA, NA, NA, 20, NA, NA, 195, 0.0001, 53883, 361, NA,
  "Ohio", 70900, 1815, NA, NA, 24, 266, 182, 4540, NA, NA, 77727, 0.0232, 11799448, 659, 293,
  "Oklahoma", 36300, 441, 7, NA, 13, 66, 91, 2407, NA, NA, 39325, 0.0123, 3959353, 993, 432,
  "Oregon", 21000, 696, 37, NA, NA, 290, 134, 1350, NA, NA, 23507, 0.0069, 4237256, 555, 284,
  "Pennsylvania", 77000, 2307, NA, 55, 121, 15, 87, 6126, NA, NA, 85711, 0.0313, 13002700, 659, 306,
  "Puerto Rico", 5610, NA, NA, NA, NA, NA, NA, 4403, NA, NA, 10013, 0.0225, 3285874, 305, 203,
  "Rhode Island", 2700, 138, NA, NA, NA, NA, NA, 333, NA, NA, 3171, 0.0017, 1097379, 289, 221,
  "South Carolina", 29700, 501, NA, 208, 8, 54, 166, 4071, NA, NA, 34708, 0.0208, 5118425, 678, 511,
  "South Dakota", 5800, 150, 212, NA, NA, NA, 6, 1135, NA, NA, 7303, 0.0058, 886667, 824, 399,
  "Tennessee", 50500, 423, NA, NA, 29, 42, 36, 6869, NA, NA, 57899, 0.0351, 6910840, 838, 595,
  "Texas", 215100, 4194, NA, 368, 1, 222, 889, 24111, NA, NA, 244885, 0.1232, 29145505, 840, 419,
  "Utah", 12500, 351, NA, NA, NA, 11, 86, 1272, NA, NA, 14220, NA, 3271616, 435, 236,
  "Vermont", 1600, 15, NA, NA, NA, NA, NA, 235, NA, NA, 1850, 0.0065, 643077, 288, 202,
  "Virgin Islands", 371, NA, NA, NA, NA, NA, NA, 176, NA, NA, 547, 0.0012, 106405, 514, NA,
  "Virginia", 57700, 951, NA, 431, 13, 267, 129, 5167, NA, NA, 64658, 0.0009, 8631393, 749, 208,
  "Washington", 30800, 690, 428, 175, 15, 232, 156, 2525, NA, NA, 35021, 0.0264, 7705281, 455, 294,
  "West Virginia", 11000, 576, NA, NA, NA, 52, 80, 1409, NA, NA, 13117, 0.0129, 1793716, 731, 317,
  "Wisconsin", 36200, 588, 51, 271, 1, 194, 43, 1742, NA, NA, 39090, 0.0072, 5893718, 663, 293,
  "Wyoming", 4000, 183, 48, NA, 15, 11, NA, 646, NA, NA, 4903, 0.0089, 576851, 850, 217,
  "U.S. Total", 1918911, 37529, 2790, 6250, 870, 6438, 6809, 217407, 27292, 1214, 2225510, NA, 335110019, 664, 379
)

A nicer view of the data:

kable(incarceration_data) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                font_size = 12) %>%
  scroll_box(width = "100%", height = "500px")
State Combined_prison_and_local_jail Youth Indian_Country_jails Civil_commitment Pre_trial_evaluation Not_guilty_by_reason_of_insanity Incompetent_to_stand_trial Federal Immigration Military Total_incarceration Federal_origin_percentage State_population Incarceration_rate Violent_crime_rate
Alabama 42200 915 NA NA NA 171 NA 3855 NA NA 47141 0.0197 5024279 938 511
Alaska 4500 243 1 NA 1 1 8 509 NA NA 5263 0.0026 733391 718 867
American Samoa 196 NA NA NA NA NA NA 1 NA NA 197 0.0000 55191 358 NA
Arizona 56000 786 734 82 NA 82 6 4364 NA NA 62054 0.0223 7151502 868 455
Arkansas 25500 459 NA NA 9 123 55 2231 NA NA 28377 0.0114 3011524 942 585
California 196100 4239 NA 937 NA 1369 1256 13015 NA NA 216916 0.0665 39538223 549 441
Colorado 32500 837 65 NA 29 121 164 1761 NA NA 35477 0.0090 5773714 614 381
Connecticut 12800 81 NA NA NA 118 52 1174 NA NA 14225 0.0060 3605944 394 184
Delaware 5700 105 NA NA 6 7 14 411 NA NA 6243 0.0021 989948 631 423
District of Columbia 1800 87 NA NA 57 102 25 4129 NA NA 6200 0.0211 689545 899 1049
Florida 151600 2301 NA 543 NA 462 1062 15324 NA NA 171292 0.0783 21538187 795 378
Georgia (State) 94800 1317 NA NA 27 163 213 7202 NA NA 103722 0.0368 10711908 968 341
Guam 559 NA NA NA NA NA NA 78 NA NA 637 0.0004 159385 400 NA
Hawaii 5300 39 NA NA 20 38 75 920 NA NA 6392 0.0047 1455271 439 286
Idaho 12700 438 48 NA NA 1 14 802 NA NA 14003 0.0041 1839106 761 224
Illinois 54700 912 NA 553 NA 359 301 6909 NA NA 63734 0.0353 12812508 497 407
Indiana 47300 1329 NA NA NA 9 140 3151 NA NA 51929 0.0161 6785528 765 371
Iowa 14300 627 NA 150 NA 0 NA 3503 NA NA 18580 0.0179 3190369 582 267
Kansas 18000 402 NA 266 19 25 46 1742 NA NA 20500 0.0089 2937880 698 411
Kentucky 38400 453 NA NA 52 NA NA 3014 NA NA 41919 0.0154 4505836 930 217
Louisiana 47600 711 NA NA NA NA NA 2623 NA NA 50934 0.0134 4657757 1094 549
Maine 3800 48 NA NA 5 38 10 568 NA NA 4469 0.0029 1362359 328 115
Maryland 27100 510 NA NA 259 232 224 4482 NA NA 32807 0.0229 6177224 531 454
Massachusetts 17200 288 NA 161 72 17 54 1546 NA NA 19338 0.0079 7029917 275 328
Michigan 53900 1473 9 NA NA 124 85 4756 NA NA 60347 0.0243 10077331 599 437
Minnesota 15900 531 64 742 NA 39 98 2153 NA NA 19527 0.0110 5706494 342 236
Mississippi 28300 204 62 NA 15 12 3 1938 NA NA 30534 0.0099 2961279 1031 278
Missouri 37600 798 NA 265 4 217 184 6165 NA NA 45233 0.0315 6154913 735 495
Montana 6900 117 242 NA 5 49 5 1233 NA NA 8551 0.0063 1084225 789 405
Nebraska 9800 435 14 158 1 33 10 1331 NA NA 11782 0.0068 1961504 601 301
Nevada 20000 540 19 NA 12 2 44 1507 NA NA 22124 0.0077 3104614 713 494
New Hampshire 4000 42 NA 1 NA NA NA 470 NA NA 4513 0.0024 1377529 328 153
New Jersey 27900 507 NA 458 29 249 100 2407 NA NA 31650 0.0123 9288994 341 207
New Mexico 13300 288 165 NA 2 NA 35 1742 NA NA 15532 0.0089 2117522 733 832
New York 64000 891 NA 375 NA 503 290 9962 NA NA 76021 0.0509 20201249 376 359
North Carolina 54400 474 NA NA 6 50 151 9296 NA NA 64377 0.0475 10439388 617 372
North Dakota 3300 90 179 51 NA NA NA 920 NA NA 4540 0.0047 779094 583 285
Northern Mariana Islands 175 NA NA NA NA NA NA 20 NA NA 195 0.0001 53883 361 NA
Ohio 70900 1815 NA NA 24 266 182 4540 NA NA 77727 0.0232 11799448 659 293
Oklahoma 36300 441 7 NA 13 66 91 2407 NA NA 39325 0.0123 3959353 993 432
Oregon 21000 696 37 NA NA 290 134 1350 NA NA 23507 0.0069 4237256 555 284
Pennsylvania 77000 2307 NA 55 121 15 87 6126 NA NA 85711 0.0313 13002700 659 306
Puerto Rico 5610 NA NA NA NA NA NA 4403 NA NA 10013 0.0225 3285874 305 203
Rhode Island 2700 138 NA NA NA NA NA 333 NA NA 3171 0.0017 1097379 289 221
South Carolina 29700 501 NA 208 8 54 166 4071 NA NA 34708 0.0208 5118425 678 511
South Dakota 5800 150 212 NA NA NA 6 1135 NA NA 7303 0.0058 886667 824 399
Tennessee 50500 423 NA NA 29 42 36 6869 NA NA 57899 0.0351 6910840 838 595
Texas 215100 4194 NA 368 1 222 889 24111 NA NA 244885 0.1232 29145505 840 419
Utah 12500 351 NA NA NA 11 86 1272 NA NA 14220 NA 3271616 435 236
Vermont 1600 15 NA NA NA NA NA 235 NA NA 1850 0.0065 643077 288 202
Virgin Islands 371 NA NA NA NA NA NA 176 NA NA 547 0.0012 106405 514 NA
Virginia 57700 951 NA 431 13 267 129 5167 NA NA 64658 0.0009 8631393 749 208
Washington 30800 690 428 175 15 232 156 2525 NA NA 35021 0.0264 7705281 455 294
West Virginia 11000 576 NA NA NA 52 80 1409 NA NA 13117 0.0129 1793716 731 317
Wisconsin 36200 588 51 271 1 194 43 1742 NA NA 39090 0.0072 5893718 663 293
Wyoming 4000 183 48 NA 15 11 NA 646 NA NA 4903 0.0089 576851 850 217
U.S. Total 1918911 37529 2790 6250 870 6438 6809 217407 27292 1214 2225510 NA 335110019 664 379

Global data (2021)

global_data <- tribble(
  ~Country, ~Incarceration_rate, ~Violent_crime_rate,
  "Afghanistan", 87, NA,
  "Albania", 164, 17,
  "Algeria", 153, 199,
  "Angola", 89, NA,
  "Argentina", 230, 1284,
  "Armenia", 74, 19,
  "Australia", 160, 438,
  "Austria", 95, 124,
  "Azerbaijan", 208, 11,
  "Bahrain", 234, NA,
  "Bangladesh", 48, NA,
  "Belarus", 345, 42,
  "Belgium", 93, 838,
  "Benin", 73, NA,
  "Bhutan", 145, 145,
  "Bolivia", 154, 265,
  "Bosnia and Herzegovina: Federation", 83, NA,
  "Bosnia and Herzegovina: Republika Srpska", 46, NA,
  "Botswana", 162, 1035,
  "Brazil", 357, 1137,
  "Bulgaria", 109, 72,
  "Burkina Faso", 37, NA,
  "Burundi", 82, 70,
  "Cambodia", 233, NA,
  "Cameroon", 85, 41,
  "Canada", 104, 310,
  "Cape Verde (Cabo Verde)", 296, 1892,
  "Central African Republic", 16, NA,
  "Chad", 59, NA,
  "Chile", 211, 767,
  "China", 121, NA,
  "Colombia", 192, 583,
  "Comoros", 37, NA,
  "Congo (Republic of)", 27, NA,
  "Costa Rica", 374, 1945,
  "Cote d'Ivoire", 82, NA,
  "Croatia", 84, 52,
  "Cuba", 510, NA,
  "Cyprus (Republic of)", 93, 26,
  "Czech Republic", 177, 74,
  "Democratic Republic of Congo", 29, NA,
  "Denmark", 72, 154,
  "Djibouti", 71, NA,
  "Dominican Republic", 239, 170,
  "Ecuador", 224, 582,
  "Egypt", 118, 6,
  "El Salvador", 562, 239,
  "Equatorial Guinea", 63, NA,
  "Estonia", 173, 43,
  "Eswatini", 277, NA,
  "Ethiopia", 99, NA,
  "Fiji", 274, NA,
  "Finland", 53, 115,
  "France", 93, 596,
  "Gabon", 241, NA,
  "Gambia", 31, NA,
  "Georgia (Country)", 247, NA,
  "Germany", 69, 256,
  "Ghana", 42, NA,
  "Greece", 107, 60,
  "Guatemala", 139, 121,
  "Guinea (Republic of)", 28, NA,
  "Guinea Bissau", 10, NA,
  "Guyana", 239, 479,
  "Haiti", 100, NA,
  "Honduras", 234, 197,
  "Hong Kong (China)", 95, 126,
  "Hungary", 171, 145,
  "India", 35, 41,
  "Indonesia", 100, 11,
  "Iran", 228, NA,
  "Iraq", 126, NA,
  "Ireland, Republic of", 77, 186,
  "Israel", 234, 674,
  "Italy", 89, 169,
  "Jamaica", 137, 241,
  "Japan", 38, 27,
  "Jordan", 198, NA,
  "Kazakhstan", 157, 80,
  "Kenya", 157, 53,
  "Kosovo/Kosova", 81, NA,
  "Kuwait", 93, NA,
  "Kyrgyzstan", 111, NA,
  "Laos", 161, NA,
  "Latvia", 130, 82,
  "Lebanon", 175, 163,
  "Lesotho", 118, NA,
  "Liberia", 92, NA,
  "Libya", 51, NA,
  "Lithuania", 139, 57,
  "Luxembourg", 86, 237,
  "Macau (China)", 232, 308,
  "Madagascar", 99, NA,
  "Malawi", 71, NA,
  "Malaysia", 212, NA,
  "Mali", 33, NA,
  "Mauritania", 53, NA,
  "Mauritius", 203, 124,
  "Mexico", 166, 282,
  "Moldova (Republic of)", 182, NA,
  "Mongolia", 154, 71,
  "Montenegro", 172, 48,
  "Morocco", 238, 479,
  "Mozambique", 63, NA,
  "Myanmar (Burma)", 171, 12,
  "Namibia", 295, NA,
  "Nepal", 86, NA,
  "Netherlands", 63, 104,
  "New Zealand", 188, 727,
  "Nicaragua", 332, 873,
  "Niger", 47, NA,
  "Nigeria", 31, 46,
  "North Macedonia", 105, NA,
  "Norway", 54, 162,
  "Oman", 45, 5,
  "Pakistan", 40, 25,
  "Panama", 420, 448,
  "Papua New Guinea", 62, NA,
  "Paraguay", 241, 111,
  "Peru", 269, 345,
  "Philippines", 200, NA,
  "Poland", 188, 50,
  "Portugal", 111, 147,
  "Qatar", 53, NA,
  "Republic of (South) Korea", 105, 137,
  "Reunion (France)", 118, NA,
  "Romania", 118, 27,
  "Russian Federation", 329, 42,
  "Rwanda", 515, 75,
  "Saudi Arabia", 207, 16,
  "Senegal", 68, 21,
  "Serbia", 152, 40,
  "Sierra Leone", 47, NA,
  "Singapore", 185, 39,
  "Slovakia", 189, 53,
  "Slovenia", 56, 100,
  "Solomon Islands", 77, NA,
  "South Africa", 248, 750,
  "South Sudan", 50, NA,
  "Spain", 122, 208,
  "Sri Lanka", 135, 63,
  "Sudan", 52, NA,
  "Suriname", 183, NA,
  "Sweden", 68, 326,
  "Switzerland", 73, 59,
  "Syria", 60, NA,
  "Taiwan", 243, NA,
  "Tajikistan", 83, 57,
  "Tanzania", 59, 30,
  "Thailand", 445, 30,
  "Timor-Leste (East Timor)", 54, NA,
  "Togo", 50, NA,
  "Trinidad and Tobago", 276, 305,
  "Tunisia", 194, NA,
  "Turkey", 335, 167,
  "Turkmenistan", 552, NA,
  "Uganda", 142, 90,
  "UK: England & Wales", 130, NA,
  "UK: Northern Ireland", 72, NA,
  "UK: Scotland", 136, NA,
  "Ukraine", 129, 59,
  "United Arab Emirates", 144, NA,
  "United States", 664, 379,
  "Uruguay", 372, 608,
  "Uzbekistan", 68, NA,
  "Venezuela", 134, NA,
  "Vietnam", 128, NA,
  "Yemen", 53, NA,
  "Zambia", 123, NA,
  "Zimbabwe", 127, NA
)

A nicer view of the data:

kable(global_data) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                font_size = 12) %>%
  scroll_box(width = "100%", height = "500px")
Country Incarceration_rate Violent_crime_rate
Afghanistan 87 NA
Albania 164 17
Algeria 153 199
Angola 89 NA
Argentina 230 1284
Armenia 74 19
Australia 160 438
Austria 95 124
Azerbaijan 208 11
Bahrain 234 NA
Bangladesh 48 NA
Belarus 345 42
Belgium 93 838
Benin 73 NA
Bhutan 145 145
Bolivia 154 265
Bosnia and Herzegovina: Federation 83 NA
Bosnia and Herzegovina: Republika Srpska 46 NA
Botswana 162 1035
Brazil 357 1137
Bulgaria 109 72
Burkina Faso 37 NA
Burundi 82 70
Cambodia 233 NA
Cameroon 85 41
Canada 104 310
Cape Verde (Cabo Verde) 296 1892
Central African Republic 16 NA
Chad 59 NA
Chile 211 767
China 121 NA
Colombia 192 583
Comoros 37 NA
Congo (Republic of) 27 NA
Costa Rica 374 1945
Cote d’Ivoire 82 NA
Croatia 84 52
Cuba 510 NA
Cyprus (Republic of) 93 26
Czech Republic 177 74
Democratic Republic of Congo 29 NA
Denmark 72 154
Djibouti 71 NA
Dominican Republic 239 170
Ecuador 224 582
Egypt 118 6
El Salvador 562 239
Equatorial Guinea 63 NA
Estonia 173 43
Eswatini 277 NA
Ethiopia 99 NA
Fiji 274 NA
Finland 53 115
France 93 596
Gabon 241 NA
Gambia 31 NA
Georgia (Country) 247 NA
Germany 69 256
Ghana 42 NA
Greece 107 60
Guatemala 139 121
Guinea (Republic of) 28 NA
Guinea Bissau 10 NA
Guyana 239 479
Haiti 100 NA
Honduras 234 197
Hong Kong (China) 95 126
Hungary 171 145
India 35 41
Indonesia 100 11
Iran 228 NA
Iraq 126 NA
Ireland, Republic of 77 186
Israel 234 674
Italy 89 169
Jamaica 137 241
Japan 38 27
Jordan 198 NA
Kazakhstan 157 80
Kenya 157 53
Kosovo/Kosova 81 NA
Kuwait 93 NA
Kyrgyzstan 111 NA
Laos 161 NA
Latvia 130 82
Lebanon 175 163
Lesotho 118 NA
Liberia 92 NA
Libya 51 NA
Lithuania 139 57
Luxembourg 86 237
Macau (China) 232 308
Madagascar 99 NA
Malawi 71 NA
Malaysia 212 NA
Mali 33 NA
Mauritania 53 NA
Mauritius 203 124
Mexico 166 282
Moldova (Republic of) 182 NA
Mongolia 154 71
Montenegro 172 48
Morocco 238 479
Mozambique 63 NA
Myanmar (Burma) 171 12
Namibia 295 NA
Nepal 86 NA
Netherlands 63 104
New Zealand 188 727
Nicaragua 332 873
Niger 47 NA
Nigeria 31 46
North Macedonia 105 NA
Norway 54 162
Oman 45 5
Pakistan 40 25
Panama 420 448
Papua New Guinea 62 NA
Paraguay 241 111
Peru 269 345
Philippines 200 NA
Poland 188 50
Portugal 111 147
Qatar 53 NA
Republic of (South) Korea 105 137
Reunion (France) 118 NA
Romania 118 27
Russian Federation 329 42
Rwanda 515 75
Saudi Arabia 207 16
Senegal 68 21
Serbia 152 40
Sierra Leone 47 NA
Singapore 185 39
Slovakia 189 53
Slovenia 56 100
Solomon Islands 77 NA
South Africa 248 750
South Sudan 50 NA
Spain 122 208
Sri Lanka 135 63
Sudan 52 NA
Suriname 183 NA
Sweden 68 326
Switzerland 73 59
Syria 60 NA
Taiwan 243 NA
Tajikistan 83 57
Tanzania 59 30
Thailand 445 30
Timor-Leste (East Timor) 54 NA
Togo 50 NA
Trinidad and Tobago 276 305
Tunisia 194 NA
Turkey 335 167
Turkmenistan 552 NA
Uganda 142 90
UK: England & Wales 130 NA
UK: Northern Ireland 72 NA
UK: Scotland 136 NA
Ukraine 129 59
United Arab Emirates 144 NA
United States 664 379
Uruguay 372 608
Uzbekistan 68 NA
Venezuela 134 NA
Vietnam 128 NA
Yemen 53 NA
Zambia 123 NA
Zimbabwe 127 NA

Merging data

# add a 'Location' column to both data frames
global_data <- global_data %>%
  mutate(Location = Country)

incarceration_data <- incarceration_data %>%
  mutate(Location = State)

# Now, join the data frames
combined_data <- global_data %>%
  full_join(incarceration_data, by = c("Incarceration_rate", "Location"))

# Arrange by Incarceration_rate in descending order
combined_data <- combined_data %>%
  arrange(desc(Incarceration_rate))

# Create a summary of the combined data
summary_combined <- combined_data %>%
  summarise(
    Total_Locations = n(),
    Locations_with_Incarceration_Rate = sum(!is.na(Incarceration_rate)),
    Locations_with_Violent_Crime_Rate = sum(!is.na(Violent_crime_rate.x) | !is.na(Violent_crime_rate.y)),
    Avg_Incarceration_Rate = mean(Incarceration_rate, na.rm = TRUE),
    Median_Incarceration_Rate = median(Incarceration_rate, na.rm = TRUE)
  )

# Display the summary
print(summary_combined)
## # A tibble: 1 × 5
##   Total_Locations Locations_with_Incarceration_Rate Locations_with_Violent_Cri…¹
##             <int>                             <int>                        <int>
## 1             227                               227                          148
## # ℹ abbreviated name: ¹​Locations_with_Violent_Crime_Rate
## # ℹ 2 more variables: Avg_Incarceration_Rate <dbl>,
## #   Median_Incarceration_Rate <dbl>

A nicer view of the data:

# Create the kable table
combined_table <- combined_data %>%
  # Select and rename columns for better readability
  select(
    Location, 
    Incarceration_rate, 
    Violent_crime_rate_Global = Violent_crime_rate.x,
    Violent_crime_rate_US = Violent_crime_rate.y
  ) %>%
  # Create the kable table
  kable(
    format = "html", 
    caption = "Combined Incarceration and Violent Crime Rates",
    col.names = c("Location", "Incarceration Rate", 
                   "Global Violent Crime Rate", 
                   "US Violent Crime Rate")
  ) %>%
  # Add styling
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  ) %>%
  # Add a scroll box if the table is large
  scroll_box(width = "100%", height = "500px")

# Display the table
combined_table
Combined Incarceration and Violent Crime Rates
Location Incarceration Rate Global Violent Crime Rate US Violent Crime Rate
Louisiana 1094 NA 549
Mississippi 1031 NA 278
Oklahoma 993 NA 432
Georgia (State) 968 NA 341
Arkansas 942 NA 585
Alabama 938 NA 511
Kentucky 930 NA 217
District of Columbia 899 NA 1049
Arizona 868 NA 455
Wyoming 850 NA 217
Texas 840 NA 419
Tennessee 838 NA 595
South Dakota 824 NA 399
Florida 795 NA 378
Montana 789 NA 405
Indiana 765 NA 371
Idaho 761 NA 224
Virginia 749 NA 208
Missouri 735 NA 495
New Mexico 733 NA 832
West Virginia 731 NA 317
Alaska 718 NA 867
Nevada 713 NA 494
Kansas 698 NA 411
South Carolina 678 NA 511
United States 664 379 NA
U.S. Total 664 NA 379
Wisconsin 663 NA 293
Ohio 659 NA 293
Pennsylvania 659 NA 306
Delaware 631 NA 423
North Carolina 617 NA 372
Colorado 614 NA 381
Nebraska 601 NA 301
Michigan 599 NA 437
North Dakota 583 NA 285
Iowa 582 NA 267
El Salvador 562 239 NA
Oregon 555 NA 284
Turkmenistan 552 NA NA
California 549 NA 441
Maryland 531 NA 454
Rwanda 515 75 NA
Virgin Islands 514 NA NA
Cuba 510 NA NA
Illinois 497 NA 407
Washington 455 NA 294
Thailand 445 30 NA
Hawaii 439 NA 286
Utah 435 NA 236
Panama 420 448 NA
Guam 400 NA NA
Connecticut 394 NA 184
New York 376 NA 359
Costa Rica 374 1945 NA
Uruguay 372 608 NA
Northern Mariana Islands 361 NA NA
American Samoa 358 NA NA
Brazil 357 1137 NA
Belarus 345 42 NA
Minnesota 342 NA 236
New Jersey 341 NA 207
Turkey 335 167 NA
Nicaragua 332 873 NA
Russian Federation 329 42 NA
Maine 328 NA 115
New Hampshire 328 NA 153
Puerto Rico 305 NA 203
Cape Verde (Cabo Verde) 296 1892 NA
Namibia 295 NA NA
Rhode Island 289 NA 221
Vermont 288 NA 202
Eswatini 277 NA NA
Trinidad and Tobago 276 305 NA
Massachusetts 275 NA 328
Fiji 274 NA NA
Peru 269 345 NA
South Africa 248 750 NA
Georgia (Country) 247 NA NA
Taiwan 243 NA NA
Gabon 241 NA NA
Paraguay 241 111 NA
Dominican Republic 239 170 NA
Guyana 239 479 NA
Morocco 238 479 NA
Bahrain 234 NA NA
Honduras 234 197 NA
Israel 234 674 NA
Cambodia 233 NA NA
Macau (China) 232 308 NA
Argentina 230 1284 NA
Iran 228 NA NA
Ecuador 224 582 NA
Malaysia 212 NA NA
Chile 211 767 NA
Azerbaijan 208 11 NA
Saudi Arabia 207 16 NA
Mauritius 203 124 NA
Philippines 200 NA NA
Jordan 198 NA NA
Tunisia 194 NA NA
Colombia 192 583 NA
Slovakia 189 53 NA
New Zealand 188 727 NA
Poland 188 50 NA
Singapore 185 39 NA
Suriname 183 NA NA
Moldova (Republic of) 182 NA NA
Czech Republic 177 74 NA
Lebanon 175 163 NA
Estonia 173 43 NA
Montenegro 172 48 NA
Hungary 171 145 NA
Myanmar (Burma) 171 12 NA
Mexico 166 282 NA
Albania 164 17 NA
Botswana 162 1035 NA
Laos 161 NA NA
Australia 160 438 NA
Kazakhstan 157 80 NA
Kenya 157 53 NA
Bolivia 154 265 NA
Mongolia 154 71 NA
Algeria 153 199 NA
Serbia 152 40 NA
Bhutan 145 145 NA
United Arab Emirates 144 NA NA
Uganda 142 90 NA
Guatemala 139 121 NA
Lithuania 139 57 NA
Jamaica 137 241 NA
UK: Scotland 136 NA NA
Sri Lanka 135 63 NA
Venezuela 134 NA NA
Latvia 130 82 NA
UK: England & Wales 130 NA NA
Ukraine 129 59 NA
Vietnam 128 NA NA
Zimbabwe 127 NA NA
Iraq 126 NA NA
Zambia 123 NA NA
Spain 122 208 NA
China 121 NA NA
Egypt 118 6 NA
Lesotho 118 NA NA
Reunion (France) 118 NA NA
Romania 118 27 NA
Kyrgyzstan 111 NA NA
Portugal 111 147 NA
Bulgaria 109 72 NA
Greece 107 60 NA
North Macedonia 105 NA NA
Republic of (South) Korea 105 137 NA
Canada 104 310 NA
Haiti 100 NA NA
Indonesia 100 11 NA
Ethiopia 99 NA NA
Madagascar 99 NA NA
Austria 95 124 NA
Hong Kong (China) 95 126 NA
Belgium 93 838 NA
Cyprus (Republic of) 93 26 NA
France 93 596 NA
Kuwait 93 NA NA
Liberia 92 NA NA
Angola 89 NA NA
Italy 89 169 NA
Afghanistan 87 NA NA
Luxembourg 86 237 NA
Nepal 86 NA NA
Cameroon 85 41 NA
Croatia 84 52 NA
Bosnia and Herzegovina: Federation 83 NA NA
Tajikistan 83 57 NA
Burundi 82 70 NA
Cote d’Ivoire 82 NA NA
Kosovo/Kosova 81 NA NA
Ireland, Republic of 77 186 NA
Solomon Islands 77 NA NA
Armenia 74 19 NA
Benin 73 NA NA
Switzerland 73 59 NA
Denmark 72 154 NA
UK: Northern Ireland 72 NA NA
Djibouti 71 NA NA
Malawi 71 NA NA
Germany 69 256 NA
Senegal 68 21 NA
Sweden 68 326 NA
Uzbekistan 68 NA NA
Equatorial Guinea 63 NA NA
Mozambique 63 NA NA
Netherlands 63 104 NA
Papua New Guinea 62 NA NA
Syria 60 NA NA
Chad 59 NA NA
Tanzania 59 30 NA
Slovenia 56 100 NA
Norway 54 162 NA
Timor-Leste (East Timor) 54 NA NA
Finland 53 115 NA
Mauritania 53 NA NA
Qatar 53 NA NA
Yemen 53 NA NA
Sudan 52 NA NA
Libya 51 NA NA
South Sudan 50 NA NA
Togo 50 NA NA
Bangladesh 48 NA NA
Niger 47 NA NA
Sierra Leone 47 NA NA
Bosnia and Herzegovina: Republika Srpska 46 NA NA
Oman 45 5 NA
Ghana 42 NA NA
Pakistan 40 25 NA
Japan 38 27 NA
Burkina Faso 37 NA NA
Comoros 37 NA NA
India 35 41 NA
Mali 33 NA NA
Gambia 31 NA NA
Nigeria 31 46 NA
Democratic Republic of Congo 29 NA NA
Guinea (Republic of) 28 NA NA
Congo (Republic of) 27 NA NA
Central African Republic 16 NA NA
Guinea Bissau 10 NA NA
# Optional: Save the combined data
# write_csv(combined_data, "combined_incarceration_data.csv")

Replication plots

We create a quick replication of the plots from Prison Policy Initiative.

library(tidyverse)
library(ggplot2)

# Calculate the number of locations to include (20% of the total)
num_locations <- nrow(combined_data)
top_20_percent <- ceiling(num_locations * 0.2)

# Sort and select top 10% by incarceration rate
top_incarceration_data <- combined_data %>%
  arrange(desc(Incarceration_rate)) %>%
  slice(1:top_20_percent)

# Create the plot
ggplot(top_incarceration_data, aes(x = reorder(Location, Incarceration_rate), 
                                    y = Incarceration_rate, 
                                    fill = ifelse(Location == "U.S. Total", "darkred", "steelblue"))) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = round(Incarceration_rate, 1)), 
            hjust = -0.1, size = 3.5) +
  coord_flip() +
  scale_fill_identity() +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(title = "Top 20% Locations by Incarceration Rate",
       subtitle = "Incarceration rate per 100,000 population",
       x = "",
       y = "Incarceration Rate") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    panel.grid.major.y = element_blank()
  )

Save the plot.

# save the plot
# ggsave("top_10_percent_incarceration_rates_with_us_highlighted.png", width = 10, height = 6, dpi = 300)