Setting Paths

pckgs <- c("tidyverse", "sf", "dplyr", "tigris","lubridate","purrr","ggplot2","RColorBrewer","hrbrthemes","tidycensus")
lapply(pckgs, library, character.only = T)

# set paths
OS <- .Platform$OS.type
if (OS == "unix"){
  input_dir <- "~/Dropbox/chyn_campos_bruhn/Gangs/data"
} else {
  input_dir <- "C:/Users/at3981/Dropbox/chyn_campos_bruhn/Gangs/data"
}

First, similar to the time series figures, I collapsed the monthly crime data to the yearly level. Then, I merged in the dataset of census block gang presence and filled in any missing values if necessary.

#load LAPD crime data
lapd_crime <- read_csv(str_interp("${input_dir}/crime/LAPD/output/panel_data_crimela_1019.csv"))

#collapse monthly data to yearly data.
lapd_crime_yearly <- lapd_crime %>% group_by(census_block, year) %>% 
  summarize(robbery = sum(robbery), homicide = sum(homicide), burglary = sum(burglary),
            shots_fired = sum(shots_fired), assault = sum(assault), rape = sum(rape), 
            theft = sum(theft), reckless_driving = sum(reckless_driving),
            vandalism = sum(vandalism), kidnapping = sum(kidnapping), order_violation = sum(order_violation),
            lynching = sum(lynching), counterfeit = sum(counterfeit), vandalism = sum(vandalism)) %>% 
  ungroup() %>% arrange(census_block, year)


#fill in the missing values with the closest earlier year
#change 2009 rto 2010 so that the 2009 gang presence data can be merged in with the 2010 LAPD crime data
blocks_gang_presence <- read_csv(str_interp("${input_dir}/block_gang_presence.csv")) %>% 
  rename(census_block = block.id) %>% select(census_block, year, no_gang, gang_presence)
blocks_gang_presence2 <- blocks_gang_presence %>% mutate(year = case_when(year == 2009 ~ 2010, TRUE ~ year))

lapd_crime_gang <- lapd_crime_yearly %>% 
  left_join(blocks_gang_presence2, by = c("census_block","year"))  %>% group_by(census_block) %>% 
  fill(c(gang_presence, no_gang), .direction = "down") %>% ungroup() %>% drop_na(gang_presence)

2015 LAPD Crime Data Spatial Distribution

In this part, I kept only data of the year 2015. To visualize maps, I collapsed census blocks into census tracts (by taking the first 11 digits of the 15-digit census block id). I also caluclated the number of blocks with gang presence within each of these census tracts. Then, I merged in the shapefile for LA so that I could have the geographical information to draw the maps.

lapd_crime_gang_15 <- lapd_crime_gang %>% filter(year == 2015) %>% mutate(censustractid = substr(census_block,1,11))
lapd_crime_gang_tract15 <- lapd_crime_gang_15 %>% group_by(censustractid) %>% 
  summarise(count_robbery = sum(robbery), 
            count_homicide = sum(homicide), 
            count_burglary = sum(burglary), 
            count_shots_fired = sum(shots_fired), 
            count_assault = sum(assault),
            count_rape = sum(rape), 
            count_theft = sum(theft),  
            count_reckless_driving = sum(reckless_driving), 
            count_lynching = sum(lynching),
            count_vandalism = sum(vandalism), 
            count_order_violation = sum(order_violation), 
            count_kidnapping = sum(kidnapping),
            count_gang = sum(gang_presence)) %>% ungroup()

map_la <- read_sf(str_interp("~/Dropbox/chyn_campos_bruhn/Gangs/rawData/Census_Tracts_2020/Census_Tracts_2020.shp")) %>% 
  mutate(censustractid = paste("06037", CT20, sep=""))  

map_and_data15 <- st_as_sf(inner_join(map_la, lapd_crime_gang_tract15, by = "censustractid"))

To draw the maps, I first inspected the quantiles of a certain crime type distribution. I then relied on these quantiles to classify each observation into a certain group of color levels for the map visualization.

#quantile(map_and_data15$count_gang, probs = seq(0, 1, 0.1))
cuts_gang15 <- c(0,2,6,9,12,15,21,45)
map_and_data15["cuts_gang15"] <- cut(map_and_data15$count_gang, breaks=cuts_gang15, include.lowest=TRUE)

fig_gang15 <- ggplot(map_and_data15) + geom_sf(aes(fill = as.factor(cuts_gang15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-2", "2-6","6-9", 
                                                 "9-12", "12-15", "15-21",
                                                 "21+")) +
  guides(fill = guide_legend(title = "Number of Census blocks with gangs in a Census Tract 2015"))

#quantile(map_and_data15$count_robbery, probs = seq(0, 1, 0.1))
cuts_robbery15 <- c(0,2,4,5,7,11,16,94)
map_and_data15["cuts_robbery15"] <- cut(map_and_data15$count_robbery, breaks=cuts_robbery15, include.lowest=TRUE)

fig_robbery15 <- ggplot(map_and_data15) + geom_sf(aes(fill = as.factor(cuts_robbery15))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-2", "2-4","4-5", 
                                                    "5-7", "7-11", "11-16",
                                                    "16+")) +
  guides(fill = guide_legend(title = "Number of Robbery cases in a Census Tract 2015"))
fig_gang15
fig_robbery15

#quantile(map_and_data15$count_burglary, probs = seq(0, 1, 0.1))
cuts_burglary15 <- c(0,17,21,25,29,36,46,125)
map_and_data15["cuts_burglary15"] <- cut(map_and_data15$count_burglary, breaks=cuts_burglary15, include.lowest=TRUE)

fig_burglary15 <- ggplot(map_and_data15) + geom_sf(aes(fill = as.factor(cuts_burglary15))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-17", "17-21","21-25", "25-29", "29-36", "36-46", "46+")) +
  guides(fill = guide_legend(title = "Number of Burglary cases in a Census Tract 2015"))
fig_gang15
fig_burglary15

#quantile(map_and_data15$count_assault, probs = seq(0, 1, 0.1))
cuts_assault15 <- c(0,17,22,27,33,40,51,218)
map_and_data15["cuts_assault15"] <- cut(map_and_data15$count_assault, breaks=cuts_assault15, include.lowest=TRUE)

fig_assault15 <- ggplot(map_and_data15) + geom_sf(aes(fill = as.factor(cuts_assault15))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-17","17-22","22-27",
                                                    "27-33","33-40","40-51","51+")) +
  guides(fill = guide_legend(title = "Number of Assault cases in a Census Tract 2015"))
fig_gang15
fig_assault15

2019 LAPD Crime Data Spatial Distribution

This part repeats all of the previous steps, but with 2019 crime data.

lapd_crime_gang_19 <- lapd_crime_gang %>% filter(year == 2019) %>% mutate(censustractid = substr(census_block,1,11))
lapd_crime_gang_tract19 <- lapd_crime_gang_19 %>% group_by(censustractid) %>% 
  summarise(count_robbery = sum(robbery), 
            count_homicide = sum(homicide), 
            count_burglary = sum(burglary), 
            count_shots_fired = sum(shots_fired), 
            count_assault = sum(assault),
            count_rape = sum(rape), 
            count_theft = sum(theft),  
            count_reckless_driving = sum(reckless_driving), 
            count_lynching = sum(lynching),
            count_vandalism = sum(vandalism), 
            count_order_violation = sum(order_violation), 
            count_kidnapping = sum(kidnapping),
            count_gang = sum(gang_presence)) %>% ungroup()


map_and_data19 <- st_as_sf(inner_join(map_la, lapd_crime_gang_tract19, by = "censustractid")) 
#quantile(map_and_data19$count_gang, probs = seq(0, 1, 0.1))
cuts_gang19 <- c(0,4,7,10,12,16,22,50)
map_and_data19["cuts_gang19"] <- cut(map_and_data19$count_gang, breaks=cuts_gang19, include.lowest=TRUE)

fig_gang19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_gang19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-4", "4-7","7-10", 
                                                  "10-12", "12-16", "16-22",
                                                  "22+")) +
  guides(fill = guide_legend(title = "Number of Census blocks with gangs in a Census Tract 2019"))

#quantile(map_and_data19$count_robbery, probs = seq(0, 1, 0.1))
cuts_robbery19 <- c(0,2,4,6,8,12,20,83)
map_and_data19["cuts_robbery19"] <- cut(map_and_data19$count_robbery, breaks=cuts_robbery19, include.lowest=TRUE)

fig_robbery19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_robbery19))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-2", "2-4","4-6", 
                                                    "6-8", "8-12", "12-20",
                                                    "20+")) +
  guides(fill = guide_legend(title = "Number of Robbery cases in a Census Tract 2019"))

fig_gang19 
fig_robbery19

#quantile(map_and_data19$count_burglary, probs = seq(0, 1, 0.1))
cuts_burglary19 <- c(0,15,19,23,28,35,48,213)
map_and_data19["cuts_burglary19"] <- cut(map_and_data19$count_burglary, breaks=cuts_burglary19, include.lowest=TRUE)

fig_burglary19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_burglary19))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-15", "15-19", "19-23", "23-28",
                                                    "28-35", "35-48", "48+")) +
  guides(fill = guide_legend(title = "Number of Burglary cases in a Census Tract 2019"))

fig_gang19 
fig_burglary19

#quantile(map_and_data19$count_assault, probs = seq(0, 1, 0.1))
cuts_assault19 <- c(0,22,28,35,44,56,79,246)
map_and_data19["cuts_assault19"] <- cut(map_and_data19$count_assault, breaks=cuts_assault19, include.lowest=TRUE)

fig_assault19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_assault19))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-22", "22-28","28-35","35-44",
                                                    "44-56","56-79","79+")) +
  guides(fill = guide_legend(title = "Number of Assault cases in a Census Tract 2019"))

fig_gang19
fig_assault19

Census Tract Characteristics

To get tract-level characteristics, I used the tidycensus package to pull data from the 5-year American Community Survey data. For example, for 2010 data, the package will pull the endyear estimate from the 2006-2010 5-year ACS.

In particular, I will pull and construct variables of poverty, median income, education, unemployment, and population. For poverty rate, I divided variable “B06012_002” (Below 100 percent of the poverty level) by variable “B01003_001” (total population). For median income, I used variable “B06011_001” (Median income in the past 12 months). For education, I constructed the share of population who holds at least a college degree by summing variables “B06009_005” (Bachelor’s degree) and “B06009_006” (Graduate or professional degree), divided by the sample size of this particular eductaion variable group (variable “B06009_001”). For unemployment rate, I divided variable “C18120_006” (In the labor force - Unemployed) by variable “C18120_002” (In the labor force). Note that this employment data is only available since 2012.

######### Merge in block and tract charactertiscs from Census Data #############

census_api_key("84cbb145c91d0dec6c6d24844d6debafdcf8f911", install = TRUE, overwrite = T)
## [1] "84cbb145c91d0dec6c6d24844d6debafdcf8f911"
readRenviron("~/.Renviron")

#get cenesus tract characteristics 2010-2019

for (x in 2010:2019) {
  assign(paste("la_characteristics",x,sep = ""), get_acs(state = "CA", county = "Los Angeles", geography = "tract", 
                                                         variables = c("B01003_001","B06012_002","B06011_001","B99233_005",
                                                                       "B99233_001","B06009_001","B06009_005","B06009_006"), year = x) %>% 
           select(GEOID, variable, estimate) %>% spread(key = variable, value = estimate) %>% 
           rename(total_population = B01003_001, poverty = B06012_002, median_income = B06011_001,
                  total_education = B06009_001, college = B06009_005, college_more = B06009_006,
                  censustractid = GEOID) %>% 
           mutate(poverty_rate = poverty/total_population,
                  college_or_more = (college + college_more)/total_education, year = x) %>% 
           select(poverty_rate, median_income, college_or_more, 
                  total_population, censustractid, year))
}

#only have unemployment variables since 2012
for (x in 2012:2019) {
  assign(paste("la_unemployed",x,sep = ""), get_acs(state = "CA", county = "Los Angeles", geography = "tract", 
                                                         variables = c("C18120_002","C18120_006"), year = x) %>% 
           select(GEOID, variable, estimate) %>% spread(key = variable, value = estimate) %>% 
           rename(unemployed = C18120_006, labor_force = C18120_002, censustractid = GEOID) %>% 
           mutate(unemployed_rate = unemployed/labor_force, year = x) %>% 
           select(unemployed_rate, censustractid, year))
}

#append all the characteristics data and merge with the crime data on tract and year
la_characteristics <- rbind(la_characteristics2010, la_characteristics2011, la_characteristics2012,
                            la_characteristics2013, la_characteristics2014, la_characteristics2015,
                            la_characteristics2016, la_characteristics2017, la_characteristics2018,
                            la_characteristics2019)

la_unemployed <- rbind(la_unemployed2012, la_unemployed2013, la_unemployed2014, la_unemployed2015,
                       la_unemployed2016, la_unemployed2017, la_unemployed2018, la_unemployed2019)

lapd_crime_gang_tract <- lapd_crime_gang %>% 
  mutate(censustractid = substr(census_block,1,11)) %>% group_by(censustractid, year) %>% 
  summarise(count_robbery = sum(robbery), 
            count_homicide = sum(homicide), 
            count_burglary = sum(burglary), 
            count_shots_fired = sum(shots_fired), 
            count_assault = sum(assault),
            count_rape = sum(rape), 
            count_theft = sum(theft),  
            count_reckless_driving = sum(reckless_driving), 
            count_lynching = sum(lynching),
            count_vandalism = sum(vandalism), 
            count_order_violation = sum(order_violation), 
            count_kidnapping = sum(kidnapping),
            count_gang = sum(gang_presence)) %>% ungroup()


lapd_crime_gang_tract_geo <- st_as_sf(inner_join(lapd_crime_gang_tract, map_la, by = "censustractid")) 

lapd_crime_characteristics <- lapd_crime_gang_tract_geo %>% 
  left_join(la_characteristics, by = c("censustractid", "year")) %>% 
  left_join(la_unemployed, by = c("censustractid", "year"))

#convert back to dataframe and save the data
lapd_crime_gang_characteristics <- st_set_geometry(lapd_crime_characteristics, NULL)
write.csv(lapd_crime_gang_characteristics, file = str_interp("${input_dir}/lapd_crime_gang_characteristics.csv"))

2015 Characteristics Visualization

I extracted the 2015 data and visualized the characteristics data that I just collected.

###### 2015 characteristics ##########
#visualize 2015 data 
lapd_crime_characteristics2015 <- lapd_crime_characteristics %>% filter(year == 2015) 


#poverty
cuts_poverty15 <- quantile(lapd_crime_characteristics2015$poverty_rate , 
                           probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_poverty15"] <- cut(lapd_crime_characteristics2015$poverty_rate, 
                                                         breaks=cuts_poverty15, include.lowest=TRUE)
fig_poverty15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_poverty15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.9%", "5.9-9.0%", "9.0-12.2%", "12.2-16%",
                                                  "16.0-20.1%", "20.1-24.2%", "24.2-28.3%","28.3-33.2%",
                                                  "33.2% +")) +
  guides(fill = guide_legend(title = "Poverty Rate by Census Tract - 2015"))
fig_poverty15

#median income
cuts_income15 <- quantile(lapd_crime_characteristics2015$median_income, 
                           probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_income15"] <- cut(lapd_crime_characteristics2015$median_income, 
                                                        breaks=cuts_income15, include.lowest=TRUE)
fig_income15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_income15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("$4,050-$15,196", "$15,196-$16,367", "$16,367-$17,671", "$17,671-$19,300",
                                                  "$19,300-$21,167", "$21,167-$23,750", "$23,750-$28,858","$28,858-$36,220",
                                                  "$36,220 +")) +
  guides(fill = guide_legend(title = "Median Income by Census Tract - 2015"))
fig_income15

#unemployment
cuts_unemployed15 <- quantile(lapd_crime_characteristics2015$unemployed_rate, 
                          probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_unemployed15"] <- cut(lapd_crime_characteristics2015$unemployed_rate, 
                                                       breaks=cuts_unemployed15, include.lowest=TRUE)
fig_unemployed15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_unemployed15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.5%", "5.5-7.1%", "7.1-8.1%", "8.1-9.0%",
                                                  "9.0-10.0%", "10-11.0%", "11.0-12.2%","12.2-13.7%",
                                                  "13.7% +")) +
  guides(fill = guide_legend(title = "Unemployment Rate by Census Tract - 2015"))
fig_unemployed15

#education
cuts_edu15 <- quantile(lapd_crime_characteristics2015$college_or_more, 
                              probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_edu15"] <- cut(lapd_crime_characteristics2015$college_or_more, 
                                                           breaks=cuts_edu15, include.lowest=TRUE)
fig_edu15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_edu15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.4%", "5.4-9.0%", "9.0-13.9%", "13.9-17.9%",
                                                  "17.9-23.5%", "23.5-31.1%", "31.1-39.4%","39.4-52.7%",
                                                  "52.7% +")) +
  guides(fill = guide_legend(title = "%College deg or more by Census Tract - 2015"))
fig_edu15

#population
cuts_population15 <- quantile(lapd_crime_characteristics2015$total_population, 
                       probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_population15"] <- cut(lapd_crime_characteristics2015$total_population, 
                                                    breaks=cuts_population15, include.lowest=TRUE)
fig_pop15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_population15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-2,445", "2,445-2,846", "2,846-3,100", "3,100-3,391",
                                                  "3,391-3,692", "3,692-3,942", "3,942-4,291", "4,291-4,691","4,691+")) +
  guides(fill = guide_legend(title = "Total population by Census Tract - 2015"))
fig_pop15

2019 Characteristics

Repeat the previous steps for the 2019 data extract.

###### 2019 characteristics ##########

#visualize 2019 data 
lapd_crime_characteristics2019 <- lapd_crime_characteristics %>% filter(year == 2019) 


#poverty
cuts_poverty19 <- quantile(lapd_crime_characteristics2019$poverty_rate , 
                           probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_poverty19"] <- cut(lapd_crime_characteristics2019$poverty_rate, 
                                                        breaks=cuts_poverty19, include.lowest=TRUE)
fig_poverty19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_poverty19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.9%", "5.9-9.0%", "9.0-12.2%", "12.2-16%",
                                                  "16.0-20.1%", "20.1-24.2%", "24.2-28.3%","28.3-33.2%",
                                                  "33.2% +")) +
  guides(fill = guide_legend(title = "Poverty Rate by Census Tract - 2019"))
fig_poverty19

#median income
cuts_income19 <- quantile(lapd_crime_characteristics2019$median_income, 
                          probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_income19"] <- cut(lapd_crime_characteristics2019$median_income, 
                                                       breaks=cuts_income19, include.lowest=TRUE)
fig_income19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_income19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("$4,050-$15,196", "$15,196-$16,367", "$16,367-$17,671", "$17,671-$19,300",
                                                  "$19,300-$21,167", "$21,167-$23,750", "$23,750-$28,858","$28,858-$36,220",
                                                  "$36,220 +")) +
  guides(fill = guide_legend(title = "Median Income by Census Tract - 2019"))
fig_income19

#unemployment
cuts_unemployed19 <- quantile(lapd_crime_characteristics2019$unemployed_rate, 
                              probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_unemployed19"] <- cut(lapd_crime_characteristics2019$unemployed_rate, 
                                                           breaks=cuts_unemployed19, include.lowest=TRUE)
fig_unemployed19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_unemployed19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.5%", "5.5-7.1%", "7.1-8.1%", "8.1-9.0%",
                                                  "9.0-10.0%", "10-11.0%", "11.0-12.2%","12.2-13.7%",
                                                  "13.7% +")) +
  guides(fill = guide_legend(title = "Unemployment Rate by Census Tract - 2019"))
fig_unemployed19

#education
cuts_edu19 <- quantile(lapd_crime_characteristics2019$college_or_more, 
                       probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_edu19"] <- cut(lapd_crime_characteristics2019$college_or_more, breaks=cuts_edu19, include.lowest=TRUE)
fig_edu19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_edu19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.4%", "5.4-9.0%", "9.0-13.9%", "13.9-17.9%",
                                                  "17.9-23.5%", "23.5-31.1%", "31.1-39.4%","39.4-52.7%",
                                                  "52.7% +")) +
  guides(fill = guide_legend(title = "%College de or more by Census Tract - 2019"))
fig_edu19

#population
cuts_population19 <- quantile(lapd_crime_characteristics2019$total_population, 
                              probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_population19"] <- cut(lapd_crime_characteristics2019$total_population, 
                                                           breaks=cuts_population19, include.lowest=TRUE)
fig_pop19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_population19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-2,445", "2,445-2,846", "2,846-3,100", "3,100-3,391",
                                                  "3,391-3,692", "3,692-3,942", "3,942-4,291", "4,291-4,691","4,691+")) +
  guides(fill = guide_legend(title = "Total population by Census Tract - 2019"))
fig_pop19