LAPD Crime Data Spatial Distribution

Setting Paths

pckgs <- c("tidyverse", "sf", "dplyr", "tigris","lubridate","purrr","ggplot2","RColorBrewer","hrbrthemes","tidycensus","data.table","haven")
lapply(pckgs, library, character.only = T)

# set paths
OS <- .Platform$OS.type
if (OS == "unix"){
  input_dir <- "~/Dropbox/chyn_campos_bruhn/Gangs/data"
} else {
  input_dir <- "C:/Users/at3981/Dropbox/chyn_campos_bruhn/Gangs/data"
}

#load block and tract gang data created in the previous script
blocks_gang_presence <- read_csv(str_interp("${input_dir}/block_gang_presence_with_names.csv")) %>% 
  rename(census_block = block.id) %>% select(census_block, year, no_gang, gang_presence)

tract_gang_presence <- read_csv(str_interp("${input_dir}/tract_gang_presence_with_names.csv")) %>% select(censustractid, year, no_gang, gang_list_block, blocks_with_gang, gang_presence)

First, similar to the time series figures, I collapsed the monthly crime data to the yearly level. Then, I merged in the dataset of census block gang presence and filled in any missing values if necessary.

To make sure that the panel data is balanced, I first created a “template” blank data with all the unique census blocks, each which year 2010-2019. Then, by merging this “template” dataframe with the unbalanced crime data and filling the missing values with 0, the resulting data will be balanced.

#load LAPD crime data
lapd_crime <- read_csv(str_interp("${input_dir}/crime/LAPD/output/panel_data_crimela_1019.csv"))

#collapse monthly data to yearly data.
lapd_crime_yearly <- lapd_crime %>% group_by(census_block, year) %>% 
  summarize(robbery = sum(robbery), homicide = sum(homicide), burglary = sum(burglary),
            shots_fired = sum(shots_fired), assault = sum(assault), rape = sum(rape), 
            theft = sum(theft), reckless_driving = sum(reckless_driving),
            vandalism = sum(vandalism), kidnapping = sum(kidnapping), order_violation = sum(order_violation),
            lynching = sum(lynching), counterfeit = sum(counterfeit), vandalism = sum(vandalism)) %>% 
  ungroup() %>% arrange(census_block, year)


#make a balanced panel data template 
unique_block <- data.frame(unique(lapd_crime_yearly$census_block)) %>% 
  rename(census_block = unique.lapd_crime_yearly.census_block.)
unique_block10 <- unique_block %>% slice(rep(1:n(), each = 10))
year_vec <- c(2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)
year_vec10 <- rep(year_vec, nrow(unique_block))
unique_block10$year <- year_vec10

#merge the template with the crime data and fill in NAs as 0 to get balanced panel
lapd_crime_yearly_balanced <- unique_block10 %>% 
  left_join(lapd_crime_yearly, by = c("census_block","year")) %>%  replace(is.na(.), 0)

#fill in the missing values with the closest earlier year
lapd_crime_gang <- lapd_crime_yearly_balanced  %>% 
  left_join(blocks_gang_presence, by = c("census_block","year"))  %>% group_by(census_block) %>% 
  fill(c(gang_presence, no_gang), .direction = "down") %>% ungroup() %>% drop_na(gang_presence)

2016 LAPD Crime Data Spatial Distribution

In this part, I kept only data of the year 2016. To visualize maps, I collapsed census blocks into census tracts (by taking the first 11 digits of the 15-digit census block id). Then, I merged in the data on gangs within tract created in the last script. Lastly, I merged in the shapefile for LA so that I could have the geographical information to draw the maps.

lapd_crime_gang_16 <- lapd_crime_gang %>% filter(year == 2016) %>% mutate(censustractid = substr(census_block,1,11))
lapd_crime_gang_tract16 <- lapd_crime_gang_16 %>% group_by(censustractid) %>% 
  summarise(count_robbery = sum(robbery), 
            count_homicide = sum(homicide), 
            count_burglary = sum(burglary), 
            count_shots_fired = sum(shots_fired), 
            count_assault = sum(assault),
            count_rape = sum(rape), 
            count_theft = sum(theft),  
            count_reckless_driving = sum(reckless_driving), 
            count_lynching = sum(lynching),
            count_vandalism = sum(vandalism), 
            count_order_violation = sum(order_violation), 
            count_kidnapping = sum(kidnapping),
            year = 2016) %>% ungroup()
lapd_crime_gang_tract16 <- lapd_crime_gang_tract16 %>% 
  left_join(tract_gang_presence, by = c("censustractid","year"))
  
map_la <- read_sf(str_interp("~/Dropbox/chyn_campos_bruhn/Gangs/rawData/Census_Tracts_2020/Census_Tracts_2020.shp")) %>% 
  mutate(censustractid = paste("06037", CT20, sep="")) 

map_and_data16 <- st_as_sf(inner_join(map_la, lapd_crime_gang_tract16, by = "censustractid"))

To draw the maps, I first inspected the quantiles of a certain crime type distribution. I then relied on these quantiles to classify each observation into a certain group of color levels for the map visualization.

#quantile(map_and_data16$no_gang, probs = seq(0, 1, 0.1))
cuts_gang16 <- c(-1,0,1,2,3,8)
map_and_data16["cuts_gang16"] <- cut(map_and_data16$no_gang, breaks=cuts_gang16, include.lowest=TRUE)

fig_gang16 <- ggplot(map_and_data16) + geom_sf(aes(fill = as.factor(cuts_gang16))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0", "1","2","3","4-8")) +
  guides(fill = guide_legend(title = "Number of Gangs in a tract"))


#quantile(map_and_data16$count_robbery, probs = seq(0, 1, 0.1))
cuts_robbery16 <- c(0,2,4,6,8,12,19,103)
map_and_data16["cuts_robbery16"] <- cut(map_and_data16$count_robbery, breaks=cuts_robbery16, include.lowest=TRUE)

fig_robbery16 <- ggplot(map_and_data16) + geom_sf(aes(fill = as.factor(cuts_robbery16))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-2", "2-4","4-6", 
                                                    "6-8", "8-12", "12-19",
                                                    "19+")) +
  guides(fill = guide_legend(title = "Number of Robbery cases in a Census Tract"))

fig_gang16
fig_robbery16

#quantile(map_and_data16$count_burglary, probs = seq(0, 1, 0.1))
cuts_burglary16 <- c(0,17,21,25,32,38,49,176)
map_and_data16["cuts_burglary16"] <- cut(map_and_data16$count_burglary, breaks=cuts_burglary16, include.lowest=TRUE)

fig_burglary16 <- ggplot(map_and_data16) + geom_sf(aes(fill = as.factor(cuts_burglary16))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-17", "17-21","21-25", "25-32", "32-38", "38-49", "49+")) +
  guides(fill = guide_legend(title = "Number of Burglary cases in a Census Tract"))

fig_gang16
fig_burglary16

#quantile(map_and_data16$count_assault, probs = seq(0, 1, 0.1))
cuts_assault16 <- c(0,9,14,21,26,32,42,53,75,203)
map_and_data16["cuts_assault16"] <- cut(map_and_data16$count_assault, breaks=cuts_assault16, include.lowest=TRUE)

fig_assault16 <- ggplot(map_and_data16) + geom_sf(aes(fill = as.factor(cuts_assault16))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-9", "9-14", "14-21","21-26","26-32",
                                                    "32-42","42-53","53-75","75+")) +
  guides(fill = guide_legend(title = "Number of Assault cases in a Census Tract"))

fig_gang16
fig_assault16

2019 LAPD Crime Data Spatial Distribution

This part repeats all of the previous steps, but with 2019 crime data.

lapd_crime_gang_19 <- lapd_crime_gang %>% filter(year == 2019) %>% mutate(censustractid = substr(census_block,1,11))
lapd_crime_gang_tract19 <- lapd_crime_gang_19 %>% group_by(censustractid) %>% 
  summarise(count_robbery = sum(robbery), 
            count_homicide = sum(homicide), 
            count_burglary = sum(burglary), 
            count_shots_fired = sum(shots_fired), 
            count_assault = sum(assault),
            count_rape = sum(rape), 
            count_theft = sum(theft),  
            count_reckless_driving = sum(reckless_driving), 
            count_lynching = sum(lynching),
            count_vandalism = sum(vandalism), 
            count_order_violation = sum(order_violation), 
            count_kidnapping = sum(kidnapping),
            year = 2019) %>% ungroup()

lapd_crime_gang_tract19 <- lapd_crime_gang_tract19 %>% 
  left_join(tract_gang_presence, by = c("censustractid","year"))
map_and_data19 <- st_as_sf(inner_join(map_la, lapd_crime_gang_tract19, by = "censustractid"))

#quantile(map_and_data19$no_gang, probs = seq(0, 1, 0.1))
cuts_gang19 <- c(1,2,3,8)
map_and_data19["cuts_gang19"] <- cut(map_and_data19$no_gang, breaks=cuts_gang19, include.lowest=TRUE)

fig_gang19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_gang19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0","1","2","3-8")) +
  guides(fill = guide_legend(title = "Number of Gangs in a tract"))

#quantile(map_and_data19$count_robbery, probs = seq(0, 1, 0.1))
cuts_robbery19 <- c(0,2,4,6,8,12,20,83)
map_and_data19["cuts_robbery19"] <- cut(map_and_data19$count_robbery, breaks=cuts_robbery19, include.lowest=TRUE)

fig_robbery19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_robbery19))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-2", "2-4","4-6", 
                                                    "6-8", "8-12", "12-20",
                                                    "20+")) +
  guides(fill = guide_legend(title = "Number of Robbery cases in a Census Tract 2019"))

fig_gang19 
fig_robbery19

#quantile(map_and_data19$count_burglary, probs = seq(0, 1, 0.1))
cuts_burglary19 <- c(0,15,19,22,27,34,47,214)
map_and_data19["cuts_burglary19"] <- cut(map_and_data19$count_burglary, breaks=cuts_burglary19, include.lowest=TRUE)

fig_burglary19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_burglary19))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-15", "15-19", "19-22", "22-27",
                                                    "27-34", "35-47", "47+")) +
  guides(fill = guide_legend(title = "Number of Burglary cases in a Census Tract 2019"))

fig_gang19 
fig_burglary19

quantile(map_and_data19$count_assault, probs = seq(0, 1, 0.1))

##    0%   10%   20%   30%   40%   50%   60%   70%   80%   90%  100% 
##   0.0   1.6   9.0  15.0  21.0  26.0  33.0  43.0  54.8  76.4 246.0

cuts_assault19 <- c(0,21,26,33,43,55,79,247)
map_and_data19["cuts_assault19"] <- cut(map_and_data19$count_assault, breaks=cuts_assault19, include.lowest=TRUE)

fig_assault19 <- ggplot(map_and_data19) + geom_sf(aes(fill = as.factor(cuts_assault19))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0-21", "21-26","26-33","33-43",
                                                    "43-55","55-79","79+")) +
  guides(fill = guide_legend(title = "Number of Assault cases in a Census Tract 2019"))

fig_gang19
fig_assault19

Census Tract Characteristics

To get tract-level characteristics, I used the tidycensus package to pull data from the 5-year American Community Survey data. For example, for 2010 data, the package will pull the endyear estimate from the 2006-2010 5-year ACS.

In particular, I will pull and construct variables of poverty, median income, education, unemployment, and population. For poverty rate, I divided variable “B06012_002” (Below 100 percent of the poverty level) by variable “B01003_001” (total population). For median income, I used variable “B06011_001” (Median income in the past 12 months). For education, I constructed the share of population who holds at least a college degree by summing variables “B06009_005” (Bachelor’s degree) and “B06009_006” (Graduate or professional degree), divided by the sample size of this particular eductaion variable group (variable “B06009_001”). For unemployment rate, I divided variable “C18120_006” (In the labor force - Unemployed) by variable “C18120_002” (In the labor force). For racial shares, I divided variable “B02001_002” (White alone), “B02001_003” (Black or African American alone), and “B03001_003” (Hispanic or Latino) by ariable “B01003_001” (total population). Note that this employment data is only available since 2012.

census_api_key("84cbb145c91d0dec6c6d24844d6debafdcf8f911", install = TRUE, overwrite = T)

## [1] "84cbb145c91d0dec6c6d24844d6debafdcf8f911"

readRenviron("~/.Renviron")

#get cenesus tract characteristics 2010-2019

for (x in 2010:2019) {
  assign(paste("la_characteristics",x,sep = ""), get_acs(state = "CA", county = "Los Angeles", geography = "tract", 
                                                         variables = c("B01003_001","B06012_002","B06011_001","B99233_005", "B99233_001","B06009_001",           "B06009_005","B06009_006","B02001_002","B02001_003","B03001_003"), year = x) %>% 
           select(GEOID, variable, estimate) %>% spread(key = variable, value = estimate) %>% 
           rename(total_population = B01003_001, poverty = B06012_002, median_income = B06011_001,
                  total_education = B06009_001, college = B06009_005, college_more = B06009_006, white = B02001_002, black = B02001_003, hispanic = B03001_003,
                  censustractid = GEOID) %>% 
           mutate(poverty_rate = poverty/total_population,
                  college_or_more = (college + college_more)/total_education,
                  white_share = white/total_population, black_share = black/total_population,
                  hispanic_share = hispanic/total_population, year = x) %>% 
           select(poverty_rate, median_income, college_or_more, 
                  total_population, censustractid, white_share, black_share,
                  hispanic_share, year))
}

#only have unemployment variables since 2012
for (x in 2012:2019) {
  assign(paste("la_unemployed",x,sep = ""), get_acs(state = "CA", county = "Los Angeles", geography = "tract", 
                                                         variables = c("C18120_002","C18120_006"), year = x) %>% 
           select(GEOID, variable, estimate) %>% spread(key = variable, value = estimate) %>% 
           rename(unemployed = C18120_006, labor_force = C18120_002, censustractid = GEOID) %>% 
           mutate(unemployed_rate = unemployed/labor_force, year = x) %>% 
           select(unemployed_rate, censustractid, year))
}

#append all the characteristics data and merge with the crime data on tract and year
la_characteristics <- rbind(la_characteristics2010, la_characteristics2011, la_characteristics2012,
                            la_characteristics2013, la_characteristics2014, la_characteristics2015,
                            la_characteristics2016, la_characteristics2017, la_characteristics2018,
                            la_characteristics2019)

la_unemployed <- rbind(la_unemployed2012, la_unemployed2013, la_unemployed2014, la_unemployed2015,
                       la_unemployed2016, la_unemployed2017, la_unemployed2018, la_unemployed2019)


lapd_crime_gang_tract <- lapd_crime_gang %>% 
  mutate(censustractid = substr(census_block,1,11)) %>% group_by(censustractid, year) %>% 
  summarise(count_robbery = sum(robbery), 
            count_homicide = sum(homicide), 
            count_burglary = sum(burglary), 
            count_shots_fired = sum(shots_fired), 
            count_assault = sum(assault),
            count_rape = sum(rape), 
            count_theft = sum(theft),  
            count_reckless_driving = sum(reckless_driving), 
            count_lynching = sum(lynching),
            count_vandalism = sum(vandalism), 
            count_order_violation = sum(order_violation), 
            count_kidnapping = sum(kidnapping)) %>% ungroup()

#version where filled in missing gang data from previous year
lapd_crime_characteristics <- lapd_crime_gang_tract %>% 
  left_join(la_characteristics, by = c("censustractid", "year")) %>% 
  left_join(la_unemployed, by = c("censustractid", "year"))  %>% 
  left_join(tract_gang_presence, by = c("censustractid","year"))  %>% group_by(censustractid) %>% 
  fill(c(gang_presence, no_gang, gang_list_block, blocks_with_gang), .direction = "down") %>% ungroup() %>% drop_na(gang_presence)

write.csv(lapd_crime_characteristics, file = str_interp("${input_dir}/lapd_crime_gang_characteristics.csv"))

#version where does not fill in missing gang data from previous year

lapd_crime_characteristics_unfilled <- lapd_crime_gang_tract %>% 
  left_join(la_characteristics, by = c("censustractid", "year")) %>% 
  left_join(la_unemployed, by = c("censustractid", "year"))  %>% 
  left_join(tract_gang_presence, by = c("censustractid","year")) 
write.csv(lapd_crime_characteristics_unfilled, file = str_interp("${input_dir}/lapd_crime_gang_characteristics_unfilled.csv"))

#merge in geometry data
lapd_crime_characteristics <- st_as_sf(inner_join(lapd_crime_characteristics, map_la, by = "censustractid"))

2015 Characteristics Visualization

I extracted the 2015 data and visualized the characteristics data that I just collected.

###### 2015 characteristics ##########
#visualize 2015 data 
lapd_crime_characteristics2015 <- lapd_crime_characteristics %>% filter(year == 2015) 


#poverty
cuts_poverty15 <- quantile(lapd_crime_characteristics2015$poverty_rate , 
                           probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_poverty15"] <- cut(lapd_crime_characteristics2015$poverty_rate, 
                                                         breaks=cuts_poverty15, include.lowest=TRUE)
fig_poverty15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_poverty15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.9%", "5.9-9.0%", "9.0-12.2%", "12.2-16%",
                                                  "16.0-20.1%", "20.1-24.2%", "24.2-28.3%","28.3-33.2%",
                                                  "33.2% +")) +
  guides(fill = guide_legend(title = "Poverty Rate by Census Tract - 2015"))
fig_poverty15

#median income
cuts_income15 <- quantile(lapd_crime_characteristics2015$median_income, 
                           probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_income15"] <- cut(lapd_crime_characteristics2015$median_income, 
                                                        breaks=cuts_income15, include.lowest=TRUE)
fig_income15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_income15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("$4,050-$15,196", "$15,196-$16,367", "$16,367-$17,671", "$17,671-$19,300",
                                                  "$19,300-$21,167", "$21,167-$23,750", "$23,750-$28,858","$28,858-$36,220",
                                                  "$36,220 +")) +
  guides(fill = guide_legend(title = "Median Income by Census Tract - 2015"))
fig_income15

#unemployment
cuts_unemployed15 <- quantile(lapd_crime_characteristics2015$unemployed_rate, 
                          probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_unemployed15"] <- cut(lapd_crime_characteristics2015$unemployed_rate, 
                                                       breaks=cuts_unemployed15, include.lowest=TRUE)
fig_unemployed15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_unemployed15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.5%", "5.5-7.1%", "7.1-8.1%", "8.1-9.0%",
                                                  "9.0-10.0%", "10-11.0%", "11.0-12.2%","12.2-13.7%",
                                                  "13.7% +")) +
  guides(fill = guide_legend(title = "Unemployment Rate by Census Tract - 2015"))
fig_unemployed15

#education
cuts_edu15 <- quantile(lapd_crime_characteristics2015$college_or_more, 
                              probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_edu15"] <- cut(lapd_crime_characteristics2015$college_or_more, 
                                                           breaks=cuts_edu15, include.lowest=TRUE)
fig_edu15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_edu15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.4%", "5.4-9.0%", "9.0-13.9%", "13.9-17.9%",
                                                  "17.9-23.5%", "23.5-31.1%", "31.1-39.4%","39.4-52.7%",
                                                  "52.7% +")) +
  guides(fill = guide_legend(title = "%College deg or more by Census Tract - 2015"))
fig_edu15

#population
cuts_population15 <- quantile(lapd_crime_characteristics2015$total_population, 
                       probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2015["cuts_population15"] <- cut(lapd_crime_characteristics2015$total_population, 
                                                    breaks=cuts_population15, include.lowest=TRUE)
fig_pop15 <- ggplot(lapd_crime_characteristics2015) + geom_sf(aes(fill = as.factor(cuts_population15))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-2,445", "2,445-2,846", "2,846-3,100", "3,100-3,391",
                                                  "3,391-3,692", "3,692-3,942", "3,942-4,291", "4,291-4,691","4,691+")) +
  guides(fill = guide_legend(title = "Total population by Census Tract - 2015"))
fig_pop15

2019 Characteristics

Repeat the previous steps for the 2019 data extract.

###### 2019 characteristics ##########

#visualize 2019 data 
lapd_crime_characteristics2019 <- lapd_crime_characteristics %>% filter(year == 2019) 


#poverty
cuts_poverty19 <- quantile(lapd_crime_characteristics2019$poverty_rate , 
                           probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_poverty19"] <- cut(lapd_crime_characteristics2019$poverty_rate, 
                                                        breaks=cuts_poverty19, include.lowest=TRUE)
fig_poverty19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_poverty19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.9%", "5.9-9.0%", "9.0-12.2%", "12.2-16%",
                                                  "16.0-20.1%", "20.1-24.2%", "24.2-28.3%","28.3-33.2%",
                                                  "33.2% +")) +
  guides(fill = guide_legend(title = "Poverty Rate by Census Tract - 2019"))
fig_poverty19

#median income
cuts_income19 <- quantile(lapd_crime_characteristics2019$median_income, 
                          probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_income19"] <- cut(lapd_crime_characteristics2019$median_income, 
                                                       breaks=cuts_income19, include.lowest=TRUE)
fig_income19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_income19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("$4,050-$15,196", "$15,196-$16,367", "$16,367-$17,671", "$17,671-$19,300",
                                                  "$19,300-$21,167", "$21,167-$23,750", "$23,750-$28,858","$28,858-$36,220",
                                                  "$36,220 +")) +
  guides(fill = guide_legend(title = "Median Income by Census Tract - 2019"))
fig_income19

#unemployment
cuts_unemployed19 <- quantile(lapd_crime_characteristics2019$unemployed_rate, 
                              probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_unemployed19"] <- cut(lapd_crime_characteristics2019$unemployed_rate, 
                                                           breaks=cuts_unemployed19, include.lowest=TRUE)
fig_unemployed19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_unemployed19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.5%", "5.5-7.1%", "7.1-8.1%", "8.1-9.0%",
                                                  "9.0-10.0%", "10-11.0%", "11.0-12.2%","12.2-13.7%",
                                                  "13.7% +")) +
  guides(fill = guide_legend(title = "Unemployment Rate by Census Tract - 2019"))
fig_unemployed19

#education
cuts_edu19 <- quantile(lapd_crime_characteristics2019$college_or_more, 
                       probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_edu19"] <- cut(lapd_crime_characteristics2019$college_or_more, breaks=cuts_edu19, include.lowest=TRUE)
fig_edu19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_edu19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-5.4%", "5.4-9.0%", "9.0-13.9%", "13.9-17.9%",
                                                  "17.9-23.5%", "23.5-31.1%", "31.1-39.4%","39.4-52.7%",
                                                  "52.7% +")) +
  guides(fill = guide_legend(title = "%College deg or more by Census Tract - 2019"))
fig_edu19

#population
cuts_population19 <- quantile(lapd_crime_characteristics2019$total_population, 
                              probs = seq(0, 1, 0.1), na.rm = T)[-10]
lapd_crime_characteristics2019["cuts_population19"] <- cut(lapd_crime_characteristics2019$total_population, 
                                                           breaks=cuts_population19, include.lowest=TRUE)
fig_pop19 <- ggplot(lapd_crime_characteristics2019) + geom_sf(aes(fill = as.factor(cuts_population19))) + 
  scale_fill_brewer(palette = "Blues", labels = c("0-2,445", "2,445-2,846", "2,846-3,100", "3,100-3,391",
                                                  "3,391-3,692", "3,692-3,942", "3,942-4,291", "4,291-4,691","4,691+")) +
  guides(fill = guide_legend(title = "Total population by Census Tract - 2019"))
fig_pop19

Student Characteristics

Here, I collapsed the student characteristics data (general and grade-specific Math and Ela achievements) on test scores using the LAUSD data from block-level to tract-level and merged in with the above crime-gang data. I also counted the number of students (total and in each school level) within each tract. Then, I constructed summary statistics for the year 2010, 2015, and all years. For Math and English, I computed both the unweighed and weighed (by the number of students in the tract) version.

###### merge in student data #######

student_data <- fread(str_interp("${input_dir}/lausd2002_2019.csv"), colClasses=c(censusblockid ="character"),
                      select=c("endyear","gradecode","z_math_all","z_ela_all","poverty",
                               "female","sped","migrant", "ethnicity", "censusblockid"))[endyear >= 2010]
student_data <- student_data %>% mutate(pop = 1, school_level = ifelse(gradecode >= 9 & gradecode <= 12, "High School", ifelse(gradecode >=6 & gradecode <=8, "Middle School","Elementary School")), z_math_elementary = ifelse(school_level == "Elementary School", z_math_all, NA), z_ela_elementary = ifelse(school_level == "Elementary School", z_ela_all, NA), z_math_middle = ifelse(school_level == "Middle School", z_math_all, NA), z_ela_middle = ifelse(school_level == "Middle School", z_ela_all, NA), z_math_high = ifelse(school_level == "High School", z_math_all, NA), z_ela_high = ifelse(school_level == "High School", z_ela_all, NA), pop_student_ele = ifelse(school_level == "Elementary School",1,0), pop_student_middle = ifelse(school_level == "Middle School",1,0), pop_student_high = ifelse(school_level == "High School",1,0) ) %>% filter(censusblockid != "")

tract_student_data <- student_data %>% 
  group_by(censustractid = substr(censusblockid, 1, 11), endyear) %>% 
  summarise(math_achievement_all = mean(z_math_all, na.rm=T), 
            ela_achievement_all = mean(z_ela_all, na.rm=T),
            math_achievement_ele = mean(z_math_elementary, na.rm=T), 
            ela_achievement_ele = mean(z_ela_elementary, na.rm=T),
            math_achievement_middle = mean(z_math_middle, na.rm=T), 
            ela_achievement_middle = mean(z_ela_middle, na.rm=T),
            math_achievement_high = mean(z_math_high, na.rm=T), 
            ela_achievement_high = mean(z_ela_high, na.rm=T),
            no_student = sum(pop, na.rm=T),
            no_ele_student = sum(pop_student_ele, na.rm=T),
            no_mid_student = sum(pop_student_middle, na.rm=T),
            no_high_student = sum(pop_student_high, na.rm=T)) %>% 
  ungroup () %>% rename(year = endyear)

lapd_crime_characteristics <- read_csv(str_interp("${input_dir}/lapd_crime_gang_characteristics.csv"))

lapd_lausd_gang_data <- lapd_crime_characteristics %>% 
  left_join(tract_student_data, by = c("censustractid", "year"))

write.csv(lapd_lausd_gang_data, 
          file = str_interp("${input_dir}/merged_lapd_lausd_gang_data.csv"))

2010 Data

# 2010 student-crime-gang data
lapd_lausd_gang_data2010 <- lapd_lausd_gang_data %>% filter(year == 2010) %>% mutate(pop = 1, Gang = ifelse(gang_presence == 1, "With Gangs", "Without Gangs"))
lapd_lausd_gang_data2010_stats <- lapd_lausd_gang_data2010 %>% group_by(Gang) %>% 
  summarise(robbery = sum(count_robbery), homicide = sum(count_homicide), shots_fired = sum(count_shots_fired),
            assault = sum(count_assault), theft = sum(count_theft), population = sum(total_population),
            poverty = mean(poverty_rate, na.rm = T), median_income = mean(median_income, na.rm = T), 
            college = mean(college_or_more, na.rm = T), white = mean(white_share, na.rm = T),
            black = mean(black_share, na.rm = T), hispanic = mean(hispanic_share, na.rm = T),
            unweighed_math_achievement_all = mean(math_achievement_all, na.rm = T),
            weighed_math_achievement_all = weighted.mean(math_achievement_all, w = no_student, na.rm = T),
            unweighed_ela_achievement_all = mean(ela_achievement_all, na.rm = T), 
            weighed_ela_achievement_all = weighted.mean(ela_achievement_all, w = no_student, na.rm = T), unweighed_math_achievement_ele = mean(math_achievement_ele, na.rm = T),
            weighed_math_achievement_ele = weighted.mean(math_achievement_ele, w = no_ele_student, na.rm = T),
            unweighed_ela_achievement_ele = mean(ela_achievement_ele, na.rm = T), 
            weighed_ela_achievement_ele = weighted.mean(ela_achievement_ele, w = no_ele_student, na.rm = T), unweighed_math_achievement_middle = mean(math_achievement_middle, na.rm = T),
            weighed_math_achievement_middle = weighted.mean(math_achievement_middle, w = no_mid_student, na.rm = T),
            unweighed_ela_achievement_middle = mean(ela_achievement_middle, na.rm = T), 
            weighed_ela_achievement_middle = weighted.mean(ela_achievement_middle, w = no_mid_student, na.rm = T), unweighed_math_achievement_high = mean(math_achievement_high, na.rm = T),
            weighed_math_achievement_high = weighted.mean(math_achievement_high, w = no_high_student, na.rm = T),
            unweighed_ela_achievement_high = mean(ela_achievement_high, na.rm = T), 
            weighed_ela_achievement_high = weighted.mean(ela_achievement_high, w = no_high_student, na.rm = T),
            no_tract = sum(pop), no_student_all = sum(no_student, na.rm = T), no_student_ele = sum(no_ele_student, na.rm = T), no_student_mid = sum(no_mid_student, na.rm = T), no_student_high = sum(no_high_student,na.rm = T))

# get stats for > 30% poverty tracts in 2010
very_poor_tract2010 <- lapd_lausd_gang_data %>% filter(year == 2010) %>% mutate(pop = 1, very_poor = ifelse(poverty_rate > 0.3, "Poverty >30%", "Poverty <30%")) %>% drop_na(very_poor)

very_poor_tract2010_stats <- very_poor_tract2010 %>% group_by(very_poor) %>% 
  summarise(robbery = sum(count_robbery), homicide = sum(count_homicide), shots_fired = sum(count_shots_fired),
            assault = sum(count_assault), theft = sum(count_theft), population = sum(total_population),
            poverty = mean(poverty_rate, na.rm = T), median_income = mean(median_income, na.rm = T), 
            college = mean(college_or_more, na.rm = T), white = mean(white_share, na.rm = T),
            black = mean(black_share, na.rm = T), hispanic = mean(hispanic_share, na.rm = T), unweighed_math_achievement_all = mean(math_achievement_all, na.rm = T),
            weighed_math_achievement_all = weighted.mean(math_achievement_all, w = no_student, na.rm = T),
            unweighed_ela_achievement_all = mean(ela_achievement_all, na.rm = T), 
            weighed_ela_achievement_all = weighted.mean(ela_achievement_all, w = no_student, na.rm = T) , unweighed_math_achievement_ele = mean(math_achievement_ele, na.rm = T),
            weighed_math_achievement_ele = weighted.mean(math_achievement_ele, w = no_ele_student, na.rm = T),
            unweighed_ela_achievement_ele = mean(ela_achievement_ele, na.rm = T), 
            weighed_ela_achievement_ele = weighted.mean(ela_achievement_ele, w = no_ele_student, na.rm = T), unweighed_math_achievement_middle = mean(math_achievement_middle, na.rm = T),
            weighed_math_achievement_middle = weighted.mean(math_achievement_middle, w = no_mid_student, na.rm = T),
            unweighed_ela_achievement_middle = mean(ela_achievement_middle, na.rm = T), 
            weighed_ela_achievement_middle = weighted.mean(ela_achievement_middle, w = no_mid_student, na.rm = T), unweighed_math_achievement_high = mean(math_achievement_high, na.rm = T),
            weighed_math_achievement_high = weighted.mean(math_achievement_high, w = no_high_student, na.rm = T),
            unweighed_ela_achievement_high = mean(ela_achievement_high, na.rm = T), 
            weighed_ela_achievement_high = weighted.mean(ela_achievement_high, w = no_high_student, na.rm = T),
            no_tract = sum(pop), no_student_all = sum(no_student, na.rm = T), no_student_ele = sum(no_ele_student, na.rm = T), no_student_mid = sum(no_mid_student, na.rm = T), no_student_high = sum(no_high_student,na.rm = T))



#transpose the data
t_lapd_lausd_gang_data2010_stats <- transpose(lapd_lausd_gang_data2010_stats)[-1,]
t_very_poor_tract2010_stats <- transpose(very_poor_tract2010_stats)[-1,]
# get row and colnames in order
colnames(t_lapd_lausd_gang_data2010_stats) <- c("With Gangs", "Without Gangs")
t_lapd_lausd_gang_data2010_stats$Variables <- colnames(lapd_lausd_gang_data2010_stats[,-1])

colnames(t_very_poor_tract2010_stats) <- c("<30% Poverty Rate", ">30% Poverty Rate")
t_very_poor_tract2010_stats$Variables <- colnames(very_poor_tract2010_stats[,-1])

lapd_lausd_gang_data2010_stats <- t_lapd_lausd_gang_data2010_stats[, c(3,1,2)]
very_poor_tract2010_stats <- t_very_poor_tract2010_stats[, c(3,1,2)][-1]

lapd_lausd_gang_data2010_stats <- cbind(lapd_lausd_gang_data2010_stats, very_poor_tract2010_stats)
knitr::kable(lapd_lausd_gang_data2010_stats)

	Variables	With Gangs	Without Gangs	<30% Poverty Rate	>30% Poverty Rate
2	robbery	7131	1218	5355	2978
3	homicide	221	16	138	99
4	shots_fired	448	41	312	176
5	assault	25451	5531	21378	9470
6	theft	411	119	364	49
7	population	2340325	1299247	2952700	686872
8	poverty	0.220794839797363	0.124892892603239	0.139486349581635	0.390917917901767
9	median_income	21867.0788643533	37032.6724637681	29869.7758186398	15802.1513513514
10	college	0.197901478647782	0.419321763925195	0.314254454591899	0.111445994977935
11	white	0.449759764922115	0.630700863224438	0.546823065840959	0.37060526754233
12	black	0.111985460293496	0.0638212452763817	0.0863903105433759	0.132017323648582
13	hispanic	0.593360643439221	0.29303405988788	0.434246388226982	0.71619333161925
14	unweighed_math_achievement_all	-0.0145190422844822	0.366770497683638	0.186305039676753	-0.15918227040647
15	weighed_math_achievement_all	-0.0793518169432366	0.211880156720122	0.0633293618987939	-0.198201929166062
16	unweighed_ela_achievement_all	-0.00156627899422523	0.436601173640254	0.235341968671011	-0.190210754179738
17	weighed_ela_achievement_all	-0.0804831194220967	0.225840514026649	0.0800888322068634	-0.235006463528962
18	unweighed_math_achievement_ele	0.00595079434214071	0.355771938310209	0.190254255587851	-0.127987238260774
19	weighed_math_achievement_ele	-0.053210560017799	0.230768413579823	0.0860147385573127	-0.161179538324569
20	unweighed_ela_achievement_ele	0.00968490410704421	0.445896269488743	0.241536957779245	-0.174675780484129
21	weighed_ela_achievement_ele	-0.0619146654528577	0.269359045679102	0.108369152405011	-0.210280707841058
22	unweighed_math_achievement_middle	-0.00900949288810491	0.390999868494762	0.206161565879648	-0.186809151328017
23	weighed_math_achievement_middle	-0.0839083676337016	0.22969382890302	0.0810035699119967	-0.240685484342755
24	unweighed_ela_achievement_middle	-0.000423067724411366	0.471454154477253	0.251209839630023	-0.201499479881298
25	weighed_ela_achievement_middle	-0.0903869144365273	0.246399161004221	0.0912403737279437	-0.271593458387382
26	unweighed_math_achievement_high	-0.0161643395493612	0.382469448313817	0.182551116920891	-0.137941886485808
27	weighed_math_achievement_high	-0.0747334445326596	0.205602549349857	0.0487014734361135	-0.166262901574997
28	unweighed_ela_achievement_high	0.0144275930197672	0.392996010889527	0.214886852346515	-0.151020788659859
29	weighed_ela_achievement_high	-0.055749507329863	0.170565252954683	0.0647266477857104	-0.187148767746847
30	no_tract	638	349	794	185
31	no_student_all	310255	105259	305948	109467
32	no_student_ele	133812	47684	134032	47420
33	no_student_mid	77659	26629	76787	27482
34	no_student_high	94939	29302	91013	33192

#2010 Spatial Gang vs Achievement
lapd_lausd_gang_data2010_map <- lapd_lausd_gang_data %>% filter(year == 2010)
lapd_lausd_gang_data2010_map <- st_as_sf(inner_join(lapd_lausd_gang_data2010_map, map_la, by = "censustractid")) 

#Math 2010
#quantile(lapd_lausd_gang_data2010_map$math_achievement, probs = seq(0, 1, 0.1), na.rm = T)
cuts_math_achievement10 <- c(-1.272,-0.283,-0.190,-0.104,-0.046,0.021,0.126,0.268,0.430,1.8)
lapd_lausd_gang_data2010_map["cuts_math_achievement10"] <- cut(lapd_lausd_gang_data2010_map$math_achievement_all, 
                                                               breaks=cuts_math_achievement10, include.lowest=TRUE)

fig_math10 <- ggplot(lapd_lausd_gang_data2010_map) + geom_sf(aes(fill = as.factor(cuts_math_achievement10))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-1.272 - -0.283", "-0.283 - -0.190","-0.190 - -0.104", 
                                                    "-0.104 - -0.046", "-0.046 - 0.021", "0.021 - 0.126",
                                                    "0.126 - 0.268", "0.268 - 0.430", "0.430+")) +
  guides(fill = guide_legend(title = "Mean Math Achievement 2010"))

#English 2010
#quantile(lapd_lausd_gang_data2010_map$ela_achievement, probs = seq(0, 1, 0.1), na.rm = T)
cuts_ela_achievement10 <- c(-2.010,-0.289,-0.197,-0.112,-0.035,0.044,0.148,0.303,0.506,1.808)
lapd_lausd_gang_data2010_map["cuts_ela_achievement10"] <- cut(lapd_lausd_gang_data2010_map$ela_achievement_all, 
                                                               breaks=cuts_ela_achievement10, include.lowest=TRUE)

fig_ela10 <- ggplot(lapd_lausd_gang_data2010_map) + geom_sf(aes(fill = as.factor(cuts_ela_achievement10))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-2.010 - -0.289", "-0.289 - -0.197","-0.197 - -0.112", 
                                                    "-0.112 - -0.035", "-0.035 - 0.044", "0.044 - 0.148",
                                                    "0.148 - 0.303", "0.303 - 0.506", "0.506+")) +
  guides(fill = guide_legend(title = "Mean English Achievement 2010"))

fig_math10
fig_ela10

2015 Data

# 2015 student-crime-gang data
lapd_lausd_gang_data2015 <- lapd_lausd_gang_data %>% filter(year == 2015) %>% mutate(pop = 1, Gang = ifelse(gang_presence == 1, "With Gangs", "Without Gangs"))

lapd_lausd_gang_data2015_stats <- lapd_lausd_gang_data2015 %>% group_by(Gang) %>% 
  summarise(robbery = sum(count_robbery), homicide = sum(count_homicide), shots_fired = sum(count_shots_fired),
            assault = sum(count_assault), theft = sum(count_theft), population = sum(total_population),
            poverty = mean(poverty_rate, na.rm = T), median_income = mean(median_income, na.rm = T), 
            college = mean(college_or_more, na.rm = T), white = mean(white_share, na.rm = T),
            black = mean(black_share, na.rm = T), hispanic = mean(hispanic_share, na.rm = T), unweighed_math_achievement_all = mean(math_achievement_all, na.rm = T),
            weighed_math_achievement_all = weighted.mean(math_achievement_all, w = no_student, na.rm = T),
            unweighed_ela_achievement_all = mean(ela_achievement_all, na.rm = T), 
            weighed_ela_achievement_all = weighted.mean(ela_achievement_all, w = no_student, na.rm = T), unweighed_math_achievement_ele = mean(math_achievement_ele, na.rm = T),
            weighed_math_achievement_ele = weighted.mean(math_achievement_ele, w = no_ele_student, na.rm = T),
            unweighed_ela_achievement_ele = mean(ela_achievement_ele, na.rm = T), 
            weighed_ela_achievement_ele = weighted.mean(ela_achievement_ele, w = no_ele_student, na.rm = T), unweighed_math_achievement_middle = mean(math_achievement_middle, na.rm = T),
            weighed_math_achievement_middle = weighted.mean(math_achievement_middle, w = no_mid_student, na.rm = T),
            unweighed_ela_achievement_middle = mean(ela_achievement_middle, na.rm = T), 
            weighed_ela_achievement_middle = weighted.mean(ela_achievement_middle, w = no_mid_student, na.rm = T), unweighed_math_achievement_high = mean(math_achievement_high, na.rm = T),
            weighed_math_achievement_high = weighted.mean(math_achievement_high, w = no_high_student, na.rm = T),
            unweighed_ela_achievement_high = mean(ela_achievement_high, na.rm = T), 
            weighed_ela_achievement_high = weighted.mean(ela_achievement_high, w = no_high_student, na.rm = T),
            no_tract = sum(pop), no_student_all = sum(no_student, na.rm = T), no_student_ele = sum(no_ele_student, na.rm = T), no_student_mid = sum(no_mid_student, na.rm = T), no_student_high = sum(no_high_student,na.rm = T))

# get stats for > 30% poverty tracts in 2010
very_poor_tract2015 <- lapd_lausd_gang_data %>% filter(year == 2015) %>% mutate(pop = 1, very_poor = ifelse(poverty_rate > 0.3, "Poverty >30%", "Poverty <30%")) %>% drop_na(very_poor)

very_poor_tract2015_stats <- very_poor_tract2015 %>% group_by(very_poor) %>% 
  summarise(robbery = sum(count_robbery), homicide = sum(count_homicide), shots_fired = sum(count_shots_fired),
            assault = sum(count_assault), theft = sum(count_theft), population = sum(total_population),
            poverty = mean(poverty_rate, na.rm = T), median_income = mean(median_income, na.rm = T), 
            college = mean(college_or_more, na.rm = T), white = mean(white_share, na.rm = T),
            black = mean(black_share, na.rm = T), hispanic = mean(hispanic_share, na.rm = T), unweighed_math_achievement_all = mean(math_achievement_all, na.rm = T),
            weighed_math_achievement_all = weighted.mean(math_achievement_all, w = no_student, na.rm = T),
            unweighed_ela_achievement_all = mean(ela_achievement_all, na.rm = T), 
            weighed_ela_achievement_all = weighted.mean(ela_achievement_all, w = no_student, na.rm = T) , unweighed_math_achievement_ele = mean(math_achievement_ele, na.rm = T),
            weighed_math_achievement_ele = weighted.mean(math_achievement_ele, w = no_ele_student, na.rm = T),
            unweighed_ela_achievement_ele = mean(ela_achievement_ele, na.rm = T), 
            weighed_ela_achievement_ele = weighted.mean(ela_achievement_ele, w = no_ele_student, na.rm = T), unweighed_math_achievement_middle = mean(math_achievement_middle, na.rm = T),
            weighed_math_achievement_middle = weighted.mean(math_achievement_middle, w = no_mid_student, na.rm = T),
            unweighed_ela_achievement_middle = mean(ela_achievement_middle, na.rm = T), 
            weighed_ela_achievement_middle = weighted.mean(ela_achievement_middle, w = no_mid_student, na.rm = T), unweighed_math_achievement_high = mean(math_achievement_high, na.rm = T),
            weighed_math_achievement_high = weighted.mean(math_achievement_high, w = no_high_student, na.rm = T),
            unweighed_ela_achievement_high = mean(ela_achievement_high, na.rm = T), 
            weighed_ela_achievement_high = weighted.mean(ela_achievement_high, w = no_high_student, na.rm = T),
            no_tract = sum(pop), no_student_all = sum(no_student, na.rm = T), no_student_ele = sum(no_ele_student, na.rm = T), no_student_mid = sum(no_mid_student, na.rm = T), no_student_high = sum(no_high_student,na.rm = T))



#transpose the data
t_lapd_lausd_gang_data2015_stats <- transpose(lapd_lausd_gang_data2015_stats)[-1,]
t_very_poor_tract2015_stats <- transpose(very_poor_tract2015_stats)[-1,]
# get row and colnames in order
colnames(t_lapd_lausd_gang_data2015_stats) <- c("With Gangs", "Without Gangs")
t_lapd_lausd_gang_data2015_stats$Variables <- colnames(lapd_lausd_gang_data2015_stats[,-1])

colnames(t_very_poor_tract2015_stats) <- c("<30% Poverty Rate", ">30% Poverty Rate")
t_very_poor_tract2015_stats$Variables <- colnames(very_poor_tract2015_stats[,-1])

lapd_lausd_gang_data2015_stats <- t_lapd_lausd_gang_data2015_stats[, c(3,1,2)]
very_poor_tract2015_stats <- t_very_poor_tract2015_stats[, c(3,1,2)][-1]

lapd_lausd_gang_data2015_stats <- cbind(lapd_lausd_gang_data2015_stats, very_poor_tract2015_stats)
knitr::kable(lapd_lausd_gang_data2015_stats)

	Variables	With Gangs	Without Gangs	<30% Poverty Rate	>30% Poverty Rate
2	robbery	5672	916	3494	3078
3	homicide	209	13	111	111
4	shots_fired	456	24	227	253
5	assault	27756	5122	19223	13515
6	theft	590	133	472	108
7	population	2535788	1201358	2759238	977908
8	poverty	0.251594977339482	0.132158894729316	0.1514598410116	0.389693481283327
9	median_income	21653.6794055202	37402.4672897196	30411.8682634731	16123.1793478261
10	college	0.217988854851954	0.460204230106058	0.357054735465697	0.118558596576175
11	white	0.475511768606868	0.638487463949522	0.578132573810799	0.383277308778427
12	black	0.108600075801385	0.0521127941631387	0.0816715432832599	0.116134110776494
13	hispanic	0.593251359575677	0.278922417486645	0.409364849336374	0.731444845605233
14	unweighed_math_achievement_all	-0.0220291652209619	0.467646978163494	0.251041320819037	-0.200908284592837
15	weighed_math_achievement_all	-0.0923891138402328	0.288623132301707	0.120890984624168	-0.233968738525996
16	unweighed_ela_achievement_all	-0.0224438482074312	0.470084849004233	0.257937944651691	-0.217846035568475
17	weighed_ela_achievement_all	-0.0971809256079177	0.281741063844618	0.122666229487516	-0.251237122090604
18	unweighed_math_achievement_ele	-0.0343576546969366	0.489128000029645	0.254298847127729	-0.223561537804814
19	weighed_math_achievement_ele	-0.109109976056381	0.355130141405955	0.148054166576716	-0.257390757522957
20	unweighed_ela_achievement_ele	-0.0271508972423455	0.498965937811714	0.270586271260224	-0.240545494018559
21	weighed_ela_achievement_ele	-0.111516796230094	0.351699506721311	0.155186311560139	-0.276630877780357
22	unweighed_math_achievement_middle	-0.0111251587053072	0.442796774684217	0.248987944701187	-0.194451597342575
23	weighed_math_achievement_middle	-0.0754341208390569	0.274104377942721	0.134868205702003	-0.234546269216806
24	unweighed_ela_achievement_middle	-0.0181453344492909	0.450980499627942	0.252282298571175	-0.21058996702221
25	weighed_ela_achievement_middle	-0.0829700950949225	0.266216581732338	0.132056739718637	-0.250730512744574
26	unweighed_math_achievement_high	-0.00259378245152888	0.313537789636602	0.154248160029949	-0.088903564342496
27	weighed_math_achievement_high	-0.0508319599590855	0.178180196190144	0.0564957514120305	-0.115140348294293
28	unweighed_ela_achievement_high	-0.0183867848270321	0.256911283723875	0.124109690685684	-0.109649075496224
29	weighed_ela_achievement_high	-0.0578515841837923	0.159756295012111	0.0471227493231721	-0.123881233194971
30	no_tract	672	315	724	256
31	no_student_all	298277	81827	239399	140597
32	no_student_ele	133803	39580	108668	64664
33	no_student_mid	68754	19662	56675	31710
34	no_student_high	93239	21787	71948	43052

#2015 Spatial Gang vs Achievement
lapd_lausd_gang_data2015_map <- lapd_lausd_gang_data %>% filter(year == 2015)
lapd_lausd_gang_data2015_map <- st_as_sf(inner_join(lapd_lausd_gang_data2015_map, map_la, by = "censustractid")) 


#Math 2015
#quantile(lapd_lausd_gang_data2015_map$math_achievement, probs = seq(0, 1, 0.1), na.rm = T)
cuts_math_achievement15 <- c(-1.454,-0.327,-0.221,-0.132,-0.045,0.031,0.127,0.278,0.494,2.04)
lapd_lausd_gang_data2015_map["cuts_math_achievement15"] <- cut(lapd_lausd_gang_data2015_map$math_achievement_all, 
                                                               breaks=cuts_math_achievement15, include.lowest=TRUE)

fig_math15 <- ggplot(lapd_lausd_gang_data2015_map) + geom_sf(aes(fill = as.factor(cuts_math_achievement15))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-1.454 - -0.327", "-0.327 - -0.221","-0.221 - -0.132", 
                                                    "-0.132 - -0.045", "-0.045 - 0.031", "0.031 - 0.127",
                                                    "0.127 - 0.278", "0.278 - 0.494", "0.494+")) +
  guides(fill = guide_legend(title = "Mean Math Achievement 2015"))

#English 2015
#quantile(lapd_lausd_gang_data2015_map$ela_achievement, probs = seq(0, 1, 0.1), na.rm = T)
cuts_ela_achievement15 <- c(-1.744,-0.337,-0.232,-0.125,-0.046,0.034,0.130,0.301,0.503,1.655)
lapd_lausd_gang_data2015_map["cuts_ela_achievement15"] <- cut(lapd_lausd_gang_data2015_map$ela_achievement_all, 
                                                              breaks=cuts_ela_achievement15, include.lowest=TRUE)

fig_ela15 <- ggplot(lapd_lausd_gang_data2015_map) + geom_sf(aes(fill = as.factor(cuts_ela_achievement15))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-1.744 - -0.337", "-0.337 - -0.232","-0.232 - -0.125", 
                                                    "-0.125 - -0.046", "-0.046 - 0.034", "0.034 - 0.130",
                                                    "0.130 - 0.301", "0.301 - 0.503", "0.503+")) +
  guides(fill = guide_legend(title = "Mean English Achievement 2015"))

fig_math15
fig_ela15

All years Data

# All years 
lapd_lausd_gang_data <- lapd_lausd_gang_data %>% mutate(pop = 1, Gang = ifelse(gang_presence == 1, "With Gangs", "Without Gangs"))
lapd_lausd_gang_data_stats <- lapd_lausd_gang_data %>% group_by(Gang) %>% 
  summarise(robbery = sum(count_robbery), homicide = sum(count_homicide), shots_fired = sum(count_shots_fired),
            assault = sum(count_assault), theft = sum(count_theft), population = sum(total_population),
            poverty = mean(poverty_rate, na.rm = T), median_income = mean(median_income, na.rm = T), 
            college = mean(college_or_more, na.rm = T), white = mean(white_share, na.rm = T),
            black = mean(black_share, na.rm = T), hispanic = mean(hispanic_share, na.rm = T), unweighed_math_achievement_all = mean(math_achievement_all, na.rm = T),
            weighed_math_achievement_all = weighted.mean(math_achievement_all, w = no_student, na.rm = T),
            unweighed_ela_achievement_all = mean(ela_achievement_all, na.rm = T), 
            weighed_ela_achievement_all = weighted.mean(ela_achievement_all, w = no_student, na.rm = T), unweighed_math_achievement_ele = mean(math_achievement_ele, na.rm = T),
            weighed_math_achievement_ele = weighted.mean(math_achievement_ele, w = no_ele_student, na.rm = T),
            unweighed_ela_achievement_ele = mean(ela_achievement_ele, na.rm = T), 
            weighed_ela_achievement_ele = weighted.mean(ela_achievement_ele, w = no_ele_student, na.rm = T), unweighed_math_achievement_middle = mean(math_achievement_middle, na.rm = T),
            weighed_math_achievement_middle = weighted.mean(math_achievement_middle, w = no_mid_student, na.rm = T),
            unweighed_ela_achievement_middle = mean(ela_achievement_middle, na.rm = T), 
            weighed_ela_achievement_middle = weighted.mean(ela_achievement_middle, w = no_mid_student, na.rm = T), unweighed_math_achievement_high = mean(math_achievement_high, na.rm = T),
            weighed_math_achievement_high = weighted.mean(math_achievement_high, w = no_high_student, na.rm = T),
            unweighed_ela_achievement_high = mean(ela_achievement_high, na.rm = T), 
            weighed_ela_achievement_high = weighted.mean(ela_achievement_high, w = no_high_student, na.rm = T),
            no_tract = sum(pop),no_student_all = sum(no_student, na.rm = T), no_student_ele = sum(no_ele_student, na.rm = T), no_student_mid = sum(no_mid_student, na.rm = T), no_student_high = sum(no_high_student,na.rm = T))

# get stats for > 30% poverty tracts in 2010
very_poor_tract <- lapd_lausd_gang_data %>% mutate(pop = 1, very_poor = ifelse(poverty_rate > 0.3, "Poverty >30%", "Poverty <30%")) %>% drop_na(very_poor)

very_poor_tract_stats <- very_poor_tract %>% group_by(very_poor) %>% 
  summarise(robbery = sum(count_robbery), homicide = sum(count_homicide), shots_fired = sum(count_shots_fired),
            assault = sum(count_assault), theft = sum(count_theft), population = sum(total_population),
            poverty = mean(poverty_rate, na.rm = T), median_income = mean(median_income, na.rm = T), 
            college = mean(college_or_more, na.rm = T), white = mean(white_share, na.rm = T),
            black = mean(black_share, na.rm = T), hispanic = mean(hispanic_share, na.rm = T), unweighed_math_achievement_all = mean(math_achievement_all, na.rm = T),
            weighed_math_achievement_all = weighted.mean(math_achievement_all, w = no_student, na.rm = T),
            unweighed_ela_achievement_all = mean(ela_achievement_all, na.rm = T), 
            weighed_ela_achievement_all = weighted.mean(ela_achievement_all, w = no_student, na.rm = T) , unweighed_math_achievement_ele = mean(math_achievement_ele, na.rm = T),
            weighed_math_achievement_ele = weighted.mean(math_achievement_ele, w = no_ele_student, na.rm = T),
            unweighed_ela_achievement_ele = mean(ela_achievement_ele, na.rm = T), 
            weighed_ela_achievement_ele = weighted.mean(ela_achievement_ele, w = no_ele_student, na.rm = T), unweighed_math_achievement_middle = mean(math_achievement_middle, na.rm = T),
            weighed_math_achievement_middle = weighted.mean(math_achievement_middle, w = no_mid_student, na.rm = T),
            unweighed_ela_achievement_middle = mean(ela_achievement_middle, na.rm = T), 
            weighed_ela_achievement_middle = weighted.mean(ela_achievement_middle, w = no_mid_student, na.rm = T), unweighed_math_achievement_high = mean(math_achievement_high, na.rm = T),
            weighed_math_achievement_high = weighted.mean(math_achievement_high, w = no_high_student, na.rm = T),
            unweighed_ela_achievement_high = mean(ela_achievement_high, na.rm = T), 
            weighed_ela_achievement_high = weighted.mean(ela_achievement_high, w = no_high_student, na.rm = T),
            no_tract = sum(pop), no_student_all = sum(no_student, na.rm = T), no_student_ele = sum(no_ele_student, na.rm = T), no_student_mid = sum(no_mid_student, na.rm = T), no_student_high = sum(no_high_student,na.rm = T))



#transpose the data
t_lapd_lausd_gang_data_stats <- transpose(lapd_lausd_gang_data_stats)[-1,]
t_very_poor_tract_stats <- transpose(very_poor_tract_stats)[-1,]
# get row and colnames in order
colnames(t_lapd_lausd_gang_data_stats) <- c("With Gangs", "Without Gangs")
t_lapd_lausd_gang_data_stats$Variables <- colnames(lapd_lausd_gang_data_stats[,-1])

colnames(t_very_poor_tract_stats) <- c("<30% Poverty Rate", ">30% Poverty Rate")
t_very_poor_tract_stats$Variables <- colnames(very_poor_tract_stats[,-1])

lapd_lausd_gang_data_stats <- t_lapd_lausd_gang_data_stats[, c(3,1,2)]
very_poor_tract_stats <- t_very_poor_tract_stats[, c(3,1,2)][-1]

lapd_lausd_gang_data_stats <- cbind(lapd_lausd_gang_data_stats, very_poor_tract_stats)
knitr::kable(lapd_lausd_gang_data_stats)

	Variables	With Gangs	Without Gangs	<30% Poverty Rate	>30% Poverty Rate
2	robbery	61011	10444	43289	27987
3	homicide	1967	188	1247	907
4	shots_fired	4558	446	3026	1968
5	assault	267149	53039	209026	109539
6	theft	5216	1346	4746	782
7	population	24557886	12572627	29263664	7866849
8	poverty	0.236709122362706	0.122093921439358	0.147079829614415	0.388147052815215
9	median_income	22816.2631411783	39556.829566855	31722.9396736223	16213.9367214748
10	college	0.212890024446208	0.458160785491031	0.343106789127943	0.11723915343834
11	white	0.465562254878482	0.640986480252262	0.562986931778359	0.381334559757829
12	black	0.10836399066827	0.0576209251536508	0.0840964541411011	0.118203267001642
13	hispanic	0.598533122554496	0.281931160403067	0.428921710842274	0.727465784016621
14	unweighed_math_achievement_all	-0.0315112043330816	0.429260756751917	0.20340548591838	-0.181769554904671
15	weighed_math_achievement_all	-0.0954345130814528	0.273370171651975	0.076968810262683	-0.222968255107251
16	unweighed_ela_achievement_all	-0.0274163127455864	0.459408186990786	0.226840785144416	-0.207360205403583
17	weighed_ela_achievement_all	-0.101056540070446	0.283929745228184	0.0868418472466828	-0.253197373873381
18	unweighed_math_achievement_ele	-0.031720666922643	0.420242507760667	0.19735840962189	-0.178732830645037
19	weighed_math_achievement_ele	-0.0940791260110698	0.300705922424017	0.0941978688498094	-0.220262959542384
20	unweighed_ela_achievement_ele	-0.031557960405058	0.465762625045197	0.226097531517139	-0.214226878122576
21	weighed_ela_achievement_ele	-0.105271459332247	0.3288974447862	0.108312166607732	-0.259535489125013
22	unweighed_math_achievement_middle	-0.0290241667709656	0.450869361509118	0.21719609097088	-0.197187353213473
23	weighed_math_achievement_middle	-0.0920127275570377	0.308983026706654	0.0991136829003123	-0.239654501910071
24	unweighed_ela_achievement_middle	-0.0240325136358336	0.4771027732175	0.238330076259357	-0.220084679306053
25	weighed_ela_achievement_middle	-0.0967631253327562	0.307052769345193	0.105215939104509	-0.269457167169797
26	unweighed_math_achievement_high	-0.0142996926855945	0.354542458079512	0.166259801520808	-0.126199913977464
27	weighed_math_achievement_high	-0.0665727582009499	0.194590789150377	0.0478024663092685	-0.15851987517236
28	unweighed_ela_achievement_high	-0.00923011345702495	0.349349902251514	0.170949218461721	-0.134557324003242
29	weighed_ela_achievement_high	-0.063785116782333	0.184551056471947	0.0517869287777955	-0.167149997957285
30	no_tract	6543	3327	7716	2079
31	no_student_all	2876445	841363	2595236	1121576
32	no_student_ele	1247174	392463	1141597	497584
33	no_student_mid	692137	211957	640122	263718
34	no_student_high	915528	229098	793474	350871

Atlas Opportunity Data

I merged in the tract-level atlas opportunity data to our above gang-crime-student data and mapped some key mobility and incarceration variables. Because these Atlas Opportunity variables are time-invariant, this is a m:1 merge with only the census tract id variable serving as the key.

atlas_data <- read_dta(str_interp("${input_dir}/Opportunity_Atlas/tract_outcomes_simple.dta"))

atlas_data$state <- as.character(atlas_data$state)
atlas_data$county <- as.character(atlas_data$county)
atlas_data$tract <- as.character(atlas_data$tract)

atlas_data <- atlas_data %>% mutate(state = ifelse(str_length(state) == 1, paste("0", state, sep=""), state),
                                    county = ifelse(str_length(county) == 1, paste("00", county, sep=""), 
                                    ifelse(str_length(county) == 2, paste("0", county, sep=""), county)),
                                    tract = ifelse(str_length(tract) == 3, paste("000", tract, sep=""),
                                    ifelse(str_length(tract) == 4, paste("00", tract, sep=""),
                                    ifelse(str_length(tract) == 5, paste("0", tract, sep=""), tract))),
                                    censustractid = paste(state, county, tract, sep=""))

lapd_lausd_gang_atlas_data <- lapd_lausd_gang_data %>% left_join(atlas_data, by = "censustractid")
write.csv(lapd_lausd_gang_atlas_data, file = str_interp("${input_dir}/merged_lapd_lausd_gang_atlas_data.csv"))

#Atlas Opportunity Map
lapd_lausd_gang_atlas_data_map <- lapd_lausd_gang_atlas_data %>% filter(year == 2010)
lapd_lausd_gang_atlas_data_map <- st_as_sf(inner_join(lapd_lausd_gang_atlas_data_map, map_la, by = "censustractid")) 

# mobility variables
#quantile(lapd_lausd_gang_atlas_data_map$kfr_pooled_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_kfr_pooled <- c(-0.032,0.365,0.390,0.405,0.418,0.431,0.444,0.460,0.480,0.692)
lapd_lausd_gang_atlas_data_map["cuts_kfr_pooled"] <- cut(lapd_lausd_gang_atlas_data_map$kfr_pooled_pooled_p25, 
                                                               breaks=cuts_kfr_pooled, include.lowest=TRUE)

fig_kfr_pooled <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_kfr_pooled))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-0.032-0.365","0.365-0.390","0.390-0.405","0.405-0.418","0.418-0.431","0.431-0.444","0.444-0.460","0.460-0.480","0.480-0.692")) +
  guides(fill = guide_legend(title = "KFR Pooled p25"))
fig_kfr_pooled


#quantile(lapd_lausd_gang_atlas_data_map$kfr_black_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_kfr_black <- c(0.137,0.258,0.277,0.292,0.305,0.316,0.328,0.342,0.360,0.570)
lapd_lausd_gang_atlas_data_map["cuts_kfr_black"] <- cut(lapd_lausd_gang_atlas_data_map$kfr_black_pooled_p25, 
                                                               breaks=cuts_kfr_black, include.lowest=TRUE)

fig_kfr_black <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_kfr_black))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0.137-0.258","0.258-0.277","0.277-0.292","0.292-0.305","0.305-0.316","0.316-0.328","0.328-0.342","0.342-0.360","0.360-0.570")) +
  guides(fill = guide_legend(title = "KFR Black p25"))
fig_kfr_black


#quantile(lapd_lausd_gang_atlas_data_map$kfr_hisp_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_kfr_hisp <- c(0.171,0.400,0.415,0.424,0.432,0.439,0.447,0.457,0.468,0.734)
lapd_lausd_gang_atlas_data_map["cuts_kfr_hisp"] <- cut(lapd_lausd_gang_atlas_data_map$kfr_hisp_pooled_p25, 
                                                               breaks=cuts_kfr_hisp, include.lowest=TRUE)

fig_kfr_hisp <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_kfr_hisp))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0.171-0.400","0.400-0.415","0.415-0.424","0.424-0.432","0.432-0.439","0.439-0.447","0.447-0.457","0.457-0.468","0.468-0.734")) +
  guides(fill = guide_legend(title = "KFR Hispanic p25"))
fig_kfr_hisp

#quantile(lapd_lausd_gang_atlas_data_map$kfr_white_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_kfr_white <- c(0.252,0.388,0.429,0.455,0.480,0.502,0.517,0.536,0.559,0.815)
lapd_lausd_gang_atlas_data_map["cuts_kfr_white"] <- cut(lapd_lausd_gang_atlas_data_map$kfr_white_pooled_p25, 
                                                               breaks=cuts_kfr_white, include.lowest=TRUE)

fig_kfr_white <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_kfr_white))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("0.252-0.388","0.388-0.429","0.429-0.455","0.455-0.480","0.480-0.502","0.502-0.517","0.517-0.536","0.536-0.559","0.559-0.815")) +
  guides(fill = guide_legend(title = "KFR White p25"))
fig_kfr_white

#incarceration variables

#quantile(lapd_lausd_gang_atlas_data_map$jail_pooled_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_jail_pooled <- c(-0.039,-0.003,0.001,0.005,0.008,0.011,0.014,0.018,0.022,0.124)
lapd_lausd_gang_atlas_data_map["cuts_jail_pooled"] <- cut(lapd_lausd_gang_atlas_data_map$jail_pooled_pooled_p25, 
                                                               breaks=cuts_jail_pooled, include.lowest=TRUE)

fig_jail_pooled <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_jail_pooled))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-0.039 - -0.003","-0.003 - 0.001","0.001 - 0.005","0.005 - 0.008","0.008 - 0.011","0.011 - 0.014","0.014 - 0.018","0.018 - 0.022","0.022 - 0.124")) +
  guides(fill = guide_legend(title = "Jail Pooled p25"))
fig_jail_pooled


#quantile(lapd_lausd_gang_atlas_data_map$jail_black_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_jail_black <- c(-0.158,-0.010,0.011,0.020,0.030,0.041,0.051,0.061,0.076,0.348)
lapd_lausd_gang_atlas_data_map["cuts_jail_black"] <- cut(lapd_lausd_gang_atlas_data_map$jail_black_pooled_p25, 
                                                               breaks=cuts_jail_black, include.lowest=TRUE)

fig_jail_black <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_jail_black))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-0.158 - -0.010","-0.010 - 0.011","0.011 - 0.020","0.020 - 0.030","0.030 - 0.041","0.041 - 0.051","0.051 - 0.061","0.061 - 0.076","0.076-0.348")) +
  guides(fill = guide_legend(title = "Jail Black p25"))
fig_jail_black


#quantile(lapd_lausd_gang_atlas_data_map$jail_hisp_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_jail_hisp <- c(-0.099,-0.007,0.000,0.004,0.007,0.010,0.013,0.017,0.022,0.221)
lapd_lausd_gang_atlas_data_map["cuts_jail_hisp"] <- cut(lapd_lausd_gang_atlas_data_map$jail_hisp_pooled_p25, 
                                                               breaks=cuts_jail_hisp, include.lowest=TRUE)

fig_jail_hisp <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_jail_hisp))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-0.099 - -0.007","-0.007 - 0.000","0.000 - 0.004","0.004 - 0.007","0.007 - 0.010","0.010 - 0.013","0.013 - 0.017","0.017 - 0.022","0.022 - 0.221")) +
  guides(fill = guide_legend(title = "Jail Hispanic p25"))
fig_jail_hisp

#quantile(lapd_lausd_gang_atlas_data_map$jail_white_pooled_p25, probs = seq(0, 1, 0.1), na.rm = T)
cuts_jail_white <- c(-0.257,-0.047,-0.021,-0.009,-0.002,0.003,0.010,0.019,0.030,0.445)
lapd_lausd_gang_atlas_data_map["cuts_jail_white"] <- cut(lapd_lausd_gang_atlas_data_map$jail_white_pooled_p25, 
                                                               breaks=cuts_jail_white, include.lowest=TRUE)

fig_jail_white <- ggplot(lapd_lausd_gang_atlas_data_map) + geom_sf(aes(fill = as.factor(cuts_jail_white))) + 
  scale_fill_brewer(palette = "Oranges", labels = c("-0.257 - -0.047","-0.047 - -0.021","-0.021 - -0.009","-0.009 - -0.002","-0.002 - 0.003","0.003 - 0.010","0.010 - 0.019","0.019 - 0.030","0.030 - 0.445")) +
  guides(fill = guide_legend(title = "Jail White p25"))
fig_jail_white