endpoint <- "https://data.cityofnewyork.us/resource/833y-fsy8.json"
resp <- httr::GET(endpoint, query = list("$limit" = 30000, "$order" = "occur_date DESC"))
shooting_data <- jsonlite::fromJSON(httr::content(resp, as = "text"), flatten = TRUE)
In this code chunk, I used the endpoint function to pull up the shooting data. I also used the get() and list() function to retrieve the 30,000 data points from the shooting data. # Cleaning Data
shooting_data$loc_classfctn_desc <- str_to_lower(shooting_data$loc_classfctn_desc)
shooting_data %>% head(30)
## incident_key occur_date occur_time boro loc_of_occur_desc
## 1 298699604 2024-12-31T00:00:00.000 19:16:00 BROOKLYN OUTSIDE
## 2 298699604 2024-12-31T00:00:00.000 19:16:00 BROOKLYN OUTSIDE
## 3 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## 4 298672094 2024-12-30T00:00:00.000 12:15:00 BRONX OUTSIDE
## 5 298672097 2024-12-30T00:00:00.000 18:48:00 BROOKLYN OUTSIDE
## 6 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## 7 298672095 2024-12-30T00:00:00.000 20:32:00 BRONX INSIDE
## 8 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## 9 298628070 2024-12-29T00:00:00.000 16:21:00 BRONX OUTSIDE
## 10 298628071 2024-12-29T00:00:00.000 17:24:00 MANHATTAN OUTSIDE
## 11 298609064 2024-12-28T00:00:00.000 15:30:00 BROOKLYN OUTSIDE
## 12 298609064 2024-12-28T00:00:00.000 15:30:00 BROOKLYN OUTSIDE
## 13 298607139 2024-12-28T00:00:00.000 2:53:00 MANHATTAN INSIDE
## 14 298607139 2024-12-28T00:00:00.000 2:53:00 MANHATTAN INSIDE
## 15 298569109 2024-12-27T00:00:00.000 20:40:00 BROOKLYN OUTSIDE
## 16 298566035 2024-12-27T00:00:00.000 20:42:00 BRONX OUTSIDE
## 17 298566035 2024-12-27T00:00:00.000 20:42:00 BRONX OUTSIDE
## 18 298515632 2024-12-26T00:00:00.000 18:39:00 BRONX INSIDE
## 19 298508438 2024-12-26T00:00:00.000 20:37:00 BRONX OUTSIDE
## 20 298609065 2024-12-25T00:00:00.000 1:18:00 BRONX INSIDE
## 21 298461889 2024-12-25T00:00:00.000 5:55:00 MANHATTAN OUTSIDE
## 22 298444104 2024-12-24T00:00:00.000 17:46:00 QUEENS OUTSIDE
## 23 298411322 2024-12-23T00:00:00.000 22:30:00 MANHATTAN OUTSIDE
## 24 298357806 2024-12-22T00:00:00.000 3:20:00 BRONX OUTSIDE
## 25 298357805 2024-12-21T00:00:00.000 18:15:00 BRONX OUTSIDE
## 26 298357804 2024-12-21T00:00:00.000 15:22:00 BROOKLYN OUTSIDE
## 27 298301433 2024-12-20T00:00:00.000 19:40:00 MANHATTAN OUTSIDE
## 28 298301433 2024-12-20T00:00:00.000 19:40:00 MANHATTAN OUTSIDE
## 29 298316135 2024-12-20T00:00:00.000 20:55:00 QUEENS OUTSIDE
## 30 298301433 2024-12-20T00:00:00.000 19:40:00 MANHATTAN OUTSIDE
## precinct jurisdiction_code loc_classfctn_desc location_desc
## 1 69 0 street (null)
## 2 69 0 street (null)
## 3 47 0 street (null)
## 4 52 0 street (null)
## 5 60 2 housing MULTI DWELL - PUBLIC HOUS
## 6 47 0 street (null)
## 7 41 0 dwelling MULTI DWELL - APT BUILD
## 8 47 0 street (null)
## 9 43 0 street (null)
## 10 23 0 street (null)
## 11 73 0 street (null)
## 12 73 0 street (null)
## 13 18 0 dwelling MULTI DWELL - APT BUILD
## 14 18 0 dwelling MULTI DWELL - APT BUILD
## 15 61 2 housing MULTI DWELL - PUBLIC HOUS
## 16 40 0 street (null)
## 17 40 0 street (null)
## 18 52 0 other HOSPITAL
## 19 40 2 housing MULTI DWELL - PUBLIC HOUS
## 20 44 0 dwelling MULTI DWELL - APT BUILD
## 21 23 2 housing MULTI DWELL - PUBLIC HOUS
## 22 106 0 street (null)
## 23 23 0 street (null)
## 24 43 0 street (null)
## 25 48 0 street (null)
## 26 61 1 transit (null)
## 27 34 0 street (null)
## 28 34 0 street (null)
## 29 109 0 street (null)
## 30 34 0 street (null)
## statistical_murder_flag perp_age_group perp_sex perp_race
## 1 FALSE 25-44 M BLACK
## 2 FALSE 25-44 M BLACK
## 3 FALSE (null) (null) (null)
## 4 FALSE 45-64 M BLACK
## 5 FALSE 25-44 M BLACK
## 6 FALSE (null) (null) (null)
## 7 TRUE 18-24 M BLACK
## 8 FALSE (null) (null) (null)
## 9 FALSE 18-24 M BLACK
## 10 FALSE (null) (null) (null)
## 11 FALSE (null) (null) (null)
## 12 FALSE (null) (null) (null)
## 13 TRUE 25-44 M BLACK
## 14 TRUE 18-24 F BLACK
## 15 FALSE (null) (null) (null)
## 16 FALSE <18 M BLACK HISPANIC
## 17 FALSE <18 M BLACK HISPANIC
## 18 FALSE (null) (null) (null)
## 19 FALSE 18-24 M WHITE HISPANIC
## 20 FALSE 45-64 M WHITE HISPANIC
## 21 FALSE 25-44 M BLACK
## 22 FALSE 25-44 M ASIAN / PACIFIC ISLANDER
## 23 FALSE (null) (null) (null)
## 24 FALSE 25-44 M BLACK
## 25 TRUE 25-44 M BLACK
## 26 FALSE <18 M BLACK
## 27 FALSE 18-24 M WHITE HISPANIC
## 28 FALSE <18 M WHITE HISPANIC
## 29 FALSE (null) (null) (null)
## 30 FALSE <18 M BLACK
## vic_age_group vic_sex vic_race x_coord_cd y_coord_cd
## 1 18-24 M BLACK 1,015,120 173,870
## 2 25-44 M BLACK 1,015,120 173,870
## 3 18-24 M BLACK 1,021,316 259,277
## 4 25-44 M WHITE 1,017,719 260,875
## 5 45-64 M BLACK 989,372 155,205
## 6 25-44 F WHITE HISPANIC 1,021,316 259,277
## 7 25-44 M BLACK 1,012,201 240,878
## 8 <18 F WHITE HISPANIC 1,021,316 259,277
## 9 <18 M BLACK 1,020,219 239,110
## 10 25-44 M BLACK 999,007 229,814
## 11 25-44 F BLACK 1,007,941 179,784
## 12 18-24 M BLACK 1,007,941 179,784
## 13 25-44 M BLACK 987,578 216,989
## 14 25-44 M BLACK 987,578 216,989
## 15 18-24 M BLACK 1,001,807 156,562
## 16 25-44 M BLACK HISPANIC 1,006,789 237,559
## 17 25-44 M WHITE HISPANIC 1,006,789 237,559
## 18 18-24 M BLACK HISPANIC 1,017,782 260,028
## 19 18-24 M BLACK 1,006,844 232,758
## 20 25-44 M BLACK HISPANIC 1,006,693 240,835
## 21 25-44 M BLACK 999,602 230,361
## 22 18-24 M ASIAN / PACIFIC ISLANDER 1,035,389 182,933
## 23 25-44 M BLACK 1,000,776 227,613
## 24 18-24 M BLACK 1,021,853 242,624
## 25 25-44 M BLACK 1,017,012 244,931
## 26 18-24 M BLACK 996,614 157,489
## 27 <18 M BLACK HISPANIC 1,004,175 253,200
## 28 <18 M BLACK HISPANIC 1,004,175 253,200
## 29 25-44 M BLACK 1,023,819 224,864
## 30 <18 M BLACK HISPANIC 1,004,175 253,200
## latitude longitude geocoded_column.type geocoded_column.coordinates
## 1 40.643866 -73.888761 Point -73.88876, 40.64387
## 2 40.643866 -73.888761 Point -73.88876, 40.64387
## 3 40.878261 -73.865964 Point -73.86596, 40.87826
## 4 40.882661 -73.878964 Point -73.87896, 40.88266
## 5 40.592685 -73.981557 Point -73.98156, 40.59269
## 6 40.878261 -73.865964 Point -73.86596, 40.87826
## 7 40.827795 -73.899003 Point -73.8990, 40.8278
## 8 40.878261 -73.865964 Point -73.86596, 40.87826
## 9 <NA> <NA> <NA> NULL
## 10 40.79745 -73.946702 Point -73.94670, 40.79745
## 11 40.66012 -73.91461 Point -73.91461, 40.66012
## 12 40.66012 -73.91461 Point -73.91461, 40.66012
## 13 40.762269 -73.987986 Point -73.98799, 40.76227
## 14 40.762269 -73.987986 Point -73.98799, 40.76227
## 15 40.596395 -73.93678 Point -73.93678, 40.59640
## 16 40.818691 -73.918569 Point -73.91857, 40.81869
## 17 40.818691 -73.918569 Point -73.91857, 40.81869
## 18 40.880337 -73.87874 Point -73.87874, 40.88034
## 19 40.805523 -73.918387 Point -73.91839, 40.80552
## 20 40.827694 -73.918903 Point -73.91890, 40.82769
## 21 40.798959 -73.944552 Point -73.94455, 40.79896
## 22 40.668647 -73.815655 Point -73.81566, 40.66865
## 23 40.791414 -73.940319 Point -73.94032, 40.79141
## 24 40.832551 -73.864117 Point -73.86412, 40.83255
## 25 40.838904 -73.881597 Point -73.8816, 40.8389
## 26 40.598939 -73.955477 Point -73.95548, 40.59894
## 27 40.861639 -73.927966 Point -73.92797, 40.86164
## 28 40.861639 -73.927966 Point -73.92797, 40.86164
## 29 40.783797 -73.857115 Point -73.85711, 40.78380
## 30 40.861639 -73.927966 Point -73.92797, 40.86164
shooting_data <- shooting_data %>%
mutate(
hour = as.numeric(format(strptime(occur_time, format = "%H:%M"), "%H")),
time_of_day2 = case_when(
hour >= 5 & hour < 12 ~ "Morning",
hour >= 12 & hour < 18 ~ "Afternoon",
TRUE ~ "Night"
)
)
shooting_data$time_of_day2 %>% head(30)
## [1] "Night" "Night" "Afternoon" "Afternoon" "Night" "Afternoon"
## [7] "Night" "Afternoon" "Afternoon" "Afternoon" "Afternoon" "Afternoon"
## [13] "Night" "Night" "Night" "Night" "Night" "Night"
## [19] "Night" "Night" "Morning" "Afternoon" "Night" "Night"
## [25] "Night" "Afternoon" "Night" "Night" "Night" "Night"
In this code chunk, I used the str_to_lower() function to put the words in the loc_classfctn_desc column in lowercase. For the second code in this chunk, I added a new column using the mutate() and case_when() function that tells us whether the shooting was done in the morning, afternoon, or night. ## Insights
shooting_data %>% count(shooting_data$time_of_day2)%>% arrange(desc(n))
## shooting_data$time_of_day2 n
## 1 Night 21580
## 2 Afternoon 5439
## 3 Morning 2725
In this code chunk, I used the count(), arrange(), and head() function to count the amount of times shootings happened, and what time of day it was.
ggplot(shooting_data, aes(x= time_of_day2))+
geom_bar(color="steelblue", fill="white")+
labs(
title= "Time of Day",
x= "Day",
y= "Count")+
theme(
plot.title = element_text(size=18, family="mono", face="bold")
)
ggplot(shooting_data, aes(x=boro))+
geom_bar(color="purple", fill="grey")+
facet_wrap(~loc_of_occur_desc)+
labs(
title= "Different Boroughs",
x= "Borough",
y="Count")+
theme(
plot.title= element_text(family="serif", face="bold", size=18)
)
kable(shooting_data$time_of_day2) %>% head(30)
## [1] "|x |" "|:---------|" "|Night |" "|Night |" "|Afternoon |"
## [6] "|Afternoon |" "|Night |" "|Afternoon |" "|Night |" "|Afternoon |"
## [11] "|Afternoon |" "|Afternoon |" "|Afternoon |" "|Afternoon |" "|Night |"
## [16] "|Night |" "|Night |" "|Night |" "|Night |" "|Night |"
## [21] "|Night |" "|Night |" "|Morning |" "|Afternoon |" "|Night |"
## [26] "|Night |" "|Night |" "|Afternoon |" "|Night |" "|Night |"
In this code chunk, for the first part I used ggplot and the geom_bar function to create a bar graph to show the time of day. The x axis was time of day (morning, afternoon, and night), and the y axis was the amount of shootings that happened at those 3 times. For the second part of the the chunk I created another bar graph using ggplot of geom_bar again, but this time to see what boroughs shootings happened in. The x-axis was the 5 boroughs in NYC, and the y-axis was the amount of times a shooting occurred in each borough. I also used the facet_wrap function to incorporate the location of the shootings in the same graph. There were 3 options; outside, inside, or N/A. For the last part of the chunk I created a table for the time of day column using kable. I also used the head function to only get the first 30 data points.
This will be able to help me with my thesis research because I will be able to upload my data and run some code to create tables and graphs (visuals). I will be able to write explanations and analyze the code I ran for a better understanding while having it all in one document so it’s easy to read. It will also allow me to come back and add or change things easily.