endpoint <- "https://data.cityofnewyork.us/resource/833y-fsy8.json"
resp <- httr::GET(endpoint, query = list("$limit" = 30000, "$order" = "occur_date DESC"))
shooting_data <- jsonlite::fromJSON(httr::content(resp, as = "text"), flatten = TRUE)
head(shooting_data)
## incident_key occur_date occur_time boro loc_of_occur_desc
## 1 298699604 2024-12-31T00:00:00.000 19:16:00 BROOKLYN OUTSIDE
## 2 298699604 2024-12-31T00:00:00.000 19:16:00 BROOKLYN OUTSIDE
## 3 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## 4 298672094 2024-12-30T00:00:00.000 12:15:00 BRONX OUTSIDE
## 5 298672097 2024-12-30T00:00:00.000 18:48:00 BROOKLYN OUTSIDE
## 6 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## precinct jurisdiction_code loc_classfctn_desc location_desc
## 1 69 0 STREET (null)
## 2 69 0 STREET (null)
## 3 47 0 STREET (null)
## 4 52 0 STREET (null)
## 5 60 2 HOUSING MULTI DWELL - PUBLIC HOUS
## 6 47 0 STREET (null)
## statistical_murder_flag perp_age_group perp_sex perp_race vic_age_group
## 1 FALSE 25-44 M BLACK 18-24
## 2 FALSE 25-44 M BLACK 25-44
## 3 FALSE (null) (null) (null) 18-24
## 4 FALSE 45-64 M BLACK 25-44
## 5 FALSE 25-44 M BLACK 45-64
## 6 FALSE (null) (null) (null) 25-44
## vic_sex vic_race x_coord_cd y_coord_cd latitude longitude
## 1 M BLACK 1,015,120 173,870 40.643866 -73.888761
## 2 M BLACK 1,015,120 173,870 40.643866 -73.888761
## 3 M BLACK 1,021,316 259,277 40.878261 -73.865964
## 4 M WHITE 1,017,719 260,875 40.882661 -73.878964
## 5 M BLACK 989,372 155,205 40.592685 -73.981557
## 6 F WHITE HISPANIC 1,021,316 259,277 40.878261 -73.865964
## geocoded_column.type geocoded_column.coordinates
## 1 Point -73.88876, 40.64387
## 2 Point -73.88876, 40.64387
## 3 Point -73.86596, 40.87826
## 4 Point -73.87896, 40.88266
## 5 Point -73.98156, 40.59269
## 6 Point -73.86596, 40.87826
Here I gathered the NYC shooting data by using a code to bring it into R.
shooting_data <- shooting_data %>% filter(!is.na(geocoded_column.type))
With this code I removed NAs from geocoded_column.type.
shooting_data$perp_race <- tolower(shooting_data$perp_race)
I transformed uppercase letters to lowercase from the perp_race column.
shooting_data <- shooting_data %>%
mutate(
occur_time = as_hms(occur_time),
time_of_day=case_when(
hour(occur_time)>=0 & hour(occur_time)<12 ~"morning",
hour(occur_time)>12 & hour(occur_time)<20 ~"afternoon",
TRUE ~"night"
))
I tried re-running the codes based on your suggestions, and it worked. I also changed the value for the for the hours just to see if it made a difference.
time_of_day <- shooting_data %>%
count(time_of_day, sort = TRUE)
time_of_day
## time_of_day n
## 1 morning 12437
## 2 night 9340
## 3 afternoon 7870
vic_sex_counts <- shooting_data %>%
count(vic_sex, sort = TRUE)
vic_sex_counts
## vic_sex n
## 1 M 26753
## 2 F 2882
## 3 U 12
I looked into the sex of the victims to see how crime is distributed across genders.
shooting_data <- head(shooting_data)
shooting_data
## incident_key occur_date occur_time boro loc_of_occur_desc
## 1 298699604 2024-12-31T00:00:00.000 19:16:00 BROOKLYN OUTSIDE
## 2 298699604 2024-12-31T00:00:00.000 19:16:00 BROOKLYN OUTSIDE
## 3 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## 4 298672094 2024-12-30T00:00:00.000 12:15:00 BRONX OUTSIDE
## 5 298672097 2024-12-30T00:00:00.000 18:48:00 BROOKLYN OUTSIDE
## 6 298672096 2024-12-30T00:00:00.000 16:45:00 BRONX OUTSIDE
## precinct jurisdiction_code loc_classfctn_desc location_desc
## 1 69 0 STREET (null)
## 2 69 0 STREET (null)
## 3 47 0 STREET (null)
## 4 52 0 STREET (null)
## 5 60 2 HOUSING MULTI DWELL - PUBLIC HOUS
## 6 47 0 STREET (null)
## statistical_murder_flag perp_age_group perp_sex perp_race vic_age_group
## 1 FALSE 25-44 M black 18-24
## 2 FALSE 25-44 M black 25-44
## 3 FALSE (null) (null) (null) 18-24
## 4 FALSE 45-64 M black 25-44
## 5 FALSE 25-44 M black 45-64
## 6 FALSE (null) (null) (null) 25-44
## vic_sex vic_race x_coord_cd y_coord_cd latitude longitude
## 1 M BLACK 1,015,120 173,870 40.643866 -73.888761
## 2 M BLACK 1,015,120 173,870 40.643866 -73.888761
## 3 M BLACK 1,021,316 259,277 40.878261 -73.865964
## 4 M WHITE 1,017,719 260,875 40.882661 -73.878964
## 5 M BLACK 989,372 155,205 40.592685 -73.981557
## 6 F WHITE HISPANIC 1,021,316 259,277 40.878261 -73.865964
## geocoded_column.type geocoded_column.coordinates time_of_day
## 1 Point -73.88876, 40.64387 afternoon
## 2 Point -73.88876, 40.64387 afternoon
## 3 Point -73.86596, 40.87826 afternoon
## 4 Point -73.87896, 40.88266 night
## 5 Point -73.98156, 40.59269 afternoon
## 6 Point -73.86596, 40.87826 afternoon
kable(shooting_data)
incident_key | occur_date | occur_time | boro | loc_of_occur_desc | precinct | jurisdiction_code | loc_classfctn_desc | location_desc | statistical_murder_flag | perp_age_group | perp_sex | perp_race | vic_age_group | vic_sex | vic_race | x_coord_cd | y_coord_cd | latitude | longitude | geocoded_column.type | geocoded_column.coordinates | time_of_day |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
298699604 | 2024-12-31T00:00:00.000 | 19:16:00 | BROOKLYN | OUTSIDE | 69 | 0 | STREET | (null) | FALSE | 25-44 | M | black | 18-24 | M | BLACK | 1,015,120 | 173,870 | 40.643866 | -73.888761 | Point | -73.88876, 40.64387 | afternoon |
298699604 | 2024-12-31T00:00:00.000 | 19:16:00 | BROOKLYN | OUTSIDE | 69 | 0 | STREET | (null) | FALSE | 25-44 | M | black | 25-44 | M | BLACK | 1,015,120 | 173,870 | 40.643866 | -73.888761 | Point | -73.88876, 40.64387 | afternoon |
298672096 | 2024-12-30T00:00:00.000 | 16:45:00 | BRONX | OUTSIDE | 47 | 0 | STREET | (null) | FALSE | (null) | (null) | (null) | 18-24 | M | BLACK | 1,021,316 | 259,277 | 40.878261 | -73.865964 | Point | -73.86596, 40.87826 | afternoon |
298672094 | 2024-12-30T00:00:00.000 | 12:15:00 | BRONX | OUTSIDE | 52 | 0 | STREET | (null) | FALSE | 45-64 | M | black | 25-44 | M | WHITE | 1,017,719 | 260,875 | 40.882661 | -73.878964 | Point | -73.87896, 40.88266 | night |
298672097 | 2024-12-30T00:00:00.000 | 18:48:00 | BROOKLYN | OUTSIDE | 60 | 2 | HOUSING | MULTI DWELL - PUBLIC HOUS | FALSE | 25-44 | M | black | 45-64 | M | BLACK | 989,372 | 155,205 | 40.592685 | -73.981557 | Point | -73.98156, 40.59269 | afternoon |
298672096 | 2024-12-30T00:00:00.000 | 16:45:00 | BRONX | OUTSIDE | 47 | 0 | STREET | (null) | FALSE | (null) | (null) | (null) | 25-44 | F | WHITE HISPANIC | 1,021,316 | 259,277 | 40.878261 | -73.865964 | Point | -73.86596, 40.87826 | afternoon |
I still don’t really get what kable does… I kept getting errors for it at first, but somehow it’s working now.
ggplot(shooting_data, aes(x = time_of_day, fill = time_of_day)) +
geom_bar() +
labs(
title = "Shootings by Time of Day",
x = "Time of Day",
y = "Number of Shootings"
) +
theme_minimal(base_size = 14) +
scale_fill_brewer(palette = "Set2") +
theme(legend.position = "none")
Ahhhhhh, time_of_day is finally working!!!!!
ggplot(vic_sex_counts, aes(x = vic_sex, y = n, fill = vic_sex)) +
geom_col() +
labs(
title = "Victim Sex Count",
x = "Victim Sex",
y = "Count"
) +
theme_minimal() +
scale_fill_brewer(palette = "Pastel1") +
theme(legend.position = "none")
knitr::kable(vic_sex_counts)
vic_sex | n |
---|---|
M | 26753 |
F | 2882 |
U | 12 |
This code allows me to have a visual representation of victims of crimes when it comes to males and females.
nrow(shooting_data)
## [1] 6
This code tells me there number of rows that remain after I cleaned up the data.
This is my second time doing this assignment. I got the chance to use the feedback and go over my lines of codes to see where I messed up. It was very helpful to do that, as I now have a better understanding when it comes to working with RMarkdown and still using R codes. I did get a few errors here and there, but this time around it was wayyyy easier to figured where they occurred in the script.