# install.packages("tidyverse")
library(readxl)
library(tidyr)
library(dplyr)
library(stringr)
library(ggplot2)
Data Posted by Brian Liles
NYC COMPSTAT Data
http://www1.nyc.gov/site/nypd/stats/crime-statistics/historical.page
Downloaded XLS file for “Citywide Non-Seven Major Felony Offenses 2000-2017”
Here’s how the data looks initially: image:
Read XLS file, rows 5 through 13
# Get path [in platform independent way using file.path() function]
path <- file.path("~", "R", "Project 2", "non-seven-major-felony-offenses-2000-2017.xls")
df <- read_xls(path, sheet = 1, cell_rows(5:13)) %>% tbl_df()
df
## # A tibble: 8 x 19
## OFFENSE `2000` `2001` `2002` `2003` `2004` `2005` `2006` `2007` `2008`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FELONY P~ 2194. 2114. 1976. 1545. 1358. 1250. 1175. 1073. 1016.
## 2 FORGERY/~ 5714. 5594. 5859. 8112. 12984. 12896. 12579. 12075. 12216.
## 3 ARSON 3189. 2681. 1902. 1492. 1789. 1831. 1679. 1527. 1896.
## 4 FELONY S~ 1839. 1831. 1513. 1295. 1263. 1162. 1096. 1031. 909.
## 5 FELONY D~ 38088. 30743. 29692. 24076. 25824. 27391. 29516. 31459. 28936.
## 6 FELONY D~ 2958. 3030. 2804. 2511. 2987. 3748. 4315. 5004. 5263.
## 7 FEL. CRI~ 12934. 16026. 13291. 8400. 8074. 7776. 7605. 7090. 7003.
## 8 OTHER FE~ 13945. 13059. 13956. 13786. 12454. 11800. 11063. 11699. 11719.
## # ... with 9 more variables: `2009` <dbl>, `2010` <dbl>, `2011` <dbl>,
## # `2012` <dbl>, `2013` <dbl>, `2014` <dbl>, `2015` <dbl>, `2016` <dbl>,
## # `2017` <dbl>
Perform data transformation, suggested by Brian: “At first glance I would use the gather function and condense the table using the following headers: offense, year, and count instead of the wide format.”
dFelon <- gather(df, "YEAR", "COUNT", 2:(2+(2017-2000)), convert = T) %>% tbl_df()
head(dFelon)
## # A tibble: 6 x 3
## OFFENSE YEAR COUNT
## <chr> <int> <dbl>
## 1 FELONY POSSESSION OF STOLEN PROPERTY 2000 2194.
## 2 FORGERY/THEFT_FRAUD/IDENTITY THEFT 2000 5714.
## 3 ARSON 2000 3189.
## 4 FELONY SEX CRIMES (3) 2000 1839.
## 5 FELONY DANGEROUS DRUGS (1) 2000 38088.
## 6 FELONY DANGEROUS WEAPONS (2) 2000 2958.
ggplot(data = dFelon, mapping = aes(x = YEAR, y = COUNT, label = OFFENSE, color = OFFENSE, shape = OFFENSE)) +
labs(x = "Years", y ="Counts") +
geom_point() +
geom_line()
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 8.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 36 rows containing missing values (geom_point).
#geom_smooth()
#facet_wrap(~ OFFENSE)
#geom_line(mapping = aes(color = OFFENSE))
#facet_grid(OFFENSE ~ .)
#geom_text(aes(label = COUNT), vjust = 0, nudge_y = 1, color = "red") +
dFelon %>% group_by(OFFENSE) %>% summarize(MEAN = mean(COUNT)) %>% arrange(desc(MEAN)) %>% mutate(RANK = row_number())
## # A tibble: 8 x 3
## OFFENSE MEAN RANK
## <chr> <dbl> <int>
## 1 FELONY DANGEROUS DRUGS (1) 24204. 1
## 2 OTHER FELONIES (4) 12703. 2
## 3 FORGERY/THEFT_FRAUD/IDENTITY THEFT 9943. 3
## 4 FEL. CRIMINAL MISCHIEF & RELATED OFFENSES 8830. 4
## 5 FELONY DANGEROUS WEAPONS (2) 4384. 5
## 6 ARSON 1575. 6
## 7 FELONY SEX CRIMES (3) 1243. 7
## 8 FELONY POSSESSION OF STOLEN PROPERTY 1188. 8
dFelon <- left_join(dFelon, dFelon %>% group_by(OFFENSE) %>% summarize(MEAN = mean(COUNT)) %>% arrange(desc(MEAN)) %>% mutate(RANK = row_number()), by = "OFFENSE") %>% arrange(YEAR, RANK)
head(dFelon, 10)
## # A tibble: 10 x 5
## OFFENSE YEAR COUNT MEAN RANK
## <chr> <int> <dbl> <dbl> <int>
## 1 FELONY DANGEROUS DRUGS (1) 2000 38088. 24204. 1
## 2 OTHER FELONIES (4) 2000 13945. 12703. 2
## 3 FORGERY/THEFT_FRAUD/IDENTITY THEFT 2000 5714. 9943. 3
## 4 FEL. CRIMINAL MISCHIEF & RELATED OFFENSES 2000 12934. 8830. 4
## 5 FELONY DANGEROUS WEAPONS (2) 2000 2958. 4384. 5
## 6 ARSON 2000 3189. 1575. 6
## 7 FELONY SEX CRIMES (3) 2000 1839. 1243. 7
## 8 FELONY POSSESSION OF STOLEN PROPERTY 2000 2194. 1188. 8
## 9 FELONY DANGEROUS DRUGS (1) 2001 30743. 24204. 1
## 10 OTHER FELONIES (4) 2001 13059. 12703. 2
ggplot(data = filter(dFelon, between(RANK, 1, 2)), mapping = aes(x = YEAR, y = COUNT, label = OFFENSE, color = OFFENSE, shape = OFFENSE)) +
labs(x = "Years", y ="Counts") +
geom_point() +
#geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess'
ggplot(data = filter(dFelon, between(RANK, 3, 5)), mapping = aes(x = YEAR, y = COUNT, label = OFFENSE, color = OFFENSE, shape = OFFENSE)) +
labs(x = "Years", y ="Counts") +
geom_point() +
#geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess'
ggplot(data = filter(dFelon, between(RANK, 6, 8)), mapping = aes(x = YEAR, y = COUNT, label = OFFENSE, color = OFFENSE, shape = OFFENSE)) +
labs(x = "Years", y ="Counts") +
geom_point() +
#geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess'