Assignment 7
Packages Used
library('RSocrata')
library('readr')
library("purrr")
library('dplyr')
library('tibble')
library('stringr')
library('lubridate')
library('ggplot2')
File Import
##url <- 'https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59'
##RestaurantInspection <- read.socrata(url = url)
##write_rds(RestaurantInspection, 'Desktop/Course/R/Data Wrangling with R (BANA 8090)/data/')
RestaurantInspection <- read_rds('Desktop/Course/R/Data Wrangling with R (BANA 8090)/data/nyc')
Q1
map(RestaurantInspection, class)
## $CAMIS
## [1] "integer"
##
## $DBA
## [1] "factor"
##
## $BORO
## [1] "factor"
##
## $BUILDING
## [1] "character"
##
## $STREET
## [1] "factor"
##
## $ZIPCODE
## [1] "integer"
##
## $PHONE
## [1] "character"
##
## $CUISINE.DESCRIPTION
## [1] "factor"
##
## $INSPECTION.DATE
## [1] "Date"
##
## $ACTION
## [1] "factor"
##
## $VIOLATION.CODE
## [1] "factor"
##
## $VIOLATION.DESCRIPTION
## [1] "factor"
##
## $CRITICAL.FLAG
## [1] "factor"
##
## $SCORE
## [1] "integer"
##
## $GRADE
## [1] "factor"
##
## $GRADE.DATE
## [1] "Date"
##
## $RECORD.DATE
## [1] "Date"
##
## $INSPECTION.TYPE
## [1] "factor"
Q2
checkfunction <- function(x){
ifelse(any(class(x) == "POSIXlt"), T, F)
}
POSIXltToDate <- function(x){
if(!checkfunction(x)){
return(x)
}
return(as.Date(x))
}
RestaurantInspection <- RestaurantInspection %>%
map(POSIXltToDate) %>%
as_tibble()
RestaurantInspection
## # A tibble: 436,584 × 18
## CAMIS DBA BORO BUILDING STREET ZIPCODE
## <int> <fctr> <fctr> <chr> <fctr> <int>
## 1 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 2 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 3 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 4 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 5 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 6 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 7 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 8 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 9 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## 10 30075445 MORRIS PARK BAKE SHOP BRONX 1007 MORRIS PARK AVE 10462
## # ... with 436,574 more rows, and 12 more variables: PHONE <chr>,
## # CUISINE.DESCRIPTION <fctr>, INSPECTION.DATE <date>, ACTION <fctr>,
## # VIOLATION.CODE <fctr>, VIOLATION.DESCRIPTION <fctr>,
## # CRITICAL.FLAG <fctr>, SCORE <int>, GRADE <fctr>, GRADE.DATE <date>,
## # RECORD.DATE <date>, INSPECTION.TYPE <fctr>
Q3
RestaurantInspection %>% filter(year(INSPECTION.DATE)==2016) %>%
mutate(Violation = ifelse (grepl("mice", VIOLATION.DESCRIPTION),"mice",
ifelse( grepl("hair",VIOLATION.DESCRIPTION),"hair",
ifelse( grepl("sewage",VIOLATION.DESCRIPTION),"sewage","NA")))) %>%
filter(Violation %in% c("mice","hair","sewage")) %>%
group_by(Violation,Year=year(INSPECTION.DATE)) %>%
summarise(NumberOfRest=length(Violation))
## Source: local data frame [3 x 3]
## Groups: Violation [?]
##
## Violation Year NumberOfRest
## <chr> <dbl> <int>
## 1 hair 2016 2132
## 2 mice 2016 8281
## 3 sewage 2016 13640
Q4
top_violations <- function(findpattern, findyear){
RestaurantInspection %>%
filter(year(INSPECTION.DATE) == findyear,
str_detect(tolower(VIOLATION.DESCRIPTION),
findpattern)) %>%
count(DBA) %>%
arrange(desc(n)) %>%
top_n(20, n) %>%
ggplot() +
geom_bar(mapping = aes(x = reorder(DBA, n),
y = n),
stat = "identity", color = "cyan", fill = "yellow") +
theme(text = element_text(size = 7)) +
labs(x = "Restaurant", y = "Violations")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
}
top_violations("mice", 2016)
