#nyc_data <- read.socrata("https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59")
#write_rds(nyc_data, path="NYC_Restaurant_Data.rds")
nyc_data <- read_rds("NYC_Restaurant_Data.rds")
names(nyc_data)
## [1] "CAMIS" "DBA"
## [3] "BORO" "BUILDING"
## [5] "STREET" "ZIPCODE"
## [7] "PHONE" "CUISINE.DESCRIPTION"
## [9] "INSPECTION.DATE" "ACTION"
## [11] "VIOLATION.CODE" "VIOLATION.DESCRIPTION"
## [13] "CRITICAL.FLAG" "SCORE"
## [15] "GRADE" "GRADE.DATE"
## [17] "RECORD.DATE" "INSPECTION.TYPE"
dim(nyc_data)
## [1] 436612 18
kable(str(nyc_data))
## 'data.frame': 436612 obs. of 18 variables:
## $ CAMIS : int 41606387 50007091 50012185 40750062 50034621 50007874 40552965 41650546 41524468 41435999 ...
## $ DBA : chr "LA CUARTA RESTAURANT" "NEW PEKING RESTAURANT" "CASTILLO RESTAURNAT" "PANEANTICO BAKERY" ...
## $ BORO : chr "BROOKLYN" "BROOKLYN" "BROOKLYN" "BROOKLYN" ...
## $ BUILDING : chr "782" "1581" "709" "9124" ...
## $ STREET : chr "4 AVENUE" "FLATBUSH AVE" "5TH AVE" "3 AVENUE" ...
## $ ZIPCODE : int 11232 11210 11215 11209 11355 10465 10302 11432 10029 11209 ...
## $ PHONE : chr "7183694964" "7188591554" "7184994910" "7186802347" ...
## $ CUISINE.DESCRIPTION : chr "Peruvian" "Chinese" "Spanish" "Bakery" ...
## $ INSPECTION.DATE : POSIXct, format: "2016-02-16" "2014-11-13" ...
## $ ACTION : chr "Violations were cited in the following area(s)." "Violations were cited in the following area(s)." "Violations were cited in the following area(s)." "Violations were cited in the following area(s)." ...
## $ VIOLATION.CODE : chr "04N" "02G" "04N" "06C" ...
## $ VIOLATION.DESCRIPTION: chr "Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies incl"| __truncated__ "Cold food item held above 41º F (smoked fish and reduced oxygen packaged foods above 38 ºF) except during necessary preparati"| __truncated__ "Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies incl"| __truncated__ "Food not protected from potential source of contamination during storage, preparation, transportation, display or service." ...
## $ CRITICAL.FLAG : chr "Critical" "Critical" "Critical" "Critical" ...
## $ SCORE : int 17 7 20 31 17 28 9 27 NA 12 ...
## $ GRADE : chr "" "A" "" "" ...
## $ GRADE.DATE : POSIXct, format: NA "2014-11-13" ...
## $ RECORD.DATE : POSIXct, format: "2016-12-02" "2016-12-02" ...
## $ INSPECTION.TYPE : chr "Cycle Inspection / Initial Inspection" "Cycle Inspection / Re-inspection" "Cycle Inspection / Initial Inspection" "Cycle Inspection / Initial Inspection" ...
kable(map(nyc_data,class))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
format_date <- function(x)
{
if (class(x) == "POSIXct" || class(x) == "POSIXt")
x <- as.Date(x)
return(x)
}
nyc_data_formatted <- as_tibble(map(nyc_data,format_date))
nyc_data_formatted
## # A tibble: 436,612 × 18
## CAMIS DBA BORO BUILDING
## <int> <chr> <chr> <chr>
## 1 41606387 LA CUARTA RESTAURANT BROOKLYN 782
## 2 50007091 NEW PEKING RESTAURANT BROOKLYN 1581
## 3 50012185 CASTILLO RESTAURNAT BROOKLYN 709
## 4 40750062 PANEANTICO BAKERY BROOKLYN 9124
## 5 50034621 SHI LI XIANG QUEENS 13358
## 6 50007874 vapor lounge BRONX 3758
## 7 40552965 GROUND LEVEL PUB & GRUB STATEN ISLAND 958
## 8 41650546 KING KABAB QUEENS 16709
## 9 41524468 STARBUCKS MANHATTAN 1491
## 10 41435999 YOUR HOUSE CAFE BROOKLYN 6916
## # ... with 436,602 more rows, and 14 more variables: STREET <chr>,
## # ZIPCODE <int>, PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## # INSPECTION.DATE <date>, ACTION <chr>, VIOLATION.CODE <chr>,
## # VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## # GRADE <chr>, GRADE.DATE <date>, RECORD.DATE <date>,
## # INSPECTION.TYPE <chr>
kable(str(nyc_data_formatted))
## Classes 'tbl_df', 'tbl' and 'data.frame': 436612 obs. of 18 variables:
## $ CAMIS : int 41606387 50007091 50012185 40750062 50034621 50007874 40552965 41650546 41524468 41435999 ...
## $ DBA : chr "LA CUARTA RESTAURANT" "NEW PEKING RESTAURANT" "CASTILLO RESTAURNAT" "PANEANTICO BAKERY" ...
## $ BORO : chr "BROOKLYN" "BROOKLYN" "BROOKLYN" "BROOKLYN" ...
## $ BUILDING : chr "782" "1581" "709" "9124" ...
## $ STREET : chr "4 AVENUE" "FLATBUSH AVE" "5TH AVE" "3 AVENUE" ...
## $ ZIPCODE : int 11232 11210 11215 11209 11355 10465 10302 11432 10029 11209 ...
## $ PHONE : chr "7183694964" "7188591554" "7184994910" "7186802347" ...
## $ CUISINE.DESCRIPTION : chr "Peruvian" "Chinese" "Spanish" "Bakery" ...
## $ INSPECTION.DATE : Date, format: "2016-02-16" "2014-11-13" ...
## $ ACTION : chr "Violations were cited in the following area(s)." "Violations were cited in the following area(s)." "Violations were cited in the following area(s)." "Violations were cited in the following area(s)." ...
## $ VIOLATION.CODE : chr "04N" "02G" "04N" "06C" ...
## $ VIOLATION.DESCRIPTION: chr "Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies incl"| __truncated__ "Cold food item held above 41º F (smoked fish and reduced oxygen packaged foods above 38 ºF) except during necessary preparati"| __truncated__ "Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies incl"| __truncated__ "Food not protected from potential source of contamination during storage, preparation, transportation, display or service." ...
## $ CRITICAL.FLAG : chr "Critical" "Critical" "Critical" "Critical" ...
## $ SCORE : int 17 7 20 31 17 28 9 27 NA 12 ...
## $ GRADE : chr "" "A" "" "" ...
## $ GRADE.DATE : Date, format: NA "2014-11-13" ...
## $ RECORD.DATE : Date, format: "2016-12-02" "2016-12-02" ...
## $ INSPECTION.TYPE : chr "Cycle Inspection / Initial Inspection" "Cycle Inspection / Re-inspection" "Cycle Inspection / Initial Inspection" "Cycle Inspection / Initial Inspection" ...
nyc_violations_mice <- nyc_data_formatted %>% filter(year(nyc_data_formatted$INSPECTION.DATE)==2016) %>%
filter(str_detect(VIOLATION.DESCRIPTION, ignore.case("mice")))
nyc_violations_hair <- nyc_data_formatted %>% filter(year(nyc_data_formatted$INSPECTION.DATE)==2016) %>%
filter(str_detect(VIOLATION.DESCRIPTION, ignore.case("hair")))
nyc_violations_sewage <- nyc_data_formatted %>% filter(year(nyc_data_formatted$INSPECTION.DATE)==2016) %>%
filter(str_detect(VIOLATION.DESCRIPTION, ignore.case("sewage")))
violations_2016 <- list()
violations_2016["mice"] <- nrow(nyc_violations_mice)
violations_2016["hair"] <- nrow(nyc_violations_hair)
violations_2016["sewage"] <- nrow(nyc_violations_sewage)
paste("There were",violations_2016[["mice"]], " mice violations,", violations_2016[["hair"]], " hair violations and ", violations_2016[["sewage"]], " sewage violations in 2016")
## [1] "There were 8283 mice violations, 2132 hair violations and 13671 sewage violations in 2016"
check_violations <- function(year,expression)
{
violations_data <- nyc_data_formatted %>% filter(year(nyc_data_formatted$INSPECTION.DATE)==year) %>%
filter(str_detect(VIOLATION.DESCRIPTION, ignore.case(expression))) %>% group_by(DBA) %>%
summarise(n=n()) %>% top_n(20)
ggplot(data=violations_data, aes(y=n,x=reorder(DBA,-n),fill="firebrick")) + geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle(paste("NYC restaurants with the most violation counts for",expression))+
labs(x="Restaurant", y ="Violation Count")
}
check_violations(2016,"mice")
check_violations(2016,"hair")
check_violations(2016,"sewage")