Loading packages,checking the class of each variable
library(RSocrata)
library(readr)
library(purrr)
library(tidyverse)
library(stringr)
library(lubridate)
library(tidyr)
library(dplyr)
library(curl)
getwd()
## [1] "C:/Users/Prarthana/Documents"
NYCData<-read.socrata("https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59")
save("NYCData",file="C:/Users/Prarthana/Documents/destinat")
head(NYCData)
## CAMIS DBA BORO BUILDING STREET ZIPCODE
## 1 41606387 LA CUARTA RESTAURANT BROOKLYN 782 4 AVENUE 11232
## 2 50007091 NEW PEKING RESTAURANT BROOKLYN 1581 FLATBUSH AVE 11210
## 3 50012185 CASTILLO RESTAURNAT BROOKLYN 709 5TH AVE 11215
## 4 40750062 PANEANTICO BAKERY BROOKLYN 9124 3 AVENUE 11209
## 5 50034621 SHI LI XIANG QUEENS 13358 41ST AVE 11355
## 6 50007874 vapor lounge BRONX 3758 E TREMONT AVE 10465
## PHONE CUISINE.DESCRIPTION INSPECTION.DATE
## 1 7183694964 Peruvian 2016-02-16
## 2 7188591554 Chinese 2014-11-13
## 3 7184994910 Spanish 2015-03-04
## 4 7186802347 Bakery 2015-08-22
## 5 7188863370 Chinese 2015-05-12
## 6 3472815995 American 2016-02-02
## ACTION VIOLATION.CODE
## 1 Violations were cited in the following area(s). 04N
## 2 Violations were cited in the following area(s). 02G
## 3 Violations were cited in the following area(s). 04N
## 4 Violations were cited in the following area(s). 06C
## 5 Violations were cited in the following area(s). 10B
## 6 Violations were cited in the following area(s). 10J
## VIOLATION.DESCRIPTION
## 1 Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies include house flies, little house flies, blow flies, bottle flies and flesh flies. Food/refuse/sewage-associated flies include fruit flies, drain flies and Phorid flies.
## 2 Cold food item held above 41º F (smoked fish and reduced oxygen packaged foods above 38 ºF) except during necessary preparation.
## 3 Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies include house flies, little house flies, blow flies, bottle flies and flesh flies. Food/refuse/sewage-associated flies include fruit flies, drain flies and Phorid flies.
## 4 Food not protected from potential source of contamination during storage, preparation, transportation, display or service.
## 5 Plumbing not properly installed or maintained; anti-siphonage or backflow prevention device not provided where required; equipment or floor not properly drained; sewage disposal system in disrepair or not functioning properly.
## 6 ''''Wash hands\032 sign not posted at hand wash facility.
## CRITICAL.FLAG SCORE GRADE GRADE.DATE RECORD.DATE
## 1 Critical 17 <NA> 2016-12-02
## 2 Critical 7 A 2014-11-13 2016-12-02
## 3 Critical 20 <NA> 2016-12-02
## 4 Critical 31 <NA> 2016-12-02
## 5 Not Critical 17 <NA> 2016-12-02
## 6 Not Critical 28 <NA> 2016-12-02
## INSPECTION.TYPE
## 1 Cycle Inspection / Initial Inspection
## 2 Cycle Inspection / Re-inspection
## 3 Cycle Inspection / Initial Inspection
## 4 Cycle Inspection / Initial Inspection
## 5 Pre-permit (Operational) / Compliance Inspection
## 6 Cycle Inspection / Initial Inspection
map(NYCData,class)
## $CAMIS
## [1] "integer"
##
## $DBA
## [1] "character"
##
## $BORO
## [1] "character"
##
## $BUILDING
## [1] "character"
##
## $STREET
## [1] "character"
##
## $ZIPCODE
## [1] "integer"
##
## $PHONE
## [1] "character"
##
## $CUISINE.DESCRIPTION
## [1] "character"
##
## $INSPECTION.DATE
## [1] "POSIXct" "POSIXt"
##
## $ACTION
## [1] "character"
##
## $VIOLATION.CODE
## [1] "character"
##
## $VIOLATION.DESCRIPTION
## [1] "character"
##
## $CRITICAL.FLAG
## [1] "character"
##
## $SCORE
## [1] "integer"
##
## $GRADE
## [1] "character"
##
## $GRADE.DATE
## [1] "POSIXct" "POSIXt"
##
## $RECORD.DATE
## [1] "POSIXct" "POSIXt"
##
## $INSPECTION.TYPE
## [1] "character"
Changing POSIXt and POSIXct to date type,converting to tibble
CheckPOSIX<-function(x){
if(class(x)=="POSIXt" || class(x)=="POSIXct"){
x <- as.Date(x)
return(class(x)) }
else
return(class(x))
}
map(NYCData,CheckPOSIX)
## $CAMIS
## [1] "integer"
##
## $DBA
## [1] "character"
##
## $BORO
## [1] "character"
##
## $BUILDING
## [1] "character"
##
## $STREET
## [1] "character"
##
## $ZIPCODE
## [1] "integer"
##
## $PHONE
## [1] "character"
##
## $CUISINE.DESCRIPTION
## [1] "character"
##
## $INSPECTION.DATE
## [1] "Date"
##
## $ACTION
## [1] "character"
##
## $VIOLATION.CODE
## [1] "character"
##
## $VIOLATION.DESCRIPTION
## [1] "character"
##
## $CRITICAL.FLAG
## [1] "character"
##
## $SCORE
## [1] "integer"
##
## $GRADE
## [1] "character"
##
## $GRADE.DATE
## [1] "Date"
##
## $RECORD.DATE
## [1] "Date"
##
## $INSPECTION.TYPE
## [1] "character"
NYCDataT<-as_tibble(NYCData)
NYCDataT
## # A tibble: 436,612 × 18
## CAMIS DBA BORO BUILDING
## <int> <chr> <chr> <chr>
## 1 41606387 LA CUARTA RESTAURANT BROOKLYN 782
## 2 50007091 NEW PEKING RESTAURANT BROOKLYN 1581
## 3 50012185 CASTILLO RESTAURNAT BROOKLYN 709
## 4 40750062 PANEANTICO BAKERY BROOKLYN 9124
## 5 50034621 SHI LI XIANG QUEENS 13358
## 6 50007874 vapor lounge BRONX 3758
## 7 40552965 GROUND LEVEL PUB & GRUB STATEN ISLAND 958
## 8 41650546 KING KABAB QUEENS 16709
## 9 41524468 STARBUCKS MANHATTAN 1491
## 10 41435999 YOUR HOUSE CAFE BROOKLYN 6916
## # ... with 436,602 more rows, and 14 more variables: STREET <chr>,
## # ZIPCODE <int>, PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## # INSPECTION.DATE <dttm>, ACTION <chr>, VIOLATION.CODE <chr>,
## # VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## # GRADE <chr>, GRADE.DATE <dttm>, RECORD.DATE <dttm>,
## # INSPECTION.TYPE <chr>
Filtering for violation desctiption related to mice,hair or sewage
NYC2016 <- NYCDataT %>% filter(year(NYCDataT$INSPECTION.DATE) == 2016)
NYCmice <- NYC2016 %>% filter(NYC2016$VIOLATION.DESCRIPTION %in% grep("mice",NYC2016$VIOLATION.DESCRIPTION, value = TRUE))
NYChair <- NYC2016 %>% filter(NYC2016$VIOLATION.DESCRIPTION %in% grep("hair",NYC2016$VIOLATION.DESCRIPTION, value = TRUE))
NYCsewage <- NYC2016 %>% filter(NYC2016$VIOLATION.DESCRIPTION %in% grep("sewage",NYC2016$VIOLATION.DESCRIPTION, value = TRUE))
Graph and and top 20 restaurants with most violations
top_restaurant<-function(NYC,year,pattern){
ByYear<-NYC%>%
filter(year(NYCDataT$INSPECTION.DATE)== year) %>%
filter(str_detect(VIOLATION.DESCRIPTION, regex("y", ignore_case = TRUE)))%>%
group_by(DBA)%>%
summarise(count=n())%>%
top_n(20)
print(ByYear)
graph<-ggplot(data=ByYear,aes(x = reorder(DBA, desc(count)), y = count,col=DBA,fill=DBA)) +
geom_bar(stat = "identity")+
geom_text(aes(label= count), na.rm = TRUE, hjust = 0.3, vjust = -0.7)+
ggtitle("Top 20 Restaurants with max violations") +
ylab("Violations")+
xlab("Top 20 Restaurants")
print(graph)
}
ans<-top_restaurant(NYCDataT,2016,"mice") #Sample function testing
## # A tibble: 20 × 2
## DBA count
## <chr> <int>
## 1 BURGER KING 230
## 2 CAFFE BENE 114
## 3 CARVEL ICE CREAM 120
## 4 CHECKERS 136
## 5 CHIPOTLE MEXICAN GRILL 170
## 6 CROWN FRIED CHICKEN 384
## 7 DOMINO'S 309
## 8 DUNKIN' DONUTS 908
## 9 DUNKIN' DONUTS, BASKIN ROBBINS 319
## 10 GOLDEN KRUST CARIBBEAN BAKERY & GRILL 248
## 11 KENNEDY FRIED CHICKEN 345
## 12 KFC 134
## 13 LE PAIN QUOTIDIEN 110
## 14 LITTLE CAESARS 131
## 15 MCDONALD'S 729
## 16 PAPA JOHN'S 238
## 17 POPEYES LOUISIANA KITCHEN 245
## 18 STARBUCKS 475
## 19 SUBWAY 1108
## 20 WENDY'S 112