Question 1

Loading packages,checking the class of each variable

library(RSocrata)
library(readr)
library(purrr)
library(tidyverse)
library(stringr)
library(lubridate)
library(tidyr)
library(dplyr)
library(curl)
getwd()
## [1] "C:/Users/Prarthana/Documents"
NYCData<-read.socrata("https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59")
save("NYCData",file="C:/Users/Prarthana/Documents/destinat")
head(NYCData)
##      CAMIS                   DBA     BORO BUILDING        STREET ZIPCODE
## 1 41606387  LA CUARTA RESTAURANT BROOKLYN      782      4 AVENUE   11232
## 2 50007091 NEW PEKING RESTAURANT BROOKLYN     1581  FLATBUSH AVE   11210
## 3 50012185   CASTILLO RESTAURNAT BROOKLYN      709       5TH AVE   11215
## 4 40750062     PANEANTICO BAKERY BROOKLYN     9124      3 AVENUE   11209
## 5 50034621          SHI LI XIANG   QUEENS    13358      41ST AVE   11355
## 6 50007874          vapor lounge    BRONX     3758 E TREMONT AVE   10465
##        PHONE CUISINE.DESCRIPTION INSPECTION.DATE
## 1 7183694964            Peruvian      2016-02-16
## 2 7188591554             Chinese      2014-11-13
## 3 7184994910             Spanish      2015-03-04
## 4 7186802347              Bakery      2015-08-22
## 5 7188863370             Chinese      2015-05-12
## 6 3472815995            American      2016-02-02
##                                            ACTION VIOLATION.CODE
## 1 Violations were cited in the following area(s).            04N
## 2 Violations were cited in the following area(s).            02G
## 3 Violations were cited in the following area(s).            04N
## 4 Violations were cited in the following area(s).            06C
## 5 Violations were cited in the following area(s).            10B
## 6 Violations were cited in the following area(s).            10J
##                                                                                                                                                                                                                                                                                  VIOLATION.DESCRIPTION
## 1 Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies include house flies, little house flies, blow flies, bottle flies and flesh flies. Food/refuse/sewage-associated flies include fruit flies, drain flies and Phorid flies.
## 2                                                                                                                                                                   Cold food item held above 41º F (smoked fish and reduced oxygen packaged foods above 38 ºF) except during necessary preparation.
## 3 Filth flies or food/refuse/sewage-associated (FRSA) flies present in facility\032s food and/or non-food areas. Filth flies include house flies, little house flies, blow flies, bottle flies and flesh flies. Food/refuse/sewage-associated flies include fruit flies, drain flies and Phorid flies.
## 4                                                                                                                                                                           Food not protected from potential source of contamination during storage, preparation, transportation, display or service.
## 5                                                                   Plumbing not properly installed or maintained; anti-siphonage or backflow prevention device not provided where required; equipment or floor not properly drained; sewage disposal system in disrepair or not functioning properly.
## 6                                                                                                                                                                                                                                            ''''Wash hands\032 sign not posted at hand wash facility.
##   CRITICAL.FLAG SCORE GRADE GRADE.DATE RECORD.DATE
## 1      Critical    17             <NA>  2016-12-02
## 2      Critical     7     A 2014-11-13  2016-12-02
## 3      Critical    20             <NA>  2016-12-02
## 4      Critical    31             <NA>  2016-12-02
## 5  Not Critical    17             <NA>  2016-12-02
## 6  Not Critical    28             <NA>  2016-12-02
##                                    INSPECTION.TYPE
## 1            Cycle Inspection / Initial Inspection
## 2                 Cycle Inspection / Re-inspection
## 3            Cycle Inspection / Initial Inspection
## 4            Cycle Inspection / Initial Inspection
## 5 Pre-permit (Operational) / Compliance Inspection
## 6            Cycle Inspection / Initial Inspection
map(NYCData,class)
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "character"
## 
## $BORO
## [1] "character"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "character"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "character"
## 
## $INSPECTION.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $ACTION
## [1] "character"
## 
## $VIOLATION.CODE
## [1] "character"
## 
## $VIOLATION.DESCRIPTION
## [1] "character"
## 
## $CRITICAL.FLAG
## [1] "character"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "character"
## 
## $GRADE.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $RECORD.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $INSPECTION.TYPE
## [1] "character"

Question 2

Changing POSIXt and POSIXct to date type,converting to tibble

CheckPOSIX<-function(x){
  if(class(x)=="POSIXt" || class(x)=="POSIXct"){
    x <- as.Date(x)
    return(class(x)) }
  else 
    return(class(x))
}

map(NYCData,CheckPOSIX)
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "character"
## 
## $BORO
## [1] "character"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "character"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "character"
## 
## $INSPECTION.DATE
## [1] "Date"
## 
## $ACTION
## [1] "character"
## 
## $VIOLATION.CODE
## [1] "character"
## 
## $VIOLATION.DESCRIPTION
## [1] "character"
## 
## $CRITICAL.FLAG
## [1] "character"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "character"
## 
## $GRADE.DATE
## [1] "Date"
## 
## $RECORD.DATE
## [1] "Date"
## 
## $INSPECTION.TYPE
## [1] "character"
NYCDataT<-as_tibble(NYCData)
NYCDataT
## # A tibble: 436,612 × 18
##       CAMIS                      DBA          BORO BUILDING
##       <int>                    <chr>         <chr>    <chr>
## 1  41606387     LA CUARTA RESTAURANT      BROOKLYN      782
## 2  50007091    NEW PEKING RESTAURANT      BROOKLYN     1581
## 3  50012185      CASTILLO RESTAURNAT      BROOKLYN      709
## 4  40750062        PANEANTICO BAKERY      BROOKLYN     9124
## 5  50034621             SHI LI XIANG        QUEENS    13358
## 6  50007874             vapor lounge         BRONX     3758
## 7  40552965 GROUND LEVEL  PUB & GRUB STATEN ISLAND      958
## 8  41650546               KING KABAB        QUEENS    16709
## 9  41524468                STARBUCKS     MANHATTAN     1491
## 10 41435999          YOUR HOUSE CAFE      BROOKLYN     6916
## # ... with 436,602 more rows, and 14 more variables: STREET <chr>,
## #   ZIPCODE <int>, PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## #   INSPECTION.DATE <dttm>, ACTION <chr>, VIOLATION.CODE <chr>,
## #   VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## #   GRADE <chr>, GRADE.DATE <dttm>, RECORD.DATE <dttm>,
## #   INSPECTION.TYPE <chr>

Question 3

Filtering for violation desctiption related to mice,hair or sewage

NYC2016 <- NYCDataT %>% filter(year(NYCDataT$INSPECTION.DATE) == 2016)

NYCmice <- NYC2016  %>% filter(NYC2016$VIOLATION.DESCRIPTION %in% grep("mice",NYC2016$VIOLATION.DESCRIPTION, value = TRUE))

NYChair <- NYC2016  %>% filter(NYC2016$VIOLATION.DESCRIPTION %in% grep("hair",NYC2016$VIOLATION.DESCRIPTION, value = TRUE))

NYCsewage <- NYC2016  %>% filter(NYC2016$VIOLATION.DESCRIPTION %in% grep("sewage",NYC2016$VIOLATION.DESCRIPTION, value = TRUE)) 

Question 4

Graph and and top 20 restaurants with most violations

top_restaurant<-function(NYC,year,pattern){
  ByYear<-NYC%>%
    filter(year(NYCDataT$INSPECTION.DATE)== year) %>%
    filter(str_detect(VIOLATION.DESCRIPTION, regex("y", ignore_case = TRUE)))%>%
    group_by(DBA)%>%
    summarise(count=n())%>%
top_n(20)
  print(ByYear)
  graph<-ggplot(data=ByYear,aes(x = reorder(DBA, desc(count)), y = count,col=DBA,fill=DBA)) +  
    geom_bar(stat = "identity")+ 
    geom_text(aes(label= count), na.rm = TRUE, hjust = 0.3, vjust = -0.7)+
    ggtitle("Top 20 Restaurants with max violations") +
    ylab("Violations")+
    xlab("Top 20 Restaurants")
  print(graph)
}
ans<-top_restaurant(NYCDataT,2016,"mice") #Sample function testing
## # A tibble: 20 × 2
##                                      DBA count
##                                    <chr> <int>
## 1                            BURGER KING   230
## 2                             CAFFE BENE   114
## 3                       CARVEL ICE CREAM   120
## 4                               CHECKERS   136
## 5                 CHIPOTLE MEXICAN GRILL   170
## 6                    CROWN FRIED CHICKEN   384
## 7                               DOMINO'S   309
## 8                         DUNKIN' DONUTS   908
## 9         DUNKIN' DONUTS, BASKIN ROBBINS   319
## 10 GOLDEN KRUST CARIBBEAN BAKERY & GRILL   248
## 11                 KENNEDY FRIED CHICKEN   345
## 12                                   KFC   134
## 13                     LE PAIN QUOTIDIEN   110
## 14                        LITTLE CAESARS   131
## 15                            MCDONALD'S   729
## 16                           PAPA JOHN'S   238
## 17             POPEYES LOUISIANA KITCHEN   245
## 18                             STARBUCKS   475
## 19                                SUBWAY  1108
## 20                               WENDY'S   112