Synopsis

In this Week 7 Homework we scrape the NYC Restaurant data from the site “https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59” and explore as directed in teh questions that follow.

Packages Used

library(RSocrata) #To download the data
library(tidyverse) # TO use the purr package and tibble commands
library(lubridate) # To use the year function
library(stringr) #To subset strings

Data Import

url <-"https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59"
Resdata<- read.socrata(url)
#write_rds(Resdata, path = 'Desktop/Data') # To store the data locally
#Rest<- read_rds('.../Desktop/Data')
Rest<-as_tibble(Resdata)
head(Rest)
## # A tibble: 6 × 18
##      CAMIS                   DBA     BORO BUILDING        STREET ZIPCODE
##      <int>                 <chr>    <chr>    <chr>         <chr>   <int>
## 1 41606387  LA CUARTA RESTAURANT BROOKLYN      782      4 AVENUE   11232
## 2 50007091 NEW PEKING RESTAURANT BROOKLYN     1581  FLATBUSH AVE   11210
## 3 50012185   CASTILLO RESTAURNAT BROOKLYN      709       5TH AVE   11215
## 4 40750062     PANEANTICO BAKERY BROOKLYN     9124      3 AVENUE   11209
## 5 50034621          SHI LI XIANG   QUEENS    13358      41ST AVE   11355
## 6 50007874          vapor lounge    BRONX     3758 E TREMONT AVE   10465
## # ... with 12 more variables: PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## #   INSPECTION.DATE <dttm>, ACTION <chr>, VIOLATION.CODE <chr>,
## #   VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## #   GRADE <chr>, GRADE.DATE <dttm>, RECORD.DATE <dttm>,
## #   INSPECTION.TYPE <chr>

Questions

# Solution 1

Rest %>% map(class)
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "character"
## 
## $BORO
## [1] "character"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "character"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "character"
## 
## $INSPECTION.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $ACTION
## [1] "character"
## 
## $VIOLATION.CODE
## [1] "character"
## 
## $VIOLATION.DESCRIPTION
## [1] "character"
## 
## $CRITICAL.FLAG
## [1] "character"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "character"
## 
## $GRADE.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $RECORD.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $INSPECTION.TYPE
## [1] "character"
#Solution 2

Date_check<-function(x){

   if(any(class(x)=="POSIXct")){
     return(as.Date(x))
      }
else {
    return(x)
}

}

Restnew <- Rest %>% map(Date_check) %>% as_tibble()
Restnew%>% map(class)
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "character"
## 
## $BORO
## [1] "character"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "character"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "character"
## 
## $INSPECTION.DATE
## [1] "Date"
## 
## $ACTION
## [1] "character"
## 
## $VIOLATION.CODE
## [1] "character"
## 
## $VIOLATION.DESCRIPTION
## [1] "character"
## 
## $CRITICAL.FLAG
## [1] "character"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "character"
## 
## $GRADE.DATE
## [1] "Date"
## 
## $RECORD.DATE
## [1] "Date"
## 
## $INSPECTION.TYPE
## [1] "character"
#Solution 3

Restnew %>% filter(year(INSPECTION.DATE)==2016) %>% 
mutate(ViolationType = ifelse (grepl("mice", VIOLATION.DESCRIPTION),"Mice Violation",
                    ifelse( grepl("hair",VIOLATION.DESCRIPTION),"Hair Violation",
                    ifelse( grepl("sewage",VIOLATION.DESCRIPTION),"Sewage    Violation","NA")))) %>% 
  
 filter(ViolationType %in% c("Mice Violation","Hair Violation","Sewage Violation")) %>% 
 group_by(ViolationType) %>%
 summarise(ViolRest=length(ViolationType))
## # A tibble: 2 × 2
##    ViolationType ViolRest
##            <chr>    <int>
## 1 Hair Violation     2132
## 2 Mice Violation     8283
# Solution 4

TopRestViol <- function(exp, year){
  Restnew %>% 
    filter(year(INSPECTION.DATE) == year,
           str_detect(VIOLATION.DESCRIPTION,
                      exp)) %>%
    count(DBA) %>%
    top_n(20, n) %>%
    arrange(desc(n)) %>%
    ggplot() +
    geom_bar(mapping = aes(x = reorder(DBA, n),y = n),
    stat = "identity", color = "darkblue", fill = "lightblue") +coord_flip()+
    labs(x = "Restaurant Name", y = "No. of Violations")+
    theme(text = element_text(size = 10)) 

}

TopRestViol("mice", 2016)