Assignment 7

Packages Used

library('RSocrata')
library('readr')
library("purrr")
library('dplyr')
library('tibble')
library('stringr')
library('lubridate')
library('ggplot2')

File Import

##url <- 'https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59'
##RestaurantInspection <- read.socrata(url = url)
##write_rds(RestaurantInspection, 'Desktop/Course/R/Data Wrangling with R (BANA 8090)/data/')
RestaurantInspection <- read_rds('Desktop/Course/R/Data Wrangling with R (BANA 8090)/data/nyc')

Q1

map(RestaurantInspection, class)
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "factor"
## 
## $BORO
## [1] "factor"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "factor"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "factor"
## 
## $INSPECTION.DATE
## [1] "Date"
## 
## $ACTION
## [1] "factor"
## 
## $VIOLATION.CODE
## [1] "factor"
## 
## $VIOLATION.DESCRIPTION
## [1] "factor"
## 
## $CRITICAL.FLAG
## [1] "factor"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "factor"
## 
## $GRADE.DATE
## [1] "Date"
## 
## $RECORD.DATE
## [1] "Date"
## 
## $INSPECTION.TYPE
## [1] "factor"

Q2

checkfunction <- function(x){
  ifelse(any(class(x) == "POSIXlt"), T, F)
}

POSIXltToDate <- function(x){
  if(!checkfunction(x)){
    return(x)
  }
  return(as.Date(x))
}

RestaurantInspection <- RestaurantInspection %>% 
  map(POSIXltToDate) %>% 
  as_tibble()
RestaurantInspection
## # A tibble: 436,584 × 18
##       CAMIS                   DBA   BORO BUILDING          STREET ZIPCODE
##       <int>                <fctr> <fctr>    <chr>          <fctr>   <int>
## 1  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 2  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 3  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 4  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 5  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 6  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 7  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 8  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 9  30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## 10 30075445 MORRIS PARK BAKE SHOP  BRONX     1007 MORRIS PARK AVE   10462
## # ... with 436,574 more rows, and 12 more variables: PHONE <chr>,
## #   CUISINE.DESCRIPTION <fctr>, INSPECTION.DATE <date>, ACTION <fctr>,
## #   VIOLATION.CODE <fctr>, VIOLATION.DESCRIPTION <fctr>,
## #   CRITICAL.FLAG <fctr>, SCORE <int>, GRADE <fctr>, GRADE.DATE <date>,
## #   RECORD.DATE <date>, INSPECTION.TYPE <fctr>

Q3

RestaurantInspection %>% filter(year(INSPECTION.DATE)==2016) %>% 
 mutate(Violation = ifelse (grepl("mice", VIOLATION.DESCRIPTION),"mice",
                            ifelse( grepl("hair",VIOLATION.DESCRIPTION),"hair",
                                    ifelse( grepl("sewage",VIOLATION.DESCRIPTION),"sewage","NA")))) %>% 
 filter(Violation %in% c("mice","hair","sewage")) %>% 
 group_by(Violation,Year=year(INSPECTION.DATE)) %>%
 summarise(NumberOfRest=length(Violation))
## Source: local data frame [3 x 3]
## Groups: Violation [?]
## 
##   Violation  Year NumberOfRest
##       <chr> <dbl>        <int>
## 1      hair  2016         2132
## 2      mice  2016         8281
## 3    sewage  2016        13640

Q4

top_violations <- function(findpattern, findyear){
  RestaurantInspection %>%
    filter(year(INSPECTION.DATE) == findyear,
           str_detect(tolower(VIOLATION.DESCRIPTION),
                      findpattern)) %>%
    count(DBA) %>%
    arrange(desc(n)) %>%
    top_n(20, n) %>%
    ggplot() +
    geom_bar(mapping = aes(x = reorder(DBA, n),
                           y = n),
             stat = "identity", color = "cyan", fill = "yellow") +
    theme(text = element_text(size = 7)) +
    labs(x = "Restaurant", y = "Violations")+
    theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

}

top_violations("mice", 2016)