Synopsis

This is document is my homework report for week 7. Here, I worked on functions and iterations to look at the NYC Restaurant Data.

Packages Required

library(dplyr) #to manipulate data
library(tidyr) #to make data tidy
library(tidyverse) #to manipulate data
library(RSocrata) ## to read socrata API datasets
library(tibble) #to create tibbles
library(stringr) #to use regx and other string functions
library(purrr) #for iteration functions like map
library(lubridate)#for date

Reading Data

#NYC <- 'https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59'
#NYC_Data <- read.socrata(NYC)
#readr::write_rds(NYC_Data,"NYC_Data.rds")

NYC_Data<-readRDS("NYC_data.rds")

Question 1

NYC_Data1 <- NYC_Data %>% map(class)
NYC_Data1
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "character"
## 
## $BORO
## [1] "character"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "character"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "character"
## 
## $INSPECTION.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $ACTION
## [1] "character"
## 
## $VIOLATION.CODE
## [1] "character"
## 
## $VIOLATION.DESCRIPTION
## [1] "character"
## 
## $CRITICAL.FLAG
## [1] "character"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "character"
## 
## $GRADE.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $RECORD.DATE
## [1] "POSIXct" "POSIXt" 
## 
## $INSPECTION.TYPE
## [1] "character"

Question 2

POSIXlt_Check <- function(x){
  if ((class(x))[1] == "POSIXlt"){
    x <- as.Date(x)
  }
  else x
}

NYC_Data2 <- as_tibble(map_df(NYC_Data, POSIXlt_Check))
NYC_Data2
## # A tibble: 436,612 × 18
##       CAMIS                      DBA          BORO BUILDING
##       <int>                    <chr>         <chr>    <chr>
## 1  41606387     LA CUARTA RESTAURANT      BROOKLYN      782
## 2  50007091    NEW PEKING RESTAURANT      BROOKLYN     1581
## 3  50012185      CASTILLO RESTAURNAT      BROOKLYN      709
## 4  40750062        PANEANTICO BAKERY      BROOKLYN     9124
## 5  50034621             SHI LI XIANG        QUEENS    13358
## 6  50007874             vapor lounge         BRONX     3758
## 7  40552965 GROUND LEVEL  PUB & GRUB STATEN ISLAND      958
## 8  41650546               KING KABAB        QUEENS    16709
## 9  41524468                STARBUCKS     MANHATTAN     1491
## 10 41435999          YOUR HOUSE CAFE      BROOKLYN     6916
## # ... with 436,602 more rows, and 14 more variables: STREET <chr>,
## #   ZIPCODE <int>, PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## #   INSPECTION.DATE <dttm>, ACTION <chr>, VIOLATION.CODE <chr>,
## #   VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## #   GRADE <chr>, GRADE.DATE <dttm>, RECORD.DATE <dttm>,
## #   INSPECTION.TYPE <chr>

Question 3

NYC_Data3 <- NYC_Data2 %>% 
  filter(year(INSPECTION.DATE)==2016) %>% 
  mutate(Violation = ifelse (grepl("Mice", VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),"Mice",
                     ifelse( grepl("Hair",VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),"Hair",
                     ifelse( grepl("Sewage",VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),"Sewage","NA")))) %>% 
  filter(Violation %in% c("Mice","Hair","Sewage")) %>%
  group_by(CAMIS,Violation) %>% summarise(CO = length(n_distinct(CAMIS))) %>% 
  group_by(Violation)%>% summarise(Rest_Count= length(Violation))

NYC_Data3
## # A tibble: 3 × 2
##   Violation Rest_Count
##       <chr>      <int>
## 1      Hair       2000
## 2      Mice       5697
## 3    Sewage       9468

Question 4

Filt_NYC_Data <- function(Data,yr,Viol){
  Data %>% filter(year(INSPECTION.DATE)==yr) %>%
  mutate(Violation = ifelse (grepl(Viol, VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),Viol,"NA"))%>% 
  filter(Violation %in% Viol) %>% group_by(DBA) %>% summarise(RCount=length(Violation)) %>%
  arrange(desc(RCount)) %>% head(.,20) %>% 
  ggplot(aes(x =reorder(DBA,RCount), y = RCount)) + geom_bar(stat="identity") + labs(x="Restaurant",y="Violations")  +
    ggtitle(paste0("Top 20 restaurants with the largest ",Viol," violations"))
}

Filt_NYC_Data(NYC_Data2,2015,"Sewage")