Load Packages required for assignment

library(RSocrata) #read in data from website
library(readr)
library(tidyverse) # Package to tidy, manipulate, and display data

## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Conflicts with tidy packages ----------------------------------------------

## filter(): dplyr, stats
## lag():    dplyr, stats

library(stringr) #package for handling strings
library(base)

1 Import File and write to directory, convert to tibble, write to directory

NYCRest<- read.socrata("https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59")
as_tibble(NYCRest)

## # A tibble: 436,612 × 18
##       CAMIS                      DBA          BORO BUILDING
##       <int>                    <chr>         <chr>    <chr>
## 1  41606387     LA CUARTA RESTAURANT      BROOKLYN      782
## 2  50007091    NEW PEKING RESTAURANT      BROOKLYN     1581
## 3  50012185      CASTILLO RESTAURNAT      BROOKLYN      709
## 4  40750062        PANEANTICO BAKERY      BROOKLYN     9124
## 5  50034621             SHI LI XIANG        QUEENS    13358
## 6  50007874             vapor lounge         BRONX     3758
## 7  40552965 GROUND LEVEL  PUB & GRUB STATEN ISLAND      958
## 8  41650546               KING KABAB        QUEENS    16709
## 9  41524468                STARBUCKS     MANHATTAN     1491
## 10 41435999          YOUR HOUSE CAFE      BROOKLYN     6916
## # ... with 436,602 more rows, and 14 more variables: STREET <chr>,
## #   ZIPCODE <int>, PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## #   INSPECTION.DATE <dttm>, ACTION <chr>, VIOLATION.CODE <chr>,
## #   VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## #   GRADE <chr>, GRADE.DATE <dttm>, RECORD.DATE <dttm>,
## #   INSPECTION.TYPE <chr>

write_rds(NYCRest, path = "NYCRest.rds")

Preliminary Data Exploration

str(NYCRest)
summary(NYCRest)

2 Reformat Tibble using map

NYCref <-function(x){
    if(class(x)=="POSIXt" || class(x)=="POSIXct"){
      x <- as.Date(x)
      return(class(x)) }
    else 
      return(class(x))
    }
map(NYCRest, NYCref)

## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "character"
## 
## $BORO
## [1] "character"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "character"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "character"
## 
## $INSPECTION.DATE
## [1] "Date"
## 
## $ACTION
## [1] "character"
## 
## $VIOLATION.CODE
## [1] "character"
## 
## $VIOLATION.DESCRIPTION
## [1] "character"
## 
## $CRITICAL.FLAG
## [1] "character"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "character"
## 
## $GRADE.DATE
## [1] "Date"
## 
## $RECORD.DATE
## [1] "Date"
## 
## $INSPECTION.TYPE
## [1] "character"

NYCRest<-as_tibble(NYCRest)

3 Number of Restaurant violations in 2016 by mice|hair|sewage

NYCRest %>%
  filter(str_detect(INSPECTION.DATE, "2016")) %>%
  filter(str_detect(VIOLATION.DESCRIPTION, "mice|hair|sewage")) %>%
  count(str_detect(VIOLATION.DESCRIPTION, "mice"))

## # A tibble: 2 × 2
##   `str_detect(VIOLATION.DESCRIPTION, "mi...`     n
##                                        <lgl> <int>
## 1                                      FALSE 15778
## 2                                       TRUE  8283

NYCRest %>%
  filter(str_detect(INSPECTION.DATE, "2016")) %>%
  filter(str_detect(VIOLATION.DESCRIPTION, "mice|hair|sewage")) %>%
  count(str_detect(VIOLATION.DESCRIPTION, "hair"))

## # A tibble: 2 × 2
##   `str_detect(VIOLATION.DESCRIPTION, "ha...`     n
##                                        <lgl> <int>
## 1                                      FALSE 21929
## 2                                       TRUE  2132

NYCRest %>%
  filter(str_detect(INSPECTION.DATE, "2016")) %>%
  filter(str_detect(VIOLATION.DESCRIPTION, "mice|hair|sewage")) %>%
  count(str_detect(VIOLATION.DESCRIPTION, "sewage"))

## # A tibble: 2 × 2
##   `str_detect(VIOLATION.DESCRIPTION, "se...`     n
##                                        <lgl> <int>
## 1                                      FALSE 10415
## 2                                       TRUE 13646

4 Restaurants with Most Violations Function

RestViol<-function(Rest,year,violation){
  Year<-Rest%>%
    filter(year(NYCRest$INSPECTION.DATE)== year) %>%
    filter(str_detect(VIOLATION.DESCRIPTION, regex("y", ignore_case = TRUE)))%>%
    group_by(DBA)%>%
    summarise(count=n())%>%
    top_n(20)
  print(Years)
  rv<-ggplot(data=Year,aes(x = reorder(DBA, desc(count)), y = count,col=DBA,fill=DBA)) +  
    geom_bar(stat = "identity") + 
    geom_text(aes(label= count), na.rm = TRUE) +
    ggtitle("20 restaurants with Most Violation") +
    ylab("Violations") +
    xlab("Restaurants")
  print(rv)
}
output <- RestViol(Years,"2015","hair")

Data Wrangling Homework 7

Lawrence Porter

December 3, 2016

Load Packages required for assignment

1 Import File and write to directory, convert to tibble, write to directory

Preliminary Data Exploration

2 Reformat Tibble using map

3 Number of Restaurant violations in 2016 by mice|hair|sewage

4 Restaurants with Most Violations Function