Synopsis

This is the homework report for week 7, produced with R Markdown. This focusses on working on functions and iterations

Packages Required

library(dplyr)## used to manupulate data
library(tibble) ## used to create tibbles
library(stringr) ## using regx and other string functions
library(tidyverse)#group of packages used to summarise and visualize data
library(tidyr) ## functions which can be used to make data tidy
library(purrr) ## used for iteration functionns like map
library(RSocrata) ## sued for reading socrata API datasets

Reading Data

nyc_rest_data<-RSocrata::read.socrata("https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59")

# save a RDS dataet locally
readr::write_rds(nyc_rest_data,"nycrestaurants.rds")

Data Analysis

Question 1

Class of each variable

map(nyc_rest_data,class)
## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "factor"
## 
## $BORO
## [1] "factor"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "factor"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "factor"
## 
## $INSPECTION.DATE
## [1] "POSIXlt" "POSIXt" 
## 
## $ACTION
## [1] "factor"
## 
## $VIOLATION.CODE
## [1] "factor"
## 
## $VIOLATION.DESCRIPTION
## [1] "factor"
## 
## $CRITICAL.FLAG
## [1] "factor"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "factor"
## 
## $GRADE.DATE
## [1] "POSIXlt" "POSIXt" 
## 
## $RECORD.DATE
## [1] "POSIXlt" "POSIXt" 
## 
## $INSPECTION.TYPE
## [1] "factor"

Question 2

function to check data type and chage POSIXlt to POSIXct

date_conv<-function(x){
      if (identical(unlist(class(x))[1],"POSIXlt")){
      x<-as.Date(x)
     } else(x)
}

##  created a tible from data frame
nyc_rest_tib<-as_tibble(map(nyc_rest_data,date_conv))
## str(nyc_rest_tib)

Question 3

Violations in NYC Restaurants

nyc_rest_tib$VIOLATION.DESCRIPTION<-as.character(nyc_rest_tib$VIOLATION.DESCRIPTION)


a<-nyc_rest_tib%>%
  filter(format(INSPECTION.DATE, "%Y") == 2016)%>%
  filter(str_detect(VIOLATION.DESCRIPTION, regex("mice", ignore_case = TRUE)))

Number of restraunts with mice violations is 5697

b<-nyc_rest_tib%>%
  filter(format(INSPECTION.DATE, "%Y") == 2016)%>%
  filter(str_detect(VIOLATION.DESCRIPTION, regex("hair", ignore_case = TRUE)))

Number of restraunts with hair Violations is 2000

c<-nyc_rest_tib%>%
  filter(format(INSPECTION.DATE, "%Y") == 2016)%>%
  filter(str_detect(VIOLATION.DESCRIPTION, regex("sewage", ignore_case = TRUE)))

Number of restraunts with sewage violations is 9468

Question 4

Top 20 Violations

top_restaurant<-function(df,x,y){
  dt<-df%>%
    filter(format(INSPECTION.DATE, "%Y") == x)%>%
    filter(str_detect(VIOLATION.DESCRIPTION, regex("y", ignore_case = TRUE)))%>%
    group_by(CAMIS,DBA)%>%
    summarise(count=n())%>%
    arrange(desc(count))%>%
    head(20)
  print(dt)
  m<-ggplot(data=dt,aes(x = reorder(DBA, desc(count)), y = count, fill=DBA)) + 
    geom_bar(stat = "identity")+ 
    geom_text(aes(label= count), na.rm = TRUE, hjust = 0.3, vjust = -0.8)+
    ggtitle("Restaurants with most violations") +
    ylab("# of Violations")+
    theme(axis.text=element_text(size=1),
          axis.title=element_text(size=5,face="bold"))+
    theme(legend.key.size = unit(.5, "mm"))
  print(m)
}

top_restaurant(nyc_rest_tib,2015,"mice")
## Source: local data frame [20 x 3]
## Groups: CAMIS [20]
## 
##       CAMIS                                 DBA count
##       <int>                              <fctr> <int>
## 1  50033122   PARTY WELL REST & ORIENTAL BAKERY    50
## 2  41475257                    A-WAH RESTAURANT    39
## 3  41459659                       T. K. KITCHEN    37
## 4  41704655                             PADDY'S    36
## 5  41231660 LAS MARAVILLAS DE MEXICO RESTAURANT    35
## 6  50018727                        K ONE BUFFET    35
## 7  41320205                  DAI WAH YUMMY CITY    34
## 8  41683816             MAX BAKERY & RESTAURANT    34
## 9  50017092                   HE LIN RESTAURANT    34
## 10 50032737                       KAM'S KITCHEN    34
## 11 41485393                 MY CORAL RESTAURANT    33
## 12 41510404                            JUSTIN'S    31
## 13 50001637                  YOLANDA RESTAURANT    31
## 14 50032777                       SKYLINE DINER    31
## 15 41583748          YEE MEI FONG TAIWAN BAKERY    30
## 16 40743368                            DOMINO'S    29
## 17 41061893                         BURGER KING    29
## 18 50000855         CROWN FRIED CHICKEN & PIZZA    29
## 19 50014886        NEW LUCKY CHINESE RESTAURANT    29
## 20 41692971                    B BO SING BAKERY    28