This is document is my homework report for week 7. Here, I worked on functions and iterations to look at the NYC Restaurant Data.
library(dplyr) #to manipulate data
library(tidyr) #to make data tidy
library(tidyverse) #to manipulate data
library(RSocrata) ## to read socrata API datasets
library(tibble) #to create tibbles
library(stringr) #to use regx and other string functions
library(purrr) #for iteration functions like map
library(lubridate)#for date
#NYC <- 'https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59'
#NYC_Data <- read.socrata(NYC)
#readr::write_rds(NYC_Data,"NYC_Data.rds")
NYC_Data<-readRDS("NYC_data.rds")
NYC_Data1 <- NYC_Data %>% map(class)
NYC_Data1
## $CAMIS
## [1] "integer"
##
## $DBA
## [1] "character"
##
## $BORO
## [1] "character"
##
## $BUILDING
## [1] "character"
##
## $STREET
## [1] "character"
##
## $ZIPCODE
## [1] "integer"
##
## $PHONE
## [1] "character"
##
## $CUISINE.DESCRIPTION
## [1] "character"
##
## $INSPECTION.DATE
## [1] "POSIXct" "POSIXt"
##
## $ACTION
## [1] "character"
##
## $VIOLATION.CODE
## [1] "character"
##
## $VIOLATION.DESCRIPTION
## [1] "character"
##
## $CRITICAL.FLAG
## [1] "character"
##
## $SCORE
## [1] "integer"
##
## $GRADE
## [1] "character"
##
## $GRADE.DATE
## [1] "POSIXct" "POSIXt"
##
## $RECORD.DATE
## [1] "POSIXct" "POSIXt"
##
## $INSPECTION.TYPE
## [1] "character"
POSIXlt_Check <- function(x){
if ((class(x))[1] == "POSIXlt"){
x <- as.Date(x)
}
else x
}
NYC_Data2 <- as_tibble(map_df(NYC_Data, POSIXlt_Check))
NYC_Data2
## # A tibble: 436,612 × 18
## CAMIS DBA BORO BUILDING
## <int> <chr> <chr> <chr>
## 1 41606387 LA CUARTA RESTAURANT BROOKLYN 782
## 2 50007091 NEW PEKING RESTAURANT BROOKLYN 1581
## 3 50012185 CASTILLO RESTAURNAT BROOKLYN 709
## 4 40750062 PANEANTICO BAKERY BROOKLYN 9124
## 5 50034621 SHI LI XIANG QUEENS 13358
## 6 50007874 vapor lounge BRONX 3758
## 7 40552965 GROUND LEVEL PUB & GRUB STATEN ISLAND 958
## 8 41650546 KING KABAB QUEENS 16709
## 9 41524468 STARBUCKS MANHATTAN 1491
## 10 41435999 YOUR HOUSE CAFE BROOKLYN 6916
## # ... with 436,602 more rows, and 14 more variables: STREET <chr>,
## # ZIPCODE <int>, PHONE <chr>, CUISINE.DESCRIPTION <chr>,
## # INSPECTION.DATE <dttm>, ACTION <chr>, VIOLATION.CODE <chr>,
## # VIOLATION.DESCRIPTION <chr>, CRITICAL.FLAG <chr>, SCORE <int>,
## # GRADE <chr>, GRADE.DATE <dttm>, RECORD.DATE <dttm>,
## # INSPECTION.TYPE <chr>
NYC_Data3 <- NYC_Data2 %>%
filter(year(INSPECTION.DATE)==2016) %>%
mutate(Violation = ifelse (grepl("Mice", VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),"Mice",
ifelse( grepl("Hair",VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),"Hair",
ifelse( grepl("Sewage",VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),"Sewage","NA")))) %>%
filter(Violation %in% c("Mice","Hair","Sewage")) %>%
group_by(CAMIS,Violation) %>% summarise(CO = length(n_distinct(CAMIS))) %>%
group_by(Violation)%>% summarise(Rest_Count= length(Violation))
NYC_Data3
## # A tibble: 3 × 2
## Violation Rest_Count
## <chr> <int>
## 1 Hair 2000
## 2 Mice 5697
## 3 Sewage 9468
Filt_NYC_Data <- function(Data,yr,Viol){
Data %>% filter(year(INSPECTION.DATE)==yr) %>%
mutate(Violation = ifelse (grepl(Viol, VIOLATION.DESCRIPTION, ignore.case=TRUE, fixed = FALSE),Viol,"NA"))%>%
filter(Violation %in% Viol) %>% group_by(DBA) %>% summarise(RCount=length(Violation)) %>%
arrange(desc(RCount)) %>% head(.,20) %>%
ggplot(aes(x =reorder(DBA,RCount), y = RCount)) + geom_bar(stat="identity") + labs(x="Restaurant",y="Violations") +
ggtitle(paste0("Top 20 restaurants with the largest ",Viol," violations"))
}
Filt_NYC_Data(NYC_Data2,2015,"Sewage")