Week-7

Synopsis

This is my homework report for week 7, produced with R Markdown. In this homework I am working on functions and iterations

Packages Required

setwd("C:\\tauseef\\data_wrangling\\Data Wrangling with R (BANA 8090)")
library(dplyr)## used to manupulate data
library("tibble") ## used to create tibbles
library(stringr) ## using regx and other string functions
library(tidyverse)#group of packages used to summarise and visualize data
library(tidyr) ## functions which can be used to make data tidy
library(purrr) ## used for iteration functionns like map
library(RSocrata) ## sued for reading socrata API datasets

Reading Data

  nyc_restra_data<-RSocrata::read.socrata("https://nycopendata.socrata.com/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59")



  readr::write_rds(nyc_restra_data,"nyc_restra_data.rds")

Question 1

data_typ<-map(nyc_restra_data,class)
print(data_typ)

## $CAMIS
## [1] "integer"
## 
## $DBA
## [1] "factor"
## 
## $BORO
## [1] "factor"
## 
## $BUILDING
## [1] "character"
## 
## $STREET
## [1] "factor"
## 
## $ZIPCODE
## [1] "integer"
## 
## $PHONE
## [1] "character"
## 
## $CUISINE.DESCRIPTION
## [1] "factor"
## 
## $INSPECTION.DATE
## [1] "POSIXlt" "POSIXt" 
## 
## $ACTION
## [1] "factor"
## 
## $VIOLATION.CODE
## [1] "factor"
## 
## $VIOLATION.DESCRIPTION
## [1] "factor"
## 
## $CRITICAL.FLAG
## [1] "factor"
## 
## $SCORE
## [1] "integer"
## 
## $GRADE
## [1] "factor"
## 
## $GRADE.DATE
## [1] "POSIXlt" "POSIXt" 
## 
## $RECORD.DATE
## [1] "POSIXlt" "POSIXt" 
## 
## $INSPECTION.TYPE
## [1] "factor"

Question 2

date_typ_conv<-function(x){
  
      if (identical(unlist(class(x))[1],"POSIXlt")){
      x<-as.Date(x)
     
      
      } else(x)
}


b<-as_tibble(map(nyc_restra_data,date_typ_conv))
str(b)

## Classes 'tbl_df', 'tbl' and 'data.frame':    436612 obs. of  18 variables:
##  $ CAMIS                : int  30075445 30075445 30075445 30075445 30075445 30075445 30075445 30075445 30075445 30075445 ...
##  $ DBA                  : Factor w/ 20551 levels "1 EAST 66TH STREET KITCHEN",..: 28 28 28 28 28 28 28 28 28 28 ...
##  $ BORO                 : Factor w/ 6 levels "BRONX","BROOKLYN",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ BUILDING             : chr  "1007" "1007" "1007" "1007" ...
##  $ STREET               : Factor w/ 3304 levels "10 STREET","18 AVENUE",..: 40 40 40 40 40 40 40 40 40 40 ...
##  $ ZIPCODE              : int  10462 10462 10462 10462 10462 10462 10462 10462 10462 10462 ...
##  $ PHONE                : chr  "7188924968" "7188924968" "7188924968" "7188924968" ...
##  $ CUISINE.DESCRIPTION  : Factor w/ 84 levels "American","Bagels/Pretzels",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ INSPECTION.DATE      : Date, format: "2016-02-18" "2016-02-18" ...
##  $ ACTION               : Factor w/ 6 levels "Establishment Closed by DOHMH.  Violations were cited in the following area(s) and those requiring immediate action were addres"| __truncated__,..: 4 4 4 4 3 4 4 4 4 4 ...
##  $ VIOLATION.CODE       : Factor w/ 97 levels "","02A","02B",..: 13 25 20 35 1 13 15 8 13 18 ...
##  $ VIOLATION.DESCRIPTION: Factor w/ 95 levels "","''''No Smoking\032 and/or 'Smoking Permitted\032 sign not conspicuously posted. Health warning not present on 'Smoking Permitte"| __truncated__,..: 14 16 23 33 1 14 17 25 14 35 ...
##  $ CRITICAL.FLAG        : Factor w/ 3 levels "Critical","Not Applicable",..: 1 3 1 3 2 1 1 1 1 1 ...
##  $ SCORE                : int  10 10 6 2 NA 6 6 32 32 32 ...
##  $ GRADE                : Factor w/ 7 levels "","A","B","C",..: 2 2 2 2 1 2 2 1 1 1 ...
##  $ GRADE.DATE           : Date, format: "2016-02-18" "2016-02-18" ...
##  $ RECORD.DATE          : Date, format: "2016-12-02" "2016-12-02" ...
##  $ INSPECTION.TYPE      : Factor w/ 34 levels "Administrative Miscellaneous / Initial Inspection",..: 4 4 4 4 12 5 5 4 4 4 ...

Question 3

You can also embed plots, for example:

b$VIOLATION.DESCRIPTION<-as.character(b$VIOLATION.DESCRIPTION)


c<-b%>%
  filter(format(INSPECTION.DATE, "%Y") == 2016)%>%
  filter(str_detect(VIOLATION.DESCRIPTION, regex("mice", ignore_case = TRUE)))
  
 
paste0("Number of restraunts having Violations mice is ",  n_distinct(c$CAMIS))

## [1] "Number of restraunts having Violations mice is 5697"

d<-b%>%
  filter(format(INSPECTION.DATE, "%Y") == 2016)%>%
  filter(str_detect(VIOLATION.DESCRIPTION, regex("hair", ignore_case = TRUE)))

paste0("Number of restraunts having Violations hair is ",  n_distinct(d$CAMIS))

## [1] "Number of restraunts having Violations hair is 2000"

e<-b%>%
  filter(format(INSPECTION.DATE, "%Y") == 2016)%>%
  filter(str_detect(VIOLATION.DESCRIPTION, regex("sewage", ignore_case = TRUE)))

paste0("Number of restraunts having Violations sewage is ",  n_distinct(e$CAMIS))

## [1] "Number of restraunts having Violations sewage is 9468"

Question 4

You can also embed plots, for example:

top_restaurant<-function(df,x,y){
  dt<-df%>%
    filter(format(INSPECTION.DATE, "%Y") == x)%>%
    filter(str_detect(VIOLATION.DESCRIPTION, regex("y", ignore_case = TRUE)))%>%
    group_by(CAMIS,DBA)%>%
    summarise(count=n())%>%
    arrange(desc(count))%>%
    head(20)
  print(dt)
  m<-ggplot(data=dt,aes(x = reorder(DBA, desc(count)), y = count, fill=DBA)) + 
    geom_bar(stat = "identity")+ 
    geom_text(aes(label= count), na.rm = TRUE, hjust = 0.3, vjust = -0.8)+
    ggtitle("Top 20 Restaurants with most violations") +
    ylab("Violations")+
    xlab("Top 20 Restaurants")+
    theme(axis.text=element_text(size=1),
          axis.title=element_text(size=10,face="bold"))+
    theme(legend.key.size = unit(2.5, "mm"))
  print(m)
}
dm<-top_restaurant(b,2016,"sewage")

## Source: local data frame [20 x 3]
## Groups: CAMIS [20]
## 
##       CAMIS                                   DBA count
##       <int>                                <fctr> <int>
## 1  50045602                             POLKA DOT    33
## 2  50044942           FOGON LATINO BAR RESTAURANT    31
## 3  50043402                   ROYAL FRIED CHICKEN    29
## 4  41429788                             EL AGUILA    27
## 5  50046492                   I LAND FISH & GRILL    27
## 6  41683816               MAX BAKERY & RESTAURANT    25
## 7  50010805                           CAFE AU LEE    25
## 8  50015997 CARIBBEAN STYLE BAKERY AND RESTAURANT    25
## 9  40861669                NEW VICTORY RESTAURANT    24
## 10 41164793       LATINO'S BAR & GRILL RESTAURANT    24
## 11 41711975                      SZECHUAN GOURMET    24
## 12 50009773                           LAS LUNITAS    24
## 13 50012922                  TAIWANESE RESTAURANT    24
## 14 50044698   II FORNO PIZZA AND PASTA RESTAURANT    24
## 15 50046745       SAFAYA RESTAURANT & FISH MARKET    24
## 16 40810174                     BRASSERIE ATHENEE    23
## 17 41568640                HOT POT UNDER DE' TREE    23
## 18 41598995             PARAISO AZTECA RESTAURANT    23
## 19 41612269                          GREEN TOMATO    23
## 20 41695575                       SABA RESTAURANT    23

Week-7

Tauseef Alam

November 25, 2016

Synopsis

Packages Required

Reading Data

Question 1

Question 2

Question 3

Question 4