load(file="MyRestaurants.rda")   # this assumes that Restaurants1.R has been run previously
# the datasets Cuisines, Restaurants, and ViolationCodes are now available

DATA CHECKING

Search the NYC website (http://a816-restaurantinspection.nyc.gov/RestaurantInspection/SearchBrowse.do) for records for the `DJ REYNOLDS PUB AND RESTAURANT. What is the history of inspections at this restaurant?

SOLUTION:

Display the results for this restaurant from the dataset.

DJR = filter(Restaurants, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR)
##      CAMIS                            DBA BORO BUILDING         STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
##   ZIPCODE      PHONE CUISINECODE   INSPDATE ACTION VIOLCODE SCORE
## 1   10019 2122452912          47 2014-09-06      D      10F     2
## 2   10019 2122452912          47 2011-12-15      P      04L    21
## 3   10019 2122452912          47 2012-07-31      D      06C    12
## 4   10019 2122452912          47 2013-07-22      D      10B    11
## 5   10019 2122452912          47 2011-12-29      U      06F    12
## 6   10019 2122452912          47 2011-12-15      P      08A    21
##   CURRENTGRADE  GRADEDATE          RECORDDATE
## 1            A 2014-09-06 2014-10-09 06:01:44
## 2         <NA>       <NA> 2014-10-09 06:01:44
## 3            A 2012-07-31 2014-10-09 06:01:44
## 4            A 2013-07-22 2014-10-09 06:01:44
## 5            A 2011-12-29 2014-10-09 06:01:44
## 6         <NA>       <NA> 2014-10-09 06:01:44

Merge the cuisine name into the Restaurants dataframe (and call this merged) using the inner_join() function in the tidyr package.

merged = inner_join(Restaurants, Cuisines, by="CUISINECODE")

Use this new dataset to determine what type of restaurant DJ Reynolds is.

names(merged)
##  [1] "CAMIS"        "DBA"          "BORO"         "BUILDING"    
##  [5] "STREET"       "ZIPCODE"      "PHONE"        "CUISINECODE" 
##  [9] "INSPDATE"     "ACTION"       "VIOLCODE"     "SCORE"       
## [13] "CURRENTGRADE" "GRADEDATE"    "RECORDDATE"   "CUISINEDESC"
DJR2 = filter(merged, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR2)
##      CAMIS                            DBA BORO BUILDING         STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
##   ZIPCODE      PHONE CUISINECODE   INSPDATE ACTION VIOLCODE SCORE
## 1   10019 2122452912          47 2014-09-06      D      10F     2
## 2   10019 2122452912          47 2011-12-15      P      04L    21
## 3   10019 2122452912          47 2012-07-31      D      06C    12
## 4   10019 2122452912          47 2013-07-22      D      10B    11
## 5   10019 2122452912          47 2011-12-29      U      06F    12
## 6   10019 2122452912          47 2011-12-15      P      08A    21
##   CURRENTGRADE  GRADEDATE          RECORDDATE CUISINEDESC
## 1            A 2014-09-06 2014-10-09 06:01:44       Irish
## 2         <NA>       <NA> 2014-10-09 06:01:44       Irish
## 3            A 2012-07-31 2014-10-09 06:01:44       Irish
## 4            A 2013-07-22 2014-10-09 06:01:44       Irish
## 5            A 2011-12-29 2014-10-09 06:01:44       Irish
## 6         <NA>       <NA> 2014-10-09 06:01:44       Irish

SOLUTION: Irish Restaurant with Cuisinecode 47

DESCRIPTIVE

How many cases are there? What do they represent?

nrow(merged)
## [1] 526066

SOLUTION:

In the merged data set, there are 526066 observations

How many unique restaurants are there?

unique = Restaurants %>% group_by(PHONE) %>% filter(row_number(PHONE)==1)
nrow(unique)
## [1] 8480

SOLUTION: There are 8480 unique restaurants in the data set

What is the distribution of restaurants by borough?

names(unique)
##  [1] "CAMIS"        "DBA"          "BORO"         "BUILDING"    
##  [5] "STREET"       "ZIPCODE"      "PHONE"        "CUISINECODE" 
##  [9] "INSPDATE"     "ACTION"       "VIOLCODE"     "SCORE"       
## [13] "CURRENTGRADE" "GRADEDATE"    "RECORDDATE"
tally(~BORO, data=unique)
## 
##    0    1    2    3    4    5 <NA> 
##    2 8314    8   31   27    9   89

SOLUTION: There are 2 restaurants in Borough 0, 8314 restaurants in Borough 1, 8 restaurants in Borough 2, 31 restaurants in Borough 3, 27 restaurants in Borough 4, and 9 restaurants in Borough 5

What is the distribution of restaurants by CUISINE?

sort(tally(~ CUISINECODE, data=unique), decreasing=TRUE)
## 
##    3   48   14   20   49   62   35   55   53    8   69   99   44   82    5 
## 2657  570  477  463  387  307  234  221  179  161  161  152  139  138  131 
##   27   39   54   47   77   63   70 <NA>   29   43   51   52   50   18   72 
##  129  114  108  107  106  104  104   89   86   82   72   69   68   67   58 
##   78   56   84    7   81   75   28   38   83   17   10   23   12   22   80 
##   56   51   51   49   43   38   33   31   30   29   22   22   20   20   18 
##   37    2    4   73   13   67   30   61   68   32   33   41   57    6    9 
##   17   15   15   15   14   14   12   11   11   10   10    7    7    6    5 
##   31   59   71    1   21   42   76    0   34   46   60   40   45   58   64 
##    5    5    5    4    4    4    4    3    3    3    3    2    2    2    2 
##   66   74   16   24   26 
##    2    2    1    1    1
filter(Cuisines, CUISINECODE %in% c(3, 48, 14, 20))
##   CUISINECODE        CUISINEDESC
## 1           3          American 
## 2          20            Chinese
## 3          48            Italian
## 4          14 Caf\xe9/Coffee/Tea

2657 American 570 Italian
477 Cafe
463 Chinese

How many distinct restaurant names are there? What is the most common name?

# head(sort(tally(~ DBA, data=unique), decreasing=TRUE))
head(unique)
## Source: local data frame [6 x 15]
## Groups: PHONE
## 
##      CAMIS                            DBA BORO BUILDING           STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351   WEST 57 STREET
## 2 40356068             TOV KOSHER KITCHEN    4    97-22          63 ROAD
## 3 40359480     1 EAST 66TH STREET KITCHEN    1        1 EAST   66 STREET
## 4 40361521                  GLORIOUS FOOD    1      522 EAST   74 STREET
## 5 40361708                   BULLY'S DELI    1      759         BROADWAY
## 6 40362098              HARRIET'S KITCHEN    1      502 AMSTERDAM AVENUE
## Variables not shown: ZIPCODE (chr), PHONE (int), CUISINECODE (int),
##   INSPDATE (time), ACTION (chr), VIOLCODE (chr), SCORE (dbl), CURRENTGRADE
##   (chr), GRADEDATE (time), RECORDDATE (time)

There are

How many distinct locations does Dunkin Donuts have in Manhattan?

SOLUTION:

SCORE

What is the distribution of the SCORE variable in the Restaurants dataset? Can you determine the cutoffs for A, B, and C grades?

# hint try running the command:
# barchart(tally(~ SCORE, data=Restaurants), xlim=c(-1, 50), horizontal=FALSE)

What is the score distribution for restaurants with A grades?

# favstats(~ SCORE, data=filter(Restaurants, CURRENTGRADE=="A"))

What is the change in score for Dunkin Donuts restaurants in NYC over time?

DD = filter(Restaurants, DBA=="DUNKIN' DONUTS")
head(Restaurants)
##      CAMIS                            DBA BORO BUILDING            STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351    WEST 57 STREET
## 2 40356068             TOV KOSHER KITCHEN    4    97-22           63 ROAD
## 3 40356151        BRUNOS ON THE BOULEVARD    4     8825 ASTORIA BOULEVARD
## 4 40356483             WILKEN'S FINE FOOD    3     7114          AVENUE U
## 5 30075445          MORRIS PARK BAKE SHOP    2     1007   MORRIS PARK AVE
## 6 30112340                        WENDY'S    3      469   FLATBUSH AVENUE
##   ZIPCODE      PHONE CUISINECODE   INSPDATE ACTION VIOLCODE SCORE
## 1   10019 2122452912          47 2014-09-06      D      10F     2
## 2   11374 2147483647          50 2013-01-17      D      02B    13
## 3   11369 2147483647           3 2014-05-02      F      06A    10
## 4   11234 2147483647          27 2014-05-29      D      08C    10
## 5   10462 2147483647           8 2014-03-03      D      10F     2
## 6   11225 2147483647          39 2014-07-01      F      06A    23
##   CURRENTGRADE  GRADEDATE          RECORDDATE
## 1            A 2014-09-06 2014-10-09 06:01:44
## 2         <NA> 2013-01-17 2014-10-09 06:01:44
## 3            A 2014-05-02 2014-10-09 06:01:44
## 4            A 2014-05-29 2014-10-09 06:01:44
## 5            A 2014-03-03 2014-10-09 06:01:44
## 6            B 2014-07-01 2014-10-09 06:01:44
WENDY= filter(Restaurants, DBA=="WENDY'S")
xyplot(SCORE ~ GRADEDATE, alpha=0.2, type=c("p", "smooth"), ylim=c(0, 20), data=DD)

plot of chunk unnamed-chunk-15

YOUR TURN

Use these datasets to explore the NYC violations data to answer an interesting statistical question. Prepare a single figure to share with the class.

bwplot(SCORE~BORO,data=WENDY, main="Distribution of Wendy's Scores by Borough", xlab="Borough Number", ylab="Score", horizontal= FALSE)

plot of chunk unnamed-chunk-16

ladd(panel.abline(h=13))

plot of chunk unnamed-chunk-16

ladd(panel.abline(h=27))

plot of chunk unnamed-chunk-16 13, 27, Are there particular