load(file="MyRestaurants.rda")   # this assumes that Restaurants1.R has been run previously
# the datasets Cuisines, Restaurants, and ViolationCodes are now available

DATA CHECKING

Search the NYC website (http://a816-restaurantinspection.nyc.gov/RestaurantInspection/SearchBrowse.do) for records for the `DJ REYNOLDS PUB AND RESTAURANT. What is the history of inspections at this restaurant?

SOLUTION:

Display the results for this restaurant from the dataset.

DJR = filter(Restaurants, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR)
##      CAMIS                            DBA BORO BUILDING         STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
##   ZIPCODE      PHONE CUISINECODE   INSPDATE ACTION VIOLCODE SCORE
## 1   10019 2122452912          47 2014-09-06      D      10F     2
## 2   10019 2122452912          47 2011-12-15      P      04L    21
## 3   10019 2122452912          47 2012-07-31      D      06C    12
## 4   10019 2122452912          47 2013-07-22      D      10B    11
## 5   10019 2122452912          47 2011-12-29      U      06F    12
## 6   10019 2122452912          47 2011-12-15      P      08A    21
##   CURRENTGRADE  GRADEDATE          RECORDDATE
## 1            A 2014-09-06 2014-10-09 06:01:44
## 2         <NA>       <NA> 2014-10-09 06:01:44
## 3            A 2012-07-31 2014-10-09 06:01:44
## 4            A 2013-07-22 2014-10-09 06:01:44
## 5            A 2011-12-29 2014-10-09 06:01:44
## 6         <NA>       <NA> 2014-10-09 06:01:44

Merge the cuisine name into the Restaurants dataframe (and call this merged) using the inner_join() function in the tidyr package.

merged = inner_join(Restaurants, Cuisines, by="CUISINECODE")

Use this new dataset to determine what type of restaurant DJ Reynolds is.

SOLUTION:

DESCRIPTIVE

How many cases are there? What do they represent?

SOLUTION:

How many unique restaurants are there?

unique = Restaurants %>% group_by(PHONE) %>% filter(row_number(PHONE)==1)

SOLUTION:

What is the distribution of restaurants by borough?

SOLUTION:

What is the distribution of restaurants by CUISINE?

sort(tally(~ CUISINECODE, data=unique), decreasing=TRUE)
## 
##    3   48   14   20   49   62   35   55   53    8   69   99   44   82    5 
## 2657  570  477  463  387  307  234  221  179  161  161  152  139  138  131 
##   27   39   54   47   77   63   70 <NA>   29   43   51   52   50   18   72 
##  129  114  108  107  106  104  104   89   86   82   72   69   68   67   58 
##   78   56   84    7   81   75   28   38   83   17   10   23   12   22   80 
##   56   51   51   49   43   38   33   31   30   29   22   22   20   20   18 
##   37    2    4   73   13   67   30   61   68   32   33   41   57    6    9 
##   17   15   15   15   14   14   12   11   11   10   10    7    7    6    5 
##   31   59   71    1   21   42   76    0   34   46   60   40   45   58   64 
##    5    5    5    4    4    4    4    3    3    3    3    2    2    2    2 
##   66   74   16   24   26 
##    2    2    1    1    1
filter(Cuisines, CUISINECODE %in% c(3, 48, 14, 20))
##   CUISINECODE        CUISINEDESC
## 1           3          American 
## 2          20            Chinese
## 3          48            Italian
## 4          14 Caf\xe9/Coffee/Tea

How many distinct restaurant names are there? What is the most common name?

# head(sort(tally(~ DBA, data=unique), decreasing=TRUE))

How many distinct locations does Dunkin Donuts have in Manhattan?

SOLUTION:

SCORE

What is the distribution of the SCORE variable in the Restaurants dataset? Can you determine the cutoffs for A, B, and C grades?

# hint try running the command:
# barchart(tally(~ SCORE, data=Restaurants), xlim=c(-1, 50), horizontal=FALSE)

What is the score distribution for restaurants with A grades?

# favstats(~ SCORE, data=filter(Restaurants, CURRENTGRADE=="A"))

What is the change in score for Dunkin Donuts restaurants in NYC over time?

DD = filter(Restaurants, DBA=="DUNKIN' DONUTS")
xyplot(SCORE ~ GRADEDATE, alpha=0.2, type=c("p", "smooth"), ylim=c(0, 20), data=DD)

plot of chunk unnamed-chunk-15

YOUR TURN

Use these datasets to explore the NYC violations data to answer an interesting statistical question. Prepare a single figure to share with the class.