load(file="MyRestaurants.rda")   # this assumes that Restaurants1.R has been run previously
# the datasets Cuisines, Restaurants, and ViolationCodes are now available

DATA CHECKING

Search the NYC website (http://a816-restaurantinspection.nyc.gov/RestaurantInspection/SearchBrowse.do) for records for the `DJ REYNOLDS PUB AND RESTAURANT. What is the history of inspections at this restaurant?

SOLUTION:

Display the results for this restaurant from the dataset.

DJR = filter(Restaurants, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR)
##      CAMIS                            DBA BORO BUILDING         STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351 WEST 57 STREET
##   ZIPCODE      PHONE CUISINECODE   INSPDATE ACTION VIOLCODE SCORE
## 1   10019 2122452912          47 2014-09-06      D      10F     2
## 2   10019 2122452912          47 2011-12-15      P      04L    21
## 3   10019 2122452912          47 2012-07-31      D      06C    12
## 4   10019 2122452912          47 2013-07-22      D      10B    11
## 5   10019 2122452912          47 2011-12-29      U      06F    12
## 6   10019 2122452912          47 2011-12-15      P      08A    21
##   CURRENTGRADE  GRADEDATE          RECORDDATE
## 1            A 2014-09-06 2014-10-09 06:01:44
## 2         <NA>       <NA> 2014-10-09 06:01:44
## 3            A 2012-07-31 2014-10-09 06:01:44
## 4            A 2013-07-22 2014-10-09 06:01:44
## 5            A 2011-12-29 2014-10-09 06:01:44
## 6         <NA>       <NA> 2014-10-09 06:01:44

Merge the cuisine name into the Restaurants dataframe (and call this merged) using the inner_join() function in the tidyr package.

merged = inner_join(Restaurants, Cuisines, by="CUISINECODE")

Use this new dataset to determine what type of restaurant DJ Reynolds is.

head(merged)
##      CAMIS                            DBA BORO BUILDING            STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT    1      351    WEST 57 STREET
## 2 40356068             TOV KOSHER KITCHEN    4    97-22           63 ROAD
## 3 40356151        BRUNOS ON THE BOULEVARD    4     8825 ASTORIA BOULEVARD
## 4 40356483             WILKEN'S FINE FOOD    3     7114          AVENUE U
## 5 30075445          MORRIS PARK BAKE SHOP    2     1007   MORRIS PARK AVE
## 6 30112340                        WENDY'S    3      469   FLATBUSH AVENUE
##   ZIPCODE      PHONE CUISINECODE   INSPDATE ACTION VIOLCODE SCORE
## 1   10019 2122452912          47 2014-09-06      D      10F     2
## 2   11374 2147483647          50 2013-01-17      D      02B    13
## 3   11369 2147483647           3 2014-05-02      F      06A    10
## 4   11234 2147483647          27 2014-05-29      D      08C    10
## 5   10462 2147483647           8 2014-03-03      D      10F     2
## 6   11225 2147483647          39 2014-07-01      F      06A    23
##   CURRENTGRADE  GRADEDATE          RECORDDATE   CUISINEDESC
## 1            A 2014-09-06 2014-10-09 06:01:44         Irish
## 2         <NA> 2013-01-17 2014-10-09 06:01:44 Jewish/Kosher
## 3            A 2014-05-02 2014-10-09 06:01:44     American 
## 4            A 2014-05-29 2014-10-09 06:01:44  Delicatessen
## 5            A 2014-03-03 2014-10-09 06:01:44        Bakery
## 6            B 2014-07-01 2014-10-09 06:01:44    Hamburgers

SOLUTION: DJ Reynolds is an Irish restaurant.

DESCRIPTIVE

How many cases are there? What do they represent?

nrow(merged)
## [1] 526066

SOLUTION: There are 52606 cases. They each represent a time that a restaurant was inspected.

How many unique restaurants are there?

unique = Restaurants %>% group_by(PHONE) %>% filter(row_number(PHONE)==1)
summary(unique)
##     CAMIS               DBA                 BORO        BUILDING        
##  Length:8480        Length:8480        Min.   :0.00   Length:8480       
##  Class :character   Class :character   1st Qu.:1.00   Class :character  
##  Mode  :character   Mode  :character   Median :1.00   Mode  :character  
##                                        Mean   :1.02                     
##                                        3rd Qu.:1.00                     
##                                        Max.   :5.00                     
##                                        NA's   :89                       
##     STREET            ZIPCODE              PHONE           CUISINECODE  
##  Length:8480        Length:8480        Min.   :0.00e+00   Min.   : 0.0  
##  Class :character   Class :character   1st Qu.:2.12e+09   1st Qu.: 3.0  
##  Mode  :character   Mode  :character   Median :2.13e+09   Median :27.0  
##                                        Mean   :2.10e+09   Mean   :31.3  
##                                        3rd Qu.:2.13e+09   3rd Qu.:51.0  
##                                        Max.   :2.15e+09   Max.   :99.0  
##                                        NA's   :1          NA's   :89    
##     INSPDATE                      ACTION            VIOLCODE        
##  Min.   :1900-01-01 00:00:00   Length:8480        Length:8480       
##  1st Qu.:2014-03-11 00:00:00   Class :character   Class :character  
##  Median :2014-05-28 00:00:00   Mode  :character   Mode  :character  
##  Mean   :2012-12-22 21:23:15                                        
##  3rd Qu.:2014-07-31 00:00:00                                        
##  Max.   :2014-10-07 00:00:00                                        
##  NA's   :89                                                         
##      SCORE       CURRENTGRADE         GRADEDATE                  
##  Min.   : -1.0   Length:8480        Min.   :2011-06-16 00:00:00  
##  1st Qu.:  7.0   Class :character   1st Qu.:2014-03-14 00:00:00  
##  Median : 10.0   Mode  :character   Median :2014-05-29 00:00:00  
##  Mean   : 11.2                      Mean   :2014-05-14 03:34:17  
##  3rd Qu.: 12.0                      3rd Qu.:2014-07-31 00:00:00  
##  Max.   :119.0                      Max.   :2014-10-07 00:00:00  
##  NA's   :187                        NA's   :275                  
##    RECORDDATE                 
##  Min.   :2013-09-18 00:00:00  
##  1st Qu.:2014-10-09 06:01:44  
##  Median :2014-10-09 06:01:44  
##  Mean   :2014-10-07 15:10:53  
##  3rd Qu.:2014-10-09 06:01:44  
##  Max.   :2014-10-09 06:01:59  
##  NA's   :13

SOLUTION: There are 8480 unique restaurants there.

What is the distribution of restaurants by borough?

tally(unique$BORO)
## First argument should be a formula... But I'll try to guess what you meant
## 
##    0    1    2    3    4    5 <NA> 
##    2 8314    8   31   27    9   89

SOLUTION:There are 2 restaurants in Jamaica, 8314 in Manhattan, 8 in the Bronx, 31 in Brooklyn, 27 in Queens, and 9 in Staten Island. Additionally, there are 89 restaurants that do not have an entry for borough.

What is the distribution of restaurants by CUISINE?

sort(tally(~ CUISINECODE, data=unique), decreasing=TRUE)
## 
##    3   48   14   20   49   62   35   55   53    8   69   99   44   82    5 
## 2657  570  477  463  387  307  234  221  179  161  161  152  139  138  131 
##   27   39   54   47   77   63   70 <NA>   29   43   51   52   50   18   72 
##  129  114  108  107  106  104  104   89   86   82   72   69   68   67   58 
##   78   56   84    7   81   75   28   38   83   17   10   23   12   22   80 
##   56   51   51   49   43   38   33   31   30   29   22   22   20   20   18 
##   37    2    4   73   13   67   30   61   68   32   33   41   57    6    9 
##   17   15   15   15   14   14   12   11   11   10   10    7    7    6    5 
##   31   59   71    1   21   42   76    0   34   46   60   40   45   58   64 
##    5    5    5    4    4    4    4    3    3    3    3    2    2    2    2 
##   66   74   16   24   26 
##    2    2    1    1    1
filter(Cuisines, CUISINECODE %in% c(3, 48, 14, 20))
##   CUISINECODE        CUISINEDESC
## 1           3          American 
## 2          20            Chinese
## 3          48            Italian
## 4          14 Caf\xe9/Coffee/Tea

How many distinct restaurant names are there? What is the most common name?

head(sort(tally(~ DBA, data=unique), decreasing=TRUE))
## 
##       STARBUCKS COFFEE                 SUBWAY         DUNKIN' DONUTS 
##                    147                    134                    122 
##             MCDONALD'S CHIPOTLE MEXICAN GRILL          PRET A MANGER 
##                     60                     27                     25

How many distinct locations does Dunkin Donuts have in Manhattan?

SOLUTION:

SCORE

What is the distribution of the SCORE variable in the Restaurants dataset? Can you determine the cutoffs for A, B, and C grades?

# hint try running the command:
barchart(tally(~ SCORE, data=Restaurants), xlim=c(-1, 50), horizontal=FALSE)

plot of chunk unnamed-chunk-13

What is the score distribution for restaurants with A grades?

favstats(~ SCORE, data=filter(Restaurants, CURRENTGRADE=="A"))
##  min Q1 median Q3 max  mean    sd      n missing
##    0  9     11 12  27 9.878 2.911 156332       0

What is the change in score for Dunkin Donuts restaurants in NYC over time?

DD = filter(Restaurants, DBA=="DUNKIN' DONUTS")
xyplot(SCORE ~ GRADEDATE, alpha=0.2, type=c("p", "smooth"), ylim=c(0, 20), data=DD)

plot of chunk unnamed-chunk-15

YOUR TURN

Use these datasets to explore the NYC violations data to answer an interesting statistical question. Prepare a single figure to share with the class.

Looking at ten restaurants over time

TenRestaurants=filter(merged, CAMIS=="41542018"|CAMIS=="40980389"|CAMIS=="40730396"|CAMIS=="41713429"|CAMIS=="50000616"|CAMIS=="41282506"|CAMIS=="40900039"|CAMIS=="40874688"|CAMIS=="40510389"|CAMIS=="41246747") %>% arrange(DBA, INSPDATE)
xyplot(SCORE~INSPDATE, group=DBA, data=TenRestaurants, xlab="Inspection Date", ylab="Score", auto.key=TRUE, main="Restaurant Scores By Inspection Dates", type="l")

plot of chunk unnamed-chunk-16