load(file="MyRestaurants.rda") # this assumes that Restaurants1.R has been run previously
# the datasets Cuisines, Restaurants, and ViolationCodes are now available
Search the NYC website (http://a816-restaurantinspection.nyc.gov/RestaurantInspection/SearchBrowse.do) for records for the `DJ REYNOLDS PUB AND RESTAURANT. What is the history of inspections at this restaurant?
SOLUTION:
Display the results for this restaurant from the dataset.
DJR = filter(Restaurants, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR)
## CAMIS DBA BORO BUILDING STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## ZIPCODE PHONE CUISINECODE INSPDATE ACTION VIOLCODE SCORE
## 1 10019 2122452912 47 2014-09-06 D 10F 2
## 2 10019 2122452912 47 2011-12-15 P 04L 21
## 3 10019 2122452912 47 2012-07-31 D 06C 12
## 4 10019 2122452912 47 2013-07-22 D 10B 11
## 5 10019 2122452912 47 2011-12-29 U 06F 12
## 6 10019 2122452912 47 2011-12-15 P 08A 21
## CURRENTGRADE GRADEDATE RECORDDATE
## 1 A 2014-09-06 2014-10-09 06:01:44
## 2 <NA> <NA> 2014-10-09 06:01:44
## 3 A 2012-07-31 2014-10-09 06:01:44
## 4 A 2013-07-22 2014-10-09 06:01:44
## 5 A 2011-12-29 2014-10-09 06:01:44
## 6 <NA> <NA> 2014-10-09 06:01:44
Merge the cuisine name into the Restaurants dataframe (and call this merged) using the inner_join() function in the tidyr package.
merged = inner_join(Restaurants, Cuisines, by="CUISINECODE")
Use this new dataset to determine what type of restaurant DJ Reynolds is.
names(merged)
## [1] "CAMIS" "DBA" "BORO" "BUILDING"
## [5] "STREET" "ZIPCODE" "PHONE" "CUISINECODE"
## [9] "INSPDATE" "ACTION" "VIOLCODE" "SCORE"
## [13] "CURRENTGRADE" "GRADEDATE" "RECORDDATE" "CUISINEDESC"
DJR2 = filter(merged, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR2)
## CAMIS DBA BORO BUILDING STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## ZIPCODE PHONE CUISINECODE INSPDATE ACTION VIOLCODE SCORE
## 1 10019 2122452912 47 2014-09-06 D 10F 2
## 2 10019 2122452912 47 2011-12-15 P 04L 21
## 3 10019 2122452912 47 2012-07-31 D 06C 12
## 4 10019 2122452912 47 2013-07-22 D 10B 11
## 5 10019 2122452912 47 2011-12-29 U 06F 12
## 6 10019 2122452912 47 2011-12-15 P 08A 21
## CURRENTGRADE GRADEDATE RECORDDATE CUISINEDESC
## 1 A 2014-09-06 2014-10-09 06:01:44 Irish
## 2 <NA> <NA> 2014-10-09 06:01:44 Irish
## 3 A 2012-07-31 2014-10-09 06:01:44 Irish
## 4 A 2013-07-22 2014-10-09 06:01:44 Irish
## 5 A 2011-12-29 2014-10-09 06:01:44 Irish
## 6 <NA> <NA> 2014-10-09 06:01:44 Irish
SOLUTION: Irish Restaurant with Cuisinecode 47
How many cases are there? What do they represent?
nrow(merged)
## [1] 526066
SOLUTION:
In the merged data set, there are 526066 observations
How many unique restaurants are there?
unique = Restaurants %>% group_by(PHONE) %>% filter(row_number(PHONE)==1)
nrow(unique)
## [1] 8480
SOLUTION: There are 8480 unique restaurants in the data set
What is the distribution of restaurants by borough?
names(unique)
## [1] "CAMIS" "DBA" "BORO" "BUILDING"
## [5] "STREET" "ZIPCODE" "PHONE" "CUISINECODE"
## [9] "INSPDATE" "ACTION" "VIOLCODE" "SCORE"
## [13] "CURRENTGRADE" "GRADEDATE" "RECORDDATE"
tally(~BORO, data=unique)
##
## 0 1 2 3 4 5 <NA>
## 2 8314 8 31 27 9 89
SOLUTION: There are 2 restaurants in Borough 0, 8314 restaurants in Borough 1, 8 restaurants in Borough 2, 31 restaurants in Borough 3, 27 restaurants in Borough 4, and 9 restaurants in Borough 5
What is the distribution of restaurants by CUISINE?
sort(tally(~ CUISINECODE, data=unique), decreasing=TRUE)
##
## 3 48 14 20 49 62 35 55 53 8 69 99 44 82 5
## 2657 570 477 463 387 307 234 221 179 161 161 152 139 138 131
## 27 39 54 47 77 63 70 <NA> 29 43 51 52 50 18 72
## 129 114 108 107 106 104 104 89 86 82 72 69 68 67 58
## 78 56 84 7 81 75 28 38 83 17 10 23 12 22 80
## 56 51 51 49 43 38 33 31 30 29 22 22 20 20 18
## 37 2 4 73 13 67 30 61 68 32 33 41 57 6 9
## 17 15 15 15 14 14 12 11 11 10 10 7 7 6 5
## 31 59 71 1 21 42 76 0 34 46 60 40 45 58 64
## 5 5 5 4 4 4 4 3 3 3 3 2 2 2 2
## 66 74 16 24 26
## 2 2 1 1 1
filter(Cuisines, CUISINECODE %in% c(3, 48, 14, 20))
## CUISINECODE CUISINEDESC
## 1 3 American
## 2 20 Chinese
## 3 48 Italian
## 4 14 Caf\xe9/Coffee/Tea
2657 American 570 Italian
477 Cafe
463 Chinese
How many distinct restaurant names are there? What is the most common name?
# head(sort(tally(~ DBA, data=unique), decreasing=TRUE))
head(unique)
## Source: local data frame [6 x 15]
## Groups: PHONE
##
## CAMIS DBA BORO BUILDING STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 2 40356068 TOV KOSHER KITCHEN 4 97-22 63 ROAD
## 3 40359480 1 EAST 66TH STREET KITCHEN 1 1 EAST 66 STREET
## 4 40361521 GLORIOUS FOOD 1 522 EAST 74 STREET
## 5 40361708 BULLY'S DELI 1 759 BROADWAY
## 6 40362098 HARRIET'S KITCHEN 1 502 AMSTERDAM AVENUE
## Variables not shown: ZIPCODE (chr), PHONE (int), CUISINECODE (int),
## INSPDATE (time), ACTION (chr), VIOLCODE (chr), SCORE (dbl), CURRENTGRADE
## (chr), GRADEDATE (time), RECORDDATE (time)
There are
How many distinct locations does Dunkin Donuts have in Manhattan?
SOLUTION:
What is the distribution of the SCORE variable in the Restaurants dataset? Can you determine the cutoffs for A, B, and C grades?
# hint try running the command:
# barchart(tally(~ SCORE, data=Restaurants), xlim=c(-1, 50), horizontal=FALSE)
What is the score distribution for restaurants with A grades?
# favstats(~ SCORE, data=filter(Restaurants, CURRENTGRADE=="A"))
What is the change in score for Dunkin Donuts restaurants in NYC over time?
DD = filter(Restaurants, DBA=="DUNKIN' DONUTS")
head(Restaurants)
## CAMIS DBA BORO BUILDING STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 2 40356068 TOV KOSHER KITCHEN 4 97-22 63 ROAD
## 3 40356151 BRUNOS ON THE BOULEVARD 4 8825 ASTORIA BOULEVARD
## 4 40356483 WILKEN'S FINE FOOD 3 7114 AVENUE U
## 5 30075445 MORRIS PARK BAKE SHOP 2 1007 MORRIS PARK AVE
## 6 30112340 WENDY'S 3 469 FLATBUSH AVENUE
## ZIPCODE PHONE CUISINECODE INSPDATE ACTION VIOLCODE SCORE
## 1 10019 2122452912 47 2014-09-06 D 10F 2
## 2 11374 2147483647 50 2013-01-17 D 02B 13
## 3 11369 2147483647 3 2014-05-02 F 06A 10
## 4 11234 2147483647 27 2014-05-29 D 08C 10
## 5 10462 2147483647 8 2014-03-03 D 10F 2
## 6 11225 2147483647 39 2014-07-01 F 06A 23
## CURRENTGRADE GRADEDATE RECORDDATE
## 1 A 2014-09-06 2014-10-09 06:01:44
## 2 <NA> 2013-01-17 2014-10-09 06:01:44
## 3 A 2014-05-02 2014-10-09 06:01:44
## 4 A 2014-05-29 2014-10-09 06:01:44
## 5 A 2014-03-03 2014-10-09 06:01:44
## 6 B 2014-07-01 2014-10-09 06:01:44
WENDY= filter(Restaurants, DBA=="WENDY'S")
xyplot(SCORE ~ GRADEDATE, alpha=0.2, type=c("p", "smooth"), ylim=c(0, 20), data=DD)
Use these datasets to explore the NYC violations data to answer an interesting statistical question. Prepare a single figure to share with the class.
bwplot(SCORE~BORO,data=WENDY, main="Distribution of Wendy's Scores by Borough", xlab="Borough Number", ylab="Score", horizontal= FALSE)
ladd(panel.abline(h=13))
ladd(panel.abline(h=27))
13, 27, Are there particular