load(file="MyRestaurants.rda") # this assumes that Restaurants1.R has been run previously
# the datasets Cuisines, Restaurants, and ViolationCodes are now available
Search the NYC website (http://a816-restaurantinspection.nyc.gov/RestaurantInspection/SearchBrowse.do) for records for the `DJ REYNOLDS PUB AND RESTAURANT. What is the history of inspections at this restaurant?
SOLUTION:
Display the results for this restaurant from the dataset.
DJR = filter(Restaurants, DBA=="DJ REYNOLDS PUB AND RESTAURANT")
head(DJR)
## CAMIS DBA BORO BUILDING STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 2 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 3 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 4 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 5 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 6 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## ZIPCODE PHONE CUISINECODE INSPDATE ACTION VIOLCODE SCORE
## 1 10019 2122452912 47 2014-09-06 D 10F 2
## 2 10019 2122452912 47 2011-12-15 P 04L 21
## 3 10019 2122452912 47 2012-07-31 D 06C 12
## 4 10019 2122452912 47 2013-07-22 D 10B 11
## 5 10019 2122452912 47 2011-12-29 U 06F 12
## 6 10019 2122452912 47 2011-12-15 P 08A 21
## CURRENTGRADE GRADEDATE RECORDDATE
## 1 A 2014-09-06 2014-10-09 06:01:44
## 2 <NA> <NA> 2014-10-09 06:01:44
## 3 A 2012-07-31 2014-10-09 06:01:44
## 4 A 2013-07-22 2014-10-09 06:01:44
## 5 A 2011-12-29 2014-10-09 06:01:44
## 6 <NA> <NA> 2014-10-09 06:01:44
Merge the cuisine name into the Restaurants dataframe (and call this merged) using the inner_join() function in the tidyr package.
merged = inner_join(Restaurants, Cuisines, by="CUISINECODE")
Use this new dataset to determine what type of restaurant DJ Reynolds is.
head(merged)
## CAMIS DBA BORO BUILDING STREET
## 1 30191841 DJ REYNOLDS PUB AND RESTAURANT 1 351 WEST 57 STREET
## 2 40356068 TOV KOSHER KITCHEN 4 97-22 63 ROAD
## 3 40356151 BRUNOS ON THE BOULEVARD 4 8825 ASTORIA BOULEVARD
## 4 40356483 WILKEN'S FINE FOOD 3 7114 AVENUE U
## 5 30075445 MORRIS PARK BAKE SHOP 2 1007 MORRIS PARK AVE
## 6 30112340 WENDY'S 3 469 FLATBUSH AVENUE
## ZIPCODE PHONE CUISINECODE INSPDATE ACTION VIOLCODE SCORE
## 1 10019 2122452912 47 2014-09-06 D 10F 2
## 2 11374 2147483647 50 2013-01-17 D 02B 13
## 3 11369 2147483647 3 2014-05-02 F 06A 10
## 4 11234 2147483647 27 2014-05-29 D 08C 10
## 5 10462 2147483647 8 2014-03-03 D 10F 2
## 6 11225 2147483647 39 2014-07-01 F 06A 23
## CURRENTGRADE GRADEDATE RECORDDATE CUISINEDESC
## 1 A 2014-09-06 2014-10-09 06:01:44 Irish
## 2 <NA> 2013-01-17 2014-10-09 06:01:44 Jewish/Kosher
## 3 A 2014-05-02 2014-10-09 06:01:44 American
## 4 A 2014-05-29 2014-10-09 06:01:44 Delicatessen
## 5 A 2014-03-03 2014-10-09 06:01:44 Bakery
## 6 B 2014-07-01 2014-10-09 06:01:44 Hamburgers
SOLUTION: DJ Reynolds is an Irish restaurant.
How many cases are there? What do they represent?
nrow(merged)
## [1] 526066
SOLUTION: There are 52606 cases. They each represent a time that a restaurant was inspected.
How many unique restaurants are there?
unique = Restaurants %>% group_by(PHONE) %>% filter(row_number(PHONE)==1)
summary(unique)
## CAMIS DBA BORO BUILDING
## Length:8480 Length:8480 Min. :0.00 Length:8480
## Class :character Class :character 1st Qu.:1.00 Class :character
## Mode :character Mode :character Median :1.00 Mode :character
## Mean :1.02
## 3rd Qu.:1.00
## Max. :5.00
## NA's :89
## STREET ZIPCODE PHONE CUISINECODE
## Length:8480 Length:8480 Min. :0.00e+00 Min. : 0.0
## Class :character Class :character 1st Qu.:2.12e+09 1st Qu.: 3.0
## Mode :character Mode :character Median :2.13e+09 Median :27.0
## Mean :2.10e+09 Mean :31.3
## 3rd Qu.:2.13e+09 3rd Qu.:51.0
## Max. :2.15e+09 Max. :99.0
## NA's :1 NA's :89
## INSPDATE ACTION VIOLCODE
## Min. :1900-01-01 00:00:00 Length:8480 Length:8480
## 1st Qu.:2014-03-11 00:00:00 Class :character Class :character
## Median :2014-05-28 00:00:00 Mode :character Mode :character
## Mean :2012-12-22 21:23:15
## 3rd Qu.:2014-07-31 00:00:00
## Max. :2014-10-07 00:00:00
## NA's :89
## SCORE CURRENTGRADE GRADEDATE
## Min. : -1.0 Length:8480 Min. :2011-06-16 00:00:00
## 1st Qu.: 7.0 Class :character 1st Qu.:2014-03-14 00:00:00
## Median : 10.0 Mode :character Median :2014-05-29 00:00:00
## Mean : 11.2 Mean :2014-05-14 03:34:17
## 3rd Qu.: 12.0 3rd Qu.:2014-07-31 00:00:00
## Max. :119.0 Max. :2014-10-07 00:00:00
## NA's :187 NA's :275
## RECORDDATE
## Min. :2013-09-18 00:00:00
## 1st Qu.:2014-10-09 06:01:44
## Median :2014-10-09 06:01:44
## Mean :2014-10-07 15:10:53
## 3rd Qu.:2014-10-09 06:01:44
## Max. :2014-10-09 06:01:59
## NA's :13
SOLUTION: There are 8480 unique restaurants there.
What is the distribution of restaurants by borough?
tally(unique$BORO)
## First argument should be a formula... But I'll try to guess what you meant
##
## 0 1 2 3 4 5 <NA>
## 2 8314 8 31 27 9 89
SOLUTION:There are 2 restaurants in Jamaica, 8314 in Manhattan, 8 in the Bronx, 31 in Brooklyn, 27 in Queens, and 9 in Staten Island. Additionally, there are 89 restaurants that do not have an entry for borough.
What is the distribution of restaurants by CUISINE?
sort(tally(~ CUISINECODE, data=unique), decreasing=TRUE)
##
## 3 48 14 20 49 62 35 55 53 8 69 99 44 82 5
## 2657 570 477 463 387 307 234 221 179 161 161 152 139 138 131
## 27 39 54 47 77 63 70 <NA> 29 43 51 52 50 18 72
## 129 114 108 107 106 104 104 89 86 82 72 69 68 67 58
## 78 56 84 7 81 75 28 38 83 17 10 23 12 22 80
## 56 51 51 49 43 38 33 31 30 29 22 22 20 20 18
## 37 2 4 73 13 67 30 61 68 32 33 41 57 6 9
## 17 15 15 15 14 14 12 11 11 10 10 7 7 6 5
## 31 59 71 1 21 42 76 0 34 46 60 40 45 58 64
## 5 5 5 4 4 4 4 3 3 3 3 2 2 2 2
## 66 74 16 24 26
## 2 2 1 1 1
filter(Cuisines, CUISINECODE %in% c(3, 48, 14, 20))
## CUISINECODE CUISINEDESC
## 1 3 American
## 2 20 Chinese
## 3 48 Italian
## 4 14 Caf\xe9/Coffee/Tea
How many distinct restaurant names are there? What is the most common name?
head(sort(tally(~ DBA, data=unique), decreasing=TRUE))
##
## STARBUCKS COFFEE SUBWAY DUNKIN' DONUTS
## 147 134 122
## MCDONALD'S CHIPOTLE MEXICAN GRILL PRET A MANGER
## 60 27 25
How many distinct locations does Dunkin Donuts have in Manhattan?
SOLUTION:
What is the distribution of the SCORE variable in the Restaurants dataset? Can you determine the cutoffs for A, B, and C grades?
# hint try running the command:
barchart(tally(~ SCORE, data=Restaurants), xlim=c(-1, 50), horizontal=FALSE)
What is the score distribution for restaurants with A grades?
favstats(~ SCORE, data=filter(Restaurants, CURRENTGRADE=="A"))
## min Q1 median Q3 max mean sd n missing
## 0 9 11 12 27 9.878 2.911 156332 0
What is the change in score for Dunkin Donuts restaurants in NYC over time?
DD = filter(Restaurants, DBA=="DUNKIN' DONUTS")
xyplot(SCORE ~ GRADEDATE, alpha=0.2, type=c("p", "smooth"), ylim=c(0, 20), data=DD)
Use these datasets to explore the NYC violations data to answer an interesting statistical question. Prepare a single figure to share with the class.
TenRestaurants=filter(merged, CAMIS=="41542018"|CAMIS=="40980389"|CAMIS=="40730396"|CAMIS=="41713429"|CAMIS=="50000616"|CAMIS=="41282506"|CAMIS=="40900039"|CAMIS=="40874688"|CAMIS=="40510389"|CAMIS=="41246747") %>% arrange(DBA, INSPDATE)
xyplot(SCORE~INSPDATE, group=DBA, data=TenRestaurants, xlab="Inspection Date", ylab="Score", auto.key=TRUE, main="Restaurant Scores By Inspection Dates", type="l")