https://data.baltimorecity.gov/Community/Restaurants/k5ry-ef3g
restData <-read.csv("Restaurants.csv")
head(restData)
## name zipCode neighborhood councilDistrict
## 1 410 21206 Frankford 2
## 2 1919 21231 Fells Point 1
## 3 SAUTE 21224 Canton 1
## 4 #1 CHINESE KITCHEN 21211 Hampden 14
## 5 #1 chinese restaurant 21223 Millhill 9
## 6 19TH HOLE 21218 Clifton Park 14
## policeDistrict Location.1
## 1 NORTHEASTERN 4509 BELAIR ROAD\nBaltimore, MD\n
## 2 SOUTHEASTERN 1919 FLEET ST\nBaltimore, MD\n
## 3 SOUTHEASTERN 2844 HUDSON ST\nBaltimore, MD\n
## 4 NORTHERN 3998 ROLAND AVE\nBaltimore, MD\n
## 5 SOUTHWESTERN 2481 frederick ave\nBaltimore, MD\n
## 6 NORTHEASTERN 2722 HARFORD RD\nBaltimore, MD\n
summary(restData)
## name zipCode neighborhood
## MCDONALD'S : 8 Min. :-21226 Downtown :128
## POPEYES FAMOUS FRIED CHICKEN: 7 1st Qu.: 21202 Fells Point : 91
## SUBWAY : 6 Median : 21218 Inner Harbor: 89
## KENTUCKY FRIED CHICKEN : 5 Mean : 21185 Canton : 81
## BURGER KING : 4 3rd Qu.: 21226 Federal Hill: 42
## DUNKIN DONUTS : 4 Max. : 21287 Mount Vernon: 33
## (Other) :1293 (Other) :863
## councilDistrict policeDistrict
## Min. : 1.000 SOUTHEASTERN:385
## 1st Qu.: 2.000 CENTRAL :288
## Median : 9.000 SOUTHERN :213
## Mean : 7.191 NORTHERN :157
## 3rd Qu.:11.000 NORTHEASTERN: 72
## Max. :14.000 EASTERN : 67
## (Other) :145
## Location.1
## 1101 RUSSELL ST\nBaltimore, MD\n: 9
## 201 PRATT ST\nBaltimore, MD\n : 8
## 2400 BOSTON ST\nBaltimore, MD\n : 8
## 300 LIGHT ST\nBaltimore, MD\n : 5
## 300 CHARLES ST\nBaltimore, MD\n : 4
## 301 LIGHT ST\nBaltimore, MD\n : 4
## (Other) :1289
str(restData)
## 'data.frame': 1327 obs. of 6 variables:
## $ name : Factor w/ 1277 levels "#1 CHINESE KITCHEN",..: 9 3 992 1 2 4 5 6 7 8 ...
## $ zipCode : int 21206 21231 21224 21211 21223 21218 21205 21211 21205 21231 ...
## $ neighborhood : Factor w/ 173 levels "Abell","Arlington",..: 53 52 18 66 104 33 98 133 98 157 ...
## $ councilDistrict: int 2 1 1 14 9 14 13 7 13 1 ...
## $ policeDistrict : Factor w/ 9 levels "CENTRAL","EASTERN",..: 3 6 6 4 8 3 6 4 6 6 ...
## $ Location.1 : Factor w/ 1210 levels "1 BIDDLE ST\nBaltimore, MD\n",..: 835 334 554 755 492 537 505 530 507 569 ...
quantile(restData$councilDistric,na.rm=T)
## 0% 25% 50% 75% 100%
## 1 2 9 11 14
quantile(restData$councilDistric, probs=c(0.5,0.75,0.9))
## 50% 75% 90%
## 9 11 12
table(restData$zipCode, useNA="ifany")
##
## -21226 21201 21202 21205 21206 21207 21208 21209 21210 21211
## 1 136 201 27 30 4 1 8 23 41
## 21212 21213 21214 21215 21216 21217 21218 21220 21222 21223
## 28 31 17 54 10 32 69 1 7 56
## 21224 21225 21226 21227 21229 21230 21231 21234 21237 21239
## 199 19 18 4 13 156 127 7 1 3
## 21251 21287
## 2 1
to know if there is any missing values in your data
sum(is.na(restData$councilDistrict))
## [1] 0
any(is.na(restData$councilDistrict))
## [1] FALSE
all(restData$zipCode>0)
## [1] FALSE
to check entire data set
colSums(is.na(restData))
## name zipCode neighborhood councilDistrict
## 0 0 0 0
## policeDistrict Location.1
## 0 0
all(colSums(is.na(restData))==0)
## [1] TRUE
values with specific characteristics
table(restData$zipCode %in% c("21212"))
##
## FALSE TRUE
## 1299 28
this can feed into the data frame
restData[restData$zipCode %in% c("21212", "21213"),]
## name zipCode
## 29 BAY ATLANTIC CLUB 21212
## 39 BERMUDA BAR 21213
## 92 ATWATER'S 21212
## 111 BALTIMORE ESTONIAN SOCIETY 21213
## 187 CAFE ZEN 21212
## 220 CERIELLO FINE FOODS 21212
## 266 CLIFTON PARK GOLF COURSE SNACK BAR 21213
## 276 CLUB HOUSE BAR & GRILL 21213
## 289 CLUBHOUSE BAR & GRILL 21213
## 291 COCKY LOU'S 21213
## 362 DREAM TAVERN, CARRIBEAN U.S.A. 21213
## 373 DUNKIN DONUTS 21212
## 383 EASTSIDE SPORTS SOCIAL CLUB 21213
## 417 FIELDS OLD TRAIL 21212
## 475 GRAND CRU 21212
## 545 RANDY'S BAR 21213
## 604 MURPHY'S NEIGHBORHOOD BAR & GRILL 21212
## 616 NEOPOL 21212
## 620 NEW CLUB THUNDERBIRD INC. 21213
## 626 NEW MAYFIELD, INC. 21213
## 678 IKAN SEAFOOD 21212
## 711 KAY-CEE CLUB 21212
## 763 LA'RAE 21213
## 777 LEMONGRASS BALTIMORE 21213
## 779 LEN'S SANDWICH SHOP 21213
## 845 MCDONALD'S 21213
## 852 MCDONALD'S 21212
## 873 NEW REX LIQUORS,INC. 21212
## 895 OK TAVERN 21213
## 919 PANERA BREAD 21212
## 940 PEIWEI ASIAN DINER 21212
## 949 PERGUSA ENTERPRISES 21212
## 957 PHANTOM'S BAR AND GRILL 21213
## 976 POPEYES FAMOUS FRIED CHICKEN 21212
## 994 ROBBIE'S NEST 21213
## 1017 RUTLAND BAR 21213
## 1018 RYAN'S DAUGHTER 21212
## 1022 saigon remembered restaurant 21212
## 1053 SHIRLEY'S HONEY HOLE 21213
## 1120 STEEPLE CHASE II 21213
## 1122 SUBWAY 21213
## 1153 TAM-TAM 21212
## 1155 TASTE 21212
## 1159 TAYLORS EAST 21213
## 1186 THE EDGE BAR & LOUNGE 21213
## 1187 THE EDGE BAR & LOUNGE - KITCHEN AREA 21213
## 1198 THE HOLLOW BAR & GRILL 21212
## 1209 THE NEW BUCKETT'S LOUNGE 21213
## 1232 THREE ACE'S 21213
## 1246 TORAIN'S HIDE-A-WAY 21213
## 1259 TSUNAMI BALTIMORE 21213
## 1287 VITO'S PIZZA 21212
## 1298 WENDY'S OLD FASHIONED HAMBURGERS #96 21212
## 1304 WHITTEN'S (4502-04) 21213
## 1312 wozi lounge 21212
## 1319 YETI RESTAURANT & CARRYOUT 21212
## 1320 YORK CLUB TAVERN 21212
## 1323 ZEN WEST ROADSIDE CANTINA 21212
## 1325 ZINK'S CAFÂ 21213
## neighborhood councilDistrict policeDistrict
## 29 Downtown 11 CENTRAL
## 39 Broadway East 12 EASTERN
## 92 Chinquapin Park-Belvedere 4 NORTHERN
## 111 South Clifton Park 12 EASTERN
## 187 Rosebank 4 NORTHERN
## 220 Chinquapin Park-Belvedere 4 NORTHERN
## 266 Darley Park 14 NORTHEASTERN
## 276 Orangeville Industrial Area 13 EASTERN
## 289 Orangeville Industrial Area 13 EASTERN
## 291 Broadway East 12 EASTERN
## 362 Broadway East 13 EASTERN
## 373 Homeland 4 NORTHERN
## 383 Broadway East 13 EASTERN
## 417 Mid-Govans 4 NORTHERN
## 475 Chinquapin Park-Belvedere 4 NORTHERN
## 545 Broadway East 12 EASTERN
## 604 Mid-Govans 4 NORTHERN
## 616 Chinquapin Park-Belvedere 4 NORTHERN
## 620 Middle East 13 EASTERN
## 626 Belair-Edison 13 NORTHEASTERN
## 678 Chinquapin Park-Belvedere 4 NORTHERN
## 711 Homeland 4 NORTHERN
## 763 Oliver 12 EASTERN
## 777 Little Italy 1 SOUTHEASTERN
## 779 Broadway East 12 EASTERN
## 845 South Clifton Park 12 EASTERN
## 852 Radnor-Winston 4 NORTHERN
## 873 Wilson Park 4 NORTHERN
## 895 Biddle Street 13 EASTERN
## 919 Lake Walker 4 NORTHERN
## 940 Cedarcroft 4 NORTHERN
## 949 Rosebank 4 NORTHERN
## 957 Belair-Edison 3 NORTHEASTERN
## 976 Winston-Govans 4 NORTHERN
## 994 Broadway East 12 EASTERN
## 1017 Broadway East 12 EASTERN
## 1018 Chinquapin Park-Belvedere 4 NORTHERN
## 1022 Mid-Govans 4 NORTHERN
## 1053 Broadway East 13 EASTERN
## 1120 Biddle Street 13 EASTERN
## 1122 Oliver 12 EASTERN
## 1153 Mid-Govans 4 NORTHERN
## 1155 Mid-Govans 4 NORTHERN
## 1159 Berea 13 EASTERN
## 1186 Broadway East 12 EASTERN
## 1187 Broadway East 12 EASTERN
## 1198 Rosebank 4 NORTHERN
## 1209 Broadway East 13 EASTERN
## 1232 Belair-Edison 3 NORTHEASTERN
## 1246 Broadway East 12 EASTERN
## 1259 Little Italy 1 SOUTHEASTERN
## 1287 Cedarcroft 4 NORTHERN
## 1298 Homeland 4 NORTHERN
## 1304 Claremont-Freedom 13 NORTHEASTERN
## 1312 Guilford 4 NORTHERN
## 1319 Rosebank 4 NORTHERN
## 1320 Homeland 4 NORTHERN
## 1323 Rosebank 4 NORTHERN
## 1325 Belair-Edison 13 NORTHEASTERN
## Location.1
## 29 206 REDWOOD ST\nBaltimore, MD\n
## 39 1801 NORTH AVE\nBaltimore, MD\n
## 92 529 BELVEDERE AVE\nBaltimore, MD\n
## 111 1932 BELAIR RD\nBaltimore, MD\n
## 187 438 BELVEDERE AVE\nBaltimore, MD\n
## 220 529 BELVEDERE AVE\nBaltimore, MD\n
## 266 2701 ST LO DR\nBaltimore, MD\n
## 276 4217 ERDMAN AVE\nBaltimore, MD\n
## 289 4217 ERDMAN AVE\nBaltimore, MD\n
## 291 2101 NORTH AVE\nBaltimore, MD\n
## 362 2300 LAFAYETTE AVE\nBaltimore, MD\n
## 373 5422 YORK RD\nBaltimore, MD\n
## 383 1203 COLLINGTON AVE\nBaltimore, MD\n
## 417 5723 YORK RD\nBaltimore, MD\n
## 475 527 BELVEDERE AVE\nBaltimore, MD\n
## 545 2135 NORTH AVE\nBaltimore, MD\n
## 604 5847 YORK RD\nBaltimore, MD\n
## 616 529 BELVEDERE AVE\nBaltimore, MD\n
## 620 2201 CHASE ST\nBaltimore, MD\n
## 626 3349 BELAIR RD\nBaltimore, MD\n
## 678 529 BELVEDERE AVE\nBaltimore, MD\n
## 711 201 HOMELAND AVE\nBaltimore, MD\n
## 763 1000 HOFFMAN ST\nBaltimore, MD\n
## 777 1300 BANK STREET\nBaltimore, MD\n
## 779 1500 WASHINGTON ST\nBaltimore, MD\n
## 845 2001 BROADWAY\nBaltimore, MD\n
## 852 5100 YORK RD\nBaltimore, MD\n
## 873 4637 YORK RD\nBaltimore, MD\n
## 895 2301 BIDDLE ST\nBaltimore, MD\n
## 919 6307 1 2 YORK RD\nBaltimore, MD\n
## 940 6302 YORK RD\nBaltimore, MD\n
## 949 5928 YORK RD\nBaltimore, MD\n
## 957 3539 BELAIR RD\nBaltimore, MD\n
## 976 5002 YORK RD\nBaltimore, MD\n
## 994 2250 NORTH AVE\nBaltimore, MD\n
## 1017 1508 RUTLAND AVE\nBaltimore, MD\n
## 1018 600 BELVEDERE AVE\nBaltimore, MD\n
## 1022 5857 york rd\nBaltimore, MD\n
## 1053 2300 OLIVER ST\nBaltimore, MD\n
## 1120 2401 CHASE ST\nBaltimore, MD\n
## 1122 1400 NORTH AVE\nBaltimore, MD\n
## 1153 5722 YORK RD\nBaltimore, MD\n
## 1155 510 BELVEDERE AVE\nBaltimore, MD\n
## 1159 1201 POTOMAC ST\nBaltimore, MD\n
## 1186 2015 FEDERAL ST\nBaltimore, MD\n
## 1187 2015 FEDERAL ST\nBaltimore, MD\n
## 1198 5921 YORK RD\nBaltimore, MD\n
## 1209 1432 CHESTER ST\nBaltimore, MD\n
## 1232 3534 belair RD\nBaltimore, MD\n
## 1246 1701 ELLSWORTH ST\nBaltimore, MD\n
## 1259 1300 BANK ST\nBaltimore, MD\n
## 1287 6304 YORK RD\nBaltimore, MD\n
## 1298 5615 YORK RD\nBaltimore, MD\n
## 1304 4502 ERDMAN AVE\nBaltimore, MD\n
## 1312 4515 YORK RD\nBaltimore, MD\n
## 1319 5926 YORK RD\nBaltimore, MD\n
## 1320 5407 YORK RD\nBaltimore, MD\n
## 1323 5916 YORK RD\nBaltimore, MD\n
## 1325 3300 LAWNVIEW AVE\nBaltimore, MD\n
data(UCBAdmissions)
DF= as.data.frame(UCBAdmissions)
summary(DF)
## Admit Gender Dept Freq
## Admitted:12 Male :12 A:4 Min. : 8.0
## Rejected:12 Female:12 B:4 1st Qu.: 80.0
## C:4 Median :170.0
## D:4 Mean :188.6
## E:4 3rd Qu.:302.5
## F:4 Max. :512.0
xt <- xtabs(Freq~Gender + Admit, data=DF)
xt
## Admit
## Gender Admitted Rejected
## Male 1198 1493
## Female 557 1278
s1<-seq(1,10,by=2);s1
## [1] 1 3 5 7 9
s2<- seq(1,10,length=4); s2
## [1] 1 4 7 10
x<-c(1,2,8,25,100); seq(along=x)
## [1] 1 2 3 4 5
Let us use this knowledgte to find out restaurants in the neighborhood
restData$nearMe = restData$neighborhood %in% c("Roland Park", "Homeland")
table(restData$nearMe)
##
## FALSE TRUE
## 1314 13
lets create binary variable
restData$zipWrong = ifelse(restData$zipCode < 0,TRUE, FALSE)
table(restData$zipWrong, restData$zipCode<0)
##
## FALSE TRUE
## FALSE 1326 0
## TRUE 0 1