For this project, I was interested in testing if there is a relationship between perceived health of neighborhood trees and average rent prices. The tree data was from the City of New York and the rent prices were scraped from apartments.com.
#load tree data
url <- curl(url = "https://data.cityofnewyork.us/resource/uvpi-gqnh.json")
tree_df <- fromJSON(url)
head(tree_df)
## address bbl bin block_id boro_ct borocode boroname
## 1 108-005 70 AVENUE 4022210001 4052307 348711 4073900 4 Queens
## 2 147-074 7 AVENUE 4044750045 4101931 315986 4097300 4 Queens
## 3 390 MORGAN AVENUE 3028870001 3338310 218365 3044900 3 Brooklyn
## 4 1027 GRAND STREET 3029250001 3338342 217969 3044900 3 Brooklyn
## 5 603 6 STREET 3010850052 3025654 223043 3016500 3 Brooklyn
## 6 8 COLUMBUS AVENUE 1011310031 1076229 106099 1014500 1 Manhattan
## brch_light brch_other brch_shoe cb_num census_tract cncldist
## 1 No No No 406 739 29
## 2 No No No 407 973 19
## 3 No No No 301 449 34
## 4 No No No 301 449 34
## 5 No No No 306 165 39
## 6 No No No 107 145 3
## council_district created_at curb_loc guards health
## 1 29 2015-08-27T00:00:00.000 OnCurb None Fair
## 2 19 2015-09-03T00:00:00.000 OnCurb None Fair
## 3 34 2015-09-05T00:00:00.000 OnCurb None Good
## 4 34 2015-09-05T00:00:00.000 OnCurb None Good
## 5 39 2015-08-30T00:00:00.000 OnCurb None Good
## 6 3 2015-08-30T00:00:00.000 OnCurb Helpful Good
## latitude longitude nta nta_name problems root_grate
## 1 40.72309177 -73.84421522 QN17 Forest Hills None No
## 2 40.79411067 -73.81867946 QN49 Whitestone Stones No
## 3 40.71758074 -73.9366077 BK90 East Williamsburg None No
## 4 40.71353749 -73.93445616 BK90 East Williamsburg Stones No
## 5 40.66677776 -73.97597938 BK37 Park Slope-Gowanus Stones No
## 6 40.77004563 -73.98494997 MN14 Lincoln Square None No
## root_other root_stone sidewalk spc_common
## 1 No No NoDamage red maple
## 2 No Yes Damage pin oak
## 3 No No Damage honeylocust
## 4 No Yes Damage honeylocust
## 5 No Yes Damage American linden
## 6 No No NoDamage honeylocust
## spc_latin st_assem st_senate state status
## 1 Acer rubrum 28 16 New York Alive
## 2 Quercus palustris 27 11 New York Alive
## 3 Gleditsia triacanthos var. inermis 50 18 New York Alive
## 4 Gleditsia triacanthos var. inermis 53 18 New York Alive
## 5 Tilia americana 44 21 New York Alive
## 6 Gleditsia triacanthos var. inermis 67 27 New York Alive
## steward stump_diam tree_dbh tree_id trnk_light trnk_other trunk_wire
## 1 None 0 3 180683 No No No
## 2 None 0 21 200540 No No No
## 3 1or2 0 3 204026 No No No
## 4 None 0 10 204337 No No No
## 5 None 0 21 189565 No No No
## 6 1or2 0 11 190422 No No No
## user_type x_sp y_sp zip_city zipcode
## 1 TreesCount Staff 1027431.148 202756.7687 Forest Hills 11375
## 2 TreesCount Staff 1034455.701 228644.8374 Whitestone 11357
## 3 Volunteer 1001822.831 200716.8913 Brooklyn 11211
## 4 Volunteer 1002420.358 199244.2531 Brooklyn 11211
## 5 Volunteer 990913.775 182202.426 Brooklyn 11215
## 6 Volunteer 988418.6997 219825.5227 New York 10023
link <- "https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm"
zip_table <- link %>% read_html() %>% html_nodes("table") %>% html_table(fill = T) %>% data.frame(stringsAsFactors = F)
#add UHF42 Neighborhood code (Ncode) column
zip_table$NCode <- c(105, 103, 106, 107, 101, 102, 104, 203, 209, 206, 208, 210, 202, 207, 204, 201, 205, 211, 302, 306, 303, 307, 308, 310, 309, 305, 304, 301, 404, 403, 406, 408, 401, 405, 410, 409, 407, 402, 501, 503, 502, 504)
#re-arrange columns
zip_table <- zip_table[, c(1,4,2,3)]
zip_uhf <- kable(zip_table) %>% kable_styling(font_size = 10) %>% scroll_box(height = "500px", width = "350px")
### Bronx
bronx_url<-paste0("https://www.apartments.com/bronx-ny/", 1:20)
bronx <- lapply(bronx_url,
function(pg){
pg %>% read_html() %>%
html_nodes("#placards") %>%
html_text() %>%
str_extract_all("\\$\\d.\\d+")
})
bronx <- parse_number(unlist(bronx))
Bxurl1 <- "https://www.apartments.com/mount-hope-bronx-ny/"
central_bronx <- Bxurl1 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl2 <- "https://www.apartments.com/fordham-heights-bronx-ny/"
fordham <- Bxurl2 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl3 <- "https://www.apartments.com/highbridge-bronx-ny/"
high_bridge <- Bxurl3 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()%>% mean()
Bxurl4 <- "https://www.apartments.com/mount-hope-bronx-ny/"
huntspoint <- Bxurl4 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl5 <- "https://www.apartments.com/kingsbridge-bronx-ny/"
kingsbridge <- Bxurl5 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl6 <- "https://www.apartments.com/mount-hope-bronx-ny/"
north_eastbx <- Bxurl6 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl7 <- "https://www.apartments.com/parkchester-bronx-ny/"
south_eastbx <- Bxurl7 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
####Brooklyn
brooklyn_url<-paste0("https://www.apartments.com/brooklyn-ny/", 1:20)
brooklyn <- lapply(brooklyn_url,
function(pg){
pg %>% read_html() %>%
html_nodes("#placards") %>%
html_text() %>%
str_extract_all("\\$\\d.\\d+")
})
brooklyn <- parse_number(unlist(brooklyn))
Burl1 <- "https://www.apartments.com/crown-heights-brooklyn-ny/"
central_brooklyn <- Burl1 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number() %>% mean()
Burl2 <- "https://www.apartments.com/southwest-brooklyn-brooklyn-ny/"
sw_brooklyn <- Burl2 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl3 <- "https://www.apartments.com/borough-park-brooklyn-ny/"
borough_park <- Burl3 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl4 <- "https://www.apartments.com/flatlands-brooklyn-ny/"
canarsie_flatlands <- Burl4 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl5 <- "https://www.apartments.com/coney-island-brooklyn-ny/"
south_brooklyn <- Burl5 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl6 <- "https://www.apartments.com/park-slope-brooklyn-ny/?bb=62kx98lnvHihhqwB"
nw_brooklyn <- Burl6 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl7 <- "https://www.apartments.com/flatbush-brooklyn-ny/"
flatbush <- Burl7 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl8 <- "https://www.apartments.com/east-new-york-brooklyn-ny/"
east_ny <- Burl8 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl9 <- "https://www.apartments.com/greenpoint-brooklyn-ny/"
greenpoint <- Burl9 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl10 <- "https://www.apartments.com/sunset-park-brooklyn-ny/"
sunset_park<- Burl10 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl11 <- "https://www.apartments.com/bushwick-brooklyn-ny/"
bushwick <- Burl11 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
#### Manhattan
manhattan_url <-paste0("https://www.apartments.com/manhattan-ny/", 1:20)
manhattan <- lapply(manhattan_url,
function(pg){
pg %>% read_html() %>%
html_nodes("#placards") %>%
html_text() %>%
str_extract_all("\\$\\d.\\d+")
})
manhattan <- parse_number(unlist(manhattan))
Murl1 <- "https://www.apartments.com/central-harlem-new-york-ny/"
central_harlem <- Murl1 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl2 <- "https://www.apartments.com/chelsea-new-york-ny/"
chelsea_clinton <- Murl2 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl3 <- "https://www.apartments.com/east-harlem-new-york-ny/"
east_harlem <- Murl3 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl4 <- "https://www.apartments.com/gramercy-park-new-york-ny/"
gramercy_murray <- Murl4 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl5 <- "https://www.apartments.com/greenwich-village-new-york-ny/"
greenwich_soho <- Murl5 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl6 <- "https://www.apartments.com/lower-manhattan-new-york-ny/"
lower_manhattan <- Murl6 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl7 <- "https://www.apartments.com/lower-east-side-new-york-ny/"
lower_east <- Murl7 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl8 <- "https://www.apartments.com/upper-east-side-new-york-ny/"
upper_east <- Murl8 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl9 <- "https://www.apartments.com/upper-west-side-new-york-ny/"
upper_west <- Murl9 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl10 <- "https://www.apartments.com/washington-heights-new-york-ny/"
inwood_washington <- Murl10 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
### Queens
queens_url <-paste0("https://www.apartments.com/queens-ny/", 1:20)
queens <- lapply(queens_url,
function(pg){
pg %>% read_html() %>%
html_nodes("#placards") %>%
html_text() %>%
str_extract_all("\\$\\d.\\d+")
})
queens <- parse_number(unlist(queens))
Qurl1 <- "https://www.apartments.com/northeast-queens-flushing-ny/"
north_east <- Qurl1 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl2 <- "https://www.apartments.com/bayside-ny/"
north <- Qurl2 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl3 <- "https://www.apartments.com/central-queens-queens-ny/"
central <- Qurl3 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl4 <- "https://www.apartments.com/southeast-queens-jamaica-ny/"
jamaica <- Qurl4 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl5 <- "https://www.apartments.com/long-island-city-ny/"
north_west <- Qurl5 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl6 <- "https://www.apartments.com/forest-hills-ny/"
west_central <- Qurl6 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl7 <- "https://www.apartments.com/south-shore-queens-far-rockaway-ny/"
rockaway <- Qurl7 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl8 <- "https://www.apartments.com/hollis-ny/"
south_east <- Qurl8 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl9 <- "https://www.apartments.com/howard-beach-ny/"
south_west <- Qurl9 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl10<- "https://www.apartments.com/northeast-queens-flushing-ny/"
west <- Qurl10 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
#### Staten Island
SI_url <-paste0("https://www.apartments.com/staten-island-ny/", 1:20)
Staten_Island <- lapply(SI_url,
function(pg){
pg %>% read_html() %>%
html_nodes("#placards") %>%
html_text() %>%
str_extract_all("\\$\\d.\\d+")
})
Staten_Island <- parse_number(unlist(Staten_Island))
Surl1 <- "https://www.apartments.com/port-richmond-staten-island-ny/"
port_richmond <- Surl1 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Surl2 <- "https://www.apartments.com/stapleton-staten-island-ny/"
stapleton <- Surl2 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Surl3 <- "https://www.apartments.com/staten-island-ny-10306/"
south_shore <- Surl3 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Surl4 <- "https://www.apartments.com/staten-island-ny-10314/"
mid_island <- Surl4 %>% read_html() %>% html_node("#placards") %>%
html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
file <- 'http://a816-dohbesp.nyc.gov/IndicatorPublic/EPHTPDF/uhf42.pdf'
text <- pdf_text(file)
text2 <- strsplit(text, "\n")
uhf42_codes <- text2 %>%
str_extract_all(pattern = "\\d{3}") %>%
unlist() %>%
parse_number() %>%
unique() %>%
sort()
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
uhf42_names <- c("Kingsbridge - Riverdale", "Northeast Bronx", "Fordham - Bronx Park", "Pelham - Throgs Neck", "Crotona - Tremont", "High Bridge - Morrisania", "Hunts Point - Mott Haven", "Greenpoint", "Downtown - Heights - Slope", "Bedford Stuyvesant - Crown Heights", "East New York", "Sunset Park", "Borough Park", "East Flatbush - Flatbush", "Canarsie - Flatlands", "Bensonhurst - Bay Ridge", "Coney Island - Sheepshead Bay", "Williamsburg - Bushwick", "Washington Heights - Inwood", "Central Harlem - Morningside Heights", "East Harlem", "Upper West Side", "Upper East Side", "Chelsea - Clinton", "Gramercy Park - Murray Hill", "Greenwich Village - Soho", "Union Square - Lower East Side", "Lower Manhattan", "Long Island City - Astoria", "West Queens", "Flushing - Clearview", "Bayside - Little Neck", "Ridgewood - Forest Hills", "Fresh Meadows", "Southwest Queens", "Jamaica", "Southeast Queens", "Rockaway", "Port Richmond", "Stapleton - St. George", "Willowbrook", "South Beach - Tottenville")
#uhf42_names <- gsub("NYC UHF 42 Neighborhoods", "", text2)
#uhf42_names <- gsub("UHF Neighborhood Name", "", uhf42_names)
#uhf42_names <- gsub("UHF Code", "", uhf42_names)
#uhf42_names <- gsub("Neighborhood Definition:.+", "", uhf42_names)
#uhf42_names <- gsub("\\d{3}", "", uhf42_names)
#uhf42_names <- unlist(str_extract_all(uhf42_names, "(\\s[A-Za-z]+.\\-?.[A-Za-z]+.\\-?.[A-Za-z]+.[A-Za-z]+.\\-?.[A-Za-z]+| Jamaica| Rockaway| Sunset Park)"))
#uhf42_names <- gsub("\\\\r", "", uhf42_names)
neighborhood <- data.frame(Code = uhf42_codes, Location = uhf42_names, stringsAsFactors = F)
kable(neighborhood) %>% kable_styling(font_size = 12) %>% scroll_box(height = "500px", width = "300px")
| Code | Location |
|---|---|
| 101 | Kingsbridge - Riverdale |
| 102 | Northeast Bronx |
| 103 | Fordham - Bronx Park |
| 104 | Pelham - Throgs Neck |
| 105 | Crotona - Tremont |
| 106 | High Bridge - Morrisania |
| 107 | Hunts Point - Mott Haven |
| 201 | Greenpoint |
| 202 | Downtown - Heights - Slope |
| 203 | Bedford Stuyvesant - Crown Heights |
| 204 | East New York |
| 205 | Sunset Park |
| 206 | Borough Park |
| 207 | East Flatbush - Flatbush |
| 208 | Canarsie - Flatlands |
| 209 | Bensonhurst - Bay Ridge |
| 210 | Coney Island - Sheepshead Bay |
| 211 | Williamsburg - Bushwick |
| 301 | Washington Heights - Inwood |
| 302 | Central Harlem - Morningside Heights |
| 303 | East Harlem |
| 304 | Upper West Side |
| 305 | Upper East Side |
| 306 | Chelsea - Clinton |
| 307 | Gramercy Park - Murray Hill |
| 308 | Greenwich Village - Soho |
| 309 | Union Square - Lower East Side |
| 310 | Lower Manhattan |
| 401 | Long Island City - Astoria |
| 402 | West Queens |
| 403 | Flushing - Clearview |
| 404 | Bayside - Little Neck |
| 405 | Ridgewood - Forest Hills |
| 406 | Fresh Meadows |
| 407 | Southwest Queens |
| 408 | Jamaica |
| 409 | Southeast Queens |
| 410 | Rockaway |
| 501 | Port Richmond |
| 502 | Stapleton - St. George |
| 503 | Willowbrook |
| 504 | South Beach - Tottenville |
#write.csv(neighborhood, file = "Neighborhood.csv", row.names = F)
Rent_prices <- c(central_bronx, fordham, high_bridge, huntspoint, kingsbridge, north_eastbx, south_eastbx, central_brooklyn, sw_brooklyn, borough_park, canarsie_flatlands, south_brooklyn, nw_brooklyn, flatbush, east_ny, greenpoint, sunset_park, bushwick, central_harlem, chelsea_clinton, east_harlem, gramercy_murray, greenwich_soho, lower_manhattan, lower_east, upper_east, upper_west, inwood_washington, north_east, north, central, jamaica, north_west, west_central, rockaway, south_east, south_west, west, port_richmond, stapleton, south_shore, mid_island)
rent <- data.frame(UHF42Code = zip_table$NCode, avg_Rentprice = Rent_prices, stringsAsFactors = F)
#write.csv(rent, file = "Rent.csv", row.names = F) #store file
kable(rent) %>% kable_styling(font_size = 12) %>% scroll_box(height = "500px", width = "250px")
| UHF42Code | avg_Rentprice |
|---|---|
| 105 | 1683.625 |
| 103 | 1680.000 |
| 106 | 1934.773 |
| 107 | 1683.625 |
| 101 | 2225.600 |
| 102 | 1683.625 |
| 104 | 1642.143 |
| 203 | 2207.000 |
| 209 | 2016.880 |
| 206 | 2053.000 |
| 208 | 2085.320 |
| 210 | 2195.826 |
| 202 | 2788.500 |
| 207 | 2122.320 |
| 204 | 2146.417 |
| 201 | 2776.640 |
| 205 | 2134.375 |
| 211 | 2036.375 |
| 302 | 2723.480 |
| 306 | 3484.120 |
| 303 | 2302.400 |
| 307 | 3719.400 |
| 308 | 4063.600 |
| 310 | 3655.500 |
| 309 | 3650.600 |
| 305 | 3356.875 |
| 304 | 3924.200 |
| 301 | 2258.958 |
| 404 | 2148.960 |
| 403 | 2427.500 |
| 406 | 2129.042 |
| 408 | 1916.040 |
| 401 | 2540.462 |
| 405 | 2172.000 |
| 410 | 1989.957 |
| 409 | 2458.192 |
| 407 | 2166.818 |
| 402 | 2148.960 |
| 501 | 1800.000 |
| 503 | 1850.000 |
| 502 | 2148.824 |
| 504 | 2427.500 |
Rent_prices <- c(central_bronx, fordham, high_bridge, huntspoint, kingsbridge, north_eastbx, south_eastbx, central_brooklyn, sw_brooklyn, borough_park, canarsie_flatlands, south_brooklyn, nw_brooklyn, flatbush, east_ny, greenpoint, sunset_park, bushwick, central_harlem, chelsea_clinton, east_harlem, gramercy_murray, greenwich_soho, lower_manhattan, lower_east, upper_east, upper_west, inwood_washington, north_east, north, central, jamaica, north_west, west_central, rockaway, south_east, south_west, west, port_richmond, stapleton, south_shore, mid_island)
rent <- data.frame(UHF42Code = zip_table$NCode, avg_Rentprice = Rent_prices, stringsAsFactors = F)
#write.csv(rent, file = "Rent.csv", row.names = F) #store file
rent_uhf <- kable(rent) %>% kable_styling(font_size = 12) %>% scroll_box(height = "500px", width = "250px")
#joining average rent with zipcodes
ziprent <- inner_join(zip_table, rent, by = c( "NCode" = "UHF42Code"))
class(zip_uhf)
## [1] "kableExtra" "knitr_kable"
ziprent <- ziprent[, -1]
ziprent <- ziprent[, -2]
zipcode <- c(10453, 10457, 10460, 10458, 10467, 10468, 10451, 10452, 10456, 10454, 10455, 10459, 10474, 10463, 10471, 10466, 10469, 10470, 10475, 10461, 10462,10464, 10465, 10472, 10473, 11212, 11213, 11216, 11233, 11238, 11209, 11214, 11228, 11204, 11218, 11219, 11230, 11234, 11236, 11239, 11223, 11224, 11229, 11235, 11201, 11205, 11215, 11217, 11231, 11203, 11210, 11225, 11226, 11207, 11208, 11211, 11222, 11220, 11232, 11206, 11221, 11237, 10026, 10027, 10030, 10037, 10039, 10001, 10011, 10018, 10019, 10020, 10036, 10029, 10035, 10010, 10016, 10017, 10022, 10012, 10013, 10014, 10004, 10005, 10006, 10007, 10038, 10280, 10002, 10003, 10009, 10021, 10028, 10044, 10065, 10075, 10128, 10023, 10024, 10025, 10031, 10032, 10033, 10034, 10040, 11361, 11362, 11363, 11364, 11354, 11355, 11356, 11357, 11358, 11359, 11360, 11365, 11366, 11367, 11412, 11423, 11432, 11433, 11434, 11435, 11436, 11101, 11102, 11103, 11104, 11105, 11106, 11374, 11375, 11379, 11385, 11691, 11692, 11693, 11694, 11695, 11697, 11004, 11005, 11411, 11413, 11422, 11426, 11427, 11428, 11429, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11368, 11369, 11370, 11372, 11373, 11377, 11378, 10302, 10303, 10310, 10306, 10307, 10308, 10309, 10312, 10301, 10304, 10305, 10314)
averagerent <- c(1683.625, 1683.625, 1683.625, 1680.000, 1680.000, 1680.000, 1934.773, 1934.773, 1934.773, 1683.625, 1683.625, 1683.625, 1683.625, 2195.600, 2195.600, 1683.625, 1683.625, 1683.625, 1683.625, 1642.143, 1642.143, 1642.143, 1642.143, 1642.143, 1642.143, 2207.000, 2207.000, 2207.000, 2207.000, 2207.000, 1996.880, 1996.880, 1996.880, 2193.400, 2193.400, 2193.400, 2193.400, 2210.360, 2210.360, 2210.360, 2195.826, 2195.826, 2195.826, 2195.826, 3212.783, 3212.783, 3212.783, 3212.783, 3212.783, 2112.320, 2112.320, 2112.320, 2112.320, 2146.417, 2146.417, 2874.522, 2874.522, 2088.720, 2088.720, 2099.917, 2099.917, 2099.917, 2723.480, 2723.480, 2723.480, 2723.480, 2723.480, 3484.120, 3484.120, 3484.120, 3484.120, 3484.120, 3484.120, 2302.400, 2302.400, 3719.400, 3719.400, 3719.400, 3719.400, 4063.600, 4063.600, 4063.600, 3616.083, 3616.083, 3616.083, 3616.083, 3616.083, 3616.083, 3650.600, 3650.600, 3650.600, 3256.875, 3256.875, 3256.875, 3256.875, 3256.875, 3256.875, 3924.200, 3924.200, 3924.200, 2258.958, 2258.958, 2258.958, 2258.958, 2258.958, 2148.960, 2148.960, 2148.960, 2148.960, 2149.615, 2149.615, 2149.615, 2149.615, 2149.615, 2149.615, 2149.615, 2110.292, 2110.292, 2110.292, 1880.040, 1880.040, 1880.040, 1880.040, 1880.040, 1880.040, 1880.040, 2526.000, 2526.000, 2526.000, 2526.000, 2526.000, 2526.000, 2172.000, 2172.000, 2172.000, 2172.000, 1891.625, 1891.625, 1891.625, 1891.625, 1891.625, 1891.625, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2148.960, 2148.960, 2148.960, 2148.960, 2148.960, 2148.960, 2148.960, 1800.000, 1800.000, 1800.000, 1850.000, 1850.000, 1850.000, 1850.000, 1850.000, 2148.824, 2148.824, 2148.824, 2427.500)
zipcode <- as.character(zipcode)
ziprent <- data.frame(zipcode, averagerent)
ziprent <- data.frame(ziprent)
#joining average rent with tree data frame
ziprent$zipcode <- as.character(ziprent$zipcode)
tree_df$zipcode <- as.character(tree_df$zipcode)
tree_rent <- inner_join(tree_df, ziprent, by = c("zipcode" = "zipcode"))
#filter data by tree health category
ggplot(tree_rent, aes(factor(health), averagerent)) + geom_boxplot(aes(fill=health)) + xlab("")
#plot(air_rent2$avg_Rentprice ~ air_rent2$Measurement)
#obs <- lm(air_rent2$avg_Rentprice ~ air_rent2$Measurement)
#summary(obs)
#abline(obs)
treehealth <- group_by(tree_rent, health)
treehealth2 <- summarise(treehealth, mean = round(mean(averagerent),0), stdev = round(sd(averagerent), 0))
knitr::kable(treehealth2)
| health | mean | stdev |
|---|---|---|
| Fair | 2390 | 660 |
| Good | 2585 | 698 |
| Poor | 2320 | 596 |
| NA | 2293 | 593 |
So, I want to check if there is any statistically significant difference in the average rent price among health groups. My null hypothesis will be that there is no difference. The conditions are met, each tree is independent of the others and there are well over 50 trees, so we don’t need to worry about the distribution. \(H_0:\mu_{fair} = \mu_{good}=\mu_{poor}=\mu_{na}\) \(H_1:\mu_{fair} \neq \mu_{good}\neq \mu_{poor}\neq \mu_{na}\)
anova <- aov(tree_rent$averagerent ~ tree_rent$health)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## tree_rent$health 2 8111833 4055916 8.631 0.000193 ***
## Residuals 963 452525813 469913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 27 observations deleted due to missingness
Since the P-value is well below .05, we reject the null hypothesis. There is sufficient evidence to say that there appears to be a relationship between average rent price of a niehgborhood and the average tree health of that neighborhood.
In conclusion, we would say there is a positive relationship with rent prices and tree health. This information could be of use to the parks department, or local communites for improving their own neighborhood tree health.