For this project, I was interested in testing if there is a relationship between perceived health of neighborhood trees and average rent prices. The tree data was from the City of New York and the rent prices were scraped from apartments.com.

#load tree data
url <- curl(url = "https://data.cityofnewyork.us/resource/uvpi-gqnh.json")
tree_df <- fromJSON(url)
head(tree_df)
##             address        bbl     bin block_id boro_ct borocode  boroname
## 1 108-005 70 AVENUE 4022210001 4052307   348711 4073900        4    Queens
## 2  147-074 7 AVENUE 4044750045 4101931   315986 4097300        4    Queens
## 3 390 MORGAN AVENUE 3028870001 3338310   218365 3044900        3  Brooklyn
## 4 1027 GRAND STREET 3029250001 3338342   217969 3044900        3  Brooklyn
## 5      603 6 STREET 3010850052 3025654   223043 3016500        3  Brooklyn
## 6 8 COLUMBUS AVENUE 1011310031 1076229   106099 1014500        1 Manhattan
##   brch_light brch_other brch_shoe cb_num census_tract cncldist
## 1         No         No        No    406          739       29
## 2         No         No        No    407          973       19
## 3         No         No        No    301          449       34
## 4         No         No        No    301          449       34
## 5         No         No        No    306          165       39
## 6         No         No        No    107          145        3
##   council_district              created_at curb_loc  guards health
## 1               29 2015-08-27T00:00:00.000   OnCurb    None   Fair
## 2               19 2015-09-03T00:00:00.000   OnCurb    None   Fair
## 3               34 2015-09-05T00:00:00.000   OnCurb    None   Good
## 4               34 2015-09-05T00:00:00.000   OnCurb    None   Good
## 5               39 2015-08-30T00:00:00.000   OnCurb    None   Good
## 6                3 2015-08-30T00:00:00.000   OnCurb Helpful   Good
##      latitude    longitude  nta           nta_name problems root_grate
## 1 40.72309177 -73.84421522 QN17       Forest Hills     None         No
## 2 40.79411067 -73.81867946 QN49         Whitestone   Stones         No
## 3 40.71758074  -73.9366077 BK90  East Williamsburg     None         No
## 4 40.71353749 -73.93445616 BK90  East Williamsburg   Stones         No
## 5 40.66677776 -73.97597938 BK37 Park Slope-Gowanus   Stones         No
## 6 40.77004563 -73.98494997 MN14     Lincoln Square     None         No
##   root_other root_stone sidewalk      spc_common
## 1         No         No NoDamage       red maple
## 2         No        Yes   Damage         pin oak
## 3         No         No   Damage     honeylocust
## 4         No        Yes   Damage     honeylocust
## 5         No        Yes   Damage American linden
## 6         No         No NoDamage     honeylocust
##                            spc_latin st_assem st_senate    state status
## 1                        Acer rubrum       28        16 New York  Alive
## 2                  Quercus palustris       27        11 New York  Alive
## 3 Gleditsia triacanthos var. inermis       50        18 New York  Alive
## 4 Gleditsia triacanthos var. inermis       53        18 New York  Alive
## 5                    Tilia americana       44        21 New York  Alive
## 6 Gleditsia triacanthos var. inermis       67        27 New York  Alive
##   steward stump_diam tree_dbh tree_id trnk_light trnk_other trunk_wire
## 1    None          0        3  180683         No         No         No
## 2    None          0       21  200540         No         No         No
## 3    1or2          0        3  204026         No         No         No
## 4    None          0       10  204337         No         No         No
## 5    None          0       21  189565         No         No         No
## 6    1or2          0       11  190422         No         No         No
##          user_type        x_sp        y_sp     zip_city zipcode
## 1 TreesCount Staff 1027431.148 202756.7687 Forest Hills   11375
## 2 TreesCount Staff 1034455.701 228644.8374   Whitestone   11357
## 3        Volunteer 1001822.831 200716.8913     Brooklyn   11211
## 4        Volunteer 1002420.358 199244.2531     Brooklyn   11211
## 5        Volunteer  990913.775  182202.426     Brooklyn   11215
## 6        Volunteer 988418.6997 219825.5227     New York   10023

Collect Zipcode and Matching Neighborhood with UHF42 Code

link <- "https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm"
zip_table <- link %>% read_html() %>% html_nodes("table") %>% html_table(fill = T) %>% data.frame(stringsAsFactors = F)
#add UHF42 Neighborhood code (Ncode) column
zip_table$NCode <- c(105, 103, 106, 107, 101, 102, 104, 203, 209, 206, 208, 210, 202, 207, 204, 201, 205, 211, 302, 306, 303, 307, 308, 310, 309, 305, 304, 301, 404, 403, 406, 408, 401, 405, 410, 409, 407, 402, 501, 503, 502, 504)
#re-arrange columns
zip_table <- zip_table[, c(1,4,2,3)]
zip_uhf <- kable(zip_table) %>% kable_styling(font_size = 10) %>% scroll_box(height = "500px", width = "350px")

collect rent data

### Bronx
bronx_url<-paste0("https://www.apartments.com/bronx-ny/", 1:20)
bronx <- lapply(bronx_url,
                function(pg){
                  pg %>% read_html() %>% 
                    html_nodes("#placards") %>% 
                    html_text() %>%
                    str_extract_all("\\$\\d.\\d+")
                })
bronx <- parse_number(unlist(bronx))
Bxurl1 <- "https://www.apartments.com/mount-hope-bronx-ny/"
central_bronx <- Bxurl1 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl2 <- "https://www.apartments.com/fordham-heights-bronx-ny/"
fordham <- Bxurl2 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl3 <- "https://www.apartments.com/highbridge-bronx-ny/"
high_bridge <- Bxurl3 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()%>% mean()
Bxurl4 <- "https://www.apartments.com/mount-hope-bronx-ny/"
huntspoint <- Bxurl4 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl5 <- "https://www.apartments.com/kingsbridge-bronx-ny/"
kingsbridge <- Bxurl5 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl6 <- "https://www.apartments.com/mount-hope-bronx-ny/"
north_eastbx <- Bxurl6 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Bxurl7 <- "https://www.apartments.com/parkchester-bronx-ny/"
south_eastbx <- Bxurl7 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
####Brooklyn
brooklyn_url<-paste0("https://www.apartments.com/brooklyn-ny/", 1:20)
brooklyn <- lapply(brooklyn_url,
                function(pg){
                  pg %>% read_html() %>% 
                    html_nodes("#placards") %>% 
                    html_text() %>%
                    str_extract_all("\\$\\d.\\d+")
                })
brooklyn <- parse_number(unlist(brooklyn))
Burl1 <- "https://www.apartments.com/crown-heights-brooklyn-ny/"
central_brooklyn <- Burl1 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number() %>% mean()
Burl2 <- "https://www.apartments.com/southwest-brooklyn-brooklyn-ny/"
sw_brooklyn <- Burl2 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl3 <- "https://www.apartments.com/borough-park-brooklyn-ny/"
borough_park <- Burl3 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl4 <- "https://www.apartments.com/flatlands-brooklyn-ny/"
canarsie_flatlands <- Burl4 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl5 <- "https://www.apartments.com/coney-island-brooklyn-ny/"
south_brooklyn <- Burl5 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl6 <- "https://www.apartments.com/park-slope-brooklyn-ny/?bb=62kx98lnvHihhqwB"
nw_brooklyn <- Burl6 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl7 <- "https://www.apartments.com/flatbush-brooklyn-ny/"
flatbush <- Burl7 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl8 <- "https://www.apartments.com/east-new-york-brooklyn-ny/"
east_ny <- Burl8 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl9 <- "https://www.apartments.com/greenpoint-brooklyn-ny/"
greenpoint <- Burl9 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl10 <- "https://www.apartments.com/sunset-park-brooklyn-ny/"
sunset_park<- Burl10 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Burl11 <- "https://www.apartments.com/bushwick-brooklyn-ny/"
bushwick <- Burl11 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
#### Manhattan
manhattan_url <-paste0("https://www.apartments.com/manhattan-ny/", 1:20)
manhattan <- lapply(manhattan_url,
                function(pg){
                  pg %>% read_html() %>% 
                    html_nodes("#placards") %>% 
                    html_text() %>%
                    str_extract_all("\\$\\d.\\d+")
                })
manhattan <- parse_number(unlist(manhattan))
Murl1 <- "https://www.apartments.com/central-harlem-new-york-ny/"
central_harlem <- Murl1 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl2 <- "https://www.apartments.com/chelsea-new-york-ny/"
chelsea_clinton <- Murl2 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl3 <- "https://www.apartments.com/east-harlem-new-york-ny/"
east_harlem <- Murl3 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl4 <- "https://www.apartments.com/gramercy-park-new-york-ny/"
gramercy_murray <- Murl4 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl5 <- "https://www.apartments.com/greenwich-village-new-york-ny/"
greenwich_soho <- Murl5 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl6 <- "https://www.apartments.com/lower-manhattan-new-york-ny/"
lower_manhattan <- Murl6 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl7 <- "https://www.apartments.com/lower-east-side-new-york-ny/"
lower_east <- Murl7 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl8 <- "https://www.apartments.com/upper-east-side-new-york-ny/"
upper_east <- Murl8 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl9 <- "https://www.apartments.com/upper-west-side-new-york-ny/"
upper_west <- Murl9 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Murl10 <- "https://www.apartments.com/washington-heights-new-york-ny/"
inwood_washington <- Murl10 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
### Queens
queens_url <-paste0("https://www.apartments.com/queens-ny/", 1:20)
queens <- lapply(queens_url,
                function(pg){
                  pg %>% read_html() %>% 
                    html_nodes("#placards") %>% 
                    html_text() %>%
                    str_extract_all("\\$\\d.\\d+")
                })
queens <- parse_number(unlist(queens))
Qurl1 <- "https://www.apartments.com/northeast-queens-flushing-ny/"
north_east <- Qurl1 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl2 <- "https://www.apartments.com/bayside-ny/"
north <- Qurl2 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl3 <- "https://www.apartments.com/central-queens-queens-ny/"
central <- Qurl3 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl4 <- "https://www.apartments.com/southeast-queens-jamaica-ny/"
jamaica <- Qurl4 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl5 <- "https://www.apartments.com/long-island-city-ny/"
north_west <- Qurl5 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl6 <- "https://www.apartments.com/forest-hills-ny/"
west_central <- Qurl6 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl7 <- "https://www.apartments.com/south-shore-queens-far-rockaway-ny/"
rockaway <- Qurl7 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl8 <- "https://www.apartments.com/hollis-ny/"
south_east <- Qurl8 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl9 <- "https://www.apartments.com/howard-beach-ny/"
south_west <- Qurl9 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Qurl10<- "https://www.apartments.com/northeast-queens-flushing-ny/"
west <- Qurl10 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
#### Staten Island
SI_url <-paste0("https://www.apartments.com/staten-island-ny/", 1:20)
Staten_Island <- lapply(SI_url,
                function(pg){
                  pg %>% read_html() %>% 
                    html_nodes("#placards") %>% 
                    html_text() %>%
                    str_extract_all("\\$\\d.\\d+")
                })
Staten_Island <- parse_number(unlist(Staten_Island))
Surl1 <- "https://www.apartments.com/port-richmond-staten-island-ny/"
port_richmond <- Surl1 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Surl2 <- "https://www.apartments.com/stapleton-staten-island-ny/"
stapleton <- Surl2 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Surl3 <- "https://www.apartments.com/staten-island-ny-10306/"
south_shore <- Surl3 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()
Surl4 <- "https://www.apartments.com/staten-island-ny-10314/"
mid_island <- Surl4 %>% read_html() %>%  html_node("#placards") %>%
    html_text() %>% str_extract_all("\\$\\d.\\d+") %>% unlist() %>% parse_number()%>% mean()

Collect Neighborhood and Matching UHF42 Code from pdf file on web

file <- 'http://a816-dohbesp.nyc.gov/IndicatorPublic/EPHTPDF/uhf42.pdf'
text <- pdf_text(file)
text2 <- strsplit(text, "\n")
uhf42_codes <- text2 %>% 
  str_extract_all(pattern = "\\d{3}") %>% 
  unlist() %>% 
  parse_number() %>% 
  unique() %>% 
 sort()
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
uhf42_names <- c("Kingsbridge - Riverdale", "Northeast Bronx", "Fordham - Bronx Park", "Pelham - Throgs Neck", "Crotona - Tremont", "High Bridge - Morrisania", "Hunts Point - Mott Haven", "Greenpoint", "Downtown - Heights - Slope", "Bedford Stuyvesant - Crown Heights", "East New York", "Sunset Park", "Borough Park", "East Flatbush - Flatbush", "Canarsie - Flatlands", "Bensonhurst - Bay Ridge", "Coney Island - Sheepshead Bay", "Williamsburg - Bushwick", "Washington Heights - Inwood", "Central Harlem - Morningside Heights", "East Harlem", "Upper West Side", "Upper East Side", "Chelsea - Clinton", "Gramercy Park - Murray Hill", "Greenwich Village - Soho", "Union Square - Lower East Side", "Lower Manhattan", "Long Island City - Astoria", "West Queens", "Flushing - Clearview", "Bayside - Little Neck", "Ridgewood - Forest Hills", "Fresh Meadows", "Southwest Queens", "Jamaica", "Southeast Queens", "Rockaway", "Port Richmond", "Stapleton - St. George", "Willowbrook", "South Beach - Tottenville")  
#uhf42_names <- gsub("NYC UHF 42 Neighborhoods", "", text2)
#uhf42_names <- gsub("UHF Neighborhood Name", "", uhf42_names)
#uhf42_names <- gsub("UHF Code", "", uhf42_names)
#uhf42_names <- gsub("Neighborhood Definition:.+", "", uhf42_names)
#uhf42_names <- gsub("\\d{3}", "", uhf42_names)
#uhf42_names <- unlist(str_extract_all(uhf42_names, "(\\s[A-Za-z]+.\\-?.[A-Za-z]+.\\-?.[A-Za-z]+.[A-Za-z]+.\\-?.[A-Za-z]+| Jamaica| Rockaway| Sunset Park)"))
#uhf42_names <- gsub("\\\\r", "", uhf42_names)
neighborhood <- data.frame(Code = uhf42_codes, Location = uhf42_names, stringsAsFactors = F)
kable(neighborhood) %>% kable_styling(font_size = 12) %>% scroll_box(height = "500px", width = "300px")
Code Location
101 Kingsbridge - Riverdale
102 Northeast Bronx
103 Fordham - Bronx Park
104 Pelham - Throgs Neck
105 Crotona - Tremont
106 High Bridge - Morrisania
107 Hunts Point - Mott Haven
201 Greenpoint
202 Downtown - Heights - Slope
203 Bedford Stuyvesant - Crown Heights
204 East New York
205 Sunset Park
206 Borough Park
207 East Flatbush - Flatbush
208 Canarsie - Flatlands
209 Bensonhurst - Bay Ridge
210 Coney Island - Sheepshead Bay
211 Williamsburg - Bushwick
301 Washington Heights - Inwood
302 Central Harlem - Morningside Heights
303 East Harlem
304 Upper West Side
305 Upper East Side
306 Chelsea - Clinton
307 Gramercy Park - Murray Hill
308 Greenwich Village - Soho
309 Union Square - Lower East Side
310 Lower Manhattan
401 Long Island City - Astoria
402 West Queens
403 Flushing - Clearview
404 Bayside - Little Neck
405 Ridgewood - Forest Hills
406 Fresh Meadows
407 Southwest Queens
408 Jamaica
409 Southeast Queens
410 Rockaway
501 Port Richmond
502 Stapleton - St. George
503 Willowbrook
504 South Beach - Tottenville
#write.csv(neighborhood, file = "Neighborhood.csv", row.names = F)

Rent_prices <- c(central_bronx, fordham, high_bridge, huntspoint, kingsbridge, north_eastbx, south_eastbx, central_brooklyn, sw_brooklyn, borough_park, canarsie_flatlands, south_brooklyn, nw_brooklyn, flatbush, east_ny, greenpoint, sunset_park, bushwick, central_harlem, chelsea_clinton, east_harlem, gramercy_murray, greenwich_soho, lower_manhattan, lower_east, upper_east, upper_west, inwood_washington, north_east, north, central, jamaica, north_west, west_central, rockaway, south_east, south_west, west, port_richmond, stapleton, south_shore, mid_island)
rent <- data.frame(UHF42Code = zip_table$NCode, avg_Rentprice = Rent_prices, stringsAsFactors = F)
#write.csv(rent, file = "Rent.csv", row.names = F) #store file
kable(rent) %>% kable_styling(font_size = 12) %>% scroll_box(height = "500px", width = "250px")
UHF42Code avg_Rentprice
105 1683.625
103 1680.000
106 1934.773
107 1683.625
101 2225.600
102 1683.625
104 1642.143
203 2207.000
209 2016.880
206 2053.000
208 2085.320
210 2195.826
202 2788.500
207 2122.320
204 2146.417
201 2776.640
205 2134.375
211 2036.375
302 2723.480
306 3484.120
303 2302.400
307 3719.400
308 4063.600
310 3655.500
309 3650.600
305 3356.875
304 3924.200
301 2258.958
404 2148.960
403 2427.500
406 2129.042
408 1916.040
401 2540.462
405 2172.000
410 1989.957
409 2458.192
407 2166.818
402 2148.960
501 1800.000
503 1850.000
502 2148.824
504 2427.500

UHF42 Codes and Rent Prices

Rent_prices <- c(central_bronx, fordham, high_bridge, huntspoint, kingsbridge, north_eastbx, south_eastbx, central_brooklyn, sw_brooklyn, borough_park, canarsie_flatlands, south_brooklyn, nw_brooklyn, flatbush, east_ny, greenpoint, sunset_park, bushwick, central_harlem, chelsea_clinton, east_harlem, gramercy_murray, greenwich_soho, lower_manhattan, lower_east, upper_east, upper_west, inwood_washington, north_east, north, central, jamaica, north_west, west_central, rockaway, south_east, south_west, west, port_richmond, stapleton, south_shore, mid_island)
rent <- data.frame(UHF42Code = zip_table$NCode, avg_Rentprice = Rent_prices, stringsAsFactors = F)
#write.csv(rent, file = "Rent.csv", row.names = F) #store file
rent_uhf <- kable(rent) %>% kable_styling(font_size = 12) %>% scroll_box(height = "500px", width = "250px")
#joining average rent with zipcodes
ziprent <- inner_join(zip_table, rent, by = c( "NCode" = "UHF42Code"))
class(zip_uhf)
## [1] "kableExtra"  "knitr_kable"
ziprent <- ziprent[, -1]
ziprent <- ziprent[, -2]

zipcode <- c(10453, 10457, 10460, 10458, 10467, 10468, 10451, 10452, 10456, 10454, 10455, 10459, 10474, 10463, 10471, 10466, 10469, 10470, 10475, 10461, 10462,10464, 10465, 10472, 10473, 11212, 11213, 11216, 11233, 11238, 11209, 11214, 11228, 11204, 11218, 11219, 11230, 11234, 11236, 11239, 11223, 11224, 11229, 11235, 11201, 11205, 11215, 11217, 11231, 11203, 11210, 11225, 11226, 11207, 11208, 11211, 11222, 11220, 11232, 11206, 11221, 11237, 10026, 10027, 10030, 10037, 10039, 10001, 10011, 10018, 10019, 10020, 10036, 10029, 10035, 10010, 10016, 10017, 10022, 10012, 10013, 10014, 10004, 10005, 10006, 10007, 10038, 10280, 10002, 10003, 10009, 10021, 10028, 10044, 10065, 10075, 10128, 10023, 10024, 10025, 10031, 10032, 10033, 10034, 10040, 11361, 11362, 11363, 11364, 11354, 11355, 11356, 11357, 11358, 11359, 11360, 11365, 11366, 11367, 11412, 11423, 11432, 11433, 11434, 11435, 11436, 11101, 11102, 11103, 11104, 11105, 11106, 11374, 11375, 11379, 11385, 11691, 11692, 11693, 11694, 11695, 11697, 11004, 11005, 11411, 11413, 11422, 11426, 11427, 11428, 11429, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11368, 11369, 11370, 11372, 11373, 11377, 11378, 10302, 10303, 10310, 10306, 10307, 10308, 10309, 10312, 10301, 10304, 10305, 10314)

averagerent <- c(1683.625, 1683.625, 1683.625, 1680.000, 1680.000, 1680.000, 1934.773, 1934.773, 1934.773, 1683.625, 1683.625, 1683.625, 1683.625, 2195.600, 2195.600, 1683.625, 1683.625, 1683.625, 1683.625, 1642.143, 1642.143, 1642.143, 1642.143, 1642.143, 1642.143, 2207.000, 2207.000, 2207.000, 2207.000, 2207.000, 1996.880, 1996.880, 1996.880, 2193.400, 2193.400, 2193.400, 2193.400, 2210.360, 2210.360, 2210.360, 2195.826, 2195.826, 2195.826, 2195.826, 3212.783, 3212.783, 3212.783, 3212.783, 3212.783, 2112.320, 2112.320, 2112.320, 2112.320, 2146.417, 2146.417, 2874.522, 2874.522, 2088.720, 2088.720, 2099.917, 2099.917, 2099.917, 2723.480, 2723.480, 2723.480, 2723.480, 2723.480, 3484.120, 3484.120, 3484.120, 3484.120, 3484.120, 3484.120, 2302.400, 2302.400, 3719.400, 3719.400, 3719.400, 3719.400, 4063.600, 4063.600, 4063.600, 3616.083, 3616.083, 3616.083, 3616.083, 3616.083, 3616.083, 3650.600, 3650.600, 3650.600, 3256.875, 3256.875, 3256.875, 3256.875, 3256.875, 3256.875, 3924.200, 3924.200, 3924.200, 2258.958, 2258.958, 2258.958, 2258.958, 2258.958, 2148.960, 2148.960, 2148.960, 2148.960, 2149.615, 2149.615, 2149.615, 2149.615, 2149.615, 2149.615, 2149.615, 2110.292, 2110.292, 2110.292, 1880.040, 1880.040, 1880.040, 1880.040, 1880.040, 1880.040, 1880.040, 2526.000, 2526.000, 2526.000, 2526.000, 2526.000, 2526.000, 2172.000, 2172.000, 2172.000, 2172.000, 1891.625, 1891.625, 1891.625, 1891.625, 1891.625, 1891.625, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2458.192, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2166.818, 2148.960, 2148.960, 2148.960, 2148.960, 2148.960, 2148.960, 2148.960, 1800.000, 1800.000, 1800.000, 1850.000, 1850.000, 1850.000, 1850.000, 1850.000, 2148.824, 2148.824, 2148.824, 2427.500)
zipcode <- as.character(zipcode)
ziprent <- data.frame(zipcode, averagerent)
ziprent <- data.frame(ziprent)
#joining average rent with tree data frame
ziprent$zipcode <- as.character(ziprent$zipcode)
tree_df$zipcode <- as.character(tree_df$zipcode)

tree_rent <- inner_join(tree_df, ziprent, by = c("zipcode" = "zipcode"))
#filter data by tree health category

ggplot(tree_rent, aes(factor(health), averagerent)) + geom_boxplot(aes(fill=health)) + xlab("")

#plot(air_rent2$avg_Rentprice ~ air_rent2$Measurement)
#obs <- lm(air_rent2$avg_Rentprice ~ air_rent2$Measurement)
#summary(obs)
#abline(obs)
treehealth <- group_by(tree_rent, health) 
treehealth2 <- summarise(treehealth, mean = round(mean(averagerent),0), stdev = round(sd(averagerent), 0))
knitr::kable(treehealth2)
health mean stdev
Fair 2390 660
Good 2585 698
Poor 2320 596
NA 2293 593

So, I want to check if there is any statistically significant difference in the average rent price among health groups. My null hypothesis will be that there is no difference. The conditions are met, each tree is independent of the others and there are well over 50 trees, so we don’t need to worry about the distribution. \(H_0:\mu_{fair} = \mu_{good}=\mu_{poor}=\mu_{na}\) \(H_1:\mu_{fair} \neq \mu_{good}\neq \mu_{poor}\neq \mu_{na}\)

anova <- aov(tree_rent$averagerent ~ tree_rent$health)
summary(anova)
##                   Df    Sum Sq Mean Sq F value   Pr(>F)    
## tree_rent$health   2   8111833 4055916   8.631 0.000193 ***
## Residuals        963 452525813  469913                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 27 observations deleted due to missingness

Since the P-value is well below .05, we reject the null hypothesis. There is sufficient evidence to say that there appears to be a relationship between average rent price of a niehgborhood and the average tree health of that neighborhood.

In conclusion, we would say there is a positive relationship with rent prices and tree health. This information could be of use to the parks department, or local communites for improving their own neighborhood tree health.