#require libraries
#install.packages("sf")
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(leaflet)
## Loading required package: leaflet
#read the spatial data
geo_tracts <- st_read("~/Documents/PPUA5262/Airbnb/AIRBNB.CT/AIRBNB.CT.shp")
## Reading layer `AIRBNB.CT' from data source
## `/Users/yasmelcuriel/Documents/PPUA5262/Airbnb/AIRBNB.CT/AIRBNB.CT.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 178 features and 74 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -71.19115 ymin: 42.22788 xmax: -70.98471 ymax: 42.40493
## Geodetic CRS: +proj=longlat +datum=NAD83 +no_defs
merged_dt <- read.csv("~/Documents/PPUA5262/Airbnb/merged_dt.csv")
#changing name of column of data
geo_tracts <- geo_tracts %>%
rename(CT_ID_10 = CT_ID)
names(geo_tracts)
## [1] "CT_ID_10" "TOWN" "COUNT" "LsF_C" "LF_C_2022"
## [6] "LF_C_2021" "LF_C_2020" "LF_C_201" "LF_M_2022" "LF_J_2022"
## [11] "LF_S_2022" "LF_D_2022" "LF_J_2021" "LF_S_2021" "LF_D_2021"
## [16] "ListingsFr" "LF_F_202" "LstngsFrq_" "LstngsFr_1" "LstngsFr_2"
## [21] "Listings_1" "LstngsFr_3" "LF_S_2020" "LF_O_202" "LF_N_202"
## [26] "LF_D_2020" "Listings_2" "LF_F_201" "LstngsFr_4" "LstngsFr_5"
## [31] "LstngsFr_6" "Listings_3" "LstngsFr_7" "LstngsFr_8" "LF_S_201"
## [36] "LF_O_201" "LF_N_201" "LF_D_201" "MdR_C" "MR_C_2022"
## [41] "MR_C_2021" "MR_C_2020" "MR_C_201" "MR_M_2022" "MR_J_2022"
## [46] "MR_S_2022" "MR_D_2022" "MR_J_2021" "MR_S_2021" "MR_D_2021"
## [51] "MedianRent" "MR_F_202" "MdnRnt_Mr_" "MdnRnt_Ap_" "MdnRnt_My_"
## [56] "MedianRe_1" "MdnRnt_Ag_" "MR_S_2020" "MR_O_202" "MR_N_202"
## [61] "MR_D_2020" "MedianRe_2" "MR_F_201" "MdnRnt_M_1" "MdnRnt_A_1"
## [66] "MdnRnt_M_2" "MedianRe_3" "MdnRnt_Jl_" "MdnRnt_A_2" "MR_S_201"
## [71] "MR_O_201" "MR_N_201" "MR_D_201" "Ngh_E" "geometry"
In the spatial data, the name of the census tracts column was CT_ID, so I wasn’t able to merge with the aggregated variables because the name was CT_ID_10. To fix this I decided to switch the name of the column to “CT_ID_10”, that way it would be the same.
# merging the spatial data and census tract data with the manifest variables
class(geo_tracts$CT_ID_10)
## [1] "character"
class(merged_dt$CT_ID_10)
## [1] "numeric"
geo_tracts$CT_ID_10 <- as.character(geo_tracts$CT_ID_10)
merged_dt$CT_ID_10 <- as.character(merged_dt$CT_ID_10)
merged_geo <- geo_tracts %>%
left_join(merged_dt, by = "CT_ID_10")
head(merged_geo)
## Simple feature collection with 6 features and 152 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -71.10718 ymin: 42.33566 xmax: -71.07046 ymax: 42.35581
## Geodetic CRS: +proj=longlat +datum=NAD83 +no_defs
## CT_ID_10 TOWN.x COUNT LsF_C LF_C_2022 LF_C_2021 LF_C_2020
## 1 25025010405 Boston Suffolk County 222 29 16 41
## 2 25025010404 Boston Suffolk County 374 40 26 79
## 3 25025010801 Boston Suffolk County 706 61 42 149
## 4 25025010702 Boston Suffolk County 924 80 64 282
## 5 25025010204 Boston Suffolk County 1477 120 59 393
## 6 25025010802 Boston Suffolk County 1254 120 79 344
## LF_C_201 LF_M_2022 LF_J_2022 LF_S_2022 LF_D_2022 LF_J_2021 LF_S_2021
## 1 136 7 7 7 8 7 2
## 2 229 10 9 9 12 9 7
## 3 454 15 16 15 15 16 11
## 4 498 21 18 20 21 22 20
## 5 905 27 28 35 30 16 17
## 6 711 28 31 30 31 28 25
## LF_D_2021 ListingsFr LF_F_202 LstngsFrq_ LstngsFr_1 LstngsFr_2 Listings_1
## 1 7 5 3 3 3 3 3
## 2 10 5 6 6 6 7 6
## 3 15 18 12 12 12 12 11
## 4 22 25 25 24 27 26 27
## 5 26 49 47 45 45 34 33
## 6 26 31 31 27 34 44 27
## LstngsFr_3 LF_S_2020 LF_O_202 LF_N_202 LF_D_2020 Listings_2 LF_F_201
## 1 2 5 3 4 4 12 13
## 2 6 7 8 9 8 23 22
## 3 11 11 12 13 13 49 52
## 4 23 26 18 19 17 44 42
## 5 24 26 21 21 19 83 85
## 6 23 26 27 26 26 68 67
## LstngsFr_4 LstngsFr_5 LstngsFr_6 Listings_3 LstngsFr_7 LstngsFr_8 LF_S_201
## 1 12 13 13 15 14 14 8
## 2 21 21 22 22 20 20 18
## 3 50 40 38 37 36 41 33
## 4 41 39 40 42 41 45 46
## 5 79 81 78 73 82 77 78
## 6 70 68 70 66 66 63 46
## LF_O_201 LF_N_201 LF_D_201 MdR_C MR_C_2022 MR_C_2021 MR_C_2020 MR_C_201
## 1 8 8 6 150 250.0 200 150.0 150
## 2 18 17 5 109 99.0 62 137.5 109
## 3 28 33 17 200 225.0 177 150.0 200
## 4 49 44 25 163 171.5 162 159.0 175
## 5 75 67 47 186 299.0 229 185.0 185
## 6 49 43 35 179 170.0 150 150.0 190
## MR_M_2022 MR_J_2022 MR_S_2022 MR_D_2022 MR_J_2021 MR_S_2021 MR_D_2021
## 1 200.0 270.0 250.0 206 200.0 537.5 200.0
## 2 80.5 77.0 99.0 109 62.0 62.0 80.5
## 3 195.0 272.5 250.0 166 198.0 154.0 173.0
## 4 162.0 280.0 299.0 150 162.0 201.0 137.0
## 5 299.0 343.0 399.0 267 192.5 216.0 256.5
## 6 163.5 226.0 222.5 157 152.5 145.0 137.5
## MedianRent MR_F_202 MdnRnt_Mr_ MdnRnt_Ap_ MdnRnt_My_ MedianRe_1 MdnRnt_Ag_
## 1 150.0 150.0 150.0 150.0 150.0 150 149.6
## 2 150.0 150.0 150.0 150.0 150.0 106 61.0
## 3 187.5 162.5 162.5 162.5 162.5 150 152.0
## 4 159.0 159.0 155.5 159.0 162.0 150 163.0
## 5 176.0 183.0 199.0 186.0 180.5 182 380.5
## 6 200.0 197.0 199.0 186.0 150.0 199 150.0
## MR_S_2020 MR_O_202 MR_N_202 MR_D_2020 MedianRe_2 MR_F_201 MdnRnt_M_1
## 1 141 131.0 140.5 140.5 145.0 140.0 145.0
## 2 62 93.5 125.0 93.5 111.0 104.5 100.0
## 3 151 114.5 110.0 83.0 199.0 200.0 199.5
## 4 155 160.0 159.0 140.0 172.0 175.0 175.0
## 5 249 199.0 199.0 185.0 151.0 188.0 200.0
## 6 150 110.0 105.0 110.0 179.5 180.0 180.0
## MdnRnt_A_1 MdnRnt_M_2 MedianRe_3 MdnRnt_Jl_ MdnRnt_A_2 MR_S_201 MR_O_201
## 1 140.0 123.0 150 152.0 157 150.0 150.0
## 2 109.0 115.5 110 110.0 110 104.5 105.5
## 3 200.0 200.0 200 200.0 209 200.0 200.0
## 4 175.0 164.5 160 160.0 175 175.0 179.0
## 5 199.0 186.0 199 185.0 185 184.0 185.0
## 6 187.5 200.0 204 187.5 197 192.5 180.0
## MR_N_201 MR_D_201 Ngh_E Neighborhood_Effects ListingsFreq_Cross
## 1 150 155 25.02361 25.02361 222
## 2 111 150 34.95489 34.95489 374
## 3 200 150 92.81968 92.81968 706
## 4 175 152 82.41783 82.41783 924
## 5 176 182 35.62100 35.62100 1477
## 6 179 180 96.80879 96.80879 1254
## MedianRent_Cross ListingsFreq_Cross_2022 MedianRent_Cross_2022
## 1 150 29 250.0
## 2 109 40 99.0
## 3 200 61 225.0
## 4 163 80 171.5
## 5 186 120 299.0
## 6 179 120 170.0
## ListingsFreq_Mar_2022 ListingsFreq_June_2022 ListingsFreq_Sept_2022
## 1 7 7 7
## 2 10 9 9
## 3 15 16 15
## 4 21 18 20
## 5 27 28 35
## 6 28 31 30
## ListingsFreq_Dec_2022 MedianRent_Mar_2022 MedianRent_June_2022
## 1 8 200.0 270.0
## 2 12 80.5 77.0
## 3 15 195.0 272.5
## 4 21 162.0 280.0
## 5 30 299.0 343.0
## 6 31 163.5 226.0
## MedianRent_Sept_2022 MedianRent_Dec_2022 TOWN.y COUNTY
## 1 250.0 206 Boston Suffolk County
## 2 99.0 109 Boston Suffolk County
## 3 250.0 166 Boston Suffolk County
## 4 299.0 150 Boston Suffolk County
## 5 399.0 267 Boston Suffolk County
## 6 222.5 157 Boston Suffolk County
## ListingsFreq_Cross_2021 MedianRent_Cross_2021 ListingsFreq_June_2021
## 1 16 200 7
## 2 26 62 9
## 3 42 177 16
## 4 64 162 22
## 5 59 229 16
## 6 79 150 28
## ListingsFreq_Sept_2021 ListingsFreq_Dec_2021 MedianRent_June_2021
## 1 2 7 200.0
## 2 7 10 62.0
## 3 11 15 198.0
## 4 20 22 162.0
## 5 17 26 192.5
## 6 25 26 152.5
## MedianRent_Sept_2021 MedianRent_Dec_2021 ListingsFreq_Cross_2020
## 1 537.5 200.0 41
## 2 62.0 80.5 79
## 3 154.0 173.0 149
## 4 201.0 137.0 282
## 5 216.0 256.5 393
## 6 145.0 137.5 344
## MedianRent_Cross_2020 ListingsFreq_Jan_2020 ListingsFreq_Feb_2020
## 1 150.0 5 3
## 2 137.5 5 6
## 3 150.0 18 12
## 4 159.0 25 25
## 5 185.0 49 47
## 6 150.0 31 31
## ListingsFreq_Mar_2020 ListingsFreq_Apr_2020 ListingsFreq_May_2020
## 1 3 3 3
## 2 6 6 7
## 3 12 12 12
## 4 24 27 26
## 5 45 45 34
## 6 27 34 44
## ListingsFreq_June_2020 ListingsFreq_Aug_2020 ListingsFreq_Sept_2020
## 1 3 2 5
## 2 6 6 7
## 3 11 11 11
## 4 27 23 26
## 5 33 24 26
## 6 27 23 26
## ListingsFreq_Oct_2020 ListingsFreq_Nov_2020 ListingsFreq_Dec_2020
## 1 3 4 4
## 2 8 9 8
## 3 12 13 13
## 4 18 19 17
## 5 21 21 19
## 6 27 26 26
## MedianRent_Jan_2020 MedianRent_Feb_2020 MedianRent_Mar_2020
## 1 150.0 150.0 150.0
## 2 150.0 150.0 150.0
## 3 187.5 162.5 162.5
## 4 159.0 159.0 155.5
## 5 176.0 183.0 199.0
## 6 200.0 197.0 199.0
## MedianRent_Apr_2020 MedianRent_May_2020 MedianRent_June_2020
## 1 150.0 150.0 150
## 2 150.0 150.0 106
## 3 162.5 162.5 150
## 4 159.0 162.0 150
## 5 186.0 180.5 182
## 6 186.0 150.0 199
## MedianRent_Aug_2020 MedianRent_Sept_2020 MedianRent_Oct_2020
## 1 149.6 141 131.0
## 2 61.0 62 93.5
## 3 152.0 151 114.5
## 4 163.0 155 160.0
## 5 380.5 249 199.0
## 6 150.0 150 110.0
## MedianRent_Nov_2020 MedianRent_Dec_2020 ListingsFreq_Cross_2019
## 1 140.5 140.5 136
## 2 125.0 93.5 229
## 3 110.0 83.0 454
## 4 159.0 140.0 498
## 5 199.0 185.0 905
## 6 105.0 110.0 711
## MedianRent_Cross_2019 ListingsFreq_Jan_2019 ListingsFreq_Feb_2019
## 1 150 12 13
## 2 109 23 22
## 3 200 49 52
## 4 175 44 42
## 5 185 83 85
## 6 190 68 67
## ListingsFreq_Mar_2019 ListingsFreq_Apr_2019 ListingsFreq_May_2019
## 1 12 13 13
## 2 21 21 22
## 3 50 40 38
## 4 41 39 40
## 5 79 81 78
## 6 70 68 70
## ListingsFreq_June_2019 ListingsFreq_July_2019 ListingsFreq_Aug_2019
## 1 15 14 14
## 2 22 20 20
## 3 37 36 41
## 4 42 41 45
## 5 73 82 77
## 6 66 66 63
## ListingsFreq_Sept_2019 ListingsFreq_Oct_2019 ListingsFreq_Nov_2019
## 1 8 8 8
## 2 18 18 17
## 3 33 28 33
## 4 46 49 44
## 5 78 75 67
## 6 46 49 43
## ListingsFreq_Dec_2019 MedianRent_Jan_2019 MedianRent_Feb_2019
## 1 6 145.0 140.0
## 2 5 111.0 104.5
## 3 17 199.0 200.0
## 4 25 172.0 175.0
## 5 47 151.0 188.0
## 6 35 179.5 180.0
## MedianRent_Mar_2019 MedianRent_Apr_2019 MedianRent_May_2019
## 1 145.0 140.0 123.0
## 2 100.0 109.0 115.5
## 3 199.5 200.0 200.0
## 4 175.0 175.0 164.5
## 5 200.0 199.0 186.0
## 6 180.0 187.5 200.0
## MedianRent_June_2019 MedianRent_July_2019 MedianRent_Aug_2019
## 1 150 152.0 157
## 2 110 110.0 110
## 3 200 200.0 209
## 4 160 160.0 175
## 5 199 185.0 185
## 6 204 187.5 197
## MedianRent_Sept_2019 MedianRent_Oct_2019 MedianRent_Nov_2019
## 1 150.0 150.0 150
## 2 104.5 105.5 111
## 3 200.0 200.0 200
## 4 175.0 179.0 175
## 5 184.0 185.0 176
## 6 192.5 180.0 179
## MedianRent_Dec_2019 num_listings avg_community_lang avg_lux_lang
## 1 155 16 0.3125000 0.8750000
## 2 150 26 0.3846154 0.6538462
## 3 150 42 0.5476190 0.7380952
## 4 152 64 0.5937500 0.7812500
## 5 182 59 0.3728814 0.6101695
## 6 180 79 0.3164557 0.6962025
## avg_marketing_lang prop_local_hosts geometry
## 1 0.1250000 0.7500000 POLYGON ((-71.09009 42.3466...
## 2 0.3076923 0.3076923 POLYGON ((-71.09066 42.3397...
## 3 0.6904762 0.3095238 POLYGON ((-71.08159 42.3537...
## 4 0.4375000 0.4843750 POLYGON ((-71.07066 42.3518...
## 5 0.3559322 0.2033898 POLYGON ((-71.10683 42.3487...
## 6 0.3670886 0.5063291 POLYGON ((-71.08159 42.3537...
I got an error saying I could not merge these two dataframe because one of them was character class while the other was numeric. I decided to make both of them character, since I wasn’t if making it numeric would interfere with the spatial component of the data.
# taking a look at the data as a map
plot(merged_geo)
## Warning: plotting the first 9 out of 152 attributes; use max.plot = 152 to plot
## all
# calculating the neighborhood appeal
merged_geo$neighborhood_appeal <- (
merged_geo$avg_lux_lang +
merged_geo$avg_community_lang +
merged_geo$avg_marketing_lang +
merged_geo$prop_local_hosts
) / 4
head(merged_geo$neighborhood_appeal)
## [1] 0.5156250 0.4134615 0.5714286 0.5742188 0.3855932 0.4715190
I decided to combine the manifest variables like, use of luxury words, community, marketed languague, and host locations into one measure called “neighborhood appeal”. This was done in order to to measure the appeal of Airbnb listings at the census tract level. To do this, I created a map of this information. As shown below, the choropleth map illustrates the appeal of the listings within that census tract. The closer the color is to red, the lower the neighborhood appeal score is, and the closer to green, the higher the neighborhood appeal score is.
Making map based on the neighborhood appeal
# creating color palette
pal <- colorNumeric(
palette = "RdYlGn",
domain = merged_geo$neighborhood_appeal,
na.color = "transparent"
)
#make map
mymap <- leaflet(merged_geo) %>%
addProviderTiles("CartoDB.Positron") %>% #basemap
setView(lng = -71.09, lat = 42.32, zoom = 11) %>%
addPolygons(
fillColor = ~pal(neighborhood_appeal),
color = "white",
weight = 0.5,
fillOpacity = 0.8,
popup = ~paste(
"<b>Census Tract:</b>", CT_ID_10, "<br>",
"<b>Listings:</b>", num_listings, "<br>",
"<b>Luxury Language:</b>", round(avg_lux_lang, 2), "<br>",
"<b>Community Language:</b>", round(avg_community_lang, 2), "<br>",
"<b>Marketing Language:</b>", round(avg_marketing_lang, 2), "<br>",
"<b>Local Hosts (%):</b>", round(prop_local_hosts * 100, 1), "<br>",
"<b>Neighborhood Appeal score:</b>", round(neighborhood_appeal, 2)
),
highlight = highlightOptions( # creating highlights
weight = 3,
color = "red",
bringToFront = TRUE
)
) %>%
addLegend( # creating the legend
pal = pal,
values = ~neighborhood_appeal,
title = "Neighborhood Appeal (avg)",
position = "bottomright"
)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
mymap
After making and visualizing the map, some census tracts that stood out to me are: