library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(leaflet)
library(htmlwidgets)
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(sfheaders)
library(tidyr)
# Read the shapefile of Sydney suburbs
shp_path <- "/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/GDA94/nsw_localities.shp"
syd <- st_read(shp_path)
## Reading layer `nsw_localities' from data source
## `/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/GDA94/nsw_localities.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4610 features and 6 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 140.9993 ymin: -37.50534 xmax: 159.1054 ymax: -28.15702
## Geodetic CRS: GDA94
# View column names and attribute data
# colnames(syd)
# head(syd)
Loading the dataset
airbnb <- read.csv("/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/listings_summary_dec18.csv")
General picture
# General picture
suburb_listing_counts <- airbnb %>%
group_by(neighbourhood) %>%
summarise(listings_count = n())
# Sort by number of listings (optional)
top_10_suburbs <- suburb_listing_counts %>%
arrange(desc(listings_count)) %>%
slice_head(n = 10)
# View the data
head(top_10_suburbs)
# the bar plot for the number of listings in each suburb
bar_plot <- ggplot(top_10_suburbs, aes(x = neighbourhood, y = listings_count,)) +
geom_bar(stat = "identity", fill = "#E69F00",width = 0.5) +
labs(title = "Top 10 the Number of Listings Suburbs", x = "Suburb", y = "Number of Listings") +
coord_flip()+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme_minimal()
# interactive plot
interactive_bar_plot <- ggplotly(bar_plot)
interactive_bar_plot
The chart illustrates the top ten suburbs in Sydney with the highest number of Airbnb listings.
Sydney (CBD): Unsurprisingly, the city center (CBD) tops the list with the largest number of Airbnb listings. This is expected due to its iconic landmarks, such as the Opera House and the Harbour Bridge, which attract significant tourist traffic. Additionally, Sydney’s central location and excellent public transportation make it a convenient base for visitors to explore other parts of the city.
Waverley, Randwick, and Manly: These suburbs rank second, third, and fourth respectively in the number of listings. They are closed to Sydney’s famous beaches—Bondi (Waverley), Coogee (Randwick), and Manly — makes them highly sought after by travelers. These areas offer not only scenic coastal views but also vibrant local services, increasing their appeal as short-term rental hotspots.
Warringah: This suburb stands out for its close location to a national park, attracting nature enthusiasts and outdoor adventurers. The unique combination of natural beauty and recreational opportunities contributes to its popularity among Airbnb hosts.
Suburbs near major tourist attractions, scenic coastal areas, or natural landmarks tend to attract more Airbnb listings due to their high demand among travelers. For property managers and hosts, investing in properties in these strategic locations can maximize occupancy rates and profitability.
# clean NA values rows
syd_airbnb <- airbnb %>%
filter(!is.na(price))
# Convert Airbnb data to spatial format
airbnb_sf <- st_as_sf(syd_airbnb, coords = c("longitude", "latitude"), crs = 4326)
airbnb_polygon <- airbnb_sf %>%
group_by(id) %>%
summarise(geometry = st_union(geometry)) %>%
st_convex_hull()
# Perform spatial join
airbnb_polygon_tranformed <- st_transform(airbnb_polygon, crs = st_crs(syd))
airbnb_polygon_joined <- st_join(syd, airbnb_polygon_tranformed, join = st_intersects)
airbnb_with_suburbs <- airbnb_polygon_joined %>%
filter(!is.na(id))
airbnb_with_suburbs_with_price <- merge(syd_airbnb, airbnb_with_suburbs, by = "id")
# Calculate price statistics per suburb
price_stats <- airbnb_with_suburbs_with_price %>%
group_by(LOC_NAME) %>%
summarise(
avg_price = mean(price, na.rm = TRUE),
min_price = min(price, na.rm = TRUE),
max_price = max(price, na.rm = TRUE),
median_price = median(price, na.rm = TRUE))
# filter for interactive plot
suburbs <- unique(airbnb_with_suburbs_with_price$LOC_NAME)
price_suburbs <- plot_ly()
for (suburb in suburbs) {
price_suburbs <- price_suburbs %>%
add_trace(
data = filter(airbnb_with_suburbs_with_price, LOC_NAME == suburb),
x = ~LOC_NAME,
y = ~price,
type = "box",
name = suburb,
visible = FALSE
)
}
price_suburbs <- price_suburbs %>%
add_trace(
data = airbnb_with_suburbs_with_price,
x = ~LOC_NAME,
y = ~price,
type = "box",
name = "All Suburbs",
visible = TRUE
)
# dropdown filter
buttons <- list(
list(
label = "All Suburbs",
method = "update",
args = list(list(visible = c(rep(FALSE, length(suburbs)), TRUE)),
list(title = "Price Distribution: All Suburbs"))
)
)
for (i in seq_along(suburbs)) {
buttons <- append(buttons, list(
list(
label = suburbs[i],
method = "update",
args = list(list(visible = c(rep(FALSE, i - 1), TRUE, rep(FALSE, length(suburbs) - i), FALSE)),
list(title = paste("Price Distribution:", suburbs[i])))
)
))
}
price_suburbs <- price_suburbs %>%
layout(
title = "Price Distribution Across Suburbs",
xaxis = list(title = "Suburb", tickangle = 45),
yaxis = list(title = "Price ($)"),
updatemenus = list(
list(
type = "dropdown",
x = 0.1, y = 1.2,
buttons = buttons
)
)
)
price_suburbs
price_summary <- airbnb_with_suburbs_with_price %>%
group_by(LOC_NAME) %>%
summarise(avg_price = mean(price, na.rm = TRUE)) %>%
filter(!is.na(avg_price))
syd_with_prices <- syd %>%
left_join(price_summary, by = c("LOC_NAME" = "LOC_NAME"))
# Filter out suburbs with no price data
syd_with_prices <- syd_with_prices %>%
filter(!is.na(avg_price))
palette <- colorNumeric(
palette = "YlOrRd",
domain = syd_with_prices$avg_price
)
# Create the interactive choropleth map
price_suburbs_map <- leaflet(syd_with_prices) %>%
addTiles() %>%
addPolygons(
fillColor = ~palette(avg_price),
weight = 1,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlightOptions = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE
),
label = ~paste0(
LOC_NAME, "\n ",
": $", round(avg_price, 2)
),
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
) %>%
addLegend(
pal = palette,
values = ~avg_price,
title = "Average Price ($)",
position = "bottomright"
)
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +no_defs).
## Need '+proj=longlat +datum=WGS84'
price_suburbs_map