airbnb <- read_delim("./airbnb_austin.csv", delim = ",")
## Rows: 15244 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, host_name, room_type
## dbl (12): id, host_id, neighbourhood, latitude, longitude, price, minimum_n...
## lgl (2): neighbourhood_group, license
## date (1): last_review
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
airbnb_ <- airbnb |>
select(last_review, price, room_type) |>
distinct()
#airbnb_
airbnb_ts <- airbnb_ |>
filter(!is.na(last_review)) |>
mutate(date = as.Date(last_review)) |>
group_by(date) |>
summarise(avg_price = mean(price, na.rm = TRUE)) |>
as_tsibble(index = date) |>
fill_gaps() |>
mutate(avg_price = na.approx(avg_price, na.rm = FALSE))
airbnb_xts <- xts(x = airbnb_ts$avg_price,
order.by = airbnb_ts$date)
airbnb_xts <- setNames(airbnb_xts, "airbnb")
airbnb_ts |>
ggplot() +
geom_line(mapping = aes(x=date, y=avg_price)) +
theme_hc()
## Warning: Removed 360 rows containing missing values or values outside the scale range
## (`geom_line()`).
airbnb_xts |>
rollapply(width = 30, \(x) mean(x, na.rm = TRUE), fill = FALSE) |>
ggplot(mapping = aes(x = Index, y = airbnb)) +
geom_line() +
labs(title = "Airbnb Average Price Trends Over Time",
subtitle = "30-Day Rolling Average of Listing Prices Based on Last Review Date") +
theme_hc()
airbnb_xts |>
rollapply(width = 30, \(x) mean(x, na.rm = TRUE), fill = NA) |>
fortify.zoo() |>
ggplot(mapping = aes(x = Index, y = airbnb)) +
geom_line() +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") + # Formats axis as "Jan 2024", etc.
labs(title = "Airbnb Average Price Trends Over Time",
subtitle = "30-Day Rolling Average of Listing Prices Based on Last Review Date",
x = "Date", y = "Average Price") +
theme_hc() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 360 rows containing missing values or values outside the scale range
## (`geom_line()`).
Prices peak during summer months (June-August)
Dip in prices visible December - April
airbnb_ts <- airbnb_ts |>
arrange(date) |>
rename(date = date)
model_trend <- airbnb_ts |>
model(trend_model = TSLM(avg_price ~ trend()))
airbnb_dc <- as.ts(airbnb_ts) |>
decompose()
plot(airbnb_dc)
Price generally trends upwards, possibly due to increase demand or inflation
The summer period shows massive price surges which likely due to holidays
The residual spikes suggest the model doesn’t fully explain all price dynamics, might need further investigation.
airbnb_ts |>
ACF(avg_price) |>
autoplot() +
labs(title = "ACF: Autocorrelation of Average Price")
airbnb_ts |>
PACF(avg_price) |>
autoplot() +
labs(title = "PACF: Partial Autocorrelation of Average Price")
The autocorrelations remain strong and statistically significant for many lags, indicating that past Airbnb prices have a significant influence on future prices.
Hosts adjust prices based on past performance.
The PACF has a significant spike at lag1 and much weaker spikes beyond that, which might suggest the current price is mostly influenced by the immediately preceding value, with diminishing effect from earlier lags.
Strong seasonal patterns, with the summer period experiencing the highest prices.
Yesterday’s or last week’s price is the biggest influence on today’s price
Hosts might adjust pricing seasonally, which can be influenced by events and seasons, e.g Christmas or summer.