airbnb <- read_delim("./airbnb_austin.csv", delim = ",")
## Rows: 15244 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (3): name, host_name, room_type
## dbl (12): id, host_id, neighbourhood, latitude, longitude, price, minimum_n...
## lgl (2): neighbourhood_group, license
## date (1): last_review
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
temp_airbnb <- airbnb
df <- temp_airbnb |>
mutate(price_per_night = price / minimum_nights)
ggplot(df, aes(x = minimum_nights, y = price_per_night)) +
geom_point(alpha = 0.5) +
labs(title = "Price per Night vs. Minimum Nights",
x = "Minimum Nights",
y = "Price (USD)") +
theme_minimal()
## Warning: Removed 4061 rows containing missing values (geom_point).
cor_price_minnights <- cor(df$price_per_night, df$minimum_nights, use = "complete.obs")
cat("Correlation (Price per Night vs. Minimum Nights):", round(cor_price_minnights, 2))
## Correlation (Price per Night vs. Minimum Nights): -0.08
The correlation
between price_per_night
and minimum_nights
suggests
a weak relationship, meaning there is no strong linear association
between the price of a listing and the minimum number of nights
required.
mean_fun <- function(df, indices) {
return(mean(df[indices], na.rm = TRUE))
}
set.seed(123)
boot_price_per_night <- boot(df$price_per_night, statistic = mean_fun, R = 1000)
ci_price_per_night <- boot.ci(boot_price_per_night, type = "basic")
cat("95% Bootstrap Confidence Interval for Price (USD):", ci_price_per_night$basic[4:5])
## 95% Bootstrap Confidence Interval for Price (USD): 164.6281 192.6599
ggplot(data.frame(boot_price_per_night$t), aes(x = boot_price_per_night$t)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
geom_vline(xintercept = ci_price_per_night$basic[4:5], linetype = "dashed", color = "red") +
labs(title = "Bootstrap Distribution of Mean Price Per Night",
x = "Mean Price per Night (USD)",
y = "Frequency") +
theme_minimal()
The 95% bootstrap confidence interval for the
mean price
is 164.6228 and
192.6599 USD. This indicates the range within which the
true average price of Airbnb listings is likely to fall. Since the range
is only about $31, it suggests that most listings have
similar pricing, with no extreme fluctuations. If you’re an Airbnb host,
you might consider setting your price within this range to stay
competitive.
df <- temp_airbnb |>
mutate(availability_category = cut(availability_365,
breaks = c(0, 120, 240, 365),
labels = c("low", "medium", "high")))
ggplot(df, aes(x = reviews_per_month, y = availability_category)) +
geom_point(alpha = 0.5) +
labs(title = "Availability Category vs. Reviews per Month",
x = "Availability Category",
y = "Reviews per Month") +
theme_minimal()
## Warning: Removed 3060 rows containing missing values (geom_point).
cor_availability_reviews <- cor(df$availability_365, df$reviews_per_month, use = "complete.obs")
cat("Correlation (Availability 365 vs. Reviews per Month):", round(cor_availability_reviews, 2))
## Correlation (Availability 365 vs. Reviews per Month): 0.18
The correlation
between availability_365
and reviews_per_month
suggests a weak positive relationship, meaning listings with higher
availability tend to have slightly more reviews per month.
set.seed(123)
boot_availability <- boot(df$availability_365, statistic = mean_fun, R = 1000)
ci_availability <- boot.ci(boot_availability, type = "basic")
cat("95% Bootstrap Confidence Interval for Availability 365 (Days):", ci_availability$basic[4:5])
## 95% Bootstrap Confidence Interval for Availability 365 (Days): 155.0955 159.3176
ggplot(data.frame(boot_availability$t), aes(x = boot_availability$t)) +
geom_histogram(binwidth = 2, fill = "lightgreen", color = "black") +
geom_vline(xintercept = ci_availability$basic[4:5], linetype = "dashed", color = "red") +
labs(title = "Bootstrap Distribution of Mean Availability",
x = "Mean Availability (Days)",
y = "Frequency") +
theme_minimal()
The 95% bootstrap confidence interval for the
mean availability_365
is 155.0955 and
159.3176 days. This indicates the range within which the true
average availability of Airbnb listings is likely to fall. This will
help a host know that their listing might be rented for only about
155–159 days per year, which can help you plan your
pricing strategy and revenue expectations.