title: “Airbnb CDMX” output: “Yahir” flexdashboard::flex_dashboard:
orientation: columns vertical_layout: fill runtime: shiny
—
# (últimos 365 días)
df_ing <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_revenue = mean(estimated_revenue_l365d, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_revenue))
df_ing$neighbourhood_cleansed <- factor(df_ing$neighbourhood_cleansed,
levels = df_ing$neighbourhood_cleansed)
plot_ly(df_ing,
x = ~neighbourhood_cleansed,
y = ~avg_revenue,
type = "bar",
marker = list(color = ~avg_revenue, colorscale = "Viridis")) %>%
layout(title = "Ingresos Promedio por Alcaldía (últimos 365 días)",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Ingreso promedio (MXN)"))
df_prec <- airbnb %>%
group_by(neighbourhood_cleansed, room_type) %>%
summarise(avg_price = mean(price, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_price)) %>%
mutate(neighbourhood_cleansed = order_factor(neighbourhood_cleansed, avg_price))
plot_ly(df_prec,
x = ~neighbourhood_cleansed, y = ~avg_price,
color = ~room_type, type = "bar") %>%
layout(title = "Precio Promedio por Tipo",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Precio promedio (MXN)"),
barmode = "group")
ord_med <- airbnb %>%
filter(!is.na(price), !is.na(neighbourhood_cleansed)) %>%
group_by(neighbourhood_cleansed) %>%
summarise(med = median(price, na.rm = TRUE), .groups = "drop") %>%
arrange(med) %>% pull(neighbourhood_cleansed)
airbnb$neighbourhood_cleansed <- factor(airbnb$neighbourhood_cleansed, levels = ord_med)
plot_ly(airbnb,
x = ~neighbourhood_cleansed,
y = ~price,
type = "box") %>%
layout(title = "Distribución de Precios por Alcaldía",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Precio (MXN)"))
df_sc1 <- airbnb %>% filter(!is.na(price), !is.na(estimated_revenue_l365d))
plot_ly(df_sc1,
x = ~price,
y = ~estimated_revenue_l365d,
color = ~neighbourhood_cleansed,
type = "scatter",
mode = "markers") %>%
layout(title = "Precio vs Ingresos (últimos 365 días)",
xaxis = list(title = "Precio (MXN)"),
yaxis = list(title = "Ingresos (MXN)"))
## Warning in RColorBrewer::brewer.pal(max(N, 3L), "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(N, 3L), "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
### Tipos por alcaldía (barras apiladas)
# Conteo por alcaldía y tipo
df_tz <- airbnb %>%
count(neighbourhood_cleansed, room_type, name = "n")
ord_tot <- df_tz %>%
group_by(neighbourhood_cleansed) %>%
summarise(total = sum(n), .groups = "drop") %>%
arrange(desc(total)) %>%
pull(neighbourhood_cleansed)
df_tz$neighbourhood_cleansed <- factor(df_tz$neighbourhood_cleansed, levels = ord_tot)
plot_ly(df_tz,
x = ~neighbourhood_cleansed, y = ~n,
color = ~room_type, type = "bar") %>%
layout(title = "Tipos de Alojamiento por Alcaldía",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Cantidad"),
barmode = "stack")
df_types <- airbnb %>% count(room_type, name = "n")
plot_ly(df_types,
labels = ~room_type, values = ~n,
type = "pie",
textposition = "inside", textinfo = "label+percent",
hoverinfo = "label+value+percent",
marker = list(colors = viridis::viridis(nrow(df_types)))) %>%
layout(title = "Participación por Tipo (Total Ciudad)")
### Calificaciones promedio por alcaldía
df_rate <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_rating = mean(review_scores_rating, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_rating))
df_rate$neighbourhood_cleansed <- factor(df_rate$neighbourhood_cleansed,
levels = df_rate$neighbourhood_cleansed)
plot_ly(df_rate,
x = ~neighbourhood_cleansed, y = ~avg_rating,
type = "bar",
marker = list(color = ~avg_rating, colorscale = "Inferno")) %>%
layout(title = "Calificaciones Promedio por Alcaldía",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Puntuación promedio"))
df_cln <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_clean = mean(review_scores_cleanliness, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_clean))
df_cln$neighbourhood_cleansed <- factor(df_cln$neighbourhood_cleansed,
levels = df_cln$neighbourhood_cleansed)
plot_ly(df_cln,
x = ~neighbourhood_cleansed, y = ~avg_clean,
type = "bar",
marker = list(color = ~avg_clean, colorscale = "Plasma")) %>%
layout(title = "Puntuación de Limpieza",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Puntuación"))
df_val <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_value = mean(review_scores_value, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_value))
df_val$neighbourhood_cleansed <- factor(df_val$neighbourhood_cleansed,
levels = df_val$neighbourhood_cleansed)
plot_ly(df_val,
x = ~neighbourhood_cleansed, y = ~avg_value,
type = "bar",
marker = list(color = ~avg_value, colorscale = "Turbo")) %>%
layout(title = "Relación Calidad–Precio",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Puntuación"))
### Evolución de reseñas mensuales
# Reseñas totales por mes (según la primera reseña)
df_rev <- airbnb %>%
mutate(month = lubridate::floor_date(first_review, "month")) %>%
group_by(month) %>%
summarise(total_reviews = sum(number_of_reviews, na.rm = TRUE), .groups = "drop") %>%
arrange(month)
plot_ly(df_rev,
x = ~month, y = ~total_reviews,
type = "scatter", mode = "lines+markers") %>%
layout(title = "Evolución de Reseñas Mensuales",
xaxis = list(title = "Fecha"),
yaxis = list(title = "Número de reseñas"))
df_pop <- airbnb %>%
mutate(rpm = ifelse(is.na(reviews_per_month), 0, reviews_per_month)) %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_reviews_per_month = mean(rpm, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_reviews_per_month))
df_pop$neighbourhood_cleansed <- factor(df_pop$neighbourhood_cleansed,
levels = df_pop$neighbourhood_cleansed)
plot_ly(df_pop,
x = ~neighbourhood_cleansed, y = ~avg_reviews_per_month,
type = "bar",
marker = list(color = ~avg_reviews_per_month, colorscale = "Turbo")) %>%
layout(title = "Reseñas por Mes (Promedio) por Alcaldía",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Reseñas por mes (promedio)"))
### Porcentaje de Superhosts por alcaldía
df_sh <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(percentage = mean(host_is_superhost, na.rm = TRUE) * 100, .groups = "drop") %>%
arrange(desc(percentage))
df_sh$neighbourhood_cleansed <- factor(df_sh$neighbourhood_cleansed,
levels = df_sh$neighbourhood_cleansed)
plot_ly(df_sh,
x = ~neighbourhood_cleansed, y = ~percentage,
type = "bar",
marker = list(color = ~percentage, colorscale = "Viridis")) %>%
layout(title = "Superhosts por Alcaldía (%)",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Porcentaje (%)"))
df_ib <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(percentage = mean(instant_bookable, na.rm = TRUE) * 100, .groups = "drop") %>%
arrange(desc(percentage))
df_ib$neighbourhood_cleansed <- factor(df_ib$neighbourhood_cleansed,
levels = df_ib$neighbourhood_cleansed)
plot_ly(df_ib,
x = ~neighbourhood_cleansed, y = ~percentage,
type = "bar",
marker = list(color = ~percentage, colorscale = "Cividis")) %>%
layout(title = "Reservas Instantáneas (%)",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Porcentaje (%)"))
### Capacidad promedio por alcaldía
df_cap <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_capacity = mean(accommodates, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_capacity))
df_cap$neighbourhood_cleansed <- factor(df_cap$neighbourhood_cleansed,
levels = df_cap$neighbourhood_cleansed)
plot_ly(df_cap,
x = ~neighbourhood_cleansed, y = ~avg_capacity,
type = "bar",
marker = list(color = ~avg_capacity, colorscale = "Plasma")) %>%
layout(title = "Capacidad Promedio por Alcaldía",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Capacidad (personas)"))
df_list <- airbnb %>%
group_by(neighbourhood_cleansed) %>%
summarise(avg_listings = mean(host_listings_count, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_listings))
df_list$neighbourhood_cleansed <- factor(df_list$neighbourhood_cleansed,
levels = df_list$neighbourhood_cleansed)
plot_ly(df_list,
x = ~neighbourhood_cleansed, y = ~avg_listings,
type = "bar",
marker = list(color = ~avg_listings, colorscale = "Magma")) %>%
layout(title = "Alojamientos por Anfitrión (Promedio)",
xaxis = list(title = "Alcaldía"),
yaxis = list(title = "Alojamientos"))
### Mapa de calor: densidad de alojamientos
if (!all(c("latitude","longitude") %in% names(airbnb))) {
cat("No existen columnas latitude/longitude en el dataset.")
} else {
df_map1 <- airbnb %>% dplyr::filter(!is.na(latitude), !is.na(longitude))
if (nrow(df_map1) == 0) {
cat("No hay coordenadas para mostrar.")
} else {
lng_rng <- range(df_map1$longitude, na.rm = TRUE)
lat_rng <- range(df_map1$latitude, na.rm = TRUE)
m <- leaflet::leaflet(df_map1) %>%
leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>%
leaflet.extras::addHeatmap(lng = ~longitude, lat = ~latitude, blur = 20, max = 0.05, radius = 15)
if ("fitBounds" %in% getNamespaceExports("leaflet")) {
m <- leaflet::fitBounds(m, lng1 = lng_rng[1], lat1 = lat_rng[1], lng2 = lng_rng[2], lat2 = lat_rng[2])
} else {
m <- leaflet::setView(m, lng = mean(lng_rng), lat = mean(lat_rng), zoom = 11)
}
m
}
}
# Verificación de columnas
if (!all(c("latitude","longitude") %in% names(airbnb))) {
cat("No existen columnas latitude/longitude en el dataset.")
} else {
df_map1 <- airbnb %>% dplyr::filter(!is.na(latitude), !is.na(longitude))
if (nrow(df_map1) == 0) {
cat("No hay coordenadas para mostrar.")
} else {
lng_rng <- range(df_map1$longitude, na.rm = TRUE)
lat_rng <- range(df_map1$latitude, na.rm = TRUE)
m <- leaflet::leaflet(df_map1) %>%
leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>%
leaflet.extras::addHeatmap(lng = ~longitude, lat = ~latitude, blur = 20, max = 0.05, radius = 15)
# Si fitBounds existe, úsalo; si no, caer a setView en el centro del bounding box
if ("fitBounds" %in% getNamespaceExports("leaflet")) {
m <- leaflet::fitBounds(m, lng1 = lng_rng[1], lat1 = lat_rng[1], lng2 = lng_rng[2], lat2 = lat_rng[2])
} else {
m <- leaflet::setView(m, lng = mean(lng_rng), lat = mean(lat_rng), zoom = 11)
}
m
}
}
if (!all(c("latitude","longitude","price") %in% names(airbnb))) {
cat("Faltan columnas latitude/longitude/price.")
} else {
df_map2 <- airbnb %>%
dplyr::filter(!is.na(latitude), !is.na(longitude), !is.na(price))
if (nrow(df_map2) == 0) {
cat("No hay datos suficientes para el mapa ponderado por precio.")
} else {
rng <- range(df_map2$price, na.rm = TRUE)
if (is.finite(rng[1]) && is.finite(rng[2]) && (rng[2] - rng[1]) > 0) {
w <- 1 + 4 * (df_map2$price - rng[1]) / (rng[2] - rng[1])
} else {
w <- rep(1, nrow(df_map2))
}
df_map2$w <- pmax(1, pmin(5, w))
lng_rng <- range(df_map2$longitude, na.rm = TRUE)
lat_rng <- range(df_map2$latitude, na.rm = TRUE)
m <- leaflet::leaflet(df_map2) %>%
leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>%
leaflet.extras::addHeatmap(lng = ~longitude, lat = ~latitude, intensity = ~w,
blur = 25, max = 1, radius = 18)
if ("fitBounds" %in% getNamespaceExports("leaflet")) {
m <- leaflet::fitBounds(m, lng1 = lng_rng[1], lat1 = lat_rng[1], lng2 = lng_rng[2], lat2 = lat_rng[2])
} else {
m <- leaflet::setView(m, lng = mean(lng_rng), lat = mean(lat_rng), zoom = 11)
}
m
}
}
### Histograma de precios
df_hist <- airbnb %>% dplyr::filter(!is.na(price))
if (nrow(df_hist) == 0) {
cat("No hay precios para graficar.")
} else {
g <- ggplot(df_hist, aes(x = price)) +
geom_histogram(bins = 40) +
labs(title = "Distribución de Precios", x = "Precio (MXN)", y = "Frecuencia") +
theme_bw()
ggplotly(g)
}
if (!all(c("price","accommodates") %in% names(airbnb))) {
cat("Faltan columnas price/accommodates.")
} else {
df_sc2 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(accommodates))
if (nrow(df_sc2) == 0) {
cat("No hay datos suficientes para dispersión precio vs capacidad.")
} else {
plot_ly(df_sc2,
x = ~accommodates, y = ~price,
type = "scatter", mode = "markers",
text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
layout(title = "Precio vs Capacidad",
xaxis = list(title = "Capacidad (personas)"),
yaxis = list(title = "Precio (MXN)"))
}
}
if (!all(c("price","review_scores_rating") %in% names(airbnb))) {
cat("Faltan columnas price/review_scores_rating.")
} else {
df_sc3 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(review_scores_rating))
if (nrow(df_sc3) == 0) {
cat("No hay datos suficientes para dispersión precio vs calificación.")
} else {
plot_ly(df_sc3,
x = ~review_scores_rating, y = ~price,
type = "scatter", mode = "markers",
text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
layout(title = "Precio vs Calificación",
xaxis = list(title = "Calificación (0–100)"),
yaxis = list(title = "Precio (MXN)"))
}
}
if (!all(c("price","accommodates") %in% names(airbnb))) {
cat("Faltan columnas price/accommodates.")
} else {
df_sc2 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(accommodates))
if (nrow(df_sc2) == 0) {
cat("No hay datos suficientes para dispersión precio vs capacidad.")
} else {
plot_ly(df_sc2,
x = ~accommodates, y = ~price,
type = "scatter", mode = "markers",
text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
layout(title = "Precio vs Capacidad",
xaxis = list(title = "Capacidad (personas)"),
yaxis = list(title = "Precio (MXN)"))
}
}
if (!all(c("price","review_scores_rating") %in% names(airbnb))) {
cat("Faltan columnas price/review_scores_rating.")
} else {
df_sc3 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(review_scores_rating))
if (nrow(df_sc3) == 0) {
cat("No hay datos suficientes para dispersión precio vs calificación.")
} else {
plot_ly(df_sc3,
x = ~review_scores_rating, y = ~price,
type = "scatter", mode = "markers",
text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
layout(title = "Precio vs Calificación",
xaxis = list(title = "Calificación (0–100)"),
yaxis = list(title = "Precio (MXN)"))
}
}
### Tabla interactiva de listados
cols_preferidas <- c(
"neighbourhood_cleansed","room_type","price","accommodates",
"review_scores_rating","reviews_per_month",
"host_is_superhost","instant_bookable",
"estimated_revenue_l365d","first_review","last_review"
)
cols <- intersect(cols_preferidas, names(airbnb))
df_tabla <- airbnb[, cols, drop = FALSE]
# Tabla DT con filtros por columna y botones de exportación
DT::datatable(
df_tabla,
options = list(
pageLength = 15,
scrollX = TRUE,
dom = "Bfrtip",
buttons = c("copy", "csv", "excel")
),
filter = "top",
rownames = FALSE,
extensions = "Buttons",
class = "cell-border stripe"
) %>%
{ if ("price" %in% names(df_tabla)) DT::formatCurrency(., "price", currency = "MXN", mark = ",", digits = 0) else . } %>%
{ if ("estimated_revenue_l365d" %in% names(df_tabla)) DT::formatCurrency(., "estimated_revenue_l365d", currency = "MXN", mark = ",", digits = 0) else . }
alcaldias <- sort(unique(airbnb$neighbourhood_cleansed))
tipos <- sort(unique(airbnb$room_type))
min_p <- suppressWarnings(floor(min(airbnb$price, na.rm = TRUE)))
max_p <- suppressWarnings(ceiling(max(airbnb$price, na.rm = TRUE)))
shiny::selectInput("f_alcaldia", "Alcaldía(s):",
choices = alcaldias, selected = alcaldias, multiple = TRUE)
shiny::selectInput("f_tipo", "Tipo(s) de alojamiento:",
choices = tipos, selected = tipos, multiple = TRUE)
shiny::sliderInput("f_precio", "Rango de precio (MXN):",
min = min_p, max = max_p, value = c(min_p, max_p),
step = max(1, round((max_p - min_p)/100)))