yaml

title: “Airbnb CDMX” output: “Yahir” flexdashboard::flex_dashboard: orientation: columns vertical_layout: fill runtime: shiny

Column

Ingresos promedio por alcaldía

#  (últimos 365 días)
df_ing <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_revenue = mean(estimated_revenue_l365d, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_revenue))

df_ing$neighbourhood_cleansed <- factor(df_ing$neighbourhood_cleansed,
                                        levels = df_ing$neighbourhood_cleansed)

plot_ly(df_ing,
        x = ~neighbourhood_cleansed,
        y = ~avg_revenue,
        type = "bar",
        marker = list(color = ~avg_revenue, colorscale = "Viridis")) %>%
  layout(title = "Ingresos Promedio por Alcaldía (últimos 365 días)",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Ingreso promedio (MXN)"))

Column

CPrecio promedio por tipo y alcaldía

df_prec <- airbnb %>%
  group_by(neighbourhood_cleansed, room_type) %>%
  summarise(avg_price = mean(price, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_price)) %>%
  mutate(neighbourhood_cleansed = order_factor(neighbourhood_cleansed, avg_price))

plot_ly(df_prec,
        x = ~neighbourhood_cleansed, y = ~avg_price,
        color = ~room_type, type = "bar") %>%
  layout(title = "Precio Promedio por Tipo",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Precio promedio (MXN)"),
         barmode = "group")

Column

Distribución de precios por alcaldía

ord_med <- airbnb %>%
  filter(!is.na(price), !is.na(neighbourhood_cleansed)) %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(med = median(price, na.rm = TRUE), .groups = "drop") %>%
  arrange(med) %>% pull(neighbourhood_cleansed)

airbnb$neighbourhood_cleansed <- factor(airbnb$neighbourhood_cleansed, levels = ord_med)

plot_ly(airbnb,
        x = ~neighbourhood_cleansed,
        y = ~price,
        type = "box") %>%
  layout(title = "Distribución de Precios por Alcaldía",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Precio (MXN)"))

Column

Relación entre precio e ingresos

df_sc1 <- airbnb %>% filter(!is.na(price), !is.na(estimated_revenue_l365d))

plot_ly(df_sc1,
        x = ~price,
        y = ~estimated_revenue_l365d,
        color = ~neighbourhood_cleansed,
        type = "scatter",
        mode = "markers") %>%
  layout(title = "Precio vs Ingresos (últimos 365 días)",
         xaxis = list(title = "Precio (MXN)"),
         yaxis = list(title = "Ingresos (MXN)"))
## Warning in RColorBrewer::brewer.pal(max(N, 3L), "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(N, 3L), "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Column

Tipos de Alojamiento

### Tipos por alcaldía (barras apiladas)
# Conteo por alcaldía y tipo
df_tz <- airbnb %>%
  count(neighbourhood_cleansed, room_type, name = "n")
ord_tot <- df_tz %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(total = sum(n), .groups = "drop") %>%
  arrange(desc(total)) %>%
  pull(neighbourhood_cleansed)

df_tz$neighbourhood_cleansed <- factor(df_tz$neighbourhood_cleansed, levels = ord_tot)

plot_ly(df_tz,
        x = ~neighbourhood_cleansed, y = ~n,
        color = ~room_type, type = "bar") %>%
  layout(title = "Tipos de Alojamiento por Alcaldía",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Cantidad"),
         barmode = "stack")

Column

Distribución de tipos (ciudad)

df_types <- airbnb %>% count(room_type, name = "n")

plot_ly(df_types,
        labels = ~room_type, values = ~n,
        type = "pie",
        textposition = "inside", textinfo = "label+percent",
        hoverinfo = "label+value+percent",
        marker = list(colors = viridis::viridis(nrow(df_types)))) %>%
  layout(title = "Participación por Tipo (Total Ciudad)")

Column

Calificaciones

### Calificaciones promedio por alcaldía
df_rate <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_rating = mean(review_scores_rating, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_rating))

df_rate$neighbourhood_cleansed <- factor(df_rate$neighbourhood_cleansed,
                                         levels = df_rate$neighbourhood_cleansed)

plot_ly(df_rate,
        x = ~neighbourhood_cleansed, y = ~avg_rating,
        type = "bar",
        marker = list(color = ~avg_rating, colorscale = "Inferno")) %>%
  layout(title = "Calificaciones Promedio por Alcaldía",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Puntuación promedio"))

Column

Limpieza por alcaldía

df_cln <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_clean = mean(review_scores_cleanliness, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_clean))

df_cln$neighbourhood_cleansed <- factor(df_cln$neighbourhood_cleansed,
                                        levels = df_cln$neighbourhood_cleansed)

plot_ly(df_cln,
        x = ~neighbourhood_cleansed, y = ~avg_clean,
        type = "bar",
        marker = list(color = ~avg_clean, colorscale = "Plasma")) %>%
  layout(title = "Puntuación de Limpieza",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Puntuación"))

Column

Relación calidad–precio (Value)

df_val <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_value = mean(review_scores_value, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_value))

df_val$neighbourhood_cleansed <- factor(df_val$neighbourhood_cleansed,
                                        levels = df_val$neighbourhood_cleansed)

plot_ly(df_val,
        x = ~neighbourhood_cleansed, y = ~avg_value,
        type = "bar",
        marker = list(color = ~avg_value, colorscale = "Turbo")) %>%
  layout(title = "Relación Calidad–Precio",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Puntuación"))

Column

Tendencias

### Evolución de reseñas mensuales
# Reseñas totales por mes (según la primera reseña)
df_rev <- airbnb %>%
  mutate(month = lubridate::floor_date(first_review, "month")) %>%
  group_by(month) %>%
  summarise(total_reviews = sum(number_of_reviews, na.rm = TRUE), .groups = "drop") %>%
  arrange(month)

plot_ly(df_rev,
        x = ~month, y = ~total_reviews,
        type = "scatter", mode = "lines+markers") %>%
  layout(title = "Evolución de Reseñas Mensuales",
         xaxis = list(title = "Fecha"),
         yaxis = list(title = "Número de reseñas"))

Column

Reseñas por mes (promedio) por alcaldía

df_pop <- airbnb %>%
  mutate(rpm = ifelse(is.na(reviews_per_month), 0, reviews_per_month)) %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_reviews_per_month = mean(rpm, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_reviews_per_month))

df_pop$neighbourhood_cleansed <- factor(df_pop$neighbourhood_cleansed,
                                        levels = df_pop$neighbourhood_cleansed)

plot_ly(df_pop,
        x = ~neighbourhood_cleansed, y = ~avg_reviews_per_month,
        type = "bar",
        marker = list(color = ~avg_reviews_per_month, colorscale = "Turbo")) %>%
  layout(title = "Reseñas por Mes (Promedio) por Alcaldía",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Reseñas por mes (promedio)"))

Column

Anfitriones

### Porcentaje de Superhosts por alcaldía
df_sh <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(percentage = mean(host_is_superhost, na.rm = TRUE) * 100, .groups = "drop") %>%
  arrange(desc(percentage))

df_sh$neighbourhood_cleansed <- factor(df_sh$neighbourhood_cleansed,
                                       levels = df_sh$neighbourhood_cleansed)

plot_ly(df_sh,
        x = ~neighbourhood_cleansed, y = ~percentage,
        type = "bar",
        marker = list(color = ~percentage, colorscale = "Viridis")) %>%
  layout(title = "Superhosts por Alcaldía (%)",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Porcentaje (%)"))

Column

Reservas instantáneas por alcaldía

df_ib <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(percentage = mean(instant_bookable, na.rm = TRUE) * 100, .groups = "drop") %>%
  arrange(desc(percentage))

df_ib$neighbourhood_cleansed <- factor(df_ib$neighbourhood_cleansed,
                                       levels = df_ib$neighbourhood_cleansed)

plot_ly(df_ib,
        x = ~neighbourhood_cleansed, y = ~percentage,
        type = "bar",
        marker = list(color = ~percentage, colorscale = "Cividis")) %>%
  layout(title = "Reservas Instantáneas (%)",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Porcentaje (%)"))

Column

Capacidad

### Capacidad promedio por alcaldía
df_cap <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_capacity = mean(accommodates, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_capacity))

df_cap$neighbourhood_cleansed <- factor(df_cap$neighbourhood_cleansed,
                                        levels = df_cap$neighbourhood_cleansed)

plot_ly(df_cap,
        x = ~neighbourhood_cleansed, y = ~avg_capacity,
        type = "bar",
        marker = list(color = ~avg_capacity, colorscale = "Plasma")) %>%
  layout(title = "Capacidad Promedio por Alcaldía",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Capacidad (personas)"))

Column

Alojamientos por anfitrión (promedio)

df_list <- airbnb %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(avg_listings = mean(host_listings_count, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_listings))

df_list$neighbourhood_cleansed <- factor(df_list$neighbourhood_cleansed,
                                         levels = df_list$neighbourhood_cleansed)

plot_ly(df_list,
        x = ~neighbourhood_cleansed, y = ~avg_listings,
        type = "bar",
        marker = list(color = ~avg_listings, colorscale = "Magma")) %>%
  layout(title = "Alojamientos por Anfitrión (Promedio)",
         xaxis = list(title = "Alcaldía"),
         yaxis = list(title = "Alojamientos"))

Column

Mapas de Calor

### Mapa de calor: densidad de alojamientos
if (!all(c("latitude","longitude") %in% names(airbnb))) {
  cat("No existen columnas latitude/longitude en el dataset.")
} else {
  df_map1 <- airbnb %>% dplyr::filter(!is.na(latitude), !is.na(longitude))
  if (nrow(df_map1) == 0) {
    cat("No hay coordenadas para mostrar.")
  } else {
    lng_rng <- range(df_map1$longitude, na.rm = TRUE)
    lat_rng <- range(df_map1$latitude,  na.rm = TRUE)

    m <- leaflet::leaflet(df_map1) %>%
      leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>%
      leaflet.extras::addHeatmap(lng = ~longitude, lat = ~latitude, blur = 20, max = 0.05, radius = 15)

    if ("fitBounds" %in% getNamespaceExports("leaflet")) {
      m <- leaflet::fitBounds(m, lng1 = lng_rng[1], lat1 = lat_rng[1], lng2 = lng_rng[2], lat2 = lat_rng[2])
    } else {
      m <- leaflet::setView(m, lng = mean(lng_rng), lat = mean(lat_rng), zoom = 11)
    }
    m
  }
}

Column

Mapa de calor: densidad de alojamientos

# Verificación de columnas
if (!all(c("latitude","longitude") %in% names(airbnb))) {
  cat("No existen columnas latitude/longitude en el dataset.")
} else {
  df_map1 <- airbnb %>% dplyr::filter(!is.na(latitude), !is.na(longitude))
  if (nrow(df_map1) == 0) {
    cat("No hay coordenadas para mostrar.")
  } else {
    lng_rng <- range(df_map1$longitude, na.rm = TRUE)
    lat_rng <- range(df_map1$latitude,  na.rm = TRUE)

    m <- leaflet::leaflet(df_map1) %>%
      leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>%
      leaflet.extras::addHeatmap(lng = ~longitude, lat = ~latitude, blur = 20, max = 0.05, radius = 15)

    # Si fitBounds existe, úsalo; si no, caer a setView en el centro del bounding box
    if ("fitBounds" %in% getNamespaceExports("leaflet")) {
      m <- leaflet::fitBounds(m, lng1 = lng_rng[1], lat1 = lat_rng[1], lng2 = lng_rng[2], lat2 = lat_rng[2])
    } else {
      m <- leaflet::setView(m, lng = mean(lng_rng), lat = mean(lat_rng), zoom = 11)
    }
    m
  }
}

Column

Mapa de calor: ponderado por precio

if (!all(c("latitude","longitude","price") %in% names(airbnb))) {
  cat("Faltan columnas latitude/longitude/price.")
} else {
  df_map2 <- airbnb %>%
    dplyr::filter(!is.na(latitude), !is.na(longitude), !is.na(price))

  if (nrow(df_map2) == 0) {
    cat("No hay datos suficientes para el mapa ponderado por precio.")
  } else {
    rng <- range(df_map2$price, na.rm = TRUE)
    if (is.finite(rng[1]) && is.finite(rng[2]) && (rng[2] - rng[1]) > 0) {
      w <- 1 + 4 * (df_map2$price - rng[1]) / (rng[2] - rng[1])
    } else {
      w <- rep(1, nrow(df_map2))
    }
    df_map2$w <- pmax(1, pmin(5, w))

    lng_rng <- range(df_map2$longitude, na.rm = TRUE)
    lat_rng <- range(df_map2$latitude,  na.rm = TRUE)

    m <- leaflet::leaflet(df_map2) %>%
      leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>%
      leaflet.extras::addHeatmap(lng = ~longitude, lat = ~latitude, intensity = ~w,
                                 blur = 25, max = 1, radius = 18)

    if ("fitBounds" %in% getNamespaceExports("leaflet")) {
      m <- leaflet::fitBounds(m, lng1 = lng_rng[1], lat1 = lat_rng[1], lng2 = lng_rng[2], lat2 = lat_rng[2])
    } else {
      m <- leaflet::setView(m, lng = mean(lng_rng), lat = mean(lat_rng), zoom = 11)
    }
    m
  }
}

Column

Histograma

### Histograma de precios
df_hist <- airbnb %>% dplyr::filter(!is.na(price))
if (nrow(df_hist) == 0) {
  cat("No hay precios para graficar.")
} else {
  g <- ggplot(df_hist, aes(x = price)) +
    geom_histogram(bins = 40) +
    labs(title = "Distribución de Precios", x = "Precio (MXN)", y = "Frecuencia") +
    theme_bw()
  ggplotly(g)
}

Column

Dispersión: precio vs capacidad

if (!all(c("price","accommodates") %in% names(airbnb))) {
  cat("Faltan columnas price/accommodates.")
} else {
  df_sc2 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(accommodates))
  if (nrow(df_sc2) == 0) {
    cat("No hay datos suficientes para dispersión precio vs capacidad.")
  } else {
    plot_ly(df_sc2,
            x = ~accommodates, y = ~price,
            type = "scatter", mode = "markers",
            text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
      layout(title = "Precio vs Capacidad",
             xaxis = list(title = "Capacidad (personas)"),
             yaxis = list(title = "Precio (MXN)"))
  }
}

Column

Dispersión: precio vs calificación

if (!all(c("price","review_scores_rating") %in% names(airbnb))) {
  cat("Faltan columnas price/review_scores_rating.")
} else {
  df_sc3 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(review_scores_rating))
  if (nrow(df_sc3) == 0) {
    cat("No hay datos suficientes para dispersión precio vs calificación.")
  } else {
    plot_ly(df_sc3,
            x = ~review_scores_rating, y = ~price,
            type = "scatter", mode = "markers",
            text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
      layout(title = "Precio vs Calificación",
             xaxis = list(title = "Calificación (0–100)"),
             yaxis = list(title = "Precio (MXN)"))
  }
}

Column

Dispersión: precio vs capacidad

if (!all(c("price","accommodates") %in% names(airbnb))) {
  cat("Faltan columnas price/accommodates.")
} else {
  df_sc2 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(accommodates))
  if (nrow(df_sc2) == 0) {
    cat("No hay datos suficientes para dispersión precio vs capacidad.")
  } else {
    plot_ly(df_sc2,
            x = ~accommodates, y = ~price,
            type = "scatter", mode = "markers",
            text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
      layout(title = "Precio vs Capacidad",
             xaxis = list(title = "Capacidad (personas)"),
             yaxis = list(title = "Precio (MXN)"))
  }
}

Column

Dispersión: precio vs calificación

if (!all(c("price","review_scores_rating") %in% names(airbnb))) {
  cat("Faltan columnas price/review_scores_rating.")
} else {
  df_sc3 <- airbnb %>% dplyr::filter(!is.na(price), !is.na(review_scores_rating))
  if (nrow(df_sc3) == 0) {
    cat("No hay datos suficientes para dispersión precio vs calificación.")
  } else {
    plot_ly(df_sc3,
            x = ~review_scores_rating, y = ~price,
            type = "scatter", mode = "markers",
            text = ~neighbourhood_cleansed, hoverinfo = "text+x+y") %>%
      layout(title = "Precio vs Calificación",
             xaxis = list(title = "Calificación (0–100)"),
             yaxis = list(title = "Precio (MXN)"))
  }
}

Column

Tabla interactiva de listados

### Tabla interactiva de listados
cols_preferidas <- c(
  "neighbourhood_cleansed","room_type","price","accommodates",
  "review_scores_rating","reviews_per_month",
  "host_is_superhost","instant_bookable",
  "estimated_revenue_l365d","first_review","last_review"
)
cols <- intersect(cols_preferidas, names(airbnb))
df_tabla <- airbnb[, cols, drop = FALSE]

# Tabla DT con filtros por columna y botones de exportación
DT::datatable(
  df_tabla,
  options = list(
    pageLength = 15,
    scrollX = TRUE,
    dom = "Bfrtip",
    buttons = c("copy", "csv", "excel")
  ),
  filter = "top",
  rownames = FALSE,
  extensions = "Buttons",
  class = "cell-border stripe"
) %>%
  { if ("price" %in% names(df_tabla)) DT::formatCurrency(., "price", currency = "MXN", mark = ",", digits = 0) else . } %>%
  { if ("estimated_revenue_l365d" %in% names(df_tabla)) DT::formatCurrency(., "estimated_revenue_l365d", currency = "MXN", mark = ",", digits = 0) else . }

Column

Filtros

alcaldias <- sort(unique(airbnb$neighbourhood_cleansed))
tipos     <- sort(unique(airbnb$room_type))
min_p     <- suppressWarnings(floor(min(airbnb$price, na.rm = TRUE)))
max_p     <- suppressWarnings(ceiling(max(airbnb$price, na.rm = TRUE)))

shiny::selectInput("f_alcaldia", "Alcaldía(s):",
                   choices = alcaldias, selected = alcaldias, multiple = TRUE)
shiny::selectInput("f_tipo", "Tipo(s) de alojamiento:",
                   choices = tipos, selected = tipos, multiple = TRUE)
shiny::sliderInput("f_precio", "Rango de precio (MXN):",
                   min = min_p, max = max_p, value = c(min_p, max_p),
                   step = max(1, round((max_p - min_p)/100)))

Column

Segmentos de precio (Económico / Medio / Premium / Lujo)

df_donut <- airbnb %>%
  dplyr::filter(!is.na(price)) %>%
  dplyr::mutate(
    categoria_precio = dplyr::case_when(
      price < 500 ~ "Económico (< $500)",
      price >= 500 & price < 1500 ~ "Medio ($500–$1,499)",
      price >= 1500 & price < 3000 ~ "Premium ($1,500–$2,999)",
      price >= 3000 ~ "Lujo (≥ $3,000)",
      TRUE ~ "Sin dato"
    )
  ) %>%
  dplyr::count(categoria_precio, name = "n") %>%
  dplyr::arrange(dplyr::desc(n))

if (nrow(df_donut) == 0) {
  cat("No hay precios para graficar.")
} else {
  # 2) Colores por nombre (CSS color names)
  cols <- c("royalblue", "mediumseagreen", "tomato", "gold", "lightgray")
  cols <- cols[seq_len(nrow(df_donut))]  # recorta al número de categorías

  # 3) Gráfico donut
  plot_ly(
    df_donut,
    labels = ~categoria_precio,
    values = ~n,
    type = "pie",
    hole = 0.45,
    textinfo = "label+percent",
    textposition = "inside",
    insidetextorientation = "radial",
    hoverinfo = "label+value+percent",
    marker = list(colors = cols,
                  line = list(color = "white", width = 1))
  ) %>%
    layout(
      title = "Distribución por Segmento de Precio",
      showlegend = FALSE
    )
}