# =========================
# app.R
# =========================
# ---------- Paquetes ----------
library(shiny)
library(ggplot2)
library(mxmaps)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
# Text mining
library(stringr)
library(tidytext)
library(SnowballC)
library(stopwords)
library(wordcloud)
## Cargando paquete requerido: RColorBrewer
# ---------- Datos base mapas ----------
data("df_mxstate_2020", package = "mxmaps")
# ---------- Helpers ----------
# Normaliza nombres de estado para evitar desajustes frecuentes
normaliza_estado <- function(x) {
x <- trimws(x)
repl <- c(
"ESTADO DE MÉXICO" = "MÉXICO",
"MEXICO" = "MÉXICO",
"CDMX" = "CIUDAD DE MÉXICO",
"DISTRITO FEDERAL" = "CIUDAD DE MÉXICO",
"QUERETARO" = "QUERÉTARO",
"YUCATAN" = "YUCATÁN",
"MICHOACAN" = "MICHOACÁN",
"COAHUILA DE ZARAGOZA" = "COAHUILA",
"NUEVO LEON" = "NUEVO LEÓN",
"VERACRUZ DE IGNACIO DE LA LLAVE" = "VERACRUZ",
"SAN LUIS POTOSI" = "SAN LUIS POTOSÍ"
)
up <- toupper(x)
up <- ifelse(up %in% names(repl), repl[up], up)
up
}
# =========================
# UI
# =========================
ui <- fluidPage(
navbarPage(
"Inteligencia Artificial",
# ----- Tab 1: Clustering -----
tabPanel(
"Clustering",
sidebarPanel(
tags$h4("Selecciona las siguientes opciones:"),
selectInput(
"fuente", "Fuente de datos:",
choices = c(
"Población (demo)",
"Violencia (tabla)",
"Obesidad (interno)",
"Movilidad (interno)",
"Vehículos (interno)",
"Desempleo (tabla)"
)
),
conditionalPanel(
condition = "['Población (demo)','Violencia (tabla)','Obesidad (interno)','Movilidad (interno)','Desempleo (tabla)'].includes(input.fuente)",
numericInput("clusters", "Número de clusters:", 3, min = 2, max = 10)
)
),
mainPanel(
h1("Mapa de Clusters / Valores"),
plotOutput("mapa", height = "550px")
)
),
# ----- Tab 2: Regresión -----
tabPanel(
"Regresion",
sidebarPanel(
tags$h4("Selecciona las siguientes opciones:"),
sliderInput("Dist_Taxi", "Dist_Taxi: ", 146, 16850, 8234),
sliderInput("Dist_Market", "Dist_Market: ", 1666, 18281, 11015),
sliderInput("Dist_Hospital", "Dist_Hospital: ", 3227, 22407, 13087),
sliderInput("Carpet", "Carpet: ", 775, 2229, 1486),
sliderInput("Builtup", "Builtup: ", 932, 2229, 1782),
sliderInput("Rainfall", "Rainfall: ", -110, 1560, 785.3),
selectInput("Parking", "Parking: ", c("Covered", "No parking", "Not Provided", "Open")),
selectInput("City_Category", "City_Category: ", c("CAT A", "CAT B", "CAT C"))
),
mainPanel(
h2("La predicción del precio de la casa es:"),
verbatimTextOutput("prediccion")
)
),
# ----- Tab 3: Text Mining -----
tabPanel(
"Text Mining",
sidebarPanel(
tags$h4("Ingresa el texto:"),
textAreaInput(
"tm_texto",
label = NULL,
placeholder = "Pega aquí el texto a analizar…",
height = "220px",
width = "100%"
),
checkboxInput("tm_usar_stop", "Quitar stopwords (español)", TRUE),
checkboxInput("tm_stem", "Aplicar stemming (raíz de palabras)", TRUE),
numericInput("tm_minfreq", "Frecuencia mínima para la nube", value = 2, min = 1, step = 1),
numericInput("tm_maxwords", "Máx. palabras en la nube", value = 120, min = 10, step = 10),
actionButton("tm_go", "Generar")
),
mainPanel(
h3("Nube de palabras"),
plotOutput("tm_nube", height = "400px"),
tags$hr(),
h3("Conteo de palabras (Top 100)"),
tableOutput("tm_tabla")
)
)
)
)
# =========================
# SERVER
# =========================
server <- function(input, output, session) {
# --- Mapa base con clave en mayúsculas ---
mapa_base <- reactive({
df_mxstate_2020 %>% mutate(ENTIDAD_UP = toupper(state_name))
})
# --- Población (demo) ---
datos_pobl <- reactive({
tibble(
estado = c("México","Ciudad de México","Jalisco","Veracruz","Puebla","Guanajuato",
"Nuevo León","Chiapas","Michoacán","Oaxaca","Baja California","Chihuahua",
"Guerrero","Tamaulipas","Coahuila","Hidalgo","Sinaloa","Sonora",
"San Luis Potosí","Tabasco","Querétaro","Yucatán","Morelos","Quintana Roo",
"Durango","Zacatecas","Aguascalientes","Tlaxcala","Nayarit",
"Campeche","Baja California Sur","Colima"),
poblacion = c(17741822,9356765,8743203,8118547,6575295,6322118,6155818,5917164,4957200,
4247773,3774269,3904131,3597563,3562828,3445576,3237356,3154500,3037489,
2880601,2547170,2514702,2386022,1969732,1857985,1832650,1622138,1425607,
1342977,1235456,928363,798447,731391)
)
})
# --- Violencia (usa tabla viol_tot del entorno) ---
datos_viol_tabla <- reactive({
validate(need(exists("viol_tot", inherits = TRUE),
"No encontré la tabla 'viol_tot' en el entorno. Cárgala antes de usar esta opción."))
df <- get("viol_tot", inherits = TRUE)
validate(need(all(c("ENTIDAD_UP","total") %in% names(df)),
"La tabla 'viol_tot' debe tener columnas: ENTIDAD_UP y total."))
df$ENTIDAD_UP <- normaliza_estado(df$ENTIDAD_UP)
df$total <- suppressWarnings(as.numeric(df$total))
k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
set.seed(42)
km <- kmeans(scale(df$total), centers = k, nstart = 50)
centers <- as.numeric(km$centers)
ord_idx <- order(centers)
map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
cluster_num <- unname(map_rank[as.character(km$cluster)])
data.frame(
ENTIDAD_UP = df$ENTIDAD_UP,
cluster_num = as.integer(cluster_num),
stringsAsFactors = FALSE
)
})
# --- Obesidad (interno) ---
datos_obesidad <- reactive({
tibble(
estado = c(
"Aguascalientes","Baja California","Baja California Sur","Campeche","Coahuila","Colima",
"Chiapas","Chihuahua","Ciudad de México","Durango","Guanajuato","Guerrero","Hidalgo",
"Jalisco","México","Michoacán","Morelos","Nayarit","Nuevo León","Oaxaca","Puebla",
"Querétaro","Quintana Roo","San Luis Potosí","Sinaloa","Sonora","Tabasco","Tamaulipas",
"Tlaxcala","Veracruz","Yucatán","Zacatecas"
),
obesidad = c(35,37,35,41,38,36,31,38,36,36,35,32,34,36,37,34,35,36,40,32,35,36,37,35,36,38,42,40,34,37,41,35)
)
})
# --- Movilidad (interno) ---
datos_movilidad <- reactive({
df <- tibble(
estado = c("Chihuahua", "Ciudad de México", "Guanajuato", "Guerrero",
"Hidalgo", "Jalisco", "México", "Nuevo León", "Puebla", "Querétaro"),
pasajeros = c(8007618.2, 2087364185, 168118717.2, 8635228,
22317857.75, 122275126.3, 68925404.61,
103860939.7, 83322622, 3397287.444),
km = c(2256719, 189531317.9, 46267008, 2749668.667,
8729435.75, 16997166.26, 15956772.77,
25378985.58, 22240888.75, 1481487.111)
)
X <- scale(df[, c("pasajeros","km")])
k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
set.seed(123)
kmfit <- kmeans(X, centers = k, nstart = 50)
comp <- rowSums(X)
medias <- tapply(comp, kmfit$cluster, mean)
orden <- rank(medias, ties.method = "first")
cluster_num <- as.integer(orden[as.character(kmfit$cluster)])
df %>% mutate(cluster_num = cluster_num)
})
# --- Vehículos (interno) ---
datos_vehiculos <- reactive({
c(
756781, 2404000, 624470, 404759,
1231890, 1879130, 6471738, 1257493,
427510, 765038, 2605684, 1389021,
1023762, 4018531, 5922644, 2004532,
967845, 653219, 2913478, 1328543,
3275691, 1358327, 1175942, 1456021,
1698541, 2036891, 1187021, 1712458,
489632, 3102458, 1298745, 987654
)
})
# --- Desempleo (usa tabla des_fijo del entorno) ---
datos_desempleo_tabla <- reactive({
validate(need(exists("des_fijo", inherits = TRUE),
"No encontré la tabla 'des_fijo' en el entorno. Cárgala antes de usar esta opción."))
df <- get("des_fijo", inherits = TRUE)
validate(need(all(c("ESTADO_UP","Promedio") %in% names(df)),
"La tabla 'des_fijo' debe tener columnas: ESTADO_UP y Promedio."))
df$ESTADO_UP <- normaliza_estado(df$ESTADO_UP)
df$Promedio <- suppressWarnings(as.numeric(df$Promedio))
k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
set.seed(123)
km <- kmeans(df$Promedio, centers = k, nstart = 50)
centers <- as.numeric(km$centers)
ord_idx <- order(centers)
map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
cluster_num <- unname(map_rank[as.character(km$cluster)])
data.frame(
ESTADO_UP = df$ESTADO_UP,
cluster_num = as.integer(cluster_num),
Promedio = df$Promedio,
stringsAsFactors = FALSE
)
})
# ---------- Gráfico de mapa ----------
output$mapa <- renderPlot({
mapa <- mapa_base()
if (input$fuente == "Población (demo)") {
df1 <- datos_pobl()
k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
set.seed(123)
km <- kmeans(df1$poblacion, centers = k, nstart = 25)
centers <- as.numeric(km$centers)
ord_idx <- order(centers)
map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
cluster_num <- unname(map_rank[as.character(km$cluster)])
tabla <- mapa %>% left_join(
tibble(ENTIDAD_UP = toupper(df1$estado), cluster_num = as.integer(cluster_num)),
by = "ENTIDAD_UP"
)
tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
mxstate_choropleth(
tabla,
title = paste("Clusters por Población (k =", k, ")"),
legend = "Cluster"
)
} else if (input$fuente == "Violencia (tabla)") {
dfv <- datos_viol_tabla() # ENTIDAD_UP, cluster_num
tabla <- mapa %>% left_join(dfv, by = "ENTIDAD_UP")
tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
mxstate_choropleth(
tabla,
title = paste("Clusters por Incidencia Total (Violencia) (k =",
ifelse(isTruthy(input$clusters), input$clusters, 3), ")"),
legend = "Cluster"
)
} else if (input$fuente == "Obesidad (interno)") {
dfo <- datos_obesidad()
k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
set.seed(123)
km <- kmeans(scale(dfo$obesidad), centers = k, nstart = 25)
centers <- as.numeric(km$centers)
ord_idx <- order(centers)
map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
cluster_num <- unname(map_rank[as.character(km$cluster)])
dfo <- dfo %>%
mutate(ENTIDAD_UP = toupper(estado),
cluster_num = as.integer(cluster_num))
tabla <- mapa %>% left_join(dfo %>% select(ENTIDAD_UP, cluster_num), by = "ENTIDAD_UP")
tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
mxstate_choropleth(
tabla,
title = paste("Clusters por Obesidad (%) (k =", k, ")"),
legend = "Cluster"
)
} else if (input$fuente == "Movilidad (interno)") {
dfm <- datos_movilidad()
tabla <- df_mxstate_2020 %>%
left_join(dfm, by = c("state_name" = "estado"))
idx <- match(tabla$state_name, dfm$estado)
tabla$value <- ifelse(is.na(idx), 0, dfm$cluster_num[idx])
mxstate_choropleth(
tabla,
title = paste("Clusters por Movilidad (k =",
ifelse(isTruthy(input$clusters), input$clusters, 3), ")"),
legend = "Cluster"
)
} else if (input$fuente == "Desempleo (tabla)") {
dfd <- datos_desempleo_tabla() # ESTADO_UP, cluster_num, Promedio
tabla <- mapa %>% left_join(dfd, by = c("ENTIDAD_UP" = "ESTADO_UP"))
tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
mxstate_choropleth(
tabla,
title = paste("Clusters por Desempleo (Promedio anual) (k =",
ifelse(isTruthy(input$clusters), input$clusters, 3), ")"),
legend = "Cluster"
)
} else { # Vehículos (interno) — valores directos
veh <- datos_vehiculos()
stopifnot(length(veh) == nrow(df_mxstate_2020))
tabla <- df_mxstate_2020
tabla$value <- veh
mxstate_choropleth(
tabla,
num_colors = 6,
title = "Vehículos particulares por estado",
legend = "Vehículos"
)
}
})
# ---------- Regresión: cálculo simple ----------
output$prediccion <- renderPrint({
coeficiente_Parking <- ifelse(input$Parking == "Covered", 0,
ifelse(input$Parking == "No parking", -612800,
ifelse(input$Parking == "Not Provided", -492600,
ifelse(input$Parking == "Open", -263500, NA))))
coeficiente_City_Category <- ifelse(input$City_Category == "CAT A", 0,
ifelse(input$City_Category == "CAT B", -1877000,
ifelse(input$City_Category == "CAT C", -2895000, NA)))
5595000 +
29.79*input$Dist_Taxi +
11.94*input$Dist_Market +
49.34*input$Dist_Hospital +
524.2*input$Carpet +
1107*input$Builtup +
coeficiente_Parking +
coeficiente_City_Category +
99.53 * input$Rainfall
})
# =========================
# TEXT MINING
# =========================
tm_tokens <- eventReactive(input$tm_go, {
req(input$tm_texto)
txt <- tolower(input$tm_texto)
df_txt <- tibble(text = txt)
# Tokenizar a palabras
toks <- df_txt |>
unnest_tokens(word, text)
# Quitar números puros y palabras muy cortas
toks <- toks |>
filter(!str_detect(word, "^[0-9]+$")) |>
filter(nchar(word) >= 2)
# Stopwords en español
if (isTRUE(input$tm_usar_stop)) {
sw_es <- stopwords::stopwords("es")
toks <- toks |>
filter(!word %in% sw_es)
}
# Stemming (raíz)
if (isTRUE(input$tm_stem)) {
toks <- toks |>
mutate(word = SnowballC::wordStem(word, language = "spanish"))
}
# Conteo final
toks |>
count(word, sort = TRUE)
})
# Nube de palabras
output$tm_nube <- renderPlot({
df <- tm_tokens()
req(nrow(df) > 0)
df_f <- df |>
filter(n >= input$tm_minfreq) |>
head(input$tm_maxwords)
validate(need(nrow(df_f) > 0,
"No hay palabras que cumplan la frecuencia mínima. Baja el umbral o desactiva filtros."))
wordcloud(
words = df_f$word,
freq = df_f$n,
max.words = input$tm_maxwords,
random.order = FALSE
)
})
# Tabla de conteos (Top 100)
output$tm_tabla <- renderTable({
df <- tm_tokens()
head(df, 100)
}, striped = TRUE, bordered = TRUE, digits = 0)
}
# Ejecutar app
shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents