# =========================
# app.R
# =========================

# ---------- Paquetes ----------
library(shiny)
library(ggplot2)
library(mxmaps)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)

# Text mining
library(stringr)
library(tidytext)
library(SnowballC)
library(stopwords)
library(wordcloud)
## Cargando paquete requerido: RColorBrewer
# ---------- Datos base mapas ----------
data("df_mxstate_2020", package = "mxmaps")

# ---------- Helpers ----------
# Normaliza nombres de estado para evitar desajustes frecuentes
normaliza_estado <- function(x) {
  x <- trimws(x)
  repl <- c(
    "ESTADO DE MÉXICO" = "MÉXICO",
    "MEXICO"           = "MÉXICO",
    "CDMX"             = "CIUDAD DE MÉXICO",
    "DISTRITO FEDERAL" = "CIUDAD DE MÉXICO",
    "QUERETARO"        = "QUERÉTARO",
    "YUCATAN"          = "YUCATÁN",
    "MICHOACAN"        = "MICHOACÁN",
    "COAHUILA DE ZARAGOZA" = "COAHUILA",
    "NUEVO LEON"       = "NUEVO LEÓN",
    "VERACRUZ DE IGNACIO DE LA LLAVE" = "VERACRUZ",
    "SAN LUIS POTOSI"  = "SAN LUIS POTOSÍ"
  )
  up <- toupper(x)
  up <- ifelse(up %in% names(repl), repl[up], up)
  up
}

# =========================
# UI
# =========================
ui <- fluidPage(
  navbarPage(
    "Inteligencia Artificial",
    
    # ----- Tab 1: Clustering -----
    tabPanel(
      "Clustering",
      sidebarPanel(
        tags$h4("Selecciona las siguientes opciones:"),
        selectInput(
          "fuente", "Fuente de datos:",
          choices = c(
            "Población (demo)",
            "Violencia (tabla)",
            "Obesidad (interno)",
            "Movilidad (interno)",
            "Vehículos (interno)",
            "Desempleo (tabla)"
          )
        ),
        conditionalPanel(
          condition = "['Población (demo)','Violencia (tabla)','Obesidad (interno)','Movilidad (interno)','Desempleo (tabla)'].includes(input.fuente)",
          numericInput("clusters", "Número de clusters:", 3, min = 2, max = 10)
        )
      ),
      mainPanel(
        h1("Mapa de Clusters / Valores"),
        plotOutput("mapa", height = "550px")
      )
    ),
    
    # ----- Tab 2: Regresión -----
    tabPanel(
      "Regresion",
      sidebarPanel(
        tags$h4("Selecciona las siguientes opciones:"),
        sliderInput("Dist_Taxi", "Dist_Taxi: ", 146, 16850, 8234),
        sliderInput("Dist_Market", "Dist_Market: ", 1666, 18281, 11015),
        sliderInput("Dist_Hospital", "Dist_Hospital: ", 3227, 22407, 13087),
        sliderInput("Carpet", "Carpet: ", 775, 2229, 1486),
        sliderInput("Builtup", "Builtup: ", 932, 2229, 1782),
        sliderInput("Rainfall", "Rainfall: ", -110, 1560, 785.3),
        selectInput("Parking", "Parking: ", c("Covered", "No parking", "Not Provided", "Open")),
        selectInput("City_Category", "City_Category: ", c("CAT A", "CAT B", "CAT C"))
      ),
      mainPanel(
        h2("La predicción del precio de la casa es:"),
        verbatimTextOutput("prediccion")
      )
    ),
    
    # ----- Tab 3: Text Mining -----
    tabPanel(
      "Text Mining",
      sidebarPanel(
        tags$h4("Ingresa el texto:"),
        textAreaInput(
          "tm_texto",
          label = NULL,
          placeholder = "Pega aquí el texto a analizar…",
          height = "220px",
          width  = "100%"
        ),
        checkboxInput("tm_usar_stop", "Quitar stopwords (español)", TRUE),
        checkboxInput("tm_stem", "Aplicar stemming (raíz de palabras)", TRUE),
        numericInput("tm_minfreq", "Frecuencia mínima para la nube", value = 2, min = 1, step = 1),
        numericInput("tm_maxwords", "Máx. palabras en la nube", value = 120, min = 10, step = 10),
        actionButton("tm_go", "Generar")
      ),
      mainPanel(
        h3("Nube de palabras"),
        plotOutput("tm_nube", height = "400px"),
        tags$hr(),
        h3("Conteo de palabras (Top 100)"),
        tableOutput("tm_tabla")
      )
    )
  )
)

# =========================
# SERVER
# =========================
server <- function(input, output, session) {
  
  # --- Mapa base con clave en mayúsculas ---
  mapa_base <- reactive({
    df_mxstate_2020 %>% mutate(ENTIDAD_UP = toupper(state_name))
  })
  
  # --- Población (demo) ---
  datos_pobl <- reactive({
    tibble(
      estado = c("México","Ciudad de México","Jalisco","Veracruz","Puebla","Guanajuato",
                 "Nuevo León","Chiapas","Michoacán","Oaxaca","Baja California","Chihuahua",
                 "Guerrero","Tamaulipas","Coahuila","Hidalgo","Sinaloa","Sonora",
                 "San Luis Potosí","Tabasco","Querétaro","Yucatán","Morelos","Quintana Roo",
                 "Durango","Zacatecas","Aguascalientes","Tlaxcala","Nayarit",
                 "Campeche","Baja California Sur","Colima"),
      poblacion = c(17741822,9356765,8743203,8118547,6575295,6322118,6155818,5917164,4957200,
                    4247773,3774269,3904131,3597563,3562828,3445576,3237356,3154500,3037489,
                    2880601,2547170,2514702,2386022,1969732,1857985,1832650,1622138,1425607,
                    1342977,1235456,928363,798447,731391)
    )
  })
  
  # --- Violencia (usa tabla viol_tot del entorno) ---
  datos_viol_tabla <- reactive({
    validate(need(exists("viol_tot", inherits = TRUE),
                  "No encontré la tabla 'viol_tot' en el entorno. Cárgala antes de usar esta opción."))
    df <- get("viol_tot", inherits = TRUE)
    
    validate(need(all(c("ENTIDAD_UP","total") %in% names(df)),
                  "La tabla 'viol_tot' debe tener columnas: ENTIDAD_UP y total."))
    
    df$ENTIDAD_UP <- normaliza_estado(df$ENTIDAD_UP)
    df$total <- suppressWarnings(as.numeric(df$total))
    
    k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
    set.seed(42)
    km <- kmeans(scale(df$total), centers = k, nstart = 50)
    
    centers <- as.numeric(km$centers)
    ord_idx <- order(centers)
    map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
    cluster_num <- unname(map_rank[as.character(km$cluster)])
    
    data.frame(
      ENTIDAD_UP   = df$ENTIDAD_UP,
      cluster_num  = as.integer(cluster_num),
      stringsAsFactors = FALSE
    )
  })
  
  # --- Obesidad (interno) ---
  datos_obesidad <- reactive({
    tibble(
      estado = c(
        "Aguascalientes","Baja California","Baja California Sur","Campeche","Coahuila","Colima",
        "Chiapas","Chihuahua","Ciudad de México","Durango","Guanajuato","Guerrero","Hidalgo",
        "Jalisco","México","Michoacán","Morelos","Nayarit","Nuevo León","Oaxaca","Puebla",
        "Querétaro","Quintana Roo","San Luis Potosí","Sinaloa","Sonora","Tabasco","Tamaulipas",
        "Tlaxcala","Veracruz","Yucatán","Zacatecas"
      ),
      obesidad = c(35,37,35,41,38,36,31,38,36,36,35,32,34,36,37,34,35,36,40,32,35,36,37,35,36,38,42,40,34,37,41,35)
    )
  })
  
  # --- Movilidad (interno) ---
  datos_movilidad <- reactive({
    df <- tibble(
      estado = c("Chihuahua", "Ciudad de México", "Guanajuato", "Guerrero",
                 "Hidalgo", "Jalisco", "México", "Nuevo León", "Puebla", "Querétaro"),
      pasajeros = c(8007618.2, 2087364185, 168118717.2, 8635228,
                    22317857.75, 122275126.3, 68925404.61,
                    103860939.7, 83322622, 3397287.444),
      km = c(2256719, 189531317.9, 46267008, 2749668.667,
             8729435.75, 16997166.26, 15956772.77,
             25378985.58, 22240888.75, 1481487.111)
    )
    X <- scale(df[, c("pasajeros","km")])
    k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
    set.seed(123)
    kmfit <- kmeans(X, centers = k, nstart = 50)
    
    comp <- rowSums(X)
    medias <- tapply(comp, kmfit$cluster, mean)
    orden <- rank(medias, ties.method = "first")
    cluster_num <- as.integer(orden[as.character(kmfit$cluster)])
    
    df %>% mutate(cluster_num = cluster_num)
  })
  
  # --- Vehículos (interno) ---
  datos_vehiculos <- reactive({
    c(
      756781, 2404000, 624470, 404759,
      1231890, 1879130, 6471738, 1257493,
      427510, 765038, 2605684, 1389021,
      1023762, 4018531, 5922644, 2004532,
      967845, 653219, 2913478, 1328543,
      3275691, 1358327, 1175942, 1456021,
      1698541, 2036891, 1187021, 1712458,
      489632, 3102458, 1298745, 987654
    )
  })
  
  # --- Desempleo (usa tabla des_fijo del entorno) ---
  datos_desempleo_tabla <- reactive({
    validate(need(exists("des_fijo", inherits = TRUE),
                  "No encontré la tabla 'des_fijo' en el entorno. Cárgala antes de usar esta opción."))
    df <- get("des_fijo", inherits = TRUE)
    
    validate(need(all(c("ESTADO_UP","Promedio") %in% names(df)),
                  "La tabla 'des_fijo' debe tener columnas: ESTADO_UP y Promedio."))
    
    df$ESTADO_UP <- normaliza_estado(df$ESTADO_UP)
    df$Promedio  <- suppressWarnings(as.numeric(df$Promedio))
    
    k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
    set.seed(123)
    km <- kmeans(df$Promedio, centers = k, nstart = 50)
    
    centers <- as.numeric(km$centers)
    ord_idx <- order(centers)
    map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
    cluster_num <- unname(map_rank[as.character(km$cluster)])
    
    data.frame(
      ESTADO_UP   = df$ESTADO_UP,
      cluster_num = as.integer(cluster_num),
      Promedio    = df$Promedio,
      stringsAsFactors = FALSE
    )
  })
  
  # ---------- Gráfico de mapa ----------
  output$mapa <- renderPlot({
    mapa <- mapa_base()
    
    if (input$fuente == "Población (demo)") {
      
      df1 <- datos_pobl()
      k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
      set.seed(123)
      km <- kmeans(df1$poblacion, centers = k, nstart = 25)
      
      centers <- as.numeric(km$centers)
      ord_idx <- order(centers)
      map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
      cluster_num <- unname(map_rank[as.character(km$cluster)])
      
      tabla <- mapa %>% left_join(
        tibble(ENTIDAD_UP = toupper(df1$estado), cluster_num = as.integer(cluster_num)),
        by = "ENTIDAD_UP"
      )
      tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
      
      mxstate_choropleth(
        tabla,
        title  = paste("Clusters por Población (k =", k, ")"),
        legend = "Cluster"
      )
      
    } else if (input$fuente == "Violencia (tabla)") {
      
      dfv <- datos_viol_tabla()  # ENTIDAD_UP, cluster_num
      tabla <- mapa %>% left_join(dfv, by = "ENTIDAD_UP")
      tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
      
      mxstate_choropleth(
        tabla,
        title  = paste("Clusters por Incidencia Total (Violencia) (k =",
                       ifelse(isTruthy(input$clusters), input$clusters, 3), ")"),
        legend = "Cluster"
      )
      
    } else if (input$fuente == "Obesidad (interno)") {
      
      dfo <- datos_obesidad()
      k <- if (isTruthy(input$clusters)) max(2, min(10, input$clusters)) else 3
      set.seed(123)
      km <- kmeans(scale(dfo$obesidad), centers = k, nstart = 25)
      
      centers <- as.numeric(km$centers)
      ord_idx <- order(centers)
      map_rank <- setNames(seq_along(ord_idx), as.character(seq_len(k))[ord_idx])
      cluster_num <- unname(map_rank[as.character(km$cluster)])
      
      dfo <- dfo %>%
        mutate(ENTIDAD_UP = toupper(estado),
               cluster_num = as.integer(cluster_num))
      
      tabla <- mapa %>% left_join(dfo %>% select(ENTIDAD_UP, cluster_num), by = "ENTIDAD_UP")
      tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
      
      mxstate_choropleth(
        tabla,
        title  = paste("Clusters por Obesidad (%) (k =", k, ")"),
        legend = "Cluster"
      )
      
    } else if (input$fuente == "Movilidad (interno)") {
      
      dfm <- datos_movilidad()
      tabla <- df_mxstate_2020 %>%
        left_join(dfm, by = c("state_name" = "estado"))
      idx <- match(tabla$state_name, dfm$estado)
      tabla$value <- ifelse(is.na(idx), 0, dfm$cluster_num[idx])
      
      mxstate_choropleth(
        tabla,
        title  = paste("Clusters por Movilidad (k =",
                       ifelse(isTruthy(input$clusters), input$clusters, 3), ")"),
        legend = "Cluster"
      )
      
    } else if (input$fuente == "Desempleo (tabla)") {
      
      dfd <- datos_desempleo_tabla()  # ESTADO_UP, cluster_num, Promedio
      tabla <- mapa %>% left_join(dfd, by = c("ENTIDAD_UP" = "ESTADO_UP"))
      tabla$value <- ifelse(is.na(tabla$cluster_num), 0, tabla$cluster_num)
      
      mxstate_choropleth(
        tabla,
        title  = paste("Clusters por Desempleo (Promedio anual) (k =",
                       ifelse(isTruthy(input$clusters), input$clusters, 3), ")"),
        legend = "Cluster"
      )
      
    } else { # Vehículos (interno) — valores directos
      
      veh <- datos_vehiculos()
      stopifnot(length(veh) == nrow(df_mxstate_2020))
      
      tabla <- df_mxstate_2020
      tabla$value <- veh
      
      mxstate_choropleth(
        tabla,
        num_colors = 6,
        title  = "Vehículos particulares por estado",
        legend = "Vehículos"
      )
    }
  })
  
  # ---------- Regresión: cálculo simple ----------
  output$prediccion <- renderPrint({
    coeficiente_Parking <- ifelse(input$Parking == "Covered", 0,
                                  ifelse(input$Parking == "No parking", -612800,
                                         ifelse(input$Parking == "Not Provided", -492600,
                                                ifelse(input$Parking == "Open", -263500, NA))))
    
    coeficiente_City_Category <- ifelse(input$City_Category == "CAT A", 0,
                                        ifelse(input$City_Category == "CAT B", -1877000,
                                               ifelse(input$City_Category == "CAT C", -2895000, NA)))
    
    5595000 +
      29.79*input$Dist_Taxi +
      11.94*input$Dist_Market +
      49.34*input$Dist_Hospital +
      524.2*input$Carpet +
      1107*input$Builtup +
      coeficiente_Parking +
      coeficiente_City_Category +
      99.53 * input$Rainfall
  })
  
  # =========================
  # TEXT MINING
  # =========================
  tm_tokens <- eventReactive(input$tm_go, {
    req(input$tm_texto)
    txt <- tolower(input$tm_texto)
    
    df_txt <- tibble(text = txt)
    
    # Tokenizar a palabras
    toks <- df_txt |>
      unnest_tokens(word, text)
    
    # Quitar números puros y palabras muy cortas
    toks <- toks |>
      filter(!str_detect(word, "^[0-9]+$")) |>
      filter(nchar(word) >= 2)
    
    # Stopwords en español
    if (isTRUE(input$tm_usar_stop)) {
      sw_es <- stopwords::stopwords("es")
      toks <- toks |>
        filter(!word %in% sw_es)
    }
    
    # Stemming (raíz)
    if (isTRUE(input$tm_stem)) {
      toks <- toks |>
        mutate(word = SnowballC::wordStem(word, language = "spanish"))
    }
    
    # Conteo final
    toks |>
      count(word, sort = TRUE)
  })
  
  # Nube de palabras
  output$tm_nube <- renderPlot({
    df <- tm_tokens()
    req(nrow(df) > 0)
    
    df_f <- df |>
      filter(n >= input$tm_minfreq) |>
      head(input$tm_maxwords)
    
    validate(need(nrow(df_f) > 0,
                  "No hay palabras que cumplan la frecuencia mínima. Baja el umbral o desactiva filtros."))
    
    wordcloud(
      words = df_f$word,
      freq  = df_f$n,
      max.words = input$tm_maxwords,
      random.order = FALSE
    )
  })
  
  # Tabla de conteos (Top 100)
  output$tm_tabla <- renderTable({
    df <- tm_tokens()
    head(df, 100)
  }, striped = TRUE, bordered = TRUE, digits = 0)
  
}

# Ejecutar app
shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents