1. Librerías
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(xgboost)
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(zoo)
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(corrplot)
## corrplot 0.95 loaded
2. Generación de datos simulados tipo investigación
set.seed(123)
n <- 600
data <- tibble(
date = seq.Date(as.Date("2020-01-01"), by="week", length.out=n),
temperature = rnorm(n, 25, 4),
humidity = runif(n, 60, 95),
rainfall = runif(n, 0, 250),
pollution = rnorm(n, 50, 10),
dengue_cases = rpois(n, lambda = 20)
)
3. Visualización temporal
ggplot(data, aes(date, dengue_cases)) +
geom_line(color="steelblue") +
theme_minimal()

4. Correlaciones
corrplot(cor(data %>% select(-date)))

5. Target binario (riesgo epidemiológico)
data <- data %>%
mutate(target = ifelse(dengue_cases > quantile(dengue_cases,0.75),1,0))
6. Split train/test
set.seed(123)
trainIndex <- createDataPartition(data$target, p=0.7, list=FALSE)
train <- data[trainIndex,]
test <- data[-trainIndex,]
7. Matrices XGBoost
features <- c("temperature", "humidity", "rainfall", "pollution")
train_matrix <- xgb.DMatrix(
data = as.matrix(train %>% select(all_of(features))),
label = train$target
)
test_matrix <- xgb.DMatrix(
data = as.matrix(test %>% select(all_of(features))),
label = test$target
)
8. Entrenamiento modelo
params <- list(objective="binary:logistic", eval_metric="auc")
model <- xgb.train(
params = params,
data = train_matrix,
nrounds = 150,
evals = list(train = train_matrix, test = test_matrix),
early_stopping_rounds = 15,
maximize = TRUE,
verbose = 1
)
## Multiple eval metrics are present. Will use test_auc for early stopping.
## Will train until test_auc hasn't improved in 15 rounds.
##
## [1] train-auc:0.801542 test-auc:0.429794
## [2] train-auc:0.819546 test-auc:0.487901
## [3] train-auc:0.874664 test-auc:0.506749
## [4] train-auc:0.897491 test-auc:0.511523
## [5] train-auc:0.917899 test-auc:0.479177
## [6] train-auc:0.926667 test-auc:0.464362
## [7] train-auc:0.952290 test-auc:0.470288
## [8] train-auc:0.957868 test-auc:0.472099
## [9] train-auc:0.961104 test-auc:0.466996
## [10] train-auc:0.967241 test-auc:0.469465
## [11] train-auc:0.970401 test-auc:0.477037
## [12] train-auc:0.972698 test-auc:0.472263
## [13] train-auc:0.976508 test-auc:0.468148
## [14] train-auc:0.977989 test-auc:0.478519
## [15] train-auc:0.982252 test-auc:0.485926
## [16] train-auc:0.984324 test-auc:0.496132
## [17] train-auc:0.987952 test-auc:0.486420
## [18] train-auc:0.990385 test-auc:0.482305
## Stopping. Best iteration:
## [19] train-auc:0.990385 test-auc:0.484280
##
## [19] train-auc:0.990385 test-auc:0.484280
9. Predicciones
pred <- predict(model, test_matrix)
10. Evaluación ROC
roc_obj <- roc(test$target, pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj)

auc(roc_obj)
## Area under the curve: 0.5115
11. Importancia variables
importance <- xgb.importance(model=model)
xgb.plot.importance(importance)

12. Insights
cat("Factores climáticos muestran relación con riesgo epidemiológico.")
## Factores climáticos muestran relación con riesgo epidemiológico.