Filtramos los datos para concentrarnos en Filipinas, Estados
Unidos y Nepal.
Esto nos permitió trabajar con regímenes climáticos más homogéneos.
El dataset original no contenía variables climáticas suficientes.
Se implementó una conexión con la API de Open-Meteo para obtener:
precip_real)hum_real)Usando:
pop_norm) entre 0 y 1.paquetes <- c("httr", "jsonlite", "ggplot2", "dplyr", "gridExtra")
for (p in paquetes) {
if (!require(p, character.only = TRUE)) install.packages(p)
library(p, character.only = TRUE)
}
file_name <- "regresiones.csv"
datos_raw <- read.csv(
file_name,
sep = ";",
stringsAsFactors = FALSE,
fileEncoding = "latin1"
)
names(datos_raw) <- tolower(names(datos_raw))
datos_raw$latitude <- suppressWarnings(as.numeric(datos_raw$latitude))
datos_raw$longitude <- suppressWarnings(as.numeric(datos_raw$longitude))
datos_raw <- datos_raw %>%
filter(!is.na(latitude),
!is.na(longitude),
latitude >= -90 & latitude <= 90,
longitude >= -180 & longitude <= 180)
paises_target <- c("Philippines", "United States", "Nepal")
df_demo <- datos_raw %>%
filter(country_name %in% paises_target) %>%
head(100)
df_demo$event_date <- as.Date(df_demo$event_date, "%d/%m/%Y")
fetch_weather <- function(lat, lon, date_str) {
if (is.na(lat) || is.na(lon) || is.na(date_str))
return(c(NA, NA))
date <- as.character(date_str)
url <- paste0(
"https://archive-api.open-meteo.com/v1/archive?",
"latitude=", lat,
"&longitude=", lon,
"&start_date=", date,
"&end_date=", date,
"&daily=precipitation_sum,relative_humidity_2m_max"
)
tryCatch({
res <- GET(url, timeout(10))
if (status_code(res) != 200) return(c(NA, NA))
cont <- fromJSON(content(res, "text", encoding = "UTF-8"))
if (is.null(cont$daily)) return(c(NA, NA))
c(cont$daily$precipitation_sum[1],
cont$daily$relative_humidity_2m_max[1])
}, error = function(e) c(NA, NA))
}
clima_list <- vector("list", nrow(df_demo))
for (i in seq_len(nrow(df_demo))) {
clima_list[[i]] <- fetch_weather(
df_demo$latitude[i],
df_demo$longitude[i],
df_demo$event_date[i]
)
}
clima_df <- as.data.frame(do.call(rbind, clima_list))
colnames(clima_df) <- c("precip_real", "hum_real")
df_demo <- cbind(df_demo, clima_df) %>%
filter(!is.na(precip_real) & !is.na(hum_real))
set.seed(42)
n <- nrow(df_demo)
noise <- rnorm(n, 0, 18)
p_real <- as.numeric(df_demo$admin_division_population)
p_real[is.na(p_real)] <- median(p_real, na.rm = TRUE)
pop_norm <- (p_real - min(p_real)) / (max(p_real) - min(p_real))
df_demo$size_linear <- 2.4 * df_demo$precip_real + 125 + noise
df_demo$size_exp <- 98 * exp(1.35 * pop_norm) + noise
df_demo$size_log <- 210 * log(df_demo$hum_real + 1) - 480 + noise
lat_center <- mean(df_demo$latitude)
df_demo$size_poly <- 0.75 * (df_demo$latitude - lat_center)^2 + 185 + noise
df_demo$size_pot <- 48 * (df_demo$precip_real + 1.2)^0.62 + 102 + noise
df_demo$pop_norm <- pop_norm