1. Preparación del Entorno y Carga de Datos

1.1 Carga de Datos

# Cargar la base original
datos <- read_xlsx(file.choose())

# Ver la estructura y obtener un resumen general
str(datos)
## tibble [4,315 × 47] (S3: tbl_df/tbl/data.frame)
##  $ DisNo.                                   : chr [1:4315] "1900-0003-USA" "1900-0006-JAM" "1900-0007-JAM" "1902-0003-GTM" ...
##  $ Historic                                 : chr [1:4315] "Yes" "Yes" "Yes" "Yes" ...
##  $ Classification Key                       : chr [1:4315] "nat-met-sto-tro" "nat-hyd-flo-flo" "nat-bio-epi-vir" "nat-geo-vol-ash" ...
##  $ Disaster Group                           : chr [1:4315] "Natural" "Natural" "Natural" "Natural" ...
##  $ Disaster Subgroup                        : chr [1:4315] "Meteorological" "Hydrological" "Biological" "Geophysical" ...
##  $ Disaster Type                            : chr [1:4315] "Storm" "Flood" "Epidemic" "Volcanic activity" ...
##  $ Disaster Subtype                         : chr [1:4315] "Tropical cyclone" "Flood (General)" "Viral disease" "Ash fall" ...
##  $ External IDs                             : logi [1:4315] NA NA NA NA NA NA ...
##  $ Event Name                               : chr [1:4315] NA NA "Gastroenteritis" "Santa Maria" ...
##  $ ISO                                      : chr [1:4315] "USA" "JAM" "JAM" "GTM" ...
##  $ Country                                  : chr [1:4315] "United States of America" "Jamaica" "Jamaica" "Guatemala" ...
##  $ Subregion                                : chr [1:4315] "Northern America" "Latin America and the Caribbean" "Latin America and the Caribbean" "Latin America and the Caribbean" ...
##  $ Region                                   : chr [1:4315] "Americas" "Americas" "Americas" "Americas" ...
##  $ Location                                 : chr [1:4315] "Galveston (Texas)" "Saint James" "Porus" NA ...
##  $ Origin                                   : chr [1:4315] NA NA NA NA ...
##  $ Associated Types                         : chr [1:4315] "Avalanche (Snow, Debris)" NA NA NA ...
##  $ OFDA/BHA Response                        : chr [1:4315] "No" "No" "No" "No" ...
##  $ Appeal                                   : chr [1:4315] "No" "No" "No" "No" ...
##  $ Declaration                              : chr [1:4315] "No" "No" "No" "No" ...
##  $ AID Contribution ('000 US$)              : logi [1:4315] NA NA NA NA NA NA ...
##  $ Magnitude                                : num [1:4315] 220 NA NA NA NA NA NA 7.5 NA NA ...
##  $ Magnitude Scale                          : chr [1:4315] "Kph" "Km2" "Vaccinated" NA ...
##  $ Latitude                                 : num [1:4315] NA NA NA NA NA NA NA 14 NA NA ...
##  $ Longitude                                : num [1:4315] NA NA NA NA NA NA NA -91 NA NA ...
##  $ River Basin                              : logi [1:4315] NA NA NA NA NA NA ...
##  $ Start Year                               : num [1:4315] 1900 1900 1900 1902 1902 ...
##  $ Start Month                              : num [1:4315] 9 1 1 4 5 5 10 4 NA NA ...
##  $ Start Day                                : num [1:4315] 8 6 13 8 8 7 24 18 NA NA ...
##  $ End Year                                 : num [1:4315] 1900 1900 1900 1902 1902 ...
##  $ End Month                                : num [1:4315] 9 1 1 4 5 5 10 4 NA NA ...
##  $ End Day                                  : num [1:4315] 8 6 13 8 8 7 24 18 NA NA ...
##  $ Total Deaths                             : num [1:4315] 6000 300 30 1000 30000 ...
##  $ No. Injured                              : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ No. Affected                             : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ No. Homeless                             : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Affected                           : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Reconstruction Costs ('000 US$)          : logi [1:4315] NA NA NA NA NA NA ...
##  $ Reconstruction Costs, Adjusted ('000 US$): logi [1:4315] NA NA NA NA NA NA ...
##  $ Insured Damage ('000 US$)                : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Insured Damage, Adjusted ('000 US$)      : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Damage ('000 US$)                  : num [1:4315] 30000 NA NA NA NA NA NA 25000 480000 NA ...
##  $ Total Damage, Adjusted ('000 US$)        : num [1:4315] 1098720 NA NA NA NA ...
##  $ CPI                                      : num [1:4315] 2.73 2.73 2.73 2.84 2.84 ...
##  $ Admin Units                              : logi [1:4315] NA NA NA NA NA NA ...
##  $ Entry Date                               : chr [1:4315] "2004-10-18" "2003-07-01" "2003-07-01" "2003-07-01" ...
##  $ Last Update                              : chr [1:4315] "2023-10-17" "2023-09-25" "2023-09-25" "2023-09-25" ...
##  $ Temperature                              : num [1:4315] 13.5 27 27 20 26 28 20 20 13.5 13.5 ...
glimpse(datos)
## Rows: 4,315
## Columns: 47
## $ DisNo.                                      <chr> "1900-0003-USA", "1900-000…
## $ Historic                                    <chr> "Yes", "Yes", "Yes", "Yes"…
## $ `Classification Key`                        <chr> "nat-met-sto-tro", "nat-hy…
## $ `Disaster Group`                            <chr> "Natural", "Natural", "Nat…
## $ `Disaster Subgroup`                         <chr> "Meteorological", "Hydrolo…
## $ `Disaster Type`                             <chr> "Storm", "Flood", "Epidemi…
## $ `Disaster Subtype`                          <chr> "Tropical cyclone", "Flood…
## $ `External IDs`                              <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Event Name`                                <chr> NA, NA, "Gastroenteritis",…
## $ ISO                                         <chr> "USA", "JAM", "JAM", "GTM"…
## $ Country                                     <chr> "United States of America"…
## $ Subregion                                   <chr> "Northern America", "Latin…
## $ Region                                      <chr> "Americas", "Americas", "A…
## $ Location                                    <chr> "Galveston (Texas)", "Sain…
## $ Origin                                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ `Associated Types`                          <chr> "Avalanche (Snow, Debris)"…
## $ `OFDA/BHA Response`                         <chr> "No", "No", "No", "No", "N…
## $ Appeal                                      <chr> "No", "No", "No", "No", "N…
## $ Declaration                                 <chr> "No", "No", "No", "No", "N…
## $ `AID Contribution ('000 US$)`               <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ Magnitude                                   <dbl> 220.0, NA, NA, NA, NA, NA,…
## $ `Magnitude Scale`                           <chr> "Kph", "Km2", "Vaccinated"…
## $ Latitude                                    <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ Longitude                                   <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `River Basin`                               <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Start Year`                                <dbl> 1900, 1900, 1900, 1902, 19…
## $ `Start Month`                               <dbl> 9, 1, 1, 4, 5, 5, 10, 4, N…
## $ `Start Day`                                 <dbl> 8, 6, 13, 8, 8, 7, 24, 18,…
## $ `End Year`                                  <dbl> 1900, 1900, 1900, 1902, 19…
## $ `End Month`                                 <dbl> 9, 1, 1, 4, 5, 5, 10, 4, N…
## $ `End Day`                                   <dbl> 8, 6, 13, 8, 8, 7, 24, 18,…
## $ `Total Deaths`                              <dbl> 6000, 300, 30, 1000, 30000…
## $ `No. Injured`                               <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `No. Affected`                              <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `No. Homeless`                              <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Total Affected`                            <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Reconstruction Costs ('000 US$)`           <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Reconstruction Costs, Adjusted ('000 US$)` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Insured Damage ('000 US$)`                 <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Insured Damage, Adjusted ('000 US$)`       <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Total Damage ('000 US$)`                   <dbl> 30000, NA, NA, NA, NA, NA,…
## $ `Total Damage, Adjusted ('000 US$)`         <dbl> 1098720, NA, NA, NA, NA, N…
## $ CPI                                         <dbl> 2.730451, 2.730451, 2.7304…
## $ `Admin Units`                               <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Entry Date`                                <chr> "2004-10-18", "2003-07-01"…
## $ `Last Update`                               <chr> "2023-10-17", "2023-09-25"…
## $ Temperature                                 <dbl> 13.5, 27.0, 27.0, 20.0, 26…
summary(datos)
##     DisNo.            Historic         Classification Key Disaster Group    
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Disaster Subgroup  Disaster Type      Disaster Subtype   External IDs  
##  Length:4315        Length:4315        Length:4315        Mode:logical  
##  Class :character   Class :character   Class :character   NA's:4315     
##  Mode  :character   Mode  :character   Mode  :character                 
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##   Event Name            ISO              Country           Subregion        
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     Region            Location            Origin          Associated Types  
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  OFDA/BHA Response     Appeal          Declaration       
##  Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  AID Contribution ('000 US$)   Magnitude         Magnitude Scale   
##  Mode:logical                Min.   :    -50.0   Length:4315       
##  TRUE:179                    1st Qu.:      7.7   Class :character  
##  NA's:4136                   Median :    165.0   Mode  :character  
##                              Mean   :  39808.8                     
##                              3rd Qu.:   3527.5                     
##                              Max.   :1768303.0                     
##                              NA's   :3111                          
##     Latitude         Longitude       River Basin      Start Year  
##  Min.   :-72.640   Min.   :-162.83   Mode:logical   Min.   :1900  
##  1st Qu.: -9.425   1st Qu.: -90.57   NA's:4315      1st Qu.:1989  
##  Median : 10.980   Median : -77.46                  Median :2002  
##  Mean   :  7.099   Mean   : -76.58                  Mean   :1998  
##  3rd Qu.: 19.107   3rd Qu.: -70.56                  3rd Qu.:2014  
##  Max.   : 67.930   Max.   : 124.42                  Max.   :2025  
##  NA's   :3689      NA's   :3689                                   
##   Start Month       Start Day        End Year      End Month     
##  Min.   : 1.000   Min.   : 1.00   Min.   :1900   Min.   : 1.000  
##  1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.:1989   1st Qu.: 4.000  
##  Median : 7.000   Median :15.00   Median :2002   Median : 7.000  
##  Mean   : 6.468   Mean   :15.43   Mean   :1998   Mean   : 6.639  
##  3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:2014   3rd Qu.: 9.000  
##  Max.   :12.000   Max.   :31.00   Max.   :2025   Max.   :12.000  
##  NA's   :69       NA's   :857                    NA's   :141     
##     End Day       Total Deaths       No. Injured         No. Affected     
##  Min.   : 1.00   Min.   :     1.0   Min.   :      1.0   Min.   :       3  
##  1st Qu.: 8.00   1st Qu.:     4.0   1st Qu.:     12.0   1st Qu.:    1020  
##  Median :16.00   Median :    12.0   Median :     37.0   Median :    6780  
##  Mean   :15.84   Mean   :   286.9   Mean   :   3817.9   Mean   :  195474  
##  3rd Qu.:23.00   3rd Qu.:    40.0   3rd Qu.:    138.5   3rd Qu.:   45000  
##  Max.   :31.00   Max.   :222570.0   Max.   :1800000.0   Max.   :85000000  
##  NA's   :834     NA's   :1398       NA's   :3400        NA's   :1884      
##   No. Homeless     Total Affected     Reconstruction Costs ('000 US$)
##  Min.   :      5   Min.   :       1   Mode:logical                   
##  1st Qu.:    300   1st Qu.:     600   TRUE:14                        
##  Median :   1533   Median :    4248   NA's:4301                      
##  Mean   :  19975   Mean   :  165323                                  
##  3rd Qu.:   8770   3rd Qu.:   30004                                  
##  Max.   :1166000   Max.   :85000012                                  
##  NA's   :3690      NA's   :1344                                      
##  Reconstruction Costs, Adjusted ('000 US$) Insured Damage ('000 US$)
##  Mode:logical                              Min.   :     162         
##  TRUE:14                                   1st Qu.:   62500         
##  NA's:4301                                 Median :  200000         
##                                            Mean   : 1299304         
##                                            3rd Qu.:  755000         
##                                            Max.   :60000000         
##                                            NA's   :3710             
##  Insured Damage, Adjusted ('000 US$) Total Damage ('000 US$)
##  Min.   :     201                    Min.   :        3      
##  1st Qu.:  118839                    1st Qu.:    15500      
##  Median :  339640                    Median :   112300      
##  Mean   : 1731304                    Mean   :  1251189      
##  3rd Qu.: 1046379                    3rd Qu.:   729350      
##  Max.   :93614347                    Max.   :125000000      
##  NA's   :3721                        NA's   :2605           
##  Total Damage, Adjusted ('000 US$)      CPI         Admin Units   
##  Min.   :        5                 Min.   :  2.73   Mode:logical  
##  1st Qu.:    37512                 1st Qu.: 38.81   NA's:4315     
##  Median :   237094                 Median : 58.11                 
##  Mean   :  1871285                 Mean   : 55.38                 
##  3rd Qu.:  1291495                 3rd Qu.: 75.35                 
##  Max.   :195029889                 Max.   :100.00                 
##  NA's   :2655                      NA's   :130                    
##   Entry Date        Last Update         Temperature   
##  Length:4315        Length:4315        Min.   : 5.00  
##  Class :character   Class :character   1st Qu.:15.00  
##  Mode  :character   Mode  :character   Median :20.00  
##                                        Mean   :19.84  
##                                        3rd Qu.:25.00  
##                                        Max.   :29.00  
## 

1.2 Filtrado de Variables

# Variables Principales (para análisis central)
variables_principales <- c("Disaster Subgroup", "Disaster Type", "Disaster Subtype", 
                           "ISO", "Country", "Subregion", "Total Deaths", "No. Injured", 
                           "No. Affected", "Total Affected", "Insured Damage ('000 US$)", 
                           "Insured Damage, Adjusted ('000 US$)", "Total Damage ('000 US$)", 
                           "Total Damage, Adjusted ('000 US$)", "Temperature")

# Variables Secundarias (para análisis complementario)
variables_secundarias <- c("Event Name", "Magnitude", "Magnitude Scale", "Start Year", 
                           "No. Homeless","Region")

# Creación de la sub-base con las variables seleccionadas
datos_subset <- datos %>% select(all_of(c(variables_principales, variables_secundarias)))

# Revisar la estructura de la sub-base
str(datos_subset)
## tibble [4,315 × 21] (S3: tbl_df/tbl/data.frame)
##  $ Disaster Subgroup                  : chr [1:4315] "Meteorological" "Hydrological" "Biological" "Geophysical" ...
##  $ Disaster Type                      : chr [1:4315] "Storm" "Flood" "Epidemic" "Volcanic activity" ...
##  $ Disaster Subtype                   : chr [1:4315] "Tropical cyclone" "Flood (General)" "Viral disease" "Ash fall" ...
##  $ ISO                                : chr [1:4315] "USA" "JAM" "JAM" "GTM" ...
##  $ Country                            : chr [1:4315] "United States of America" "Jamaica" "Jamaica" "Guatemala" ...
##  $ Subregion                          : chr [1:4315] "Northern America" "Latin America and the Caribbean" "Latin America and the Caribbean" "Latin America and the Caribbean" ...
##  $ Total Deaths                       : num [1:4315] 6000 300 30 1000 30000 ...
##  $ No. Injured                        : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ No. Affected                       : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Affected                     : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Insured Damage ('000 US$)          : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Insured Damage, Adjusted ('000 US$): num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Damage ('000 US$)            : num [1:4315] 30000 NA NA NA NA NA NA 25000 480000 NA ...
##  $ Total Damage, Adjusted ('000 US$)  : num [1:4315] 1098720 NA NA NA NA ...
##  $ Temperature                        : num [1:4315] 13.5 27 27 20 26 28 20 20 13.5 13.5 ...
##  $ Event Name                         : chr [1:4315] NA NA "Gastroenteritis" "Santa Maria" ...
##  $ Magnitude                          : num [1:4315] 220 NA NA NA NA NA NA 7.5 NA NA ...
##  $ Magnitude Scale                    : chr [1:4315] "Kph" "Km2" "Vaccinated" NA ...
##  $ Start Year                         : num [1:4315] 1900 1900 1900 1902 1902 ...
##  $ No. Homeless                       : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Region                             : chr [1:4315] "Americas" "Americas" "Americas" "Americas" ...
summary(datos_subset)
##  Disaster Subgroup  Disaster Type      Disaster Subtype       ISO           
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Country           Subregion          Total Deaths       No. Injured       
##  Length:4315        Length:4315        Min.   :     1.0   Min.   :      1.0  
##  Class :character   Class :character   1st Qu.:     4.0   1st Qu.:     12.0  
##  Mode  :character   Mode  :character   Median :    12.0   Median :     37.0  
##                                        Mean   :   286.9   Mean   :   3817.9  
##                                        3rd Qu.:    40.0   3rd Qu.:    138.5  
##                                        Max.   :222570.0   Max.   :1800000.0  
##                                        NA's   :1398       NA's   :3400       
##   No. Affected      Total Affected     Insured Damage ('000 US$)
##  Min.   :       3   Min.   :       1   Min.   :     162         
##  1st Qu.:    1020   1st Qu.:     600   1st Qu.:   62500         
##  Median :    6780   Median :    4248   Median :  200000         
##  Mean   :  195474   Mean   :  165323   Mean   : 1299304         
##  3rd Qu.:   45000   3rd Qu.:   30004   3rd Qu.:  755000         
##  Max.   :85000000   Max.   :85000012   Max.   :60000000         
##  NA's   :1884       NA's   :1344       NA's   :3710             
##  Insured Damage, Adjusted ('000 US$) Total Damage ('000 US$)
##  Min.   :     201                    Min.   :        3      
##  1st Qu.:  118839                    1st Qu.:    15500      
##  Median :  339640                    Median :   112300      
##  Mean   : 1731304                    Mean   :  1251189      
##  3rd Qu.: 1046379                    3rd Qu.:   729350      
##  Max.   :93614347                    Max.   :125000000      
##  NA's   :3721                        NA's   :2605           
##  Total Damage, Adjusted ('000 US$)  Temperature     Event Name       
##  Min.   :        5                 Min.   : 5.00   Length:4315       
##  1st Qu.:    37512                 1st Qu.:15.00   Class :character  
##  Median :   237094                 Median :20.00   Mode  :character  
##  Mean   :  1871285                 Mean   :19.84                     
##  3rd Qu.:  1291495                 3rd Qu.:25.00                     
##  Max.   :195029889                 Max.   :29.00                     
##  NA's   :2655                                                        
##    Magnitude         Magnitude Scale      Start Year    No. Homeless    
##  Min.   :    -50.0   Length:4315        Min.   :1900   Min.   :      5  
##  1st Qu.:      7.7   Class :character   1st Qu.:1989   1st Qu.:    300  
##  Median :    165.0   Mode  :character   Median :2002   Median :   1533  
##  Mean   :  39808.8                      Mean   :1998   Mean   :  19975  
##  3rd Qu.:   3527.5                      3rd Qu.:2014   3rd Qu.:   8770  
##  Max.   :1768303.0                      Max.   :2025   Max.   :1166000  
##  NA's   :3111                                          NA's   :3690     
##     Region         
##  Length:4315       
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

2. Análisis Exploratorio de Datos

2.1 Revisión de Datos Faltantes y Valores Atípicos

# Número total de NA's en la sub-base
total_na <- sum(is.na(datos_subset))
print(paste("Total NA:", total_na))
## [1] "Total NA: 31179"
# Porcentaje de NA's por columna
na_por_columna <- colSums(is.na(datos_subset)) / nrow(datos_subset)
print(na_por_columna)
##                   Disaster Subgroup                       Disaster Type 
##                          0.00000000                          0.00000000 
##                    Disaster Subtype                                 ISO 
##                          0.00000000                          0.00000000 
##                             Country                           Subregion 
##                          0.00000000                          0.00000000 
##                        Total Deaths                         No. Injured 
##                          0.32398610                          0.78794902 
##                        No. Affected                      Total Affected 
##                          0.43661645                          0.31147161 
##           Insured Damage ('000 US$) Insured Damage, Adjusted ('000 US$) 
##                          0.85979143                          0.86234067 
##             Total Damage ('000 US$)   Total Damage, Adjusted ('000 US$) 
##                          0.60370800                          0.61529548 
##                         Temperature                          Event Name 
##                          0.00000000                          0.76152955 
##                           Magnitude                     Magnitude Scale 
##                          0.72097335                          0.08690614 
##                          Start Year                        No. Homeless 
##                          0.00000000                          0.85515643 
##                              Region 
##                          0.00000000
# Boxplot de 'Total Damage' (Daños Totales)
ggplot(datos_subset, aes(y = `Total Damage ('000 US$)`)) +
  geom_boxplot() +
  theme_minimal() +
  labs(y = "Total Damage ('000 US$)", title = "Boxplot de Daños Totales")

2.2 Estadísticas Descriptivas Básicas

# Calcular estadísticas de muertes
media_muertes <- mean(datos_subset$`Total Deaths`, na.rm = TRUE)
mediana_muertes <- median(datos_subset$`Total Deaths`, na.rm = TRUE)
desv_estandar_muertes <- sd(datos_subset$`Total Deaths`, na.rm = TRUE)

# Función para calcular la moda
moda <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
moda_muertes <- moda(datos_subset$`Total Deaths`)

# Imprimir resultados
print(paste("Media de muertes:", media_muertes))
## [1] "Media de muertes: 286.88104216661"
print(paste("Mediana de muertes:", mediana_muertes))
## [1] "Mediana de muertes: 12"
print(paste("Desviación estándar:", desv_estandar_muertes))
## [1] "Desviación estándar: 4649.08031281979"
print(paste("Moda de muertes:", moda_muertes))
## [1] "Moda de muertes: NA"

3. Visualización de Datos

3.1 Gráficos de Frecuencia para Variables Categóricas

# Frecuencia por "Disaster Subgroup"
ggplot(datos_subset, aes(x = `Disaster Subgroup`)) +
  geom_bar(fill = "dodgerblue") +
  theme_minimal() +
  labs(title = "Frecuencia por Subgrupo de Desastre",
       x = "Subgrupo de Desastre", y = "Cantidad")

# Frecuencia por "Disaster Type"
ggplot(datos_subset, aes(x = `Disaster Type`)) +
  geom_bar(fill = "red") +
  theme_minimal() +
  labs(title = "Frecuencia por Tipo de Desastre",
       x = "Tipo de Desastre", y = "Cantidad")

# Top 5 Disaster Type
top5_disaster_type <- datos_subset %>% 
  count(`Disaster Type`, sort = TRUE) %>% 
  head(5)
print(top5_disaster_type)
## # A tibble: 5 × 2
##   `Disaster Type`         n
##   <chr>               <int>
## 1 Storm                1580
## 2 Flood                1407
## 3 Earthquake            326
## 4 Mass movement (wet)   208
## 5 Wildfire              208
# Frecuencia por "Disaster Subtype"
ggplot(datos_subset, aes(x = `Disaster Subtype`)) +
  geom_bar(fill = "seagreen") +
  theme_minimal() +
  labs(title = "Frecuencia por Subtipo de Desastre",
       x = "Subtipo de Desastre", y = "Cantidad")

# Top 5 Disaster Subtype
top5_disaster_subtype <- datos_subset %>% 
  count(`Disaster Subtype`, sort = TRUE) %>% 
  head(5)
print(top5_disaster_subtype)
## # A tibble: 5 × 2
##   `Disaster Subtype`     n
##   <chr>              <int>
## 1 Tropical cyclone     795
## 2 Riverine flood       633
## 3 Flood (General)      630
## 4 Ground movement      314
## 5 Storm (General)      279

3.2 Gráficos para Variables Numéricas

# Filtrar datos para Total Damage
datos_filtrados <- datos_subset %>%
  filter(!is.na(`Total Damage ('000 US$)`) & is.finite(`Total Damage ('000 US$)`))

# Histograma de Daños Totales
ggplot(datos_filtrados, aes(x = `Total Damage ('000 US$)`)) +
  geom_histogram(binwidth = 1000, fill = "orange", color = "black") +
  theme_minimal() +
  labs(title = "Histograma de Daños Totales",
       x = "Daños Totales ('000 US$)", y = "Frecuencia")

# Diagrama de Dispersión: Muertes vs Heridos
ggplot(datos_subset, aes(x = `Total Deaths`, y = `No. Injured`)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  labs(title = "Relación entre Muertes y Heridos",
       x = "Total Deaths", y = "No. Injured")

4. Análisis de Relaciones y Tendencias

4.1 Impacto por Región y Tiempo

# Análisis de impacto por región
impacto_region <- datos_subset %>%
  group_by(Region, `Start Year`) %>%
  summarise(Total_Muertes = sum(`Total Deaths`, na.rm = TRUE),
            Total_Heridos = sum(`No. Injured`, na.rm = TRUE),
            .groups = "drop")

# Visualización de la evolución de muertes por región
ggplot(impacto_region, aes(x = `Start Year`, y = Total_Muertes, color = Region)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Evolución de Muertes por Región a lo Largo de los Años",
       x = "Año de Inicio", y = "Total Muertes")

5. Visualización Avanzada: Mapa de Calor Interactivo

# Cargar el shapefile
mapa_mundi <- st_read(file.choose())
## Reading layer `ne_110m_admin_0_countries' from data source 
##   `C:\Users\taroj\Documents\Actuaria_Octavo Semestre\COMPUTO CIENTIFICO\Proyecto\Mapas\ne_110m_admin_0_countries\ne_110m_admin_0_countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 177 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## Geodetic CRS:  WGS 84
head(mapa_mundi$SOV_A3)
## [1] "FJI" "TZA" "SAH" "CAN" "US1" "KA1"
# Información de Desastres a Nivel País
desastres_por_pais <- datos_subset %>%
  group_by(ISO) %>%
  summarise(Total_Desastres = n(),
            Desastres_Tipo = paste(unique(`Disaster Type`), collapse = ", "),
            .groups = "drop")

# Corregir el código de Estados Unidos en el shapefile
mapa_mundi <- mapa_mundi %>%
  mutate(SOV_A3 = ifelse(SOV_A3 == "US1", "USA", SOV_A3))

# Unión con el shapefile corregido
mapa_datos <- mapa_mundi %>%
  left_join(desastres_por_pais, by = c("SOV_A3" = "ISO"))

# Asignar color gris a los países sin datos
mapa_datos$Total_Desastres[is.na(mapa_datos$Total_Desastres)] <- 0

# Crear un mapa base con ggplot2
mapa_base <- ggplot() +
  geom_sf(data = mapa_datos, aes(fill = Total_Desastres, text = paste("País:", SOV_A3,
                                                                      "<br>Total Desastres:", Total_Desastres,
                                                                      "<br>Tipos:", Desastres_Tipo))) +
  scale_fill_gradient(low = "lightblue", high = "darkred", na.value = "grey") +
  theme_minimal() +
  labs(title = "Mapa Interactivo de Desastres por País", fill = "Total Desastres")

# Convertir a interactivo
mapa_interactivo <- ggplotly(mapa_base, tooltip = "text")
mapa_interactivo

6. Análisis de la Variable Temperature

# Distribución de Temperature
ggplot(datos_subset, aes(x = Temperature)) +
  geom_histogram(binwidth = 1, fill = "lightgreen", color = "black") +
  theme_minimal() +
  labs(title = "Histograma de Temperatura", x = "Temperature (°C)", y = "Frecuencia")

# Relación entre Temperature y Total Damage
ggplot(datos_subset, aes(x = Temperature, y = `Total Damage ('000 US$)`)) +
  geom_point(alpha = 0.6, color = "purple") +
  theme_minimal() +
  labs(title = "Relación entre Temperatura y Daños Totales",
       x = "Temperatura (°C)", y = "Daños Totales ('000 US$)")

# Relación entre Temperature y Total Affected
ggplot(datos_subset, aes(x = Temperature, y = `Total Affected`)) +
  geom_point(alpha = 0.6, color = "brown") +
  theme_minimal() +
  labs(title = "Relación entre Temperatura y Total Afectados",
       x = "Temperatura (°C)", y = "Total Afectados")

7. Análisis de Desastres y Temperatura a lo Largo del Tiempo

# Agrupar por Country, Disaster Type y Start Year
temp_analysis <- datos_subset %>%
  group_by(Country, `Disaster Type`, `Start Year`) %>%
  summarise(Num_Desastres = n(),
            Prom_Temperature = mean(Temperature, na.rm = TRUE),
            .groups = "drop")

# Visualizar evolución de desastres por país y tipo
ggplot(temp_analysis, aes(x = `Start Year`, y = Num_Desastres, color = Country)) +
  geom_line() +
  facet_wrap(~ `Disaster Type`) +
  theme_minimal() +
  labs(title = "Evolución de Desastres por Tipo y País en Función de la Temperatura",
       x = "Año de Inicio", y = "Cantidad de Desastres")

# Tendencia de temperatura y cantidad de desastres a nivel global
global_temp_trend <- datos_subset %>%
  group_by(`Start Year`) %>%
  summarise(Num_Desastres = n(),
            Avg_Temperature = mean(Temperature, na.rm = TRUE),
            .groups = "drop")

ggplot(global_temp_trend, aes(x = `Start Year`)) +
  geom_line(aes(y = Num_Desastres), color = "blue", size = 1) +
  geom_line(aes(y = Avg_Temperature * 10), color = "red", linetype = "dashed", size = 1) +
  theme_minimal() +
  labs(title = "Tendencia Global: Número de Desastres y Temperatura",
       x = "Año de Inicio",
       y = "Cantidad de Desastres / Temperatura (escalada)")

MODELO VAR

######MODELO VAR
# Librerías necesarias
library(dplyr)
library(vars)
library(urca)
library(tseries)

# 1. Agregar SumTotalAffected y crear serie anual con LogInsured y LogAffected
serie_regional <- datos_subset %>%
  filter(`Start Year` >= 1980) %>%
  mutate(
    RegionGrupo = case_when(
      Subregion == "Northern America" ~ "NorteAmerica",
      Subregion %in% c("Latin America and the Caribbean", "South America") ~ "CentroSurAmerica",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(RegionGrupo)) %>%
  group_by(RegionGrupo, Año = `Start Year`) %>%
  summarise(
    SumDeaths        = sum(`Total Deaths`, na.rm = TRUE),
    SumInsuredDamage = sum(`Insured Damage ('000 US$)`, na.rm = TRUE),
    SumTotalAffected = sum(`Total Affected`, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(RegionGrupo, Año) %>%
  mutate(
    LogInsured  = log(SumInsuredDamage + 1),
    LogAffected = log(SumTotalAffected + 1)
  )

# 2. Crear ts multivariante de 3 variables endógenas
crear_ts3 <- function(df) {
  start_year <- min(df$Año)
  mat <- df %>%
    dplyr::select(LogInsured, SumDeaths, LogAffected) %>%
    as.matrix()
  ts(mat, start = start_year, frequency = 1)
}

ts_norte3 <- serie_regional %>%
  filter(RegionGrupo == "NorteAmerica") %>%
  crear_ts3()

ts_cs3 <- serie_regional %>%
  filter(RegionGrupo == "CentroSurAmerica") %>%
  crear_ts3()

# 3. Test ADF
test_adf <- function(ts_data) {
  apply(ts_data, 2, function(x) {
    if (all(x == 0, na.rm = TRUE) || length(unique(x[!is.na(x)])) <= 1) {
      return(c(stat = NA, p.value = NA))
    }
    adf <- try(adf.test(x), silent = TRUE)
    if (inherits(adf, "htest")) {
      return(c(stat = as.numeric(adf$statistic), p.value = adf$p.value))
    } else {
      ur <- ur.df(x, type = "trend", selectlags = "AIC")
      return(c(stat = as.numeric(ur@teststat[1]), p.value = NA))
    }
  })
}

cat("ADF niveles Norteamérica (3 var):
"); print(test_adf(ts_norte3))
## ADF niveles Norteamérica (3 var):
##          LogInsured   SumDeaths LogAffected
## stat    -3.70780467 -3.47809531  -2.3259964
## p.value  0.03485446  0.05638791   0.4443354
cat("ADF niveles CentroSur América (3 var):
"); print(test_adf(ts_cs3))
## ADF niveles CentroSur América (3 var):
##         LogInsured   SumDeaths LogAffected
## stat    -2.3021843 -3.41735250  -2.4267907
## p.value  0.4538035  0.06564748   0.4042582
# 4. Cointegración Johansen
test_cointegration <- function(ts_data, region_name) {
  jo <- ca.jo(ts_data, type = "trace", ecdet = "const", K = 2)
  cat("Johansen", region_name, ":
"); print(summary(jo))
  teststat <- jo@teststat
  cval     <- jo@cval
  rank_found <- 0
  for (i in seq_along(teststat)) {
    if (!is.na(teststat[i]) && teststat[i] > as.numeric(cval[i, "5pct"])) {
      rank_found <- i
    }
  }
  max_rank <- ncol(ts_data) - 1
  if (rank_found > max_rank) {
    cat("Advertencia: rank sugerido", rank_found, "excede max_rank", max_rank, "→ ajustado a", max_rank, "\n")
    rank <- max_rank
  } else {
    rank <- rank_found
  }
  cat("Cointegration rank sugerido para", region_name, "=", rank, "\n\n")
  list(jo = jo, rank = rank)
}

res_coin_norte <- test_cointegration(ts_norte3, "NorteAmerica")
## Johansen NorteAmerica :
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 3.815874e-01 2.712467e-01 1.889672e-01 1.665335e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 |  9.22  7.52  9.24 12.97
## r <= 1 | 23.14 17.85 19.96 24.60
## r = 0  | 44.28 32.00 34.91 41.07
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                LogInsured.l2 SumDeaths.l2 LogAffected.l2   constant
## LogInsured.l2     1.00000000  1.000000000    1.000000000  1.0000000
## SumDeaths.l2      0.03488876 -0.009351966   -0.002774113 -0.0103964
## LogAffected.l2   -2.21711919 -2.764023549    0.279008028 -1.1207177
## constant         -3.45454781 20.438233262  -17.000045652 78.5279781
## 
## Weights W:
## (This is the loading matrix)
## 
##               LogInsured.l2 SumDeaths.l2 LogAffected.l2      constant
## LogInsured.d    -0.15212248   -0.0295427     -0.2297773 -2.595214e-18
## SumDeaths.d    -23.63105997   19.5305027     10.9414760  2.149470e-15
## LogAffected.d    0.02581326    0.1982096     -0.1550881  9.032321e-18
## 
## Advertencia: rank sugerido 3 excede max_rank 2 → ajustado a 2 
## Cointegration rank sugerido para NorteAmerica = 2
res_coin_cs    <- test_cointegration(ts_cs3,    "CentroSurAmerica")
## Johansen CentroSurAmerica :
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 4.025543e-01 3.103318e-01 2.551600e-01 4.996004e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 | 12.96  7.52  9.24 12.97
## r <= 1 | 29.31 17.85 19.96 24.60
## r = 0  | 51.97 32.00 34.91 41.07
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                LogInsured.l2  SumDeaths.l2 LogAffected.l2      constant
## LogInsured.l2   1.000000e+00  1.000000e+00   1.000000e+00  1.000000e+00
## SumDeaths.l2   -1.107854e-03 -1.843583e-04   3.190581e-05 -1.905011e-05
## LogAffected.l2  1.920745e+01 -1.507749e+01  -1.074131e+00  7.683941e+00
## constant       -2.929411e+02  2.239612e+02   7.397955e+00  1.087308e+02
## 
## Weights W:
## (This is the loading matrix)
## 
##               LogInsured.l2 SumDeaths.l2 LogAffected.l2      constant
## LogInsured.d    -0.02925524 2.458395e-03  -6.578660e-01 -1.810730e-17
## SumDeaths.d    821.90904439 9.353970e+02  -1.323081e+03 -2.354038e-13
## LogAffected.d   -0.01278736 4.613684e-02  -4.813532e-02  5.146330e-18
## 
## Advertencia: rank sugerido 3 excede max_rank 2 → ajustado a 2 
## Cointegration rank sugerido para CentroSurAmerica = 2
# 5. Ajuste modelo con 3 var
procesar_region_3var <- function(ts_data, region_name) {
  cat("---- Procesando región:", region_name, "----\n")
  adf_res <- test_adf(ts_data)
  cat("ADF niveles", region_name, ":
"); print(adf_res)
  coin <- test_cointegration(ts_data, region_name)
  jo   <- coin$jo; rank <- coin$rank
  result <- list()
  if (!is.null(jo) && rank >= 1) {
    cat(region_name, ": intentando VECM con rank", rank, "\n")
    vecm_lm <- try(cajorls(jo, r = rank), silent = TRUE)
    if (inherits(vecm_lm, "try-error")) {
      cat("Error cajorls → VAR en diferencias\n")
      diff_ts <- diff(ts_data)
      sel_df <- VARselect(diff_ts, lag.max = 5, type = "const")
      p_df <- sel_df$selection["AIC(n)"]
      cat("Lag óptimo dif (AIC) =", p_df, "\n")
      var_mod <- VAR(diff_ts, p = p_df, type = "const")
      result$model <- var_mod; result$type <- "VAR_diff"; result$select <- sel_df; result$rank <- 0
    } else {
      var_from_vecm <- try(vars::vec2var(jo, r = rank), silent = TRUE)
      if (inherits(var_from_vecm, "try-error")) {
        cat("Error vec2var → VAR en diferencias\n")
        diff_ts <- diff(ts_data)
        sel_df <- VARselect(diff_ts, lag.max = 5, type = "const")
        p_df <- sel_df$selection["AIC(n)"]
        cat("Lag óptimo dif (AIC) =", p_df, "\n")
        var_mod <- VAR(diff_ts, p = p_df, type = "const")
        result$model <- var_mod; result$type <- "VAR_diff"; result$select <- sel_df; result$rank <- 0
      } else {
        cat("VECM estimado para", region_name, "\n")
        result$model <- var_from_vecm; result$type <- "VECM->VAR"; result$jo <- jo; result$vecm_lm <- vecm_lm; result$rank <- rank
      }
    }
  } else {
    estacionarios <- all(adf_res["p.value",] < 0.05, na.rm = TRUE)
    if (estacionarios) {
      cat(region_name, ": VAR en niveles\n")
      sel_lv <- VARselect(ts_data, lag.max = 5, type = "const")
      p_lv <- sel_lv$selection["AIC(n)"]
      cat("Lag óptimo niv (AIC) =", p_lv, "\n")
      var_mod <- VAR(ts_data, p = p_lv, type = "const")
      result$model <- var_mod; result$type <- "VAR_levels"; result$select <- sel_lv; result$rank <- 0
    } else {
      cat(region_name, ": VAR en diferencias\n")
      diff_ts <- diff(ts_data)
      if (any(is.na(diff_ts))) stop("NA tras diff en ", region_name)
      sel_df <- VARselect(diff_ts, lag.max = 5, type = "const")
      p_df <- sel_df$selection["AIC(n)"]
      cat("Lag óptimo dif (AIC) =", p_df, "\n")
      var_mod <- VAR(diff_ts, p = p_df, type = "const")
      result$model <- var_mod; result$type <- "VAR_diff"; result$select <- sel_df; result$rank <- 0
    }
  }
  return(result)
}

res_norte3 <- procesar_region_3var(ts_norte3, "NorteAmerica")
## ---- Procesando región: NorteAmerica ----
## ADF niveles NorteAmerica :
##          LogInsured   SumDeaths LogAffected
## stat    -3.70780467 -3.47809531  -2.3259964
## p.value  0.03485446  0.05638791   0.4443354
## Johansen NorteAmerica :
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 3.815874e-01 2.712467e-01 1.889672e-01 1.665335e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 |  9.22  7.52  9.24 12.97
## r <= 1 | 23.14 17.85 19.96 24.60
## r = 0  | 44.28 32.00 34.91 41.07
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                LogInsured.l2 SumDeaths.l2 LogAffected.l2   constant
## LogInsured.l2     1.00000000  1.000000000    1.000000000  1.0000000
## SumDeaths.l2      0.03488876 -0.009351966   -0.002774113 -0.0103964
## LogAffected.l2   -2.21711919 -2.764023549    0.279008028 -1.1207177
## constant         -3.45454781 20.438233262  -17.000045652 78.5279781
## 
## Weights W:
## (This is the loading matrix)
## 
##               LogInsured.l2 SumDeaths.l2 LogAffected.l2      constant
## LogInsured.d    -0.15212248   -0.0295427     -0.2297773 -2.595214e-18
## SumDeaths.d    -23.63105997   19.5305027     10.9414760  2.149470e-15
## LogAffected.d    0.02581326    0.1982096     -0.1550881  9.032321e-18
## 
## Advertencia: rank sugerido 3 excede max_rank 2 → ajustado a 2 
## Cointegration rank sugerido para NorteAmerica = 2 
## 
## NorteAmerica : intentando VECM con rank 2 
## VECM estimado para NorteAmerica
res_cs3    <- procesar_region_3var(ts_cs3,    "CentroSurAmerica")
## ---- Procesando región: CentroSurAmerica ----
## ADF niveles CentroSurAmerica :
##         LogInsured   SumDeaths LogAffected
## stat    -2.3021843 -3.41735250  -2.4267907
## p.value  0.4538035  0.06564748   0.4042582
## Johansen CentroSurAmerica :
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 4.025543e-01 3.103318e-01 2.551600e-01 4.996004e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 | 12.96  7.52  9.24 12.97
## r <= 1 | 29.31 17.85 19.96 24.60
## r = 0  | 51.97 32.00 34.91 41.07
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                LogInsured.l2  SumDeaths.l2 LogAffected.l2      constant
## LogInsured.l2   1.000000e+00  1.000000e+00   1.000000e+00  1.000000e+00
## SumDeaths.l2   -1.107854e-03 -1.843583e-04   3.190581e-05 -1.905011e-05
## LogAffected.l2  1.920745e+01 -1.507749e+01  -1.074131e+00  7.683941e+00
## constant       -2.929411e+02  2.239612e+02   7.397955e+00  1.087308e+02
## 
## Weights W:
## (This is the loading matrix)
## 
##               LogInsured.l2 SumDeaths.l2 LogAffected.l2      constant
## LogInsured.d    -0.02925524 2.458395e-03  -6.578660e-01 -1.810730e-17
## SumDeaths.d    821.90904439 9.353970e+02  -1.323081e+03 -2.354038e-13
## LogAffected.d   -0.01278736 4.613684e-02  -4.813532e-02  5.146330e-18
## 
## Advertencia: rank sugerido 3 excede max_rank 2 → ajustado a 2 
## Cointegration rank sugerido para CentroSurAmerica = 2 
## 
## CentroSurAmerica : intentando VECM con rank 2 
## VECM estimado para CentroSurAmerica
# 6. Diagnóstico y IRF
diagnostico_VAR <- function(var_model, region_name) {
  cat("Diagnóstico para", region_name, "\n")
  print(try(serial.test(var_model, lags.pt = 10, type = "PT.asymptotic"), silent=FALSE))
  print(try(stability(var_model), silent=FALSE))
}
diagnostico_VAR(res_norte3$model, "NorteAmerica")
## Diagnóstico para NorteAmerica 
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object var_model
## Chi-squared = 43.828, df = 75, p-value = 0.9985
## 
## Error in UseMethod("estfun") : 
##   no applicable method for 'estfun' applied to an object of class "vec2var"
## [1] "Error in UseMethod(\"estfun\") : \n  no applicable method for 'estfun' applied to an object of class \"vec2var\"\n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in UseMethod("estfun"): no applicable method for 'estfun' applied to an object of class "vec2var">
diagnostico_VAR(res_cs3$model,    "CentroSurAmerica")
## Diagnóstico para CentroSurAmerica 
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object var_model
## Chi-squared = 72.814, df = 75, p-value = 0.55
## 
## Error in UseMethod("estfun") : 
##   no applicable method for 'estfun' applied to an object of class "vec2var"
## [1] "Error in UseMethod(\"estfun\") : \n  no applicable method for 'estfun' applied to an object of class \"vec2var\"\n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in UseMethod("estfun"): no applicable method for 'estfun' applied to an object of class "vec2var">
# IRF: ejemplo de choques
if (!is.null(res_norte3$model)) {
  cat("IRF Norteamérica: choque en SumDeaths → LogInsured\n")
  plot(irf(res_norte3$model, impulse="SumDeaths", response="LogInsured", n.ahead=5, boot=TRUE))
  cat("IRF Norteamérica: choque en LogAffected → LogInsured\n")
  plot(irf(res_norte3$model, impulse="LogAffected", response="LogInsured", n.ahead=5, boot=TRUE))
}
## IRF Norteamérica: choque en SumDeaths → LogInsured

## IRF Norteamérica: choque en LogAffected → LogInsured

if (!is.null(res_cs3$model)) {
  cat("IRF CentroSur América: choque en SumDeaths → LogInsured\n")
  plot(irf(res_cs3$model, impulse="SumDeaths", response="LogInsured", n.ahead=5, boot=TRUE))
  cat("IRF CentroSur América: choque en LogAffected → LogInsured\n")
  plot(irf(res_cs3$model, impulse="LogAffected", response="LogInsured", n.ahead=5, boot=TRUE))
}
## IRF CentroSur América: choque en SumDeaths → LogInsured

## IRF CentroSur América: choque en LogAffected → LogInsured

# 7. Pronóstico final por región
# Función para pronosticar y reconstruir niveles cuando el modelo es VAR en diferencias
reconstruir_forecast_VAR_diff <- function(var_model, ts_data, h = 5, region_name) {
  # var_model: la salida VAR(...) ajustada sobre diff(ts_data)
  # ts_data: la serie ts multivariante en niveles (columnas: LogInsured, SumDeaths, LogAffected)
  # h: horizonte de pronóstico en períodos (años)
  cat("\n=== Pronóstico VAR_diff para", region_name, "===\n")
  
  fc <- predict(var_model, n.ahead = h)
  # fc$fcst es una lista con un elemento por cada variable endógena
  # Cada fc$fcst$<var> es matriz con columnas fcst, lower, upper
  
  # Extraer pronósticos de diferencias para cada variable:
  # Aseguramos que los nombres coincidan con colnames(ts_data)
  vars <- colnames(ts_data)
  # Inicializamos lista para pronósticos de niveles
  last_levels <- as.numeric(tail(ts_data, 1))  # vector de último nivel para cada var
  # Construir pronóstico de diferencias para cada variable
  diffs_fc <- lapply(vars, function(v) {
    if (!is.null(fc$fcst[[v]])) {
      fc$fcst[[v]][, "fcst"]
    } else {
      stop("No se encontró pronóstico de diferencias para ", v)
    }
  })
  names(diffs_fc) <- vars
  # Reconstruir niveles pronosticados: cumular la última observación
  levels_fc <- matrix(NA, nrow = h, ncol = length(vars), dimnames = list(NULL, vars))
  for (i in seq_len(h)) {
    for (j in seq_along(vars)) {
      v <- vars[j]
      if (i == 1) {
        levels_fc[i, j] <- last_levels[j] + diffs_fc[[v]][i]
      } else {
        levels_fc[i, j] <- levels_fc[i-1, j] + diffs_fc[[v]][i]
      }
    }
  }
  # Construir data.frame de pronóstico:
  # Determinamos años: último año de ts_data + 1 hasta +h
  end_time <- end(ts_data)  # c(año, subperiodo), frecuencia=1 → end_time[1] es último año
  start_year <- end_time[1]
  años_fc <- (start_year + 1):(start_year + h)
  df_fc <- as.data.frame(levels_fc)
  df_fc <- tibble::add_column(df_fc, Year = años_fc, .before = 1)
  
  # Reconstruir “InsuredDamage” en niveles: exp(LogInsured) - 1
  if ("LogInsured" %in% vars) {
    df_fc <- df_fc %>%
      mutate(InsuredDamage_fc = exp(LogInsured) - 1)
  }
  # Opcional: reconstruir TotalAffected si tienes LogAffected
  if ("LogAffected" %in% vars) {
    df_fc <- df_fc %>%
      mutate(TotalAffected_fc = exp(LogAffected) - 1)
  }
  cat("Pronóstico completado para", region_name, "\n")
  return(df_fc)
}

# Función para pronosticar cuando el modelo es VAR en niveles o producto de vec2var (VECM->VAR)
forecast_VAR_levels <- function(var_model, ts_data, h = 5, region_name) {
  # var_model: salida VAR(...) ajustada sobre ts_data en niveles, o vec2var
  # ts_data: serie ts multivariante en niveles (columnas: LogInsured, SumDeaths, LogAffected)
  # h: horizonte
  cat("\n=== Pronóstico VAR_levels/VECM->VAR para", region_name, "===\n")
  fc <- predict(var_model, n.ahead = h)
  # fc$fcst[[v]][, "fcst"] es el pronóstico de nivel directamente
  vars <- colnames(ts_data)
  # Construir data.frame:
  end_time <- end(ts_data)
  start_year <- end_time[1]
  años_fc <- (start_year + 1):(start_year + h)
  # Extraer pronósticos
  pred_list <- lapply(vars, function(v) {
    if (!is.null(fc$fcst[[v]])) {
      fc$fcst[[v]][, "fcst"]
    } else {
      warning("No se encontró componente pronosticada para ", v)
      rep(NA_real_, h)
    }
  })
  names(pred_list) <- vars
  df_fc <- as.data.frame(pred_list)
  df_fc <- tibble::add_column(df_fc, Year = años_fc, .before = 1)
  # Reconstruir variables en niveles:
  if ("LogInsured" %in% vars) {
    df_fc <- df_fc %>%
      mutate(InsuredDamage_fc = exp(LogInsured) - 1)
  }
  if ("LogAffected" %in% vars) {
    df_fc <- df_fc %>%
      mutate(TotalAffected_fc = exp(LogAffected) - 1)
  }
  cat("Pronóstico completado para", region_name, "\n")
  return(df_fc)
}

# 8. Aplicar pronóstico para cada región según el tipo de modelo detectado
horizonte <- 5  # años a pronosticar

# NorteAmérica
if (!is.null(res_norte3$model)) {
  tipo_norte <- res_norte3$type
  cat("\n### Región NorteAmérica: tipo de modelo =", tipo_norte, "###\n")
  if (tipo_norte == "VAR_diff") {
    tabla_fc_norte <- reconstruir_forecast_VAR_diff(res_norte3$model, ts_norte3, h = horizonte, region_name = "NorteAmerica")
  } else if (tipo_norte %in% c("VAR_levels", "VECM->VAR")) {
    tabla_fc_norte <- forecast_VAR_levels(res_norte3$model, ts_norte3, h = horizonte, region_name = "NorteAmerica")
  } else {
    stop("Tipo de modelo no reconocido para NorteAmérica: ", tipo_norte)
  }
  print(tabla_fc_norte)
}
## 
## ### Región NorteAmérica: tipo de modelo = VECM->VAR ###
## 
## === Pronóstico VAR_levels/VECM->VAR para NorteAmerica ===
## Pronóstico completado para NorteAmerica 
##   Year LogInsured SumDeaths LogAffected InsuredDamage_fc TotalAffected_fc
## 1 2026   2.224050  258.1194    3.285848         8.244696         25.73163
## 2 2027   2.830140  449.0361    6.486500        15.947839        655.22235
## 3 2028   2.254831  486.4349    6.170034         8.533678        477.20233
## 4 2029   2.043634  446.1956    6.278893         6.718608        532.19791
## 5 2030   1.906193  459.3021    6.445142         5.727427        628.63594
# CentroSurAmérica
if (!is.null(res_cs3$model)) {
  tipo_cs <- res_cs3$type
  cat("\n### Región CentroSurAmérica: tipo de modelo =", tipo_cs, "###\n")
  if (tipo_cs == "VAR_diff") {
    tabla_fc_cs <- reconstruir_forecast_VAR_diff(res_cs3$model, ts_cs3, h = horizonte, region_name = "CentroSurAmerica")
  } else if (tipo_cs %in% c("VAR_levels", "VECM->VAR")) {
    tabla_fc_cs <- forecast_VAR_levels(res_cs3$model, ts_cs3, h = horizonte, region_name = "CentroSurAmerica")
  } else {
    stop("Tipo de modelo no reconocido para CentroSurAmérica: ", tipo_cs)
  }
  print(tabla_fc_cs)
}
## 
## ### Región CentroSurAmérica: tipo de modelo = VECM->VAR ###
## 
## === Pronóstico VAR_levels/VECM->VAR para CentroSurAmerica ===
## Pronóstico completado para CentroSurAmerica 
##   Year LogInsured SumDeaths LogAffected InsuredDamage_fc TotalAffected_fc
## 1 2026   4.956550  7572.671    15.45298        141.10264          5142140
## 2 2027   4.677449 -4316.213    14.98439        106.49551          3218383
## 3 2028   4.606385  2450.008    15.12208         99.12158          3693490
## 4 2029   4.618113  2441.824    15.12664        100.30274          3710342
## 5 2030   4.630577  2081.529    15.13497        101.57321          3741412