1. Preparación del Entorno y Carga de Datos

1.1 Carga de Datos

# Cargar la base original
datos <- read_xlsx(file.choose())

# Ver la estructura y obtener un resumen general
str(datos)
## tibble [4,315 × 47] (S3: tbl_df/tbl/data.frame)
##  $ DisNo.                                   : chr [1:4315] "1900-0003-USA" "1900-0006-JAM" "1900-0007-JAM" "1902-0003-GTM" ...
##  $ Historic                                 : chr [1:4315] "Yes" "Yes" "Yes" "Yes" ...
##  $ Classification Key                       : chr [1:4315] "nat-met-sto-tro" "nat-hyd-flo-flo" "nat-bio-epi-vir" "nat-geo-vol-ash" ...
##  $ Disaster Group                           : chr [1:4315] "Natural" "Natural" "Natural" "Natural" ...
##  $ Disaster Subgroup                        : chr [1:4315] "Meteorological" "Hydrological" "Biological" "Geophysical" ...
##  $ Disaster Type                            : chr [1:4315] "Storm" "Flood" "Epidemic" "Volcanic activity" ...
##  $ Disaster Subtype                         : chr [1:4315] "Tropical cyclone" "Flood (General)" "Viral disease" "Ash fall" ...
##  $ External IDs                             : logi [1:4315] NA NA NA NA NA NA ...
##  $ Event Name                               : chr [1:4315] NA NA "Gastroenteritis" "Santa Maria" ...
##  $ ISO                                      : chr [1:4315] "USA" "JAM" "JAM" "GTM" ...
##  $ Country                                  : chr [1:4315] "United States of America" "Jamaica" "Jamaica" "Guatemala" ...
##  $ Subregion                                : chr [1:4315] "Northern America" "Latin America and the Caribbean" "Latin America and the Caribbean" "Latin America and the Caribbean" ...
##  $ Region                                   : chr [1:4315] "Americas" "Americas" "Americas" "Americas" ...
##  $ Location                                 : chr [1:4315] "Galveston (Texas)" "Saint James" "Porus" NA ...
##  $ Origin                                   : chr [1:4315] NA NA NA NA ...
##  $ Associated Types                         : chr [1:4315] "Avalanche (Snow, Debris)" NA NA NA ...
##  $ OFDA/BHA Response                        : chr [1:4315] "No" "No" "No" "No" ...
##  $ Appeal                                   : chr [1:4315] "No" "No" "No" "No" ...
##  $ Declaration                              : chr [1:4315] "No" "No" "No" "No" ...
##  $ AID Contribution ('000 US$)              : logi [1:4315] NA NA NA NA NA NA ...
##  $ Magnitude                                : num [1:4315] 220 NA NA NA NA NA NA 7.5 NA NA ...
##  $ Magnitude Scale                          : chr [1:4315] "Kph" "Km2" "Vaccinated" NA ...
##  $ Latitude                                 : num [1:4315] NA NA NA NA NA NA NA 14 NA NA ...
##  $ Longitude                                : num [1:4315] NA NA NA NA NA NA NA -91 NA NA ...
##  $ River Basin                              : logi [1:4315] NA NA NA NA NA NA ...
##  $ Start Year                               : num [1:4315] 1900 1900 1900 1902 1902 ...
##  $ Start Month                              : num [1:4315] 9 1 1 4 5 5 10 4 NA NA ...
##  $ Start Day                                : num [1:4315] 8 6 13 8 8 7 24 18 NA NA ...
##  $ End Year                                 : num [1:4315] 1900 1900 1900 1902 1902 ...
##  $ End Month                                : num [1:4315] 9 1 1 4 5 5 10 4 NA NA ...
##  $ End Day                                  : num [1:4315] 8 6 13 8 8 7 24 18 NA NA ...
##  $ Total Deaths                             : num [1:4315] 6000 300 30 1000 30000 ...
##  $ No. Injured                              : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ No. Affected                             : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ No. Homeless                             : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Affected                           : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Reconstruction Costs ('000 US$)          : logi [1:4315] NA NA NA NA NA NA ...
##  $ Reconstruction Costs, Adjusted ('000 US$): logi [1:4315] NA NA NA NA NA NA ...
##  $ Insured Damage ('000 US$)                : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Insured Damage, Adjusted ('000 US$)      : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Damage ('000 US$)                  : num [1:4315] 30000 NA NA NA NA NA NA 25000 480000 NA ...
##  $ Total Damage, Adjusted ('000 US$)        : num [1:4315] 1098720 NA NA NA NA ...
##  $ CPI                                      : num [1:4315] 2.73 2.73 2.73 2.84 2.84 ...
##  $ Admin Units                              : logi [1:4315] NA NA NA NA NA NA ...
##  $ Entry Date                               : chr [1:4315] "2004-10-18" "2003-07-01" "2003-07-01" "2003-07-01" ...
##  $ Last Update                              : chr [1:4315] "2023-10-17" "2023-09-25" "2023-09-25" "2023-09-25" ...
##  $ Temperature                              : num [1:4315] 13.5 27 27 20 26 28 20 20 13.5 13.5 ...
glimpse(datos)
## Rows: 4,315
## Columns: 47
## $ DisNo.                                      <chr> "1900-0003-USA", "1900-000…
## $ Historic                                    <chr> "Yes", "Yes", "Yes", "Yes"…
## $ `Classification Key`                        <chr> "nat-met-sto-tro", "nat-hy…
## $ `Disaster Group`                            <chr> "Natural", "Natural", "Nat…
## $ `Disaster Subgroup`                         <chr> "Meteorological", "Hydrolo…
## $ `Disaster Type`                             <chr> "Storm", "Flood", "Epidemi…
## $ `Disaster Subtype`                          <chr> "Tropical cyclone", "Flood…
## $ `External IDs`                              <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Event Name`                                <chr> NA, NA, "Gastroenteritis",…
## $ ISO                                         <chr> "USA", "JAM", "JAM", "GTM"…
## $ Country                                     <chr> "United States of America"…
## $ Subregion                                   <chr> "Northern America", "Latin…
## $ Region                                      <chr> "Americas", "Americas", "A…
## $ Location                                    <chr> "Galveston (Texas)", "Sain…
## $ Origin                                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ `Associated Types`                          <chr> "Avalanche (Snow, Debris)"…
## $ `OFDA/BHA Response`                         <chr> "No", "No", "No", "No", "N…
## $ Appeal                                      <chr> "No", "No", "No", "No", "N…
## $ Declaration                                 <chr> "No", "No", "No", "No", "N…
## $ `AID Contribution ('000 US$)`               <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ Magnitude                                   <dbl> 220.0, NA, NA, NA, NA, NA,…
## $ `Magnitude Scale`                           <chr> "Kph", "Km2", "Vaccinated"…
## $ Latitude                                    <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ Longitude                                   <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `River Basin`                               <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Start Year`                                <dbl> 1900, 1900, 1900, 1902, 19…
## $ `Start Month`                               <dbl> 9, 1, 1, 4, 5, 5, 10, 4, N…
## $ `Start Day`                                 <dbl> 8, 6, 13, 8, 8, 7, 24, 18,…
## $ `End Year`                                  <dbl> 1900, 1900, 1900, 1902, 19…
## $ `End Month`                                 <dbl> 9, 1, 1, 4, 5, 5, 10, 4, N…
## $ `End Day`                                   <dbl> 8, 6, 13, 8, 8, 7, 24, 18,…
## $ `Total Deaths`                              <dbl> 6000, 300, 30, 1000, 30000…
## $ `No. Injured`                               <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `No. Affected`                              <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `No. Homeless`                              <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Total Affected`                            <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Reconstruction Costs ('000 US$)`           <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Reconstruction Costs, Adjusted ('000 US$)` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Insured Damage ('000 US$)`                 <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Insured Damage, Adjusted ('000 US$)`       <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Total Damage ('000 US$)`                   <dbl> 30000, NA, NA, NA, NA, NA,…
## $ `Total Damage, Adjusted ('000 US$)`         <dbl> 1098720, NA, NA, NA, NA, N…
## $ CPI                                         <dbl> 2.730451, 2.730451, 2.7304…
## $ `Admin Units`                               <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Entry Date`                                <chr> "2004-10-18", "2003-07-01"…
## $ `Last Update`                               <chr> "2023-10-17", "2023-09-25"…
## $ Temperature                                 <dbl> 13.5, 27.0, 27.0, 20.0, 26…
summary(datos)
##     DisNo.            Historic         Classification Key Disaster Group    
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Disaster Subgroup  Disaster Type      Disaster Subtype   External IDs  
##  Length:4315        Length:4315        Length:4315        Mode:logical  
##  Class :character   Class :character   Class :character   NA's:4315     
##  Mode  :character   Mode  :character   Mode  :character                 
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##   Event Name            ISO              Country           Subregion        
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     Region            Location            Origin          Associated Types  
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  OFDA/BHA Response     Appeal          Declaration       
##  Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  AID Contribution ('000 US$)   Magnitude         Magnitude Scale   
##  Mode:logical                Min.   :    -50.0   Length:4315       
##  TRUE:179                    1st Qu.:      7.7   Class :character  
##  NA's:4136                   Median :    165.0   Mode  :character  
##                              Mean   :  39808.8                     
##                              3rd Qu.:   3527.5                     
##                              Max.   :1768303.0                     
##                              NA's   :3111                          
##     Latitude         Longitude       River Basin      Start Year  
##  Min.   :-72.640   Min.   :-162.83   Mode:logical   Min.   :1900  
##  1st Qu.: -9.424   1st Qu.: -90.57   NA's:4315      1st Qu.:1989  
##  Median : 10.980   Median : -77.46                  Median :2002  
##  Mean   :  7.099   Mean   : -76.58                  Mean   :1998  
##  3rd Qu.: 19.107   3rd Qu.: -70.56                  3rd Qu.:2014  
##  Max.   : 67.930   Max.   : 124.42                  Max.   :2025  
##  NA's   :3689      NA's   :3689                                   
##   Start Month       Start Day        End Year      End Month     
##  Min.   : 1.000   Min.   : 1.00   Min.   :1900   Min.   : 1.000  
##  1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.:1989   1st Qu.: 4.000  
##  Median : 7.000   Median :15.00   Median :2002   Median : 7.000  
##  Mean   : 6.468   Mean   :15.43   Mean   :1998   Mean   : 6.639  
##  3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:2014   3rd Qu.: 9.000  
##  Max.   :12.000   Max.   :31.00   Max.   :2025   Max.   :12.000  
##  NA's   :69       NA's   :857                    NA's   :141     
##     End Day       Total Deaths       No. Injured         No. Affected     
##  Min.   : 1.00   Min.   :     1.0   Min.   :      1.0   Min.   :       3  
##  1st Qu.: 8.00   1st Qu.:     4.0   1st Qu.:     12.0   1st Qu.:    1020  
##  Median :16.00   Median :    12.0   Median :     37.0   Median :    6780  
##  Mean   :15.84   Mean   :   286.9   Mean   :   3817.9   Mean   :  195474  
##  3rd Qu.:23.00   3rd Qu.:    40.0   3rd Qu.:    138.5   3rd Qu.:   45000  
##  Max.   :31.00   Max.   :222570.0   Max.   :1800000.0   Max.   :85000000  
##  NA's   :834     NA's   :1398       NA's   :3400        NA's   :1884      
##   No. Homeless     Total Affected     Reconstruction Costs ('000 US$)
##  Min.   :      5   Min.   :       1   Mode:logical                   
##  1st Qu.:    300   1st Qu.:     600   TRUE:14                        
##  Median :   1533   Median :    4248   NA's:4301                      
##  Mean   :  19975   Mean   :  165323                                  
##  3rd Qu.:   8770   3rd Qu.:   30004                                  
##  Max.   :1166000   Max.   :85000012                                  
##  NA's   :3690      NA's   :1344                                      
##  Reconstruction Costs, Adjusted ('000 US$) Insured Damage ('000 US$)
##  Mode:logical                              Min.   :     162         
##  TRUE:14                                   1st Qu.:   62500         
##  NA's:4301                                 Median :  200000         
##                                            Mean   : 1299304         
##                                            3rd Qu.:  755000         
##                                            Max.   :60000000         
##                                            NA's   :3710             
##  Insured Damage, Adjusted ('000 US$) Total Damage ('000 US$)
##  Min.   :     201                    Min.   :        3      
##  1st Qu.:  118839                    1st Qu.:    15500      
##  Median :  339640                    Median :   112300      
##  Mean   : 1731304                    Mean   :  1251189      
##  3rd Qu.: 1046379                    3rd Qu.:   729350      
##  Max.   :93614347                    Max.   :125000000      
##  NA's   :3721                        NA's   :2605           
##  Total Damage, Adjusted ('000 US$)      CPI         Admin Units   
##  Min.   :        5                 Min.   :  2.73   Mode:logical  
##  1st Qu.:    37512                 1st Qu.: 38.81   NA's:4315     
##  Median :   237094                 Median : 58.11                 
##  Mean   :  1871285                 Mean   : 55.38                 
##  3rd Qu.:  1291495                 3rd Qu.: 75.35                 
##  Max.   :195029889                 Max.   :100.00                 
##  NA's   :2655                      NA's   :130                    
##   Entry Date        Last Update         Temperature   
##  Length:4315        Length:4315        Min.   : 5.00  
##  Class :character   Class :character   1st Qu.:15.00  
##  Mode  :character   Mode  :character   Median :20.00  
##                                        Mean   :19.84  
##                                        3rd Qu.:25.00  
##                                        Max.   :29.00  
## 

1.2 Filtrado de Variables

# Variables Principales (para análisis central)
variables_principales <- c("Disaster Subgroup", "Disaster Type", "Disaster Subtype", 
                           "ISO", "Country", "Subregion", "Total Deaths", "No. Injured", 
                           "No. Affected", "Total Affected", "Insured Damage ('000 US$)", 
                           "Insured Damage, Adjusted ('000 US$)", "Total Damage ('000 US$)", 
                           "Total Damage, Adjusted ('000 US$)", "Temperature")

# Variables Secundarias (para análisis complementario)
variables_secundarias <- c("Event Name", "Magnitude", "Magnitude Scale", "Start Year", 
                           "No. Homeless","Region")

# Creación de la sub-base con las variables seleccionadas
datos_subset <- datos %>% select(all_of(c(variables_principales, variables_secundarias)))

# Revisar la estructura de la sub-base
str(datos_subset)
## tibble [4,315 × 21] (S3: tbl_df/tbl/data.frame)
##  $ Disaster Subgroup                  : chr [1:4315] "Meteorological" "Hydrological" "Biological" "Geophysical" ...
##  $ Disaster Type                      : chr [1:4315] "Storm" "Flood" "Epidemic" "Volcanic activity" ...
##  $ Disaster Subtype                   : chr [1:4315] "Tropical cyclone" "Flood (General)" "Viral disease" "Ash fall" ...
##  $ ISO                                : chr [1:4315] "USA" "JAM" "JAM" "GTM" ...
##  $ Country                            : chr [1:4315] "United States of America" "Jamaica" "Jamaica" "Guatemala" ...
##  $ Subregion                          : chr [1:4315] "Northern America" "Latin America and the Caribbean" "Latin America and the Caribbean" "Latin America and the Caribbean" ...
##  $ Total Deaths                       : num [1:4315] 6000 300 30 1000 30000 ...
##  $ No. Injured                        : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ No. Affected                       : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Affected                     : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Insured Damage ('000 US$)          : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Insured Damage, Adjusted ('000 US$): num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Total Damage ('000 US$)            : num [1:4315] 30000 NA NA NA NA NA NA 25000 480000 NA ...
##  $ Total Damage, Adjusted ('000 US$)  : num [1:4315] 1098720 NA NA NA NA ...
##  $ Temperature                        : num [1:4315] 13.5 27 27 20 26 28 20 20 13.5 13.5 ...
##  $ Event Name                         : chr [1:4315] NA NA "Gastroenteritis" "Santa Maria" ...
##  $ Magnitude                          : num [1:4315] 220 NA NA NA NA NA NA 7.5 NA NA ...
##  $ Magnitude Scale                    : chr [1:4315] "Kph" "Km2" "Vaccinated" NA ...
##  $ Start Year                         : num [1:4315] 1900 1900 1900 1902 1902 ...
##  $ No. Homeless                       : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
##  $ Region                             : chr [1:4315] "Americas" "Americas" "Americas" "Americas" ...
summary(datos_subset)
##  Disaster Subgroup  Disaster Type      Disaster Subtype       ISO           
##  Length:4315        Length:4315        Length:4315        Length:4315       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Country           Subregion          Total Deaths       No. Injured       
##  Length:4315        Length:4315        Min.   :     1.0   Min.   :      1.0  
##  Class :character   Class :character   1st Qu.:     4.0   1st Qu.:     12.0  
##  Mode  :character   Mode  :character   Median :    12.0   Median :     37.0  
##                                        Mean   :   286.9   Mean   :   3817.9  
##                                        3rd Qu.:    40.0   3rd Qu.:    138.5  
##                                        Max.   :222570.0   Max.   :1800000.0  
##                                        NA's   :1398       NA's   :3400       
##   No. Affected      Total Affected     Insured Damage ('000 US$)
##  Min.   :       3   Min.   :       1   Min.   :     162         
##  1st Qu.:    1020   1st Qu.:     600   1st Qu.:   62500         
##  Median :    6780   Median :    4248   Median :  200000         
##  Mean   :  195474   Mean   :  165323   Mean   : 1299304         
##  3rd Qu.:   45000   3rd Qu.:   30004   3rd Qu.:  755000         
##  Max.   :85000000   Max.   :85000012   Max.   :60000000         
##  NA's   :1884       NA's   :1344       NA's   :3710             
##  Insured Damage, Adjusted ('000 US$) Total Damage ('000 US$)
##  Min.   :     201                    Min.   :        3      
##  1st Qu.:  118839                    1st Qu.:    15500      
##  Median :  339640                    Median :   112300      
##  Mean   : 1731304                    Mean   :  1251189      
##  3rd Qu.: 1046379                    3rd Qu.:   729350      
##  Max.   :93614347                    Max.   :125000000      
##  NA's   :3721                        NA's   :2605           
##  Total Damage, Adjusted ('000 US$)  Temperature     Event Name       
##  Min.   :        5                 Min.   : 5.00   Length:4315       
##  1st Qu.:    37512                 1st Qu.:15.00   Class :character  
##  Median :   237094                 Median :20.00   Mode  :character  
##  Mean   :  1871285                 Mean   :19.84                     
##  3rd Qu.:  1291495                 3rd Qu.:25.00                     
##  Max.   :195029889                 Max.   :29.00                     
##  NA's   :2655                                                        
##    Magnitude         Magnitude Scale      Start Year    No. Homeless    
##  Min.   :    -50.0   Length:4315        Min.   :1900   Min.   :      5  
##  1st Qu.:      7.7   Class :character   1st Qu.:1989   1st Qu.:    300  
##  Median :    165.0   Mode  :character   Median :2002   Median :   1533  
##  Mean   :  39808.8                      Mean   :1998   Mean   :  19975  
##  3rd Qu.:   3527.5                      3rd Qu.:2014   3rd Qu.:   8770  
##  Max.   :1768303.0                      Max.   :2025   Max.   :1166000  
##  NA's   :3111                                          NA's   :3690     
##     Region         
##  Length:4315       
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

2. Análisis Exploratorio de Datos

2.1 Revisión de Datos Faltantes y Valores Atípicos

# Número total de NA's en la sub-base
total_na <- sum(is.na(datos_subset))
print(paste("Total NA:", total_na))
## [1] "Total NA: 31179"
# Porcentaje de NA's por columna
na_por_columna <- colSums(is.na(datos_subset)) / nrow(datos_subset)
print(na_por_columna)
##                   Disaster Subgroup                       Disaster Type 
##                          0.00000000                          0.00000000 
##                    Disaster Subtype                                 ISO 
##                          0.00000000                          0.00000000 
##                             Country                           Subregion 
##                          0.00000000                          0.00000000 
##                        Total Deaths                         No. Injured 
##                          0.32398610                          0.78794902 
##                        No. Affected                      Total Affected 
##                          0.43661645                          0.31147161 
##           Insured Damage ('000 US$) Insured Damage, Adjusted ('000 US$) 
##                          0.85979143                          0.86234067 
##             Total Damage ('000 US$)   Total Damage, Adjusted ('000 US$) 
##                          0.60370800                          0.61529548 
##                         Temperature                          Event Name 
##                          0.00000000                          0.76152955 
##                           Magnitude                     Magnitude Scale 
##                          0.72097335                          0.08690614 
##                          Start Year                        No. Homeless 
##                          0.00000000                          0.85515643 
##                              Region 
##                          0.00000000
# Boxplot de 'Total Damage' (Daños Totales)
ggplot(datos_subset, aes(y = `Total Damage ('000 US$)`)) +
  geom_boxplot() +
  theme_minimal() +
  labs(y = "Total Damage ('000 US$)", title = "Boxplot de Daños Totales")

2.2 Estadísticas Descriptivas Básicas

# Calcular estadísticas de muertes
media_muertes <- mean(datos_subset$`Total Deaths`, na.rm = TRUE)
mediana_muertes <- median(datos_subset$`Total Deaths`, na.rm = TRUE)
desv_estandar_muertes <- sd(datos_subset$`Total Deaths`, na.rm = TRUE)

# Función para calcular la moda
moda <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
moda_muertes <- moda(datos_subset$`Total Deaths`)

# Imprimir resultados
print(paste("Media de muertes:", media_muertes))
## [1] "Media de muertes: 286.88104216661"
print(paste("Mediana de muertes:", mediana_muertes))
## [1] "Mediana de muertes: 12"
print(paste("Desviación estándar:", desv_estandar_muertes))
## [1] "Desviación estándar: 4649.08031281979"
print(paste("Moda de muertes:", moda_muertes))
## [1] "Moda de muertes: NA"

3. Visualización de Datos

3.1 Gráficos de Frecuencia para Variables Categóricas

# Frecuencia por "Disaster Subgroup"
ggplot(datos_subset, aes(x = `Disaster Subgroup`)) +
  geom_bar(fill = "dodgerblue") +
  theme_minimal() +
  labs(title = "Frecuencia por Subgrupo de Desastre",
       x = "Subgrupo de Desastre", y = "Cantidad")

# Frecuencia por "Disaster Type"
ggplot(datos_subset, aes(x = `Disaster Type`)) +
  geom_bar(fill = "red") +
  theme_minimal() +
  labs(title = "Frecuencia por Tipo de Desastre",
       x = "Tipo de Desastre", y = "Cantidad")

# Top 5 Disaster Type
top5_disaster_type <- datos_subset %>% 
  count(`Disaster Type`, sort = TRUE) %>% 
  head(5)
print(top5_disaster_type)
## # A tibble: 5 × 2
##   `Disaster Type`         n
##   <chr>               <int>
## 1 Storm                1580
## 2 Flood                1407
## 3 Earthquake            326
## 4 Mass movement (wet)   208
## 5 Wildfire              208
# Frecuencia por "Disaster Subtype"
ggplot(datos_subset, aes(x = `Disaster Subtype`)) +
  geom_bar(fill = "seagreen") +
  theme_minimal() +
  labs(title = "Frecuencia por Subtipo de Desastre",
       x = "Subtipo de Desastre", y = "Cantidad")

# Top 5 Disaster Subtype
top5_disaster_subtype <- datos_subset %>% 
  count(`Disaster Subtype`, sort = TRUE) %>% 
  head(5)
print(top5_disaster_subtype)
## # A tibble: 5 × 2
##   `Disaster Subtype`     n
##   <chr>              <int>
## 1 Tropical cyclone     795
## 2 Riverine flood       633
## 3 Flood (General)      630
## 4 Ground movement      314
## 5 Storm (General)      279

3.2 Gráficos para Variables Numéricas

# Filtrar datos para Total Damage
datos_filtrados <- datos_subset %>%
  filter(!is.na(`Total Damage ('000 US$)`) & is.finite(`Total Damage ('000 US$)`))

# Histograma de Daños Totales
ggplot(datos_filtrados, aes(x = `Total Damage ('000 US$)`)) +
  geom_histogram(binwidth = 1000, fill = "orange", color = "black") +
  theme_minimal() +
  labs(title = "Histograma de Daños Totales",
       x = "Daños Totales ('000 US$)", y = "Frecuencia")

# Diagrama de Dispersión: Muertes vs Heridos
ggplot(datos_subset, aes(x = `Total Deaths`, y = `No. Injured`)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  labs(title = "Relación entre Muertes y Heridos",
       x = "Total Deaths", y = "No. Injured")

4. Análisis de Relaciones y Tendencias

4.1 Impacto por Región y Tiempo

# Análisis de impacto por región
impacto_region <- datos_subset %>%
  group_by(Region, `Start Year`) %>%
  summarise(Total_Muertes = sum(`Total Deaths`, na.rm = TRUE),
            Total_Heridos = sum(`No. Injured`, na.rm = TRUE),
            .groups = "drop")

# Visualización de la evolución de muertes por región
ggplot(impacto_region, aes(x = `Start Year`, y = Total_Muertes, color = Region)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Evolución de Muertes por Región a lo Largo de los Años",
       x = "Año de Inicio", y = "Total Muertes")

5. Visualización Avanzada: Mapa de Calor Interactivo

# Cargar el shapefile
mapa_mundi <- st_read(file.choose())
## Reading layer `ne_110m_admin_0_countries' from data source 
##   `C:\Users\taroj\Documents\Actuaria_Octavo Semestre\COMPUTO CIENTIFICO\Proyecto\Mapas\ne_110m_admin_0_countries\ne_110m_admin_0_countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 177 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## Geodetic CRS:  WGS 84
head(mapa_mundi$SOV_A3)
## [1] "FJI" "TZA" "SAH" "CAN" "US1" "KA1"
# Información de Desastres a Nivel País
desastres_por_pais <- datos_subset %>%
  group_by(ISO) %>%
  summarise(Total_Desastres = n(),
            Desastres_Tipo = paste(unique(`Disaster Type`), collapse = ", "),
            .groups = "drop")

# Corregir el código de Estados Unidos en el shapefile
mapa_mundi <- mapa_mundi %>%
  mutate(SOV_A3 = ifelse(SOV_A3 == "US1", "USA", SOV_A3))

# Unión con el shapefile corregido
mapa_datos <- mapa_mundi %>%
  left_join(desastres_por_pais, by = c("SOV_A3" = "ISO"))

# Asignar color gris a los países sin datos
mapa_datos$Total_Desastres[is.na(mapa_datos$Total_Desastres)] <- 0

# Crear un mapa base con ggplot2
mapa_base <- ggplot() +
  geom_sf(data = mapa_datos, aes(fill = Total_Desastres, text = paste("País:", SOV_A3,
                                                                      "<br>Total Desastres:", Total_Desastres,
                                                                      "<br>Tipos:", Desastres_Tipo))) +
  scale_fill_gradient(low = "lightblue", high = "darkred", na.value = "grey") +
  theme_minimal() +
  labs(title = "Mapa Interactivo de Desastres por País", fill = "Total Desastres")

# Convertir a interactivo
mapa_interactivo <- ggplotly(mapa_base, tooltip = "text")
mapa_interactivo

6. Análisis de la Variable Temperature

# Distribución de Temperature
ggplot(datos_subset, aes(x = Temperature)) +
  geom_histogram(binwidth = 1, fill = "lightgreen", color = "black") +
  theme_minimal() +
  labs(title = "Histograma de Temperatura", x = "Temperature (°C)", y = "Frecuencia")

# Relación entre Temperature y Total Damage
ggplot(datos_subset, aes(x = Temperature, y = `Total Damage ('000 US$)`)) +
  geom_point(alpha = 0.6, color = "purple") +
  theme_minimal() +
  labs(title = "Relación entre Temperatura y Daños Totales",
       x = "Temperatura (°C)", y = "Daños Totales ('000 US$)")

# Relación entre Temperature y Total Affected
ggplot(datos_subset, aes(x = Temperature, y = `Total Affected`)) +
  geom_point(alpha = 0.6, color = "brown") +
  theme_minimal() +
  labs(title = "Relación entre Temperatura y Total Afectados",
       x = "Temperatura (°C)", y = "Total Afectados")

7. Análisis de Desastres y Temperatura a lo Largo del Tiempo

# Agrupar por Country, Disaster Type y Start Year
temp_analysis <- datos_subset %>%
  group_by(Country, `Disaster Type`, `Start Year`) %>%
  summarise(Num_Desastres = n(),
            Prom_Temperature = mean(Temperature, na.rm = TRUE),
            .groups = "drop")

# Visualizar evolución de desastres por país y tipo
ggplot(temp_analysis, aes(x = `Start Year`, y = Num_Desastres, color = Country)) +
  geom_line() +
  facet_wrap(~ `Disaster Type`) +
  theme_minimal() +
  labs(title = "Evolución de Desastres por Tipo y País en Función de la Temperatura",
       x = "Año de Inicio", y = "Cantidad de Desastres")

# Tendencia de temperatura y cantidad de desastres a nivel global
global_temp_trend <- datos_subset %>%
  group_by(`Start Year`) %>%
  summarise(Num_Desastres = n(),
            Avg_Temperature = mean(Temperature, na.rm = TRUE),
            .groups = "drop")

ggplot(global_temp_trend, aes(x = `Start Year`)) +
  geom_line(aes(y = Num_Desastres), color = "blue", size = 1) +
  geom_line(aes(y = Avg_Temperature * 10), color = "red", linetype = "dashed", size = 1) +
  theme_minimal() +
  labs(title = "Tendencia Global: Número de Desastres y Temperatura",
       x = "Año de Inicio",
       y = "Cantidad de Desastres / Temperatura (escalada)")