# Cargar la base original
datos <- read_xlsx(file.choose())
# Ver la estructura y obtener un resumen general
str(datos)## tibble [4,315 × 47] (S3: tbl_df/tbl/data.frame)
## $ DisNo. : chr [1:4315] "1900-0003-USA" "1900-0006-JAM" "1900-0007-JAM" "1902-0003-GTM" ...
## $ Historic : chr [1:4315] "Yes" "Yes" "Yes" "Yes" ...
## $ Classification Key : chr [1:4315] "nat-met-sto-tro" "nat-hyd-flo-flo" "nat-bio-epi-vir" "nat-geo-vol-ash" ...
## $ Disaster Group : chr [1:4315] "Natural" "Natural" "Natural" "Natural" ...
## $ Disaster Subgroup : chr [1:4315] "Meteorological" "Hydrological" "Biological" "Geophysical" ...
## $ Disaster Type : chr [1:4315] "Storm" "Flood" "Epidemic" "Volcanic activity" ...
## $ Disaster Subtype : chr [1:4315] "Tropical cyclone" "Flood (General)" "Viral disease" "Ash fall" ...
## $ External IDs : logi [1:4315] NA NA NA NA NA NA ...
## $ Event Name : chr [1:4315] NA NA "Gastroenteritis" "Santa Maria" ...
## $ ISO : chr [1:4315] "USA" "JAM" "JAM" "GTM" ...
## $ Country : chr [1:4315] "United States of America" "Jamaica" "Jamaica" "Guatemala" ...
## $ Subregion : chr [1:4315] "Northern America" "Latin America and the Caribbean" "Latin America and the Caribbean" "Latin America and the Caribbean" ...
## $ Region : chr [1:4315] "Americas" "Americas" "Americas" "Americas" ...
## $ Location : chr [1:4315] "Galveston (Texas)" "Saint James" "Porus" NA ...
## $ Origin : chr [1:4315] NA NA NA NA ...
## $ Associated Types : chr [1:4315] "Avalanche (Snow, Debris)" NA NA NA ...
## $ OFDA/BHA Response : chr [1:4315] "No" "No" "No" "No" ...
## $ Appeal : chr [1:4315] "No" "No" "No" "No" ...
## $ Declaration : chr [1:4315] "No" "No" "No" "No" ...
## $ AID Contribution ('000 US$) : logi [1:4315] NA NA NA NA NA NA ...
## $ Magnitude : num [1:4315] 220 NA NA NA NA NA NA 7.5 NA NA ...
## $ Magnitude Scale : chr [1:4315] "Kph" "Km2" "Vaccinated" NA ...
## $ Latitude : num [1:4315] NA NA NA NA NA NA NA 14 NA NA ...
## $ Longitude : num [1:4315] NA NA NA NA NA NA NA -91 NA NA ...
## $ River Basin : logi [1:4315] NA NA NA NA NA NA ...
## $ Start Year : num [1:4315] 1900 1900 1900 1902 1902 ...
## $ Start Month : num [1:4315] 9 1 1 4 5 5 10 4 NA NA ...
## $ Start Day : num [1:4315] 8 6 13 8 8 7 24 18 NA NA ...
## $ End Year : num [1:4315] 1900 1900 1900 1902 1902 ...
## $ End Month : num [1:4315] 9 1 1 4 5 5 10 4 NA NA ...
## $ End Day : num [1:4315] 8 6 13 8 8 7 24 18 NA NA ...
## $ Total Deaths : num [1:4315] 6000 300 30 1000 30000 ...
## $ No. Injured : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ No. Affected : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ No. Homeless : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Total Affected : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Reconstruction Costs ('000 US$) : logi [1:4315] NA NA NA NA NA NA ...
## $ Reconstruction Costs, Adjusted ('000 US$): logi [1:4315] NA NA NA NA NA NA ...
## $ Insured Damage ('000 US$) : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Insured Damage, Adjusted ('000 US$) : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Total Damage ('000 US$) : num [1:4315] 30000 NA NA NA NA NA NA 25000 480000 NA ...
## $ Total Damage, Adjusted ('000 US$) : num [1:4315] 1098720 NA NA NA NA ...
## $ CPI : num [1:4315] 2.73 2.73 2.73 2.84 2.84 ...
## $ Admin Units : logi [1:4315] NA NA NA NA NA NA ...
## $ Entry Date : chr [1:4315] "2004-10-18" "2003-07-01" "2003-07-01" "2003-07-01" ...
## $ Last Update : chr [1:4315] "2023-10-17" "2023-09-25" "2023-09-25" "2023-09-25" ...
## $ Temperature : num [1:4315] 13.5 27 27 20 26 28 20 20 13.5 13.5 ...
## Rows: 4,315
## Columns: 47
## $ DisNo. <chr> "1900-0003-USA", "1900-000…
## $ Historic <chr> "Yes", "Yes", "Yes", "Yes"…
## $ `Classification Key` <chr> "nat-met-sto-tro", "nat-hy…
## $ `Disaster Group` <chr> "Natural", "Natural", "Nat…
## $ `Disaster Subgroup` <chr> "Meteorological", "Hydrolo…
## $ `Disaster Type` <chr> "Storm", "Flood", "Epidemi…
## $ `Disaster Subtype` <chr> "Tropical cyclone", "Flood…
## $ `External IDs` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Event Name` <chr> NA, NA, "Gastroenteritis",…
## $ ISO <chr> "USA", "JAM", "JAM", "GTM"…
## $ Country <chr> "United States of America"…
## $ Subregion <chr> "Northern America", "Latin…
## $ Region <chr> "Americas", "Americas", "A…
## $ Location <chr> "Galveston (Texas)", "Sain…
## $ Origin <chr> NA, NA, NA, NA, NA, NA, NA…
## $ `Associated Types` <chr> "Avalanche (Snow, Debris)"…
## $ `OFDA/BHA Response` <chr> "No", "No", "No", "No", "N…
## $ Appeal <chr> "No", "No", "No", "No", "N…
## $ Declaration <chr> "No", "No", "No", "No", "N…
## $ `AID Contribution ('000 US$)` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ Magnitude <dbl> 220.0, NA, NA, NA, NA, NA,…
## $ `Magnitude Scale` <chr> "Kph", "Km2", "Vaccinated"…
## $ Latitude <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ Longitude <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `River Basin` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Start Year` <dbl> 1900, 1900, 1900, 1902, 19…
## $ `Start Month` <dbl> 9, 1, 1, 4, 5, 5, 10, 4, N…
## $ `Start Day` <dbl> 8, 6, 13, 8, 8, 7, 24, 18,…
## $ `End Year` <dbl> 1900, 1900, 1900, 1902, 19…
## $ `End Month` <dbl> 9, 1, 1, 4, 5, 5, 10, 4, N…
## $ `End Day` <dbl> 8, 6, 13, 8, 8, 7, 24, 18,…
## $ `Total Deaths` <dbl> 6000, 300, 30, 1000, 30000…
## $ `No. Injured` <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `No. Affected` <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `No. Homeless` <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Total Affected` <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Reconstruction Costs ('000 US$)` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Reconstruction Costs, Adjusted ('000 US$)` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Insured Damage ('000 US$)` <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Insured Damage, Adjusted ('000 US$)` <dbl> NA, NA, NA, NA, NA, NA, NA…
## $ `Total Damage ('000 US$)` <dbl> 30000, NA, NA, NA, NA, NA,…
## $ `Total Damage, Adjusted ('000 US$)` <dbl> 1098720, NA, NA, NA, NA, N…
## $ CPI <dbl> 2.730451, 2.730451, 2.7304…
## $ `Admin Units` <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ `Entry Date` <chr> "2004-10-18", "2003-07-01"…
## $ `Last Update` <chr> "2023-10-17", "2023-09-25"…
## $ Temperature <dbl> 13.5, 27.0, 27.0, 20.0, 26…
## DisNo. Historic Classification Key Disaster Group
## Length:4315 Length:4315 Length:4315 Length:4315
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Disaster Subgroup Disaster Type Disaster Subtype External IDs
## Length:4315 Length:4315 Length:4315 Mode:logical
## Class :character Class :character Class :character NA's:4315
## Mode :character Mode :character Mode :character
##
##
##
##
## Event Name ISO Country Subregion
## Length:4315 Length:4315 Length:4315 Length:4315
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Region Location Origin Associated Types
## Length:4315 Length:4315 Length:4315 Length:4315
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## OFDA/BHA Response Appeal Declaration
## Length:4315 Length:4315 Length:4315
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## AID Contribution ('000 US$) Magnitude Magnitude Scale
## Mode:logical Min. : -50.0 Length:4315
## TRUE:179 1st Qu.: 7.7 Class :character
## NA's:4136 Median : 165.0 Mode :character
## Mean : 39808.8
## 3rd Qu.: 3527.5
## Max. :1768303.0
## NA's :3111
## Latitude Longitude River Basin Start Year
## Min. :-72.640 Min. :-162.83 Mode:logical Min. :1900
## 1st Qu.: -9.424 1st Qu.: -90.57 NA's:4315 1st Qu.:1989
## Median : 10.980 Median : -77.46 Median :2002
## Mean : 7.099 Mean : -76.58 Mean :1998
## 3rd Qu.: 19.107 3rd Qu.: -70.56 3rd Qu.:2014
## Max. : 67.930 Max. : 124.42 Max. :2025
## NA's :3689 NA's :3689
## Start Month Start Day End Year End Month
## Min. : 1.000 Min. : 1.00 Min. :1900 Min. : 1.000
## 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.:1989 1st Qu.: 4.000
## Median : 7.000 Median :15.00 Median :2002 Median : 7.000
## Mean : 6.468 Mean :15.43 Mean :1998 Mean : 6.639
## 3rd Qu.: 9.000 3rd Qu.:23.00 3rd Qu.:2014 3rd Qu.: 9.000
## Max. :12.000 Max. :31.00 Max. :2025 Max. :12.000
## NA's :69 NA's :857 NA's :141
## End Day Total Deaths No. Injured No. Affected
## Min. : 1.00 Min. : 1.0 Min. : 1.0 Min. : 3
## 1st Qu.: 8.00 1st Qu.: 4.0 1st Qu.: 12.0 1st Qu.: 1020
## Median :16.00 Median : 12.0 Median : 37.0 Median : 6780
## Mean :15.84 Mean : 286.9 Mean : 3817.9 Mean : 195474
## 3rd Qu.:23.00 3rd Qu.: 40.0 3rd Qu.: 138.5 3rd Qu.: 45000
## Max. :31.00 Max. :222570.0 Max. :1800000.0 Max. :85000000
## NA's :834 NA's :1398 NA's :3400 NA's :1884
## No. Homeless Total Affected Reconstruction Costs ('000 US$)
## Min. : 5 Min. : 1 Mode:logical
## 1st Qu.: 300 1st Qu.: 600 TRUE:14
## Median : 1533 Median : 4248 NA's:4301
## Mean : 19975 Mean : 165323
## 3rd Qu.: 8770 3rd Qu.: 30004
## Max. :1166000 Max. :85000012
## NA's :3690 NA's :1344
## Reconstruction Costs, Adjusted ('000 US$) Insured Damage ('000 US$)
## Mode:logical Min. : 162
## TRUE:14 1st Qu.: 62500
## NA's:4301 Median : 200000
## Mean : 1299304
## 3rd Qu.: 755000
## Max. :60000000
## NA's :3710
## Insured Damage, Adjusted ('000 US$) Total Damage ('000 US$)
## Min. : 201 Min. : 3
## 1st Qu.: 118839 1st Qu.: 15500
## Median : 339640 Median : 112300
## Mean : 1731304 Mean : 1251189
## 3rd Qu.: 1046379 3rd Qu.: 729350
## Max. :93614347 Max. :125000000
## NA's :3721 NA's :2605
## Total Damage, Adjusted ('000 US$) CPI Admin Units
## Min. : 5 Min. : 2.73 Mode:logical
## 1st Qu.: 37512 1st Qu.: 38.81 NA's:4315
## Median : 237094 Median : 58.11
## Mean : 1871285 Mean : 55.38
## 3rd Qu.: 1291495 3rd Qu.: 75.35
## Max. :195029889 Max. :100.00
## NA's :2655 NA's :130
## Entry Date Last Update Temperature
## Length:4315 Length:4315 Min. : 5.00
## Class :character Class :character 1st Qu.:15.00
## Mode :character Mode :character Median :20.00
## Mean :19.84
## 3rd Qu.:25.00
## Max. :29.00
##
# Variables Principales (para análisis central)
variables_principales <- c("Disaster Subgroup", "Disaster Type", "Disaster Subtype",
"ISO", "Country", "Subregion", "Total Deaths", "No. Injured",
"No. Affected", "Total Affected", "Insured Damage ('000 US$)",
"Insured Damage, Adjusted ('000 US$)", "Total Damage ('000 US$)",
"Total Damage, Adjusted ('000 US$)", "Temperature")
# Variables Secundarias (para análisis complementario)
variables_secundarias <- c("Event Name", "Magnitude", "Magnitude Scale", "Start Year",
"No. Homeless","Region")
# Creación de la sub-base con las variables seleccionadas
datos_subset <- datos %>% select(all_of(c(variables_principales, variables_secundarias)))
# Revisar la estructura de la sub-base
str(datos_subset)## tibble [4,315 × 21] (S3: tbl_df/tbl/data.frame)
## $ Disaster Subgroup : chr [1:4315] "Meteorological" "Hydrological" "Biological" "Geophysical" ...
## $ Disaster Type : chr [1:4315] "Storm" "Flood" "Epidemic" "Volcanic activity" ...
## $ Disaster Subtype : chr [1:4315] "Tropical cyclone" "Flood (General)" "Viral disease" "Ash fall" ...
## $ ISO : chr [1:4315] "USA" "JAM" "JAM" "GTM" ...
## $ Country : chr [1:4315] "United States of America" "Jamaica" "Jamaica" "Guatemala" ...
## $ Subregion : chr [1:4315] "Northern America" "Latin America and the Caribbean" "Latin America and the Caribbean" "Latin America and the Caribbean" ...
## $ Total Deaths : num [1:4315] 6000 300 30 1000 30000 ...
## $ No. Injured : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ No. Affected : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Total Affected : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Insured Damage ('000 US$) : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Insured Damage, Adjusted ('000 US$): num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Total Damage ('000 US$) : num [1:4315] 30000 NA NA NA NA NA NA 25000 480000 NA ...
## $ Total Damage, Adjusted ('000 US$) : num [1:4315] 1098720 NA NA NA NA ...
## $ Temperature : num [1:4315] 13.5 27 27 20 26 28 20 20 13.5 13.5 ...
## $ Event Name : chr [1:4315] NA NA "Gastroenteritis" "Santa Maria" ...
## $ Magnitude : num [1:4315] 220 NA NA NA NA NA NA 7.5 NA NA ...
## $ Magnitude Scale : chr [1:4315] "Kph" "Km2" "Vaccinated" NA ...
## $ Start Year : num [1:4315] 1900 1900 1900 1902 1902 ...
## $ No. Homeless : num [1:4315] NA NA NA NA NA NA NA NA NA NA ...
## $ Region : chr [1:4315] "Americas" "Americas" "Americas" "Americas" ...
## Disaster Subgroup Disaster Type Disaster Subtype ISO
## Length:4315 Length:4315 Length:4315 Length:4315
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Country Subregion Total Deaths No. Injured
## Length:4315 Length:4315 Min. : 1.0 Min. : 1.0
## Class :character Class :character 1st Qu.: 4.0 1st Qu.: 12.0
## Mode :character Mode :character Median : 12.0 Median : 37.0
## Mean : 286.9 Mean : 3817.9
## 3rd Qu.: 40.0 3rd Qu.: 138.5
## Max. :222570.0 Max. :1800000.0
## NA's :1398 NA's :3400
## No. Affected Total Affected Insured Damage ('000 US$)
## Min. : 3 Min. : 1 Min. : 162
## 1st Qu.: 1020 1st Qu.: 600 1st Qu.: 62500
## Median : 6780 Median : 4248 Median : 200000
## Mean : 195474 Mean : 165323 Mean : 1299304
## 3rd Qu.: 45000 3rd Qu.: 30004 3rd Qu.: 755000
## Max. :85000000 Max. :85000012 Max. :60000000
## NA's :1884 NA's :1344 NA's :3710
## Insured Damage, Adjusted ('000 US$) Total Damage ('000 US$)
## Min. : 201 Min. : 3
## 1st Qu.: 118839 1st Qu.: 15500
## Median : 339640 Median : 112300
## Mean : 1731304 Mean : 1251189
## 3rd Qu.: 1046379 3rd Qu.: 729350
## Max. :93614347 Max. :125000000
## NA's :3721 NA's :2605
## Total Damage, Adjusted ('000 US$) Temperature Event Name
## Min. : 5 Min. : 5.00 Length:4315
## 1st Qu.: 37512 1st Qu.:15.00 Class :character
## Median : 237094 Median :20.00 Mode :character
## Mean : 1871285 Mean :19.84
## 3rd Qu.: 1291495 3rd Qu.:25.00
## Max. :195029889 Max. :29.00
## NA's :2655
## Magnitude Magnitude Scale Start Year No. Homeless
## Min. : -50.0 Length:4315 Min. :1900 Min. : 5
## 1st Qu.: 7.7 Class :character 1st Qu.:1989 1st Qu.: 300
## Median : 165.0 Mode :character Median :2002 Median : 1533
## Mean : 39808.8 Mean :1998 Mean : 19975
## 3rd Qu.: 3527.5 3rd Qu.:2014 3rd Qu.: 8770
## Max. :1768303.0 Max. :2025 Max. :1166000
## NA's :3111 NA's :3690
## Region
## Length:4315
## Class :character
## Mode :character
##
##
##
##
# Número total de NA's en la sub-base
total_na <- sum(is.na(datos_subset))
print(paste("Total NA:", total_na))## [1] "Total NA: 31179"
# Porcentaje de NA's por columna
na_por_columna <- colSums(is.na(datos_subset)) / nrow(datos_subset)
print(na_por_columna)## Disaster Subgroup Disaster Type
## 0.00000000 0.00000000
## Disaster Subtype ISO
## 0.00000000 0.00000000
## Country Subregion
## 0.00000000 0.00000000
## Total Deaths No. Injured
## 0.32398610 0.78794902
## No. Affected Total Affected
## 0.43661645 0.31147161
## Insured Damage ('000 US$) Insured Damage, Adjusted ('000 US$)
## 0.85979143 0.86234067
## Total Damage ('000 US$) Total Damage, Adjusted ('000 US$)
## 0.60370800 0.61529548
## Temperature Event Name
## 0.00000000 0.76152955
## Magnitude Magnitude Scale
## 0.72097335 0.08690614
## Start Year No. Homeless
## 0.00000000 0.85515643
## Region
## 0.00000000
# Boxplot de 'Total Damage' (Daños Totales)
ggplot(datos_subset, aes(y = `Total Damage ('000 US$)`)) +
geom_boxplot() +
theme_minimal() +
labs(y = "Total Damage ('000 US$)", title = "Boxplot de Daños Totales")# Calcular estadísticas de muertes
media_muertes <- mean(datos_subset$`Total Deaths`, na.rm = TRUE)
mediana_muertes <- median(datos_subset$`Total Deaths`, na.rm = TRUE)
desv_estandar_muertes <- sd(datos_subset$`Total Deaths`, na.rm = TRUE)
# Función para calcular la moda
moda <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
moda_muertes <- moda(datos_subset$`Total Deaths`)
# Imprimir resultados
print(paste("Media de muertes:", media_muertes))## [1] "Media de muertes: 286.88104216661"
## [1] "Mediana de muertes: 12"
## [1] "Desviación estándar: 4649.08031281979"
## [1] "Moda de muertes: NA"
# Frecuencia por "Disaster Subgroup"
ggplot(datos_subset, aes(x = `Disaster Subgroup`)) +
geom_bar(fill = "dodgerblue") +
theme_minimal() +
labs(title = "Frecuencia por Subgrupo de Desastre",
x = "Subgrupo de Desastre", y = "Cantidad")# Frecuencia por "Disaster Type"
ggplot(datos_subset, aes(x = `Disaster Type`)) +
geom_bar(fill = "red") +
theme_minimal() +
labs(title = "Frecuencia por Tipo de Desastre",
x = "Tipo de Desastre", y = "Cantidad")# Top 5 Disaster Type
top5_disaster_type <- datos_subset %>%
count(`Disaster Type`, sort = TRUE) %>%
head(5)
print(top5_disaster_type)## # A tibble: 5 × 2
## `Disaster Type` n
## <chr> <int>
## 1 Storm 1580
## 2 Flood 1407
## 3 Earthquake 326
## 4 Mass movement (wet) 208
## 5 Wildfire 208
# Frecuencia por "Disaster Subtype"
ggplot(datos_subset, aes(x = `Disaster Subtype`)) +
geom_bar(fill = "seagreen") +
theme_minimal() +
labs(title = "Frecuencia por Subtipo de Desastre",
x = "Subtipo de Desastre", y = "Cantidad")# Top 5 Disaster Subtype
top5_disaster_subtype <- datos_subset %>%
count(`Disaster Subtype`, sort = TRUE) %>%
head(5)
print(top5_disaster_subtype)## # A tibble: 5 × 2
## `Disaster Subtype` n
## <chr> <int>
## 1 Tropical cyclone 795
## 2 Riverine flood 633
## 3 Flood (General) 630
## 4 Ground movement 314
## 5 Storm (General) 279
# Filtrar datos para Total Damage
datos_filtrados <- datos_subset %>%
filter(!is.na(`Total Damage ('000 US$)`) & is.finite(`Total Damage ('000 US$)`))
# Histograma de Daños Totales
ggplot(datos_filtrados, aes(x = `Total Damage ('000 US$)`)) +
geom_histogram(binwidth = 1000, fill = "orange", color = "black") +
theme_minimal() +
labs(title = "Histograma de Daños Totales",
x = "Daños Totales ('000 US$)", y = "Frecuencia")# Diagrama de Dispersión: Muertes vs Heridos
ggplot(datos_subset, aes(x = `Total Deaths`, y = `No. Injured`)) +
geom_point(alpha = 0.6) +
theme_minimal() +
labs(title = "Relación entre Muertes y Heridos",
x = "Total Deaths", y = "No. Injured")# Análisis de impacto por región
impacto_region <- datos_subset %>%
group_by(Region, `Start Year`) %>%
summarise(Total_Muertes = sum(`Total Deaths`, na.rm = TRUE),
Total_Heridos = sum(`No. Injured`, na.rm = TRUE),
.groups = "drop")
# Visualización de la evolución de muertes por región
ggplot(impacto_region, aes(x = `Start Year`, y = Total_Muertes, color = Region)) +
geom_line() +
theme_minimal() +
labs(title = "Evolución de Muertes por Región a lo Largo de los Años",
x = "Año de Inicio", y = "Total Muertes")## Reading layer `ne_110m_admin_0_countries' from data source
## `C:\Users\taroj\Documents\Actuaria_Octavo Semestre\COMPUTO CIENTIFICO\Proyecto\Mapas\ne_110m_admin_0_countries\ne_110m_admin_0_countries.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 177 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 83.64513
## Geodetic CRS: WGS 84
## [1] "FJI" "TZA" "SAH" "CAN" "US1" "KA1"
# Información de Desastres a Nivel País
desastres_por_pais <- datos_subset %>%
group_by(ISO) %>%
summarise(Total_Desastres = n(),
Desastres_Tipo = paste(unique(`Disaster Type`), collapse = ", "),
.groups = "drop")
# Corregir el código de Estados Unidos en el shapefile
mapa_mundi <- mapa_mundi %>%
mutate(SOV_A3 = ifelse(SOV_A3 == "US1", "USA", SOV_A3))
# Unión con el shapefile corregido
mapa_datos <- mapa_mundi %>%
left_join(desastres_por_pais, by = c("SOV_A3" = "ISO"))
# Asignar color gris a los países sin datos
mapa_datos$Total_Desastres[is.na(mapa_datos$Total_Desastres)] <- 0
# Crear un mapa base con ggplot2
mapa_base <- ggplot() +
geom_sf(data = mapa_datos, aes(fill = Total_Desastres, text = paste("País:", SOV_A3,
"<br>Total Desastres:", Total_Desastres,
"<br>Tipos:", Desastres_Tipo))) +
scale_fill_gradient(low = "lightblue", high = "darkred", na.value = "grey") +
theme_minimal() +
labs(title = "Mapa Interactivo de Desastres por País", fill = "Total Desastres")
# Convertir a interactivo
mapa_interactivo <- ggplotly(mapa_base, tooltip = "text")
mapa_interactivo# Distribución de Temperature
ggplot(datos_subset, aes(x = Temperature)) +
geom_histogram(binwidth = 1, fill = "lightgreen", color = "black") +
theme_minimal() +
labs(title = "Histograma de Temperatura", x = "Temperature (°C)", y = "Frecuencia")# Relación entre Temperature y Total Damage
ggplot(datos_subset, aes(x = Temperature, y = `Total Damage ('000 US$)`)) +
geom_point(alpha = 0.6, color = "purple") +
theme_minimal() +
labs(title = "Relación entre Temperatura y Daños Totales",
x = "Temperatura (°C)", y = "Daños Totales ('000 US$)")# Relación entre Temperature y Total Affected
ggplot(datos_subset, aes(x = Temperature, y = `Total Affected`)) +
geom_point(alpha = 0.6, color = "brown") +
theme_minimal() +
labs(title = "Relación entre Temperatura y Total Afectados",
x = "Temperatura (°C)", y = "Total Afectados")# Agrupar por Country, Disaster Type y Start Year
temp_analysis <- datos_subset %>%
group_by(Country, `Disaster Type`, `Start Year`) %>%
summarise(Num_Desastres = n(),
Prom_Temperature = mean(Temperature, na.rm = TRUE),
.groups = "drop")
# Visualizar evolución de desastres por país y tipo
ggplot(temp_analysis, aes(x = `Start Year`, y = Num_Desastres, color = Country)) +
geom_line() +
facet_wrap(~ `Disaster Type`) +
theme_minimal() +
labs(title = "Evolución de Desastres por Tipo y País en Función de la Temperatura",
x = "Año de Inicio", y = "Cantidad de Desastres")# Tendencia de temperatura y cantidad de desastres a nivel global
global_temp_trend <- datos_subset %>%
group_by(`Start Year`) %>%
summarise(Num_Desastres = n(),
Avg_Temperature = mean(Temperature, na.rm = TRUE),
.groups = "drop")
ggplot(global_temp_trend, aes(x = `Start Year`)) +
geom_line(aes(y = Num_Desastres), color = "blue", size = 1) +
geom_line(aes(y = Avg_Temperature * 10), color = "red", linetype = "dashed", size = 1) +
theme_minimal() +
labs(title = "Tendencia Global: Número de Desastres y Temperatura",
x = "Año de Inicio",
y = "Cantidad de Desastres / Temperatura (escalada)")