Data Processing
library(data.table)
library(tidyverse)
library(R.utils)
library(knitr)
library(cowplot)
library(scales)
library(dplyr)
library(ggplot2)
library(knitr)
library(cowplot)
storm <- fread("repdata_data_StormData.csv.bz2")
dim(storm)
## [1] 902297 37
names(storm)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
# --- Limpieza y preparación de los datos ---
exp_values <- function(e) {
ifelse(e %in% c("H","h"), 1e2,
ifelse(e %in% c("K","k"), 1e3,
ifelse(e %in% c("M","m"), 1e6,
ifelse(e %in% c("B","b"), 1e9, 1))))
}
storm$PROPDMGEXP2 <- exp_values(storm$PROPDMGEXP)
storm$CROPDMGEXP2 <- exp_values(storm$CROPDMGEXP)
storm$PROPDMGVAL <- storm$PROPDMG * storm$PROPDMGEXP2
storm$CROPDMGVAL <- storm$CROPDMG * storm$CROPDMGEXP2
storm$EVTYPE <- toupper(trimws(storm$EVTYPE))
storm$EVTYPE <- gsub("TSTM WIND", "THUNDERSTORM WIND", storm$EVTYPE)
storm$EVTYPE <- gsub("THUNDERSTORM WINDS", "THUNDERSTORM WIND", storm$EVTYPE)
storm$EVTYPE <- gsub("HURRICANE/TYPHOON", "HURRICANE", storm$EVTYPE)
storm$EVTYPE <- gsub("WILD/FOREST FIRE", "WILDFIRE", storm$EVTYPE)
storm$EVTYPE <- gsub("URBAN/SML STREAM FLD", "FLOOD", storm$EVTYPE)
# --- Impacto en la salud (fatalidades y lesiones) ---
health_impact <- storm %>%
group_by(EVTYPE) %>%
summarise(FATALITIES = sum(FATALITIES, na.rm=TRUE),
INJURIES = sum(INJURIES, na.rm=TRUE)) %>%
mutate(TOTAL_HEALTH_IMPACT = FATALITIES + INJURIES) %>%
arrange(desc(TOTAL_HEALTH_IMPACT))
top5_health <- head(health_impact, 5)
kable(top5_health, caption = "Top 5 events by fatalities and injuries")
Top 5 events by fatalities and injuries
| TORNADO |
5633 |
91346 |
96979 |
| THUNDERSTORM WIND |
701 |
9353 |
10054 |
| EXCESSIVE HEAT |
1903 |
6525 |
8428 |
| FLOOD |
498 |
6868 |
7366 |
| LIGHTNING |
816 |
5230 |
6046 |
# --- Impacto económico (daños a propiedades y cultivos) ---
economic_impact <- storm %>%
group_by(EVTYPE) %>%
summarise(PROPERTY_DAMAGE = sum(PROPDMGVAL, na.rm=TRUE),
CROP_DAMAGE = sum(CROPDMGVAL, na.rm=TRUE)) %>%
mutate(TOTAL_ECONOMIC_IMPACT = PROPERTY_DAMAGE + CROP_DAMAGE) %>%
arrange(desc(TOTAL_ECONOMIC_IMPACT))
top5_economic <- head(economic_impact, 5)
kable(top5_economic, caption = "Top 5 events by economic damage")
Top 5 events by economic damage
| FLOOD |
144716019457 |
5670456550 |
150386476007 |
| HURRICANE |
81174159010 |
5349782800 |
86523941810 |
| TORNADO |
56937160779 |
414953270 |
57352114049 |
| STORM SURGE |
43323536000 |
5000 |
43323541000 |
| HAIL |
15732267543 |
3025954473 |
18758222016 |
# --- Visualización de resultados ---
p1 <- ggplot(top5_health, aes(x=reorder(EVTYPE, TOTAL_HEALTH_IMPACT), y=TOTAL_HEALTH_IMPACT)) +
geom_col(fill="steelblue") + coord_flip() +
labs(title="Top 5 Events: Health Impact", x="Event Type", y="Fatalities + Injuries")
p2 <- ggplot(top5_economic, aes(x=reorder(EVTYPE, TOTAL_ECONOMIC_IMPACT), y=TOTAL_ECONOMIC_IMPACT)) +
geom_col(fill="darkred") + coord_flip() +
labs(title="Top 5 Events: Economic Damage", x="Event Type", y="Total Damage ($)")
plot_grid(p1, p2, ncol=1)
