This analysis examines the NOAA Storm Database to determine which types of weather events cause the greatest harm to population health and economic impact across the United States. The dataset includes information about fatalities, injuries, property damage, and crop damage associated with severe weather events. The analysis aggregates these impacts by event type to identify the most severe categories. Results show the event types responsible for the highest number of fatalities and injuries as well as those producing the greatest financial losses. Understanding these patterns supports improved disaster preparedness and resource allocation.
storm <- read.csv("repdata_data_StormData.csv")
dim(storm)
## [1] 902297 37
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
storm_data <- storm %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)
health <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES),
injuries = sum(INJURIES),
total = fatalities + injuries
) %>%
arrange(desc(total))
top_health <- head(health, 10)
top_health
## # A tibble: 10 × 4
## EVTYPE fatalities injuries total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
economic <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
property = sum(PROPDMG),
crop = sum(CROPDMG),
total = property + crop
) %>%
arrange(desc(total))
top_economic <- head(economic, 10)
top_economic
## # A tibble: 10 × 4
## EVTYPE property crop total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 3212258. 100019. 3312277.
## 2 FLASH FLOOD 1420125. 179200. 1599325.
## 3 TSTM WIND 1335966. 109203. 1445168.
## 4 HAIL 688693. 579596. 1268290.
## 5 FLOOD 899938. 168038. 1067976.
## 6 THUNDERSTORM WIND 876844. 66791. 943636.
## 7 LIGHTNING 603352. 3581. 606932.
## 8 THUNDERSTORM WINDS 446293. 18685. 464978.
## 9 HIGH WIND 324732. 17283. 342015.
## 10 WINTER STORM 132721. 1979. 134700.
ggplot(top_health,
aes(x = reorder(EVTYPE, total), y = total)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Top Weather Events Causing Health Impact",
x = "Event Type",
y = "Fatalities + Injuries"
)
ggplot(top_economic,
aes(x = reorder(EVTYPE, total), y = total)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Top Weather Events Causing Economic Damage",
x = "Event Type",
y = "Total Damage"
)
top_health_long <- top_health %>%
select(EVTYPE, fatalities, injuries) %>%
tidyr::pivot_longer(cols = c(fatalities, injuries),
names_to = "type",
values_to = "count")
ggplot(top_health_long,
aes(x = reorder(EVTYPE, count), y = count, fill = type)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Fatalities vs Injuries by Event Type",
x = "Event Type",
y = "Number of Cases"
)