This report analyzes the NOAA Storm Database to determine which event types have the greatest impact on population health and economic consequences across the United States. The analysis uses raw storm data from 1950 to 2011 and summarizes fatalities, injuries, property damage, and crop damage to identify the most harmful weather events.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Download file if not available
if(!file.exists("stormdata.csv.bz2")){
download.file(
"https://d396qusza40orc.cloudfront.net/repdata/data/StormData.csv.bz2",
destfile = "stormdata.csv.bz2"
)
}
storm <- read.csv("stormdata.csv.bz2")
df <- storm %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,
CROPDMG, CROPDMGEXP)
exp_map <- c("K"=1000, "M"=1e6, "B"=1e9, "k"=1000, "m"=1e6, "b"=1e9)
df$PROPDMGEXP <- exp_map[df$PROPDMGEXP]
df$CROPDMGEXP <- exp_map[df$CROPDMGEXP]
df$PROPDMGEXP[is.na(df$PROPDMGEXP)] <- 1
df$CROPDMGEXP[is.na(df$CROPDMGEXP)] <- 1
df$PROP_TOTAL <- df$PROPDMG * df$PROPDMGEXP
df$CROP_TOTAL <- df$CROPDMG * df$CROPDMGEXP
df$ECONOMIC_TOTAL <- df$PROP_TOTAL + df$CROP_TOTAL
health <- df %>%
group_by(EVTYPE) %>%
summarise(
total_fatalities = sum(FATALITIES, na.rm=TRUE),
total_injuries = sum(INJURIES, na.rm=TRUE),
total_health = total_fatalities + total_injuries
) %>%
arrange(desc(total_health)) %>%
head(10)
health
## # A tibble: 10 × 4
## EVTYPE total_fatalities total_injuries total_health
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
ggplot(health, aes(x=reorder(EVTYPE, total_health), y=total_health)) +
geom_col(fill="red") +
coord_flip() +
labs(title="Top 10 Most Harmful Events to Population Health",
x="Event Type", y="Fatalities + Injuries")
econ <- df %>%
group_by(EVTYPE) %>%
summarise(
total_prop = sum(PROP_TOTAL, na.rm=TRUE),
total_crop = sum(CROP_TOTAL, na.rm=TRUE),
total_econ = sum(ECONOMIC_TOTAL, na.rm=TRUE)
) %>%
arrange(desc(total_econ)) %>%
head(10)
econ
## # A tibble: 10 × 4
## EVTYPE total_prop total_crop total_econ
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 5661968450 150319678257
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 56937160779. 414953270 57352114049.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15732267048. 3025954473 18758221521.
## 6 FLASH FLOOD 16140812067. 1421317100 17562129167.
## 7 DROUGHT 1046106000 13972566000 15018672000
## 8 HURRICANE 11868319010 2741910000 14610229010
## 9 RIVER FLOOD 5118945500 5029459000 10148404500
## 10 ICE STORM 3944927860 5022113500 8967041360
ggplot(econ, aes(x=reorder(EVTYPE, total_econ), y=total_econ)) +
geom_col(fill="darkblue") +
coord_flip() +
labs(title="Top 10 Events with Greatest Economic Consequences",
x="Event Type", y="Economic Damage (USD)")