In this report, I aim to address two basic questions about severe weather events based on the data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database:
1. Across the United States, which types of events are most harmful with respect to population health?
2. Across the United States, which types of events have the greatest economic consequences?
I specificaly obatined data for the period spanning from 1950 to November 2011. From the data, I found that tornado caused the most injuries and fatalities in the US from 1950 to 2011 while heat/ecessive heat caused the most junries and fatalities per event among the top 10 most injury/fatality reported events. On the other hand, I also found that flood is the event that has the greatest economic consequences (i.e. highest crop and property damages combined) in the US from 1950 to 2011.
The data come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size.
StormData <- read_csv("source_data/repdata_data_StormData.csv.bz2")
## Parsed with column specification:
## cols(
## .default = col_double(),
## BGN_DATE = col_character(),
## BGN_TIME = col_character(),
## TIME_ZONE = col_character(),
## COUNTYNAME = col_character(),
## STATE = col_character(),
## EVTYPE = col_character(),
## BGN_AZI = col_logical(),
## BGN_LOCATI = col_logical(),
## END_DATE = col_logical(),
## END_TIME = col_logical(),
## COUNTYENDN = col_logical(),
## END_AZI = col_logical(),
## END_LOCATI = col_logical(),
## PROPDMGEXP = col_character(),
## CROPDMGEXP = col_logical(),
## WFO = col_logical(),
## STATEOFFIC = col_logical(),
## ZONENAMES = col_logical(),
## REMARKS = col_logical()
## )
## See spec(...) for full column specifications.
I first look into the injuries caused by/reported under each types of storm event.
StormData %>%
select(EVTYPE, INJURIES) %>%
group_by(EVTYPE) %>%
summarise(Events_Reported = n(), Injuries_Reported = sum(INJURIES), Injuries_per_Report = Injuries_Reported/Events_Reported) -> InjuryData
The top 10 events with the highest accumulative injuries reported are:
InjuryData %>%
arrange(desc(Injuries_Reported)) %>%
head(10) -> InjuryData_filtered
print(InjuryData_filtered)
## # A tibble: 10 x 4
## EVTYPE Events_Reported Injuries_Reported Injuries_per_Report
## <chr> <int> <dbl> <dbl>
## 1 TORNADO 60652 91346 1.51
## 2 TSTM WIND 219944 6957 0.0316
## 3 FLOOD 25326 6789 0.268
## 4 EXCESSIVE HEAT 1678 6525 3.89
## 5 LIGHTNING 15755 5230 0.332
## 6 HEAT 767 2100 2.74
## 7 ICE STORM 2006 1975 0.985
## 8 FLASH FLOOD 54278 1777 0.0327
## 9 THUNDERSTORM WIND 82563 1488 0.0180
## 10 HAIL 288661 1361 0.00471
Next, we look at the relationship between fatality and event by performing a similar analysis.
StormData %>%
select(EVTYPE, FATALITIES) %>%
group_by(EVTYPE) %>%
summarise(Events_Reported = n(), Fatalities_Reported = sum(FATALITIES), Fatalities_per_Report = Fatalities_Reported/Events_Reported) %>%
arrange(desc(Fatalities_Reported)) %>%
head(10)->FatalityData
The top 10 events with the highest accumulative fatalities reported are:
print(FatalityData)
## # A tibble: 10 x 4
## EVTYPE Events_Reported Fatalities_Reported Fatalities_per_Report
## <chr> <int> <dbl> <dbl>
## 1 TORNADO 60652 5633 0.0929
## 2 EXCESSIVE HEAT 1678 1903 1.13
## 3 FLASH FLOOD 54278 978 0.0180
## 4 HEAT 767 937 1.22
## 5 LIGHTNING 15755 816 0.0518
## 6 TSTM WIND 219944 504 0.00229
## 7 FLOOD 25326 470 0.0186
## 8 RIP CURRENT 470 368 0.783
## 9 HIGH WIND 20212 248 0.0123
## 10 AVALANCHE 386 224 0.580
We first take a look at the PROPDMGEXP and CROPDMGEXP columns which denote the unit of the damage values in the PROPDMG and CROPDMG columns.
Prop_Symbol <- sort(unique(StormData$PROPDMGEXP))
Crop_Symbol <- sort(unique(StormData$CROPDMGEXP))
EXPsymbol <- sort(unique(c(Prop_Symbol,Crop_Symbol)))
EXPsymbol
## [1] "-" "?" "+" "0" "1" "2" "3" "4"
## [9] "5" "6" "7" "8" "B" "FALSE" "h" "H"
## [17] "K" "m" "M"
Next, based on the analysis performed by flydisk, we assisgn the values to those EXP symbols.
EXPvalue <- c(0,0,1,10,10,10,10,10,10,10,10,10,10^9,0,10^2,10^2,10^3,10^6,10^6)
EXPtable <- as_tibble(cbind(EXPsymbol, EXPvalue))
EXPtable$EXPvalue = as.numeric(EXPtable$EXPvalue)
EXPtable
## # A tibble: 19 x 2
## EXPsymbol EXPvalue
## <chr> <dbl>
## 1 - 0
## 2 ? 0
## 3 + 1
## 4 0 10
## 5 1 10
## 6 2 10
## 7 3 10
## 8 4 10
## 9 5 10
## 10 6 10
## 11 7 10
## 12 8 10
## 13 B 1000000000
## 14 FALSE 0
## 15 h 100
## 16 H 100
## 17 K 1000
## 18 m 1000000
## 19 M 1000000
Finally we calculate the total damage caused by each event by summing the product of the DMG value and its EXP symbol
StormData %>%
select(EVTYPE,CROPDMG,CROPDMGEXP,PROPDMG,PROPDMGEXP) %>%
merge(., EXPtable, by.x = "CROPDMGEXP", by.y = "EXPsymbol",all.x = TRUE) %>%
mutate(Crop.dmg = ifelse(is.na(EXPvalue),0,CROPDMG * EXPvalue)) %>%
select(-EXPvalue) %>%
merge(., EXPtable, by.x = "PROPDMGEXP", by.y = "EXPsymbol",all.x = TRUE) %>%
mutate(Prop.dmg = ifelse(is.na(EXPvalue),0,PROPDMG * EXPvalue)) %>%
mutate(Total.dmg = Prop.dmg + Crop.dmg) %>%
select(-EXPvalue) %>%
group_by(EVTYPE) %>%
summarise(Total_Dmg = sum(Total.dmg)) %>%
arrange(desc(Total_Dmg)) %>%
head(10)->EconData
print(EconData)
## # A tibble: 10 x 2
## EVTYPE Total_Dmg
## <chr> <dbl>
## 1 FLOOD 144657709800
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56937162897
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16140865011
## 6 HAIL 15732269877
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
## 9 WINTER STORM 6688497260
## 10 HIGH WIND 5270046280
InjuryData_filtered %>%
ggplot(aes(x = reorder(EVTYPE,-Injuries_Reported), y = Injuries_Reported))+
geom_bar(stat = "identity", fill = "gold")+
theme_minimal()+theme(axis.text.x = element_text(angle = 45, hjust =1))+
labs(title = "Top 10 Events by Total Injuries Caused", x = "Event Type", y = "Total Injuries", caption = "Tornado is the most injury causing event cumulatively")+
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.caption = element_text(color = "gold", face = "italic"))
FatalityData %>%
ggplot(aes(x = reorder(EVTYPE,-Fatalities_Reported), y = Fatalities_Reported))+
geom_bar(stat = "identity", fill = "cyan")+
theme_minimal()+theme(axis.text.x = element_text(angle = 45, hjust =1))+
labs(title = "Top 10 Events by Total Fatalities Caused", x = "Event Type", y = "Total Fatalities",caption = "Tornado is the most fatal event cumulatively")+
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.caption = element_text(color = "cyan", face = "italic"))
EconData %>%
ggplot(aes(x = reorder(EVTYPE, -Total_Dmg), y = Total_Dmg))+
geom_bar(stat = "identity", fill = "maroon1")+
theme_minimal()+theme(axis.text.x = element_text(angle = 45, hjust =1))+
labs(title = "Top 10 Events by Total Crop and Property Damages", x = "Event Type", y = "Total Damages", caption = "Flood has the highest economic consequences")+
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.caption = element_text(color = "maroon1", face = "italic"))