In this report we aim to analyze the effects of severe weather events in the United States. Every years, tornados, hurricans, and forest fires cause both public and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
data <- read.csv("repdata_data_StormData.csv.bz2")
data <- clean_names(data)
First we read the data from the csv file. There are over 900,000 observations.
dim(data)
## [1] 902297 37
#Across the United States, which types of events are most harmful with respect
#to population health?
pop_health <- select(data, evtype, fatalities, injuries)
pop_health <- pop_health %>% group_by(evtype) %>% summarise(total_fatalities =
sum(fatalities), total_injuries = sum(injuries)
)
pop_health <- add_row(pop_health, evtype = "HEAT RELATED", total_fatalities =
1903 + 937)
pop_health <- filter(pop_health, evtype != "HEAT", evtype != "EXCESSIVE HEAT")
pop_health <- arrange(pop_health, desc(total_fatalities))
top5_fatalities <- pop_health[1:5,]
Many events did not have tangible costs. In addition, some values are missing. Observations with 0 cost or missing values have been removed.
#Across the United States, which types of events have the greatest economic consequences?
economic_cons <- select(data, evtype, propdmg, propdmgexp)
economic_cons <- filter(economic_cons, propdmg != 0.00, propdmgexp != "",
propdmgexp != "-", propdmgexp != "?", propdmgexp != "+")
for (i in 1:length(economic_cons$propdmgexp)){
if (economic_cons$propdmgexp[i] == "H"){
economic_cons$propdmg[i] <- economic_cons$propdmg[i]*100
}
else if(economic_cons$propdmgexp[i] == "K"){
economic_cons$propdmg[i] <- economic_cons$propdmg[i]*1000
}
else if(economic_cons$propdmgexp[i] == "M"){
economic_cons$propdmg[i] <- economic_cons$propdmg[i]*1000000
}
else if(economic_cons$propdmgexp[i] == "B"){
economic_cons$propdmg[i] <- economic_cons$propdmg[i]*1000000000
}
}
economic_conse <- economic_cons %>% group_by(evtype) %>%
summarise(total_property_damage = sum(propdmg))
economic_conse <- add_row(economic_conse, evtype = "FLOOD RELATED", total_property_damage =144657709800+16140811860)
economic_conse <- filter(economic_conse, evtype != "FLOOD", evtype != "FLASH FLOOD")
economic_conse <- transmute(economic_conse, evtype, total_property_damage = total_property_damage/1000000000)
economic_conse <- arrange(economic_conse, desc(total_property_damage))
top5_propdm <- economic_conse[1:5,]
For analysis purposes, I combined the values for “HEAT” and “EXCESSIVE HEAT”. Tornados cause by far the greatest number of fatalities -almost double heat related deaths.
fatalities <- ggplot(top5_fatalities, aes(x= reorder(evtype, -total_fatalities), total_fatalities)) + geom_bar(stat = "identity", aes(fill = evtype)) + labs(title = "Top 5 Causes of Meteorological Fatalities in the United States", subtitle = "Number of Fatalities from 1950 to 2011", x = "Event", y = "Total Fatalities") + theme(legend.position = "none")
fatalities
For this analysis I had to combine “FLOOD” and “FLASH FLOOD”. Floods and hurricanes are the most expensive natural disasters.
property_damage <- ggplot(top5_propdm, aes(x=reorder(evtype, -total_property_damage), total_property_damage)) + geom_bar(stat = "identity", aes(fill = evtype), size= 10)+ labs(title = "Top 5 Most Costly Meteorological Events in the United States", subtitle = "Total cost natural disasters from 1950 to 2011", x = "Event", y = "Total Cost ($Billions)") + theme(legend.position = "none") + coord_flip()
options(scipen=999)
property_damage