The basic goal of this report is to explore the NOAA Storm Database and answer some basic questions about severe weather events.
The 2 main Questions addressed are the following:
Here is a summary of the results from this Data Analysis: 1. Tornados is the event that seems to cause the most fatalities, followed by Excessive Heat and Flash Flood Events. Tornados are also the major cause of injuries. 2. Floods seem to be the major contributor of Economic damage, followed by Hurricane/Typhoon and Tornado events.
Read the csv file and create a Data Frame aggregating the health impact by Event Type
stormdata <- read.csv("repdata%2Fdata%2FStormData.csv")
health_impact_by_evtype <- data.frame(fatalities = tapply(stormdata$FATALITIES, stormdata$EVTYPE, sum), injuries = tapply(stormdata$INJURIES, stormdata$EVTYPE, sum))
health_impact_by_evtype_filtered <- health_impact_by_evtype[health_impact_by_evtype$injuries != 0,]
#Order by injuries
health_impact_by_evtype_filtered_inj_desc <- health_impact_by_evtype_filtered[order(health_impact_by_evtype_filtered$injuries, health_impact_by_evtype_filtered$fatalities, decreasing = T),]
#Add type column and melt by injury or fatality
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.3.3
health_impact_by_evtype_filtered$type <- row.names(health_impact_by_evtype_filtered)
health_impact_by_evtype_filtered_melted <- melt(health_impact_by_evtype_filtered, id.vars = 3, value.name = "count")
health_impact_by_evtype_filtered_melted_top_10 <- rbind(head((health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "fatalities",])[order(health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "fatalities",]$count, decreasing = T),], 10), head((health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "injuries",])[order(health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "injuries",]$count, decreasing = T),], 10))
#Show the top10 injuries and fatalities
health_impact_by_evtype_filtered_melted_top_10
## type variable count
## 129 TORNADO fatalities 5633
## 20 EXCESSIVE HEAT fatalities 1903
## 28 FLASH FLOOD fatalities 978
## 47 HEAT fatalities 937
## 85 LIGHTNING fatalities 816
## 135 TSTM WIND fatalities 504
## 30 FLOOD fatalities 470
## 101 RIP CURRENT fatalities 368
## 63 HIGH WIND fatalities 248
## 1 AVALANCHE fatalities 224
## 287 TORNADO injuries 91346
## 293 TSTM WIND injuries 6957
## 188 FLOOD injuries 6789
## 178 EXCESSIVE HEAT injuries 6525
## 243 LIGHTNING injuries 5230
## 205 HEAT injuries 2100
## 237 ICE STORM injuries 1975
## 186 FLASH FLOOD injuries 1777
## 279 THUNDERSTORM WIND injuries 1488
## 203 HAIL injuries 1361
In the below graph, we can see that Tornados are the events that seem to cause the most fatalities, followed by Excessive Heat and Flash Flood Events. Tornados are also the major cause of injuries.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
ggplot(health_impact_by_evtype_filtered_melted_top_10, aes(x = type, y = count)) + geom_bar(stat = "identity") + facet_grid(variable~., scales = "free_y") + theme_bw() + theme(axis.text.x = element_text(angle = 90)) + ggtitle("Graph of Top 10 Events causing Fatalities and Injuries")
Calculate property damage columns by converting H, K, M, B units
stormdata$pd <- 0
stormdata[stormdata$PROPDMGEXP == "H", ]$pd <-
stormdata[stormdata$PROPDMGEXP == "H", ]$PROPDMG * 10^2
stormdata[stormdata$PROPDMGEXP == "K", ]$pd <-
stormdata[stormdata$PROPDMGEXP == "K", ]$PROPDMG * 10^3
stormdata[stormdata$PROPDMGEXP == "M", ]$pd <-
stormdata[stormdata$PROPDMGEXP == "M", ]$PROPDMG * 10^6
stormdata[stormdata$PROPDMGEXP == "B", ]$pd <-
stormdata[stormdata$PROPDMGEXP == "B", ]$PROPDMG * 10^9
Calculate crop damage columns by converting H, K, M, B units
stormdata$cd <- 0
stormdata[stormdata$CROPDMGEXP == "H", ]$cd <-
stormdata[stormdata$CROPDMGEXP == "H", ]$CROPDMG * 10^2
stormdata[stormdata$CROPDMGEXP == "K", ]$cd <-
stormdata[stormdata$CROPDMGEXP == "K", ]$CROPDMG * 10^3
stormdata[stormdata$CROPDMGEXP == "M", ]$cd <-
stormdata[stormdata$CROPDMGEXP == "M", ]$CROPDMG * 10^6
stormdata[stormdata$CROPDMGEXP == "B", ]$cd <-
stormdata[stormdata$CROPDMGEXP == "B", ]$CROPDMG * 10^9
event_dmg <- data.frame(Property = tapply(stormdata$pd, stormdata$EVTYPE, sum), Crop = tapply(stormdata$cd, stormdata$EVTYPE, sum))
event_dmg$Total <- event_dmg$Property + event_dmg$Crop
event_dmg$Event_Type <- row.names(event_dmg)
event_dmg_top10 <- head(event_dmg[order(event_dmg$Total, decreasing = T),], 10)
event_dmg_top10
## Property Crop Total Event_Type
## FLOOD 144657709800 5661968450 150319678250 FLOOD
## HURRICANE/TYPHOON 69305840000 2607872800 71913712800 HURRICANE/TYPHOON
## TORNADO 56925660480 414953110 57340613590 TORNADO
## STORM SURGE 43323536000 5000 43323541000 STORM SURGE
## HAIL 15727367220 3025537450 18752904670 HAIL
## FLASH FLOOD 16140811510 1421317100 17562128610 FLASH FLOOD
## DROUGHT 1046106000 13972566000 15018672000 DROUGHT
## HURRICANE 11868319010 2741910000 14610229010 HURRICANE
## RIVER FLOOD 5118945500 5029459000 10148404500 RIVER FLOOD
## ICE STORM 3944927810 5022113500 8967041310 ICE STORM
In the below graph, we can see that Floods seem to be the major contributor of Economic damage, followed by Hurricane/Typhoon and Tornado events.
ggplot(event_dmg_top10, aes(x = Event_Type, y = Total)) + geom_bar(stat = "identity") + theme_bw() + theme(axis.text.x = element_text(angle = 90)) + ylab("Damages in US $") + ggtitle("Graph of Top 10 Events causing Economical Damage") + theme(plot.title = element_text(hjust = 0.5))