================================================================================
This report analyzes which event types from NOAA Storm Data cause the most human fatalities and injuries across the United States. Additionally, analysis is performed to determine the events that cause the greatest economic losses, also across all regions in the U.S. Tables and graphs are provided in this report to communicate the findings. The results show that the top three event types that caused the most fatalities were, in order: tornadoes, excessive heat and flash flooding. The top three event types that caused the most injuries were, in order: tornadoes, TSTM wind and flooding. Therefore, it can be concluded that tornadoes caused the most harm to population health. The top three events that had the greatest economic consequences (considering both property and crop damage) were, in order: flooding, hurricanes/typhoons and tornadoes. Therefore, flooding caused the greatest economic consequences.
Download from the internet and look at a summary of the NOAA Storm Data:
webURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
temp <- tempfile()
download.file(webURL,temp)
stormData <- read.csv(temp)
unlink(temp)
str(stormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
Create a data frame of the number of total U.S. (summed across all regions) fatalities and injuries according to unique event types:
Please note: many categories of event types could be combined together with more information (e.g., THUNDERSTORM WINDS G and THUNDERSTORM WIND (G40)). For this project, the event types are kept separate.
eventTypes <- unique(stormData$EVTYPE)
totalFatalities <- tapply(stormData$FATALITIES,stormData$EVTYPE,sum)
totalInjuries <- tapply(stormData$INJURIES,stormData$EVTYPE,sum)
totalFatalities_ord <- as.vector(totalFatalities[order(match(names(totalFatalities), eventTypes))])
totalInjuries_ord <- as.vector(totalInjuries[order(match(names(totalInjuries), eventTypes))])
USPopHarm <- data.frame(Event_Type = eventTypes, Fatalities_Total =totalFatalities_ord,Injuries_Total =totalInjuries_ord)
List the possible exponent characters for property damage and crop damage, so that a map can be created between each exponent character and its numeric value:
exp_PropType <- unique(stormData$PROPDMGEXP)
exp_CropType <- unique(stormData$CROPDMGEXP)
The exponent values for property damage are: K, M, , B, m, +, 0, 5, 6, ?, 4, 2, 3, h, 7, H, -, 1, 8. The exponent values for crop damage are: , M, K, m, B, ?, 0, k, 2
Create new columns in stormData to reflect total numeric property and crop damage monetary amounts using a map of exponents, and sum property damage and crop damage values together to obtain the total economic impact:
exp_map <- c("B"=10^9,"b"=10^9,"M"=10^6,"m"=10^6,"K"=10^3,"k"=10^3,"-"=0,"+"=0,"0"=0," "=1,"1"=10,"2"=100,"3"=10^3,"4"=10^4,"5"=10^5,"6"=10^6,"7"=10^7,"8"=10^8,"?"=0)
stormData$PropMult <- exp_map[as.character(stormData$PROPDMGEXP)]
stormData$CropMult <- exp_map[as.character(stormData$CROPDMGEXP)]
stormData$PropMult[is.na(stormData$PropMult)] <- rep(1,sum(is.na(stormData$PropMult)))
stormData$CropMult[is.na(stormData$CropMult)] <- rep(1,sum(is.na(stormData$CropMult)))
stormData$PropVal <- stormData$PROPDMG * stormData$PropMult
stormData$CropVal <- stormData$CROPDMG * stormData$CropMult
stormData$TotalDmg <- stormData$PropVal + stormData$CropVal
Create a data frame of the monetary value of total U.S. (summed across all regions) property and crop damage losses according to unique event types:
totalDmg <- tapply(stormData$TotalDmg,stormData$EVTYPE,sum)
totalDmg_ord <- as.vector(totalDmg[order(match(names(totalDmg), eventTypes))])
USTotalDmg <- data.frame(Event_Type = eventTypes, Damage_Total =totalDmg_ord)
Select the top 10 event types with the most fatalities and create a table to display both the number of fatalities and the number of injuries, ordered by fatalities. A category “OTHER” is included to account for the sum of the remaining events outside of the top 10:
Please note: Fatalities and injuries are kept separate from each other, as the impacts on population health between fatalities and injuries are very different.
fatalities_ordered <- USPopHarm[order(-USPopHarm$Fatalities_Total),]
fatalities_ordered_trunc <- fatalities_ordered[1:10,]
other_fatalities <- sum(fatalities_ordered$Fatalities_Total[11:nrow(fatalities_ordered)],na.rm=TRUE)
other_injuries <- sum(fatalities_ordered$Injuries_Total[11:nrow(fatalities_ordered)],na.rm=TRUE)
other_row_fatalities <- data.frame(Event_Type = "OTHER", Fatalities_Total =other_fatalities,Injuries_Total =other_injuries)
USPopHarm_fatalities <- rbind(fatalities_ordered_trunc,other_row_fatalities)
rownames(USPopHarm_fatalities) <- NULL
USPopHarm_fatalities
## Event_Type Fatalities_Total Injuries_Total
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
## 11 OTHER 3064 18265
Select the top 10 event types with the most injuries and create a table to display both the number of fatalities and the number of injuries, ordered by injuries. A category “OTHER” is included to account for the sum of the remaining events outside of the top 10:
Please note: Fatalities and injuries are kept separate from each other, as the impacts on population health between fatalities and injuries are very different.
injuries_ordered <- USPopHarm[order(-USPopHarm$Injuries_Total),]
injuries_ordered_trunc <- injuries_ordered[1:10,]
other_fatalities_inj <- sum(injuries_ordered$Fatalities_Total[11:nrow(injuries_ordered)],na.rm=TRUE)
other_injuries_inj <- sum(injuries_ordered$Injuries_Total[11:nrow(injuries_ordered)],na.rm=TRUE)
other_row_fatalities_inj <- data.frame(Event_Type = "OTHER", Fatalities_Total =other_fatalities_inj,Injuries_Total =other_injuries_inj)
USPopHarm_injuries <- rbind(injuries_ordered_trunc,other_row_fatalities_inj)
rownames(USPopHarm_injuries) <- NULL
USPopHarm_injuries
## Event_Type Fatalities_Total Injuries_Total
## 1 TORNADO 5633 91346
## 2 TSTM WIND 504 6957
## 3 FLOOD 470 6789
## 4 EXCESSIVE HEAT 1903 6525
## 5 LIGHTNING 816 5230
## 6 HEAT 937 2100
## 7 ICE STORM 89 1975
## 8 FLASH FLOOD 978 1777
## 9 THUNDERSTORM WIND 133 1488
## 10 HAIL 15 1361
## 11 OTHER 3667 14980
Plot of the Top 10 event types that impact population health, with one plot ranked by fatalities and another plot ranked by injuries:
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.3
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.3
USPopHarm_fatalities_reshaped<-melt(USPopHarm_fatalities,id.vars="Event_Type")
USPopHarm_injuries_reshaped<-melt(USPopHarm_injuries,id.vars="Event_Type")
USPopHarm_fatalities_reshaped$Event_Type <- factor(USPopHarm_fatalities_reshaped$Event_Type,levels=USPopHarm_fatalities$Event_Type)
g1 <- ggplot(USPopHarm_fatalities_reshaped,aes(x=Event_Type,y=value,fill=factor(variable)))+
geom_bar(stat="identity",position="dodge")+
scale_fill_discrete(name="Fatality/Injury",labels=c("Fatality", "Injury"))+
xlab("Event Type")+ylab("Number of fatalities/injuries")+theme(text=element_text(size=20),axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), plot.caption = element_text(hjust = 0, face = "italic"))+
geom_text(aes(label=value), size = 5,position=position_dodge(width=0.9), vjust=-0.25)+ ggtitle("Top 10 Event Types Ranked by Fatalities") + labs(caption="Graph showing the top 10 event types that caused the most fatalities across the United States. Injuries for \n each event type are also shown.")
USPopHarm_injuries_reshaped$Event_Type <- factor(USPopHarm_injuries_reshaped$Event_Type,levels=USPopHarm_injuries$Event_Type)
g2 <- ggplot(USPopHarm_injuries_reshaped,aes(x=Event_Type,y=value,fill=factor(variable)))+
geom_bar(stat="identity",position="dodge")+
scale_fill_discrete(name="Fatality/Injury",labels=c("Fatality", "Injury"))+
xlab("Event Type")+ylab("Number of fatalities/injuries")+theme(text=element_text(size=20),axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), plot.caption = element_text(hjust = 0, face = "italic"))+
geom_text(aes(label=value), size = 5,position=position_dodge(width=0.9), vjust=-0.25)+ ggtitle("Top 10 Event Types Ranked by Injuries") + labs(caption="Graph showing the top 10 event types that caused the most injuries across the United States. Fatalities for \n each event type are also shown.")
grid.arrange(g1,g2,ncol=1)
The event types that cause the most fatalities are tornadoes, excessive heat, and flash flooding. The event types that cause the most injuries are tornadoes, TSTM wind, and flooding. Considering the numbers of both fatalities and injuries, tornadoes cause the most harm to public health.
Select the top 10 event types with the highest total economic damage, where total damage refers to the sum of property and crop damage. A category “OTHER” is included to account for the sum of the economic damage caused by remaining events outside of the top 10:
dmg_ordered <- USTotalDmg[order(-USTotalDmg$Damage_Total),]
dmg_ordered_trunc <- dmg_ordered[1:10,]
other_dmg <- sum(dmg_ordered$Damage_Total[11:nrow(dmg_ordered)],na.rm=TRUE)
other_row_dmg <- data.frame(Event_Type = "OTHER", Damage_Total =other_dmg)
USTotalDmg <- rbind(dmg_ordered_trunc,other_row_dmg)
rownames(USTotalDmg) <- NULL
USTotalDmgSci <- USTotalDmg
USTotalDmgSci$Damage_Total <- format(USTotalDmgSci$Damage_Total,digits = 3, scientific=2)
USTotalDmgSci
## Event_Type Damage_Total
## 1 FLOOD 1.50e+11
## 2 HURRICANE/TYPHOON 7.19e+10
## 3 TORNADO 5.74e+10
## 4 STORM SURGE 4.33e+10
## 5 HAIL 1.88e+10
## 6 FLASH FLOOD 1.82e+10
## 7 DROUGHT 1.50e+10
## 8 HURRICANE 1.46e+10
## 9 RIVER FLOOD 1.01e+10
## 10 ICE STORM 8.97e+09
## 11 OTHER 6.87e+10
Plot of the Top 10 event types that caused the most total economic damage:
USTotalDmg_reshaped<-melt(USTotalDmg,id.vars="Event_Type")
USTotalDmg_reshaped$Event_Type <- factor(USTotalDmg_reshaped$Event_Type,levels=USTotalDmg$Event_Type)
ggplot(USTotalDmg_reshaped,aes(x=Event_Type,y=value,fill=factor(variable)))+
geom_bar(stat="identity",position="dodge")+
xlab("Event Type")+ylab("Total Dollar Amount of Economic Damage ($)")+theme(text=element_text(size=20),legend.position="none",axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),plot.caption = element_text(hjust = 0, face = "italic"))+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE))+
geom_text(aes(label=format(value,digits=3,scientific=TRUE)), size = 5,position=position_dodge(width=0.9), vjust=-0.25)+ ggtitle("Top 10 Event Types Ranked by Total Economic Damage, including \n both Property and Crop Damage") + labs(caption="Graph showing the top 10 event types that caused the most economic damage across the United States. Economic damage is \n calculated as the sum of property and crop loss.")
Floods have the highest economic impacts, followed by hurricanes/typhoons and tornadoes.