Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage.Preventing such outcomes to the extent possible is a key concern. This analysis tries to provide an answer on the following questions:
Set glopbal knitr parameters for further processing.
knitr::opts_chunk$set(echo = TRUE)
This section will load the relevant dataset if not already loaded.
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
projectDataBz2 <- "./stormData.bz2"
if(!file.exists(projectDataBz2)) {
download.file(url, projectDataBz2)
}
if (!exists("projectData")) {
projectData <- read.csv(bzfile(projectDataBz2), header = TRUE)
}
names(projectData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
rows <- nrow(projectData)
observedAttrs <- ncol(projectData)
The file contains 902297 observations and 37 observed attributes.
This are the relevant columns to process:
Caluculate events with top 10 of Human impact for Fatalities and Injuries
agrfatalities<-aggregate(FATALITIES~EVTYPE, data = projectData, "sum")
fatalities<-agrfatalities[order(-agrfatalities$FATALITIES), ][1:10, ]
fatalities
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
agrinjuries<-aggregate(INJURIES~EVTYPE, data = projectData, "sum")
injuries<-agrinjuries[order(-agrinjuries$INJURIES), ][1:10, ]
injuries
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
PROPDMGEXP and CROPDMGEXP must be transformed to a numeric factor to multiply with PROPDMG to get the total impact value to work with.
projectData$PROPDMGEXP <- as.character(projectData$PROPDMGEXP)
projectData$PROPDMGEXP = gsub("\\-|\\+|\\?","0",projectData$PROPDMGEXP)
projectData$PROPDMGEXP = gsub("B|b", "9", projectData$PROPDMGEXP)
projectData$PROPDMGEXP = gsub("M|m", "6", projectData$PROPDMGEXP)
projectData$PROPDMGEXP = gsub("K|k", "3", projectData$PROPDMGEXP)
projectData$PROPDMGEXP = gsub("H|h", "2", projectData$PROPDMGEXP)
projectData$PROPDMGEXP <- as.numeric(projectData$PROPDMGEXP)
projectData$PROPDMGEXP[is.na(projectData$PROPDMGEXP)] = 0
projectData$ActPropDamage<- projectData$PROPDMG * 10^projectData$PROPDMGEXP
propDamage <- aggregate(ActPropDamage~EVTYPE, data=projectData, sum)
propDamage_reorder<- propDamage[order(-propDamage$ActPropDamage),]
PropDamage10<-propDamage_reorder[1:10,]
PropDamage10$ActPropDamage <- PropDamage10$ActPropDamage / 10^9
PropDamage10
## EVTYPE ActPropDamage
## 170 FLOOD 144.657710
## 411 HURRICANE/TYPHOON 69.305840
## 834 TORNADO 56.947381
## 670 STORM SURGE 43.323536
## 153 FLASH FLOOD 16.822674
## 244 HAIL 15.735268
## 402 HURRICANE 11.868319
## 848 TROPICAL STORM 7.703891
## 972 WINTER STORM 6.688497
## 359 HIGH WIND 5.270046
projectData$CROPDMGEXP <- as.character(projectData$CROPDMGEXP)
projectData$CROPDMGEXP = gsub("\\-|\\+|\\?","0",projectData$CROPDMGEXP)
projectData$CROPDMGEXP = gsub("B|b", "9", projectData$CROPDMGEXP)
projectData$CROPDMGEXP = gsub("M|m", "6", projectData$CROPDMGEXP)
projectData$CROPDMGEXP = gsub("K|k", "3", projectData$CROPDMGEXP)
projectData$CROPDMGEXP = gsub("H|h", "2", projectData$CROPDMGEXP)
projectData$CROPDMGEXP <- as.numeric(projectData$CROPDMGEXP)
projectData$CROPDMGEXP[is.na(projectData$CROPDMGEXP)] = 0
projectData$ActCropDamage<- projectData$CROPDMG * 10^projectData$CROPDMGEXP
cropDamage <- aggregate(ActCropDamage~EVTYPE, data=projectData, sum)
cropDamage_reorder<- cropDamage[order(-cropDamage$ActCropDamage),]
CropDamage10<-cropDamage_reorder[1:10,]
CropDamage10$ActCropDamage <- CropDamage10$ActCropDamage / 10^9
CropDamage10
## EVTYPE ActCropDamage
## 95 DROUGHT 13.972566
## 170 FLOOD 5.661968
## 590 RIVER FLOOD 5.029459
## 427 ICE STORM 5.022113
## 244 HAIL 3.025954
## 402 HURRICANE 2.741910
## 411 HURRICANE/TYPHOON 2.607873
## 153 FLASH FLOOD 1.421317
## 140 EXTREME COLD 1.292973
## 212 FROST/FREEZE 1.094086
totalDamage <- aggregate(ActPropDamage + ActCropDamage~EVTYPE, data=projectData, sum)
names(totalDamage)[2] <- "total"
totalDamage_reorder<- totalDamage[order(-totalDamage$total),]
totalDamage10 <- totalDamage_reorder[1:10,]
totalDamage10$total <- totalDamage10$total/10^9
totalDamage10
## EVTYPE total
## 170 FLOOD 150.319678
## 411 HURRICANE/TYPHOON 71.913713
## 834 TORNADO 57.362334
## 670 STORM SURGE 43.323541
## 244 HAIL 18.761222
## 153 FLASH FLOOD 18.243991
## 95 DROUGHT 15.018672
## 402 HURRICANE 14.610229
## 590 RIVER FLOOD 10.148404
## 427 ICE STORM 8.967041
In this section we show the impact of the top 10 events on human and on econonomic damage.
require(gridExtra)
## Loading required package: gridExtra
require(ggplot2)
## Loading required package: ggplot2
Plot the top 10 of human impact caused by event types for fatalities and injuries.
p1 <- ggplot(injuries,aes(x=reorder(EVTYPE, -INJURIES), y = INJURIES), xlim= 10, ylim=max(INJURIES)) + geom_bar(stat="identity",fill="darkred", colour="black") + theme(axis.text.x = element_text(angle=60, hjust=1))+ ylab("Number of Injuries")+ xlab("Kind of event")
p2 <- ggplot(fatalities,aes(x=reorder(EVTYPE, -FATALITIES), y = FATALITIES), xlim= 10, ylim=max(FATALITIES)) + geom_bar(stat="identity",fill="maroon4", colour="black") + theme(axis.text.x = element_text(angle=60, hjust=1))+ ylab("Number of Fatalities")+ xlab("Kind of event")
grid.arrange(p1, p2, top = "Human impact caused by Event Types", ncol=2)
Plot top 10 of economic impact for Properties, Crop and Totals in Billions of USD
p1 <- ggplot(PropDamage10,aes(x=reorder(EVTYPE, -ActPropDamage), y = ActPropDamage), xlim= 10, ylim=max(ActPropDamage)) + geom_bar(stat="identity",fill="darkred", colour="black") + theme(axis.text.x = element_text(angle=60, hjust=1))+ ylab("Economic Impact on properties")+ xlab("Event Type")
p2 <- ggplot(CropDamage10,aes(x=reorder(EVTYPE, -ActCropDamage), y = ActCropDamage), xlim= 10, ylim=max(ActCropDamage)) + geom_bar(stat="identity",fill="chartreuse4", colour="black") + theme(axis.text.x = element_text(angle=60, hjust=1))+ ylab("Economic Impact on crop")+ xlab("Event Type")
p3 <- ggplot(totalDamage10,aes(x=reorder(EVTYPE, -total), y = total), xlim= 10, ylim=max(total)) + geom_bar(stat="identity",fill="maroon4", colour="black") + theme(axis.text.x = element_text(angle=60, hjust=1))+ ylab("Sum of Economic Impact")+ xlab("Event Type")
grid.arrange(p1, p2, p3, top = "Economic impact caused by Event Types in Billion USD", ncol=3)
From these plots we can conclude, across the United States: