This assignment explores and analyze the US NOAA Storm Database, which tracks major storms and weather events, to address the following two questions:
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
This section contains reading in data, extracting only the data relevant to the questions, and cleaning the EVTYPE variable of the data.
storm=read.csv("repdata-data-StormData.csv.bz2",stringsAsFactors=FALSE)
storm$BGN_DATE=gsub(" 0:00:00","",storm$BGN_DATE)
storm$BGN_DATE=as.Date(storm$BGN_DATE,"%m/%d/%Y")
storm_96=subset(storm,BGN_DATE>="1996-01-01")
storm_relevant=with(storm_96,data.frame(BGN_DATE,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,PROPDMGEXP,stringsAsFactors=FALSE))
FATALITES or INJURIES>0) or having economic consequences (PROPDMG or CROPDMG>0).storm_harmful=subset(storm_relevant,FATALITIES>0|INJURIES>0|PROPDMG>0|CROPDMG>0)
PROP to storm_harmful, which calculate the amount of property damage according to PROPDMG by PROPDMGEXP. If there is no value for ROPDMGEXP, assume it to be 1.storm_harmful$PROP=storm_harmful$PROPDMG
for(i in 1:length(storm_harmful$PROPDMG)){
if(storm_harmful$PROPDMGEXP[i]=="K"){storm_harmful$PROP[i]=storm_harmful$PROPDMG[i]*1000}
if(storm_harmful$PROPDMGEXP[i]=="M"){storm_harmful$PROP[i]=storm_harmful$PROPDMG[i]*1000000}
if(storm_harmful$PROPDMGEXP[i]=="B"){storm_harmful$PROP[i]=storm_harmful$PROPDMG[i]*1000000000}
}
CROP to storm_harmful, which calculate the amount of crop damage according to CROPDMG by PROPDMGEXP.1. If there is no value for PROPDMGEXP.1, assume it to be 1.storm_harmful$CROP=storm_harmful$CROPDMG
for(i in 1:length(storm_harmful$CROPDMG)){
if(storm_harmful$PROPDMGEXP.1[i]=="K"){storm_harmful$CROP[i]=storm_harmful$CROPDMG[i]*1000}
if(storm_harmful$PROPDMGEXP.1[i]=="M"){storm_harmful$CROP[i]=storm_harmful$CROPDMG[i]*1000000}
if(storm_harmful$PROPDMGEXP.1[i]=="B"){storm_harmful$CROP[i]=storm_harmful$CROPDMG[i]*1000000000}
}
EVTYPE. When checked with length()andunique(), 222 event types are found in EVTYPE variable, much more than 48 event types suggested by the Storm Data Documentation, suggesting that there might be typos and other errors resulting in redundancy.length(unique(storm_harmful$EVTYPE))
## [1] 222
Compare the 222 event types in storm_harmful with the 2.1.1 Storm Data Event Table in the Storm Data Documentation. Reduce the number of event types step by step.Here I only put the codes for this cleaning process, for more detail on the logic behind this cleaning, please check out this page.
storm_harmful$EVTYPE=toupper(storm_harmful$EVTYPE)
library(stringr)
storm_harmful$EVTYPE=str_trim(storm_harmful$EVTYPE)
storm_harmful[grep("COASTAL",storm_harmful$EVTYPE),]$EVTYPE="COASTAL FLOOD"
storm_harmful[grep("^COLD|HYPOTHERMIA",storm_harmful$EVTYPE),]$EVTYPE="COLD/WIND CHILL"
storm_harmful[grep(" COLD|WINDCHILL",storm_harmful$EVTYPE),]$EVTYPE="EXTREME COLD/WIND CHILL"
storm_harmful[grep("SLIDE|SLUMP",storm_harmful$EVTYPE),]$EVTYPE="DEBRIS FLOW"
storm_harmful[grep("BLOWING DUST",storm_harmful$EVTYPE),]$EVTYPE="DUST STORM"
storm_harmful[grep("HYPERTHERMIA|WARM",storm_harmful$EVTYPE),]$EVTYPE="HEAT"
storm_harmful[grep("HEAT WAVE|RECORD HEAT",storm_harmful$EVTYPE),]$EVTYPE="EXCESSIVE HEAT"
storm_harmful[grep("FLASH|DAM BREAK|HIGH WATER",storm_harmful$EVTYPE),]$EVTYPE="FLASH FLOOD"
storm_harmful[grep("RIVER",storm_harmful$EVTYPE),]$EVTYPE="FLOOD"
storm_harmful[grep("CSTL|TIDAL FLOODING",storm_harmful$EVTYPE),]$EVTYPE="COASTAL FLOOD"
storm_harmful[grep("ICE JAM",storm_harmful$EVTYPE),]$EVTYPE="FLASH FLOOD"
storm_harmful[grep("FROST|FREEZE",storm_harmful$EVTYPE),]$EVTYPE="FROST/FREEZE"
storm_harmful[grep("SMALL HAIL",storm_harmful$EVTYPE),]$EVTYPE="HAIL"
storm_harmful[grep("GUSTY WIND/HAIL",storm_harmful$EVTYPE),]$EVTYPE="HAIL"
storm_harmful[grep("GUSTY WIND/HVY RAIN|GUSTY WIND/RAIN",storm_harmful$EVTYPE),]$EVTYPE="HEAVY RAIN"
storm_harmful[grep("GUSTY WIND",storm_harmful$EVTYPE),]$EVTYPE="STRONG WIND"
storm_harmful[grep("FREEZING RAIN|RAIN/SNOW|FREEZING DRIZZLE",storm_harmful$EVTYPE),]$EVTYPE="WINTER WEATHER"
storm_harmful[grep("RAIN",storm_harmful$EVTYPE),]$EVTYPE="HEAVY RAIN"
storm_harmful[grep("EXCESSIVE SNOW|LATE SEASON SNOW|HEAVY SNOW SHOWER",storm_harmful$EVTYPE),]$EVTYPE="HEAVY SNOW"
storm_harmful[agrep("LAKE-EFFECT",storm_harmful$EVTYPE),]$EVTYPE="LAKE-EFFECT SNOW"
storm_harmful[grep("BLOWING SNOW|LIGHT SNOW|SNOW SQUALL|FALLING SNOW/ICE|SNOW AND ICE",storm_harmful$EVTYPE),]$EVTYPE="WINTER WEATHER"
storm_harmful$EVTYPE[which(storm_harmful$EVTYPE=="SNOW")]="HEAVY SNOW"
storm_harmful[grep("SURF",storm_harmful$EVTYPE),]$EVTYPE="HIGH SURF"
storm_harmful[grep("^HIGH WIND",storm_harmful$EVTYPE),]$EVTYPE="HIGH WIND"
storm_harmful[grep("HURRICANE|TYPHOON",storm_harmful$EVTYPE),]$EVTYPE="HURRICANE(TYPHOON)"
storm_harmful[grep("RIP|GRADIENT",storm_harmful$EVTYPE),]$EVTYPE="RIP CURRENT"
storm_harmful[grep("STORM SURGE",storm_harmful$EVTYPE),]$EVTYPE="STORM SURGE/TIDE"
storm_harmful[grep("ASTRONOMICAL HIGH",storm_harmful$EVTYPE),]$EVTYPE="COASTAL FLOOD"
storm_harmful[grep("^STRONG WIND",storm_harmful$EVTYPE),]$EVTYPE="STRONG WIND"
storm_harmful[grep("^THUNDERSTORM|^TSTM|BURST",storm_harmful$EVTYPE),]$EVTYPE="THUNDERSTORM WIND"
storm_harmful[grep("MARINE TSTM",storm_harmful$EVTYPE),]$EVTYPE="MARINE THUNDERSTORM WIND"
storm_harmful[agrep("NON-TSTM WIND",storm_harmful$EVTYPE),]$EVTYPE="OTHER"
storm_harmful[grep("TORNADO|LANDSPOUT",storm_harmful$EVTYPE),]$EVTYPE="TORNADO"
storm_harmful[grep("FIRE",storm_harmful$EVTYPE),]$EVTYPE="WILDFIRE"
storm_harmful[grep("WINTER WEATHER|BLACK ICE|GLAZE|ROAD|MIXED PRECIP|WINTRY|FREEZING SPRAY",storm_harmful$EVTYPE),]$EVTYPE="WINTER WEATHER"
storm_harmful[grep("^WIND|WHIRLWIND|WIND DAMAGE",storm_harmful$EVTYPE),]$EVTYPE="OTHER"
storm_harmful[grep("URBAN",storm_harmful$EVTYPE),]$EVTYPE="HEAVY RAIN"
storm_harmful[grep("DROWNING|MARINE ACCIDENT|BEACH EROSION",storm_harmful$EVTYPE),]$EVTYPE="OTHER"
storm_harmful[grep("SWELLS|ROGUE|SEAS",storm_harmful$EVTYPE),]$EVTYPE="HIGH SURF"
EVTYPE variable) are most harmful with respect to population health?Top 10 event types causing most fatalities.
library(plyr)
fatalities=arrange(aggregate(FATALITIES~EVTYPE,data=storm_harmful,FUN=sum),FATALITIES,decreasing=TRUE)
head(fatalities,10)
## EVTYPE FATALITIES
## 1 EXCESSIVE HEAT 1799
## 2 TORNADO 1511
## 3 FLASH FLOOD 890
## 4 LIGHTNING 651
## 5 RIP CURRENT 542
## 6 FLOOD 416
## 7 THUNDERSTORM WIND 382
## 8 EXTREME COLD/WIND CHILL 258
## 9 HEAT 238
## 10 HIGH WIND 235
When considering only fatalities, excessive heat is the most harmful event type, followed by tornado and flash flood.
Top 10 event types causing most injuries.
injuries=arrange(aggregate(INJURIES~EVTYPE,data=storm_harmful,FUN=sum),INJURIES,decreasing=TRUE)
head(injuries,10)
## EVTYPE INJURIES
## 1 TORNADO 20667
## 2 FLOOD 6759
## 3 EXCESSIVE HEAT 6461
## 4 THUNDERSTORM WIND 5154
## 5 LIGHTNING 4141
## 6 FLASH FLOOD 1674
## 7 WILDFIRE 1458
## 8 HURRICANE(TYPHOON) 1328
## 9 WINTER STORM 1292
## 10 HEAT 1241
When considering only injuries, the most harmful event type is tornado, followed by flood and excessive heat. Plot top 10 events causing fatalities and that causing injuries into one stacked bar plot.
colnames(fatalities)=c("EVTYPE","NUMBER")
colnames(injuries)=c("EVTYPE","NUMBER")
top10_harmful=rbind(mutate(head(fatalities,10),factor=rep("FATALITIES",10)),mutate(head(injuries,10),factor=rep("INJURIES",10)))
library(ggplot2)
ggplot(top10_harmful,aes(x=EVTYPE,y=NUMBER,fill=factor))+
geom_bar(position="stack",stat="identity")+
labs(title="Event Types Most Harmful to Public Health",fill="",x="Event Type",y="Number of Fatalities/Injuries")+
theme(axis.text.x=element_text(angle=30,hjust=1))
From this plot, we can see that when considering both the numbers of fatalities and injuries, tornado is the most harmful event type, followed by excessive heat and flood.
Top 10 event types having greatest economic consequences.
storm_harmful=mutate(storm_harmful,DAMAGE_IN_BILLION_USD=(PROP+CROP)/1000000000)
damage=aggregate(DAMAGE_IN_BILLION_USD~EVTYPE,data=storm_harmful,FUN=sum)
top10_damage=head(arrange(damage,DAMAGE_IN_BILLION_USD,decreasing=TRUE),10)
top10_damage
## EVTYPE DAMAGE_IN_BILLION_USD
## 1 HURRICANE(TYPHOON) 1617.42
## 2 FLOOD 227.49
## 3 TORNADO 49.92
## 4 FLASH FLOOD 48.67
## 5 STORM SURGE/TIDE 47.84
## 6 HAIL 27.56
## 7 THUNDERSTORM WIND 21.00
## 8 WILDFIRE 15.03
## 9 HIGH WIND 12.42
## 10 TROPICAL STORM 9.46
Plot top 10 event types having greatest economic consequences.
ggplot(top10_damage,aes(x=EVTYPE,y=DAMAGE_IN_BILLION_USD))+
geom_bar(stat="identity",width=0.75,fill="#009E73")+
labs(title="Event Types with Greatest Economic Consequences",x="Event Type",y="Damage (in Billion USD)")+
theme(axis.text.x=element_text(angle=30,hjust=1))
From the table and plot above, we can see that hurricane(typhoon) is the event type having greatest economic consequences, followed by flood and tornado. Also, hurricane(typhoon) outnumbers flood in economic consequences by about 7 to 1.