This report answers the questions of which types of weather phenomena are most harmful to population health and which have the largest economic impact. It utilizes the NOAA Storm Database to visualize such events. During the period from January 1950 to November 2008, tornadoes by far have caused the most injuries and deaths; flood has caused the highest amount of property damages and economic consequences overall while drought has caused the higest amount of crop damages.
Download Storm Data from the NOAA Storm Database and read it into storm data.frame
download.file(url='https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2',
destfile = 'StormData.csv.bz2')
storm<-read.csv(bzfile("StormData.csv.bz2"))
Examine data summary and parts of the data set.
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels ""," Christiansburg",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels ""," CANTON"," TULIA",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","%SD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","\t","\t\t",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
Subset the data for only what we need: types of events, fatalities, injuires, property damages and their magnitudes, and crop damages and their magnitudes.
storm<- subset(storm, select=c('EVTYPE','FATALITIES','INJURIES','PROPDMG','PROPDMGEXP','CROPDMG','CROPDMGEXP'))
Notice that PROPDMGEXP and CROPDMGEXP serve as magnitude variables. Therefore, we must transform these variables to get the proper property and crop damages in USD.
Examine the unique levels of magnitudes.
unique(storm$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
Create PROPEXP for numerical representation of PROPDMGEXP
storm$PROPEXP[storm$PROPDMGEXP == "K"] <- 1000
storm$PROPEXP[storm$PROPDMGEXP == "M"] <- 1e+06
storm$PROPEXP[storm$PROPDMGEXP == ""] <- 1
storm$PROPEXP[storm$PROPDMGEXP == "B"] <- 1e+09
storm$PROPEXP[storm$PROPDMGEXP == "m"] <- 1e+06
storm$PROPEXP[storm$PROPDMGEXP == "0"] <- 1
storm$PROPEXP[storm$PROPDMGEXP == "5"] <- 1e+05
storm$PROPEXP[storm$PROPDMGEXP == "6"] <- 1e+06
storm$PROPEXP[storm$PROPDMGEXP == "4"] <- 10000
storm$PROPEXP[storm$PROPDMGEXP == "2"] <- 100
storm$PROPEXP[storm$PROPDMGEXP == "3"] <- 1000
storm$PROPEXP[storm$PROPDMGEXP == "h"] <- 100
storm$PROPEXP[storm$PROPDMGEXP == "7"] <- 1e+07
storm$PROPEXP[storm$PROPDMGEXP == "H"] <- 100
storm$PROPEXP[storm$PROPDMGEXP == "1"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "8"] <- 1e+08
# Invalid entries
storm$PROPEXP[storm$PROPDMGEXP == "+"] <- 0
storm$PROPEXP[storm$PROPDMGEXP == "-"] <- 0
storm$PROPEXP[storm$PROPDMGEXP == "?"] <- 0
Create PROP variable by multiplying property damages with their magnitudes
storm$PROP<- storm$PROPDMG*storm$PROPEXP
Examine the unique levels of magnitudes.
unique(storm$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
Create CROPEXP for numerical representation of CROPDMGEXP
storm$CROPEXP[storm$CROPDMGEXP == "M"] <- 1e+06
storm$CROPEXP[storm$CROPDMGEXP == "K"] <- 1000
storm$CROPEXP[storm$CROPDMGEXP == "m"] <- 1e+06
storm$CROPEXP[storm$CROPDMGEXP == "B"] <- 1e+09
storm$CROPEXP[storm$CROPDMGEXP == "0"] <- 1
storm$CROPEXP[storm$CROPDMGEXP == "k"] <- 1000
storm$CROPEXP[storm$CROPDMGEXP == "2"] <- 100
storm$CROPEXP[storm$CROPDMGEXP == ""] <- 1
# Invalid Entries
storm$CROPEXP[storm$CROPDMGEXP == "?"] <- 0
Create CROP variable by multiplying crop damages with their magnitudes
storm$CROP<- storm$CROPDMG*storm$CROPEXP
fatalities<-aggregate(FATALITIES~EVTYPE,data=storm,FUN=sum)
injuries<-aggregate(INJURIES~EVTYPE,data=storm,FUN=sum)
prop<-aggregate(PROP~EVTYPE,data=storm,FUN=sum)
crop<-aggregate(CROP~EVTYPE,data=storm,FUN=sum)
#Change unit to billion USD
prop$PROP<-prop$PROP/1e+09
crop$CROP<-crop$CROP/1e+09
#overall economic damages
overall<-merge(prop,crop,by.x='EVTYPE',by.y='EVTYPE')
overall$OVERALL <- overall$PROP+overall$CROP
Get top 10 events with fatalities and injuries
fa10<-fatalities[order(-fatalities$FATALITIES), ][1:10, ]
in10<-injuries[order(-injuries$INJURIES), ][1:10, ]
Plot fatalities and injuries according to event types. Tornadoes top both categories.
require(ggplot2)
## Loading required package: ggplot2
require(gridExtra)
## Loading required package: gridExtra
#For fatalities
g1<-ggplot(fa10, aes(x=reorder(EVTYPE,-FATALITIES),y=FATALITIES,fill=EVTYPE))
g1<-g1+geom_bar(stat='identity')
g1<-g1+scale_y_continuous(limits=c(0,6000),breaks=seq(0, 6000, 500))
g1<-g1+labs(x='Event Types',y='Fatalities',title='10 Most Deadly Storm Events')
g1<-g1+theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position='none')
#For injuries
g2<-ggplot(in10, aes(x=reorder(EVTYPE,-INJURIES),y=INJURIES,fill=EVTYPE))
g2<-g2+geom_bar(stat='identity')
g2<-g2+scale_y_continuous(limits=c(0,95000),breaks=seq(0, 95000, 10000))
g2<-g2+labs(x='Event Types',y='Injuries',title='10 Most Injuring Storm Events')
g2<-g2+theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position='none')
#Plot fatalities and injuries
grid.arrange(g1,g2,ncol=2)
Get top 10 events with property,crop and overall damages.
pr10<-prop[order(-prop$PROP), ][1:10, ]
cr10<-crop[order(-crop$CROP), ][1:10, ]
ov10<-overall[order(-overall$OVERALL), ][1:10, ]
Plot property, crop, and overall damages according to event types. Flood tops property and overall damages whereas drought tops crop damages.
require(ggplot2)
require(gridExtra)
#For property damages
g3<-ggplot(pr10, aes(x=reorder(EVTYPE,-PROP),y=PROP,fill=EVTYPE))
g3<-g3+geom_bar(stat='identity')
g3<-g3+scale_y_continuous(limits=c(0,150),breaks=seq(0, 150, 50))
g3<-g3+labs(x='Event Types',y='Damages (billion USD)',title='10 Worst Property Damagers')
g3<-g3+theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position='none')
#For crop damages
g4<-ggplot(cr10, aes(x=reorder(EVTYPE,-CROP),y=CROP,fill=EVTYPE))
g4<-g4+geom_bar(stat='identity')
g4<-g4+scale_y_continuous(limits=c(0,15),breaks=seq(0, 15, 5))
g4<-g4+labs(x='Event Types',y='Damages (billion USD)',title='10 Worst Crop Damagers')
g4<-g4+theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position='none')
#For overall damages in stacked barplot
require(reshape2)
## Loading required package: reshape2
#Reshape ov10
mov10<-melt(ov10)
## Using EVTYPE as id variables
mov10<-mov10[mov10$variable!='OVERALL',]
#Sort EVTYPE according to overall damage
l <-ov10$EVTYPE
mov10$EVTYPE<-factor(mov10$EVTYPE,levels=l)
g5<-ggplot(mov10, aes(x=EVTYPE,y=value,fill=variable))
g5<-g5+geom_bar(stat='identity')
g5<-g5+scale_y_continuous(limits=c(0,160),breaks=seq(0, 160, 50))
g5<-g5+labs(x='Event Types',y='Damages (billion USD)',title='10 Most Costly Storm Events')
g5<-g5+theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.title=element_blank())
#Plot prop vs crop and overall
grid.arrange(g3,g4,ncol=2)
g5