The Storm Types Assessment Based On The Heaviest Casualties And The Greatest Economic Consequences

SYNOPSIS

This project involves exploring the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.

By analyzing the database, this project identifies the types of major storms and weather events, which have the most population health effects (casualties) and greatest economic consequences in the United States.

DATA PROCESSING

1.Load the Storm data, and remove unused variables

setwd("~/Desktop/Coursera/Reproducible Research/Peer Assessment 2")
Data<-read.csv(bzfile("repdata-data-StormData.csv.bz2"))
StormData<-subset(Data,select=c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP"))
row.names(StormData)<-NULL

2.Trim the leading and trailing spaces in the EVTYPE variable, and uppercase the EVTYPE variable

StormData$EVTYPE<-sub("(^ +)|( +$)","",StormData$EVTYPE)
StormData$EVTYPE<-toupper(StormData$EVTYPE)

3.Add a new variable Type by summarizing the StormData EVTYPE, so the event types are reduced to 49 including NA type. (NA type event needs more detail information to analyze.)

Keywords<-list('AVALANC', 
               'HURRICANE', 
               'TYPHOON', 
               'WIND', 
               'WND',
               'HAIL', 
               'RAIN', 
               'SHOW',
               'FREEZ',
               'LOW',               
               'COLD', 
               'CHILL', 
               'WINTRY',  
               'WINTER',
               'COOL', 
               'THERMIA', 
               'SNOW', 
               'THUNDERSTORM', 
               'STORM', 
               'FLOOD', 
               'FLD',
               'STREAM', 
               'RIP CURRENT',  
               'SURF',
               'WAVE',
               'SURGE',
               'TORN', 
               'LIG', 
               'FUNNEL',   
               'WALL CLOUD', 
               'WATERSPO', 
               'WAYTER', 
               'WATER SPO',
               'BLIZZARD',
               'TIDE', 
               'HIGH', 
               'WARM', 
               'HEAT', 
               'HOT', 
               'MARINE MISHAP', 
               'MARINE ACCIDENT', 
               'DUST', 
               'MUD', 
               'SLIDE', 
               'FROST', 
               'SEICHE', 
               'EROS', 
               'ASH', 
               'GUSTNADO', 
               'PRECIP', 
               'SLEET', 
               'FOG',  
               'DRY',
               'DRIE',
               'DROUGHT',
               'DOWNBURST', 
               'WET', 
               'DRIZZLE', 
               'GLAZE', 
               'TROPICAL DEPRESSION', 
               'VOLCANIC ERUPTION', 
               'VOG', 
               'SEAS', 
               'RED FLAG CRITERIA', 
               'SMOKE', 
               'LANDSLUMP', 
               'LANDSPOUT',
               'ICE',
               'ICY',
               'FIRE',
               'MICROBURST',
               'TURBULENCE',
               'TSUNAMI',
               'DAM ',
               'DROWN'
)
No<-list(1,2,2,3,3,4,5,5,6,6,
         6,6,6,6,6,6,7,8,8,9,
         9,9,10,10,10,10,11,12,13,13,
         14,14,14,15,16,17,17,17,17,18,
         18,19,20,20,21,22,23,24,25,26,
         27,28,29,29,29,30,31,32,33,34,
         35,36,37,38,39,40,41,42,42,43,
         44,45,46,47,48
)
TypeTable<-data.frame(cbind(Keywords,No))

StormData$Type<-0
for (i in 1:length(TypeTable$No)) {
        location<-grep(TypeTable$Keywords[[i]],StormData$EVTYPE)
        StormData[location,8]<-TypeTable$No[[i]]
}
StormData<-StormData[order(StormData$Type),]

3.Count the Casualties by summing the fatalities and injuries

StormData$CASUALTIES<-StormData$FATALITIES+StormData$INJURIES

4.Create a new dataset CasData by summing the Casualties by each event type

StormData$Type<-as.factor(StormData$Type)
TotCASUALTIES<-tapply(StormData$CASUALTIES,StormData$Type,sum)
EVENTNAMES<-list('NA',
                 'AVALANCHE',
                 'HURRICANE or TYPHOON',
                 'WIND',
                 'HAIL',
                 'RAIN',
                 'FREEZE or COLD or CHILL or WINTRY or WINTER or COOL or HYPOTHERMIA',
                 'SNOW',
                 'THUNDERSTORM or STORM',
                 'FLOOD or STREAM',
                 'RIP CURRENT or SURF',
                 'TORNADO',
                 'LIGHTNING',
                 'FUNNEL CLOUD or WALL CLOUD',
                 'WATERSPOUT',
                 'BLIZZARD',
                 'TIDE',
                 'HIGH or WARM or HEAT or HOT',
                 'MARINE MISHAP or MARINE ACCIDENT',
                 'DUST',
                 'MUD SLIDE',
                 'FROST',
                 'SEICHE',
                 'EROSION',
                 'ASH',
                 'GUSTNADO',
                 'PRECIPITATION',
                 'SLEET',
                 'FOG',
                 'DRY',
                 'DOWNBURST',
                 'WET',
                 'DRIZZLE',
                 'GLAZE',
                 'TROPICAL DEPRESSION',
                 'VOLCANIC ERUPTION',
                 'VOG',
                 'SEAS',
                 'RED FLAG CRITERIA',
                 'SMOKE',
                 'LANDSLUMP',
                 'LANDSPOUT',
                 'ICE or ICY',
                 'FIRE',
                 'MICROBURST',
                 'TURBULENCE',
                 'TSUNAMI',
                 'DAM BREAK or DAM FAILURE',
                 'DROWNING'

)

CasData<-data.frame(cbind(EVENTNAMES,TotCASUALTIES))
colnames(CasData)<-c('EVENTS','CASUALTIES')
row.names(CasData)<-NULL
CasData$EVENTS<-as.character(CasData$EVENTS)
CasData$CASUALTIES<-as.numeric(CasData$CASUALTIES)

5.Sort the CasData by the decreasing order of the Casualties

CasData<-CasData[order(CasData$CASUALTIES,decreasing =T,na.last=T),]
row.names(CasData)<-NULL

6.Subset the CasData into a Top5CasData including the top Five event types

Top5CasData<-CasData[1:5,]
Top5CasData$CASUALTIES<-round(Top5CasData$CASUALTIES/100,1)

7.Trim the leading and trailing spaces in the PROPDMGEXP and CROPDMGEXP

StormData$PROPDMGEXP<-sub("(^ +)|( +$)","",StormData$PROPDMGEXP)
StormData$CROPDMGEXP<-sub("(^ +)|( +$)","",StormData$CROPDMGEXP)

8.Create CMultiplier and PMultiplier to reflect the real economic damage in numerical values

StormData$PMultiplier<-0
StormData$CMultiplier<-0

StormData$PMultiplier[StormData$PROPDMGEXP %in% c("B","b")]<-1000000000
StormData$PMultiplier[StormData$PROPDMGEXP %in% c("M","m")]<-1000000
StormData$PMultiplier[StormData$PROPDMGEXP %in% c("K","K")]<-1000
StormData$PMultiplier[StormData$PROPDMGEXP %in% c("H","h")]<-100
StormData$PMultiplier[StormData$PROPDMGEXP %in% 1]<-10
StormData$PMultiplier[StormData$PROPDMGEXP %in% 2]<-100
StormData$PMultiplier[StormData$PROPDMGEXP %in% 3]<-1000
StormData$PMultiplier[StormData$PROPDMGEXP %in% 4]<-10000
StormData$PMultiplier[StormData$PROPDMGEXP %in% 5]<-100000
StormData$PMultiplier[StormData$PROPDMGEXP %in% 6]<-1000000
StormData$PMultiplier[StormData$PROPDMGEXP %in% 7]<-10000000
StormData$PMultiplier[StormData$PROPDMGEXP %in% 8]<-100000000
StormData$PMultiplier[StormData$PROPDMGEXP %in% c("-","+","?",0)]<-0

StormData$CMultiplier[StormData$CROPDMGEXP %in% c("B","b")]<-1000000000
StormData$CMultiplier[StormData$CROPDMGEXP %in% c("M","m")]<-1000000
StormData$CMultiplier[StormData$CROPDMGEXP %in% c("K","k")]<-1000
StormData$CMultiplier[StormData$CROPDMGEXP %in% 2]<-100
StormData$CMultiplier[StormData$CROPDMGEXP %in% c("-","+","?",0)]<-0

9.Multiply the base DMG by the multiplier, sum the PROPDMG and CROPDMG by each event type, and create a new dataset DMGData including the event types and total economic DMG

StormData$PROPERDMG<-as.numeric(StormData$PROPDMG)*StormData$PMultiplier
TotPROPDMG<-tapply(StormData$PROPERDMG,StormData$Type,sum)
StormData$CROPDMGEXP<-as.numeric(StormData$CROPDMG)*StormData$CMultiplier
TotCROPDMG<-tapply(StormData$PROPERDMG,StormData$Type,sum)
TotDMG<-TotPROPDMG+TotCROPDMG

DMGData<-data.frame(cbind(EVENTNAMES,TotDMG))
colnames(DMGData)<-c('EVENTS','ECODMG')
row.names(DMGData)<-NULL
DMGData$EVENTS<-as.character(DMGData$EVENTS)
DMGData$ECODMG<-as.numeric(DMGData$ECODMG)

10.Sort the DMGData by the decreasing order of total economic damage

DMGData<-DMGData[order(DMGData$ECODMG,decreasing =T,na.last=T),]
row.names(DMGData)<-NULL

11.Subset the DMGData into Top5DMGData including the top Five event types

Top5DMGData<-DMGData[1:5,]
Top5DMGData$ECODMG<-round(Top5DMGData$ECODMG/1000000000,3)

RESULTS

1.The top 5 most harmful with respect to population health types of event across the United States:

Top5CasData
##                        EVENTS CASUALTIES
## 1                     TORNADO      970.2
## 2 HIGH or WARM or HEAT or HOT      144.9
## 3                        WIND       80.7
## 4             FLOOD or STREAM       73.9
## 5                   LIGHTNING       60.5

Plot histogram of the Top5CasData

library(ggplot2)
gg1 <- ggplot(Top5CasData, aes(x=as.character(Top5CasData$EVENTS)))
gg1 <- gg1 + geom_bar(aes(weight=as.numeric(Top5CasData$CASUALTIES)))
gg1 <- gg1 + labs(list(x="Event Types", y="Total Casulties (in hundred)", title="Top 5 Event Types of the Most Casualties"))
gg1 <- gg1 +theme(axis.text.x = element_text(angle = 70, hjust = 1))
gg1

plot of chunk Histogram Top5CasData

2.The top 5 greatest economic consequences types of events across the United States:

Top5DMGData
##                  EVENTS ECODMG
## 1       FLOOD or STREAM 301.24
## 2  HURRICANE or TYPHOON 170.51
## 3               TORNADO 117.10
## 4   RIP CURRENT or SURF  86.65
## 5 THUNDERSTORM or STORM  42.57

Plot histogram of the Top5DMGData

gg2 <- ggplot(Top5DMGData, aes(x=as.character(Top5DMGData$EVENTS)))
gg2 <- gg2 + geom_bar(aes(weight=as.numeric(Top5DMGData$ECODMG)))
gg2 <- gg2 + labs(list(x="Event Types", y="Total Economic Damage (in $ Billion)", title="Top 5 Event Types of the Greatest Economic Consequences (Damages)"))
gg2 <- gg2 +theme(axis.text.x = element_text(angle = 70, hjust = 1))
gg2

plot of chunk Histogram Top5DMGData