#Peer Graded Assignment: Course Project 2 ##Population Health and Economic Effects of Weather Events Summary An analysis of NOAA Storm Database for effects of event types on population health and economic outcomes was conducted. The exploration of the data looked at fatalities, injuries, and total damage as a result of each weather event in the database. The analysis looks at both total fatalities, injuries, and damage by each event type and also looks at the mean effect of a single event for each event type. Objectives The basic goal of this analysis is to explore the NOAA Storm Database and answer two questions concerning severe weather events: -Which severe weather events (EVTYPE) are most harmful with respect to population health? -Which severe weather events have the greatest economic consequences? Sep-by-step method
#Reading data
dmg <- read.table("D:/knitR/R/repdata_data_StormData.csv", sep=",", header = TRUE)
str(dmg)
summary(dmg)
# Convert the date from factor to to a date field.
dmg$BGN_DATE <- as.Date(dmg$BGN_DATE, format = "%m/%d/%Y")
# Data as of January 1, 1966 is only taken for this analysis.
dmg <- subset(dmg, BGN_DATE >= as.Date("1966-01-01"))
# Necessary columns for the analysis which are kept for this analysis.
neededCols <- c("BGN_DATE",
"EVTYPE",
"FATALITIES",
"INJURIES",
"PROPDMG",
"PROPDMGEXP",
"CROPDMG",
"CROPDMGEXP")
# Take only cols and rows which are needed for this analysis.
data <- subset(dmg,
select = neededCols,
(grepl(EVTYPE, pattern = "*Summary*", ignore.case = TRUE) == FALSE))
Adjustment of Several Event Type Names According to Storm Data Documentation(page 6), names must be written out, because some are represented in more than one name, such as “TSTM WIND” and “THUNDERSTORM WIND”.
data[data$EVTYPE == "TSTM WIND", ]$EVTYPE = "THUNDERSTORM WIND"
data[data$EVTYPE == "THUNDERSTORM WINDS", ]$EVTYPE = "THUNDERSTORM WIND"
data[data$EVTYPE == "RIVER FLOOD", ]$EVTYPE = "FLOOD"
data[data$EVTYPE == "HURRICANE/TYPHOON", ]$EVTYPE = "HURRICANE-TYPHOON"
data[data$EVTYPE == "HURRICANE", ]$EVTYPE = "HURRICANE-TYPHOON"
# Create a dictanry for lookup exponents.
exponentLookup <- data.frame(DMGEXP = c("",
"-",
"?",
"+",
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"B",
"H",
"K",
"M"
),
EXP = c(1, #
1, # -
1, # ?
1, # +
1, # 0
10, # 1
100, # 2
1000, # 3
10000, # 4
100000, # 5
1000000, # 6
10000000, # 7
100000000, # 8
1000000000, # B
100, # H
1000, # K
1000000 # M
),
stringsAsFactors = FALSE
)
# If DMGEXP has small letters convert them into capital letters
data$PROPDMGEXP <- toupper(data$PROPDMGEXP)
data$CROPDMGEXP <- toupper(data$CROPDMGEXP)
# Merge the power of exponents for use with property damage amounts.
data <- merge(x = data, y = exponentLookup,
by.x = "PROPDMGEXP", by.y = "DMGEXP",
all.x = TRUE)
names(data)[names(data)== "EXP"] <- "PROPEXP"
data <- merge(x = data, y = exponentLookup,
by.x = "CROPDMGEXP", by.y = "DMGEXP",
all.x = TRUE)
names(data)[names(data)== "EXP"] <- "CROPEXP"
# Apply the power of to the damage amounts
data$PropDamageAmount <- data$PROPDMG * data$PROPEX
data$CropDamageAmount <- data$CROPDMG * data$CROPEXP
#A general function "clean" of modifications to apply to all event type values.
clean <- function(x) {
x <- toupper(x) # use consistent casing
x <- sub("\\\\", " ", x) # eliminate \'s
x <- sub("-", " ", x) # eliminate -'s
x <- sub("/", " ", x) # eliminate /'s
x <- gsub("\\s+", " ", x) # compress multiple spaces
x <- trimws(x) # trim leading and trailing whitespace
}
data$EVTYPE <- apply(data.frame(data$EVTYPE), 2, clean)
# Top 5 Severe Weather Events for Fatality
fatalityTop5 <- aggregate(list(Freq=data$FATALITIES),
list(EventType=data$EVTYPE),
sum)
fatalityTop5 <- head(fatalityTop5[order(-fatalityTop5$Freq),], n=5)
# Top 5 Severe Weather Events for Injury
injuryTop5 <- aggregate(list(Freq=data$INJURIES),
list(EventType=data$EVTYPE),
sum)
injuryTop5 <- head(injuryTop5[order(-injuryTop5$Freq),], n=5)
library(ggplot2)
ggplot(data=fatalityTop5, aes(x=reorder(EventType, Freq), y=Freq)) +
geom_bar(stat="identity") +
ggtitle(label = "Top 5 Severe Weather Events with the Highest Total Fatalities") +
labs(x = "Event Type", y = "Count") +
coord_flip()
ggplot(data=injuryTop5, aes(x=reorder(EventType, Freq), y=Freq)) +
geom_bar(stat="identity") +
ggtitle(label = "Top 5 Severe Weather Events with the Highest Total Injuries") +
labs(x = "Event Type", y = "Count") +
coord_flip()
Tornados has at fatalities and injuries the highest number.
library(reshape)
# Calculte the costs of damage for property and for crop.
PropertyDamageTop5 <- aggregate(
list(PropDamageAmount=data$PropDamageAmount,
CropDamageAmount=data$CropDamageAmount),
list(EVTYPE=data$EVTYPE),
sum)
# Calculate the total costs
PropertyDamageTop5$TotalDamageAmount <- PropertyDamageTop5$PropDamageAmount + PropertyDamageTop5$CropDamageAmount
# Top 5 Contributing Severe Weather Events for Total Economic Damage
PropertyDamageTop5 <- PropertyDamageTop5[order(-PropertyDamageTop5$TotalDamageAmount),]
PropertyDamageTop5 <- head(PropertyDamageTop5, n=5)
# Reorganize PropertyDamageTop5 to 3 columns EVTYPE, variable, value
PropertyDamageTop5 <- melt(PropertyDamageTop5[,-4])
## Using EVTYPE as id variables
names(PropertyDamageTop5)[names(PropertyDamageTop5) == "variable"] <- "TypOfDamage"
names(PropertyDamageTop5)[names(PropertyDamageTop5) == "value"] <- "Amount"
# Rename PropDamageAmount and CropDamageAmount
levels(PropertyDamageTop5$TypOfDamage)[levels(PropertyDamageTop5$TypOfDamage) == "PropDamageAmount"] <- "Property Damage Amount"
levels(PropertyDamageTop5$TypOfDamage)[levels(PropertyDamageTop5$TypOfDamage) == "CropDamageAmount"] <- "Crop Damage Amount"
library('scales')
ggplot(data=PropertyDamageTop5,
aes(x=reorder(EVTYPE, Amount), y=Amount, fill=TypOfDamage)) +
scale_fill_grey(start = 0, end = 0.9) +
theme(legend.position="bottom") +
guides(fill=guide_legend(title=NULL)) +
geom_bar(stat="identity", colour="black") +
ggtitle(label = "Top 5 Severe Weather Events with the Highest Economic Damage") +
labs(x = "Event Type", y = "Amount of Damage in USD") +
scale_y_continuous(labels = dollar) +
coord_flip()
Floods are the highest contributor economic loss. It is doubled of hurricane typhoon ,which is 2nd. Tornados, which is the highest contributor for fatalities and injuries, is just 3rd ranking event to economic damage. For these severe weather event types, costs for property damage is much higher than for crops damage.