This report analyzes the impact on population health and economic consequences resulting from major weather events in United States.
The data analysis is based on the storm data collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. The raw data can be downloaded from: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
This section explains how raw data is obtained and prepared for analysis.
# Load required libraries
library(ggplot2)
library(gridExtra)
# Download the source file
sourceurl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
targetfile <- "repdata-data-StormData.csv.bz2"
if (!(targetfile %in% dir("./"))) {
download.file(sourceurl, targetfile)
}
# Read file content
if (!("stormdata" %in% ls())) {
tmpfile <- bzfile(targetfile, open = "r")
stormdata <- read.csv(tmpfile, header = TRUE, stringsAsFactors = FALSE)
close(tmpfile)
}
# Show data structure
str(stormdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
# Add new column [PROPDMGVAL] to calculate total value for PROPDMG
stormdata$PROPDMGVAL <- stormdata$PROPDMG
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="1"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="1"] * 10
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="2"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="2"] * 100
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="3"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="3"] * 1000
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="4"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="4"] * 10000
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="5"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="5"] * 100000
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="6"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="6"] * 1000000
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="7"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="7"] * 10000000
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="8"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="8"] * 100000000
stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="9"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="9"] * 1000000000
stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="H"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="H"] * 100
stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="K"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="K"] * 1000
stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="M"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="M"] * 1000000
stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="B"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="B"] * 1000000000
# Add new column [CROPDMGVAL] to calculate total value for CROPDMG
stormdata$CROPDMGVAL <- stormdata$CROPDMG
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="1"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="1"] * 10
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="2"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="2"] * 100
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="3"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="3"] * 1000
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="4"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="4"] * 10000
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="5"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="5"] * 100000
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="6"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="6"] * 1000000
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="7"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="7"] * 10000000
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="8"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="8"] * 100000000
stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="9"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="9"] * 1000000000
stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="H"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="H"] * 100
stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="K"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="K"] * 1000
stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="M"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="M"] * 1000000
stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="B"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="B"] * 1000000000
This section present results from the data analysis.
Top 10 Events resulting highest number of FATALITIES
# Determine Total FATALITIES for each EVTYPE
summary_fatalities <- aggregate(FATALITIES ~ EVTYPE, data = stormdata, FUN = sum)
# Sort results according to highest FATALITIES
summary_fatalities <- summary_fatalities[order(summary_fatalities$FATALITIES, decreasing = TRUE),]
summary_fatalities$EVTYPE <- factor(summary_fatalities$EVTYPE, levels = summary_fatalities$EVTYPE[order(summary_fatalities$FATALITIES, decreasing = TRUE)])
# Only interested on Top 10
summary_fatalities <- summary_fatalities[1:10,]
# Print results
print(summary_fatalities, row.names = FALSE)
## EVTYPE FATALITIES
## TORNADO 5633
## EXCESSIVE HEAT 1903
## FLASH FLOOD 978
## HEAT 937
## LIGHTNING 816
## TSTM WIND 504
## FLOOD 470
## RIP CURRENT 368
## HIGH WIND 248
## AVALANCHE 224
Top 10 Events resulting highest number of INJURIES
# Determine Total INJURIES for each EVTYPE
summary_injuries <- aggregate(INJURIES ~ EVTYPE, data = stormdata, FUN = sum)
# Sort results according to highest INJURIES
summary_injuries <- summary_injuries[order(summary_injuries$INJURIES, decreasing = TRUE),]
summary_injuries$EVTYPE <- factor(summary_injuries$EVTYPE, levels = summary_injuries$EVTYPE[order(summary_injuries$INJURIES, decreasing = TRUE)])
# Only interested on Top 10
summary_injuries <- summary_injuries[1:10,]
# Print results
print(summary_injuries, row.names = FALSE)
## EVTYPE INJURIES
## TORNADO 91346
## TSTM WIND 6957
## FLOOD 6789
## EXCESSIVE HEAT 6525
## LIGHTNING 5230
## HEAT 2100
## ICE STORM 1975
## FLASH FLOOD 1777
## THUNDERSTORM WIND 1488
## HAIL 1361
Graphical representation of Top 10 Events resulting highest number of FATALITIES and INJURIES
plot1 <- qplot(EVTYPE, data = summary_fatalities, weight = FATALITIES, geom = "bar", binwidth = 1) +
scale_y_continuous("Number of Fatalities") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + xlab("Event Type") +
ggtitle("Fatalities by Event Type")
plot2 <- qplot(EVTYPE, data = summary_injuries, weight = INJURIES, geom = "bar", binwidth = 1) +
scale_y_continuous("Number of Injuries") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + xlab("Event Type") +
ggtitle("Injuries by Event Type")
grid.arrange(plot1, plot2, ncol=2, widths=c(4, 4))
Top 10 Events resulting highest PROPERTY DAMAGES
# Determine Total PROPDMGVAL for each EVTYPE
summary_propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data = stormdata, FUN = sum)
# Sort results according to highest PROPDMGVAL
summary_propdmg <- summary_propdmg[order(summary_propdmg$PROPDMGVAL, decreasing = TRUE),]
summary_propdmg$EVTYPE <- factor(summary_propdmg$EVTYPE, levels = summary_propdmg$EVTYPE[order(summary_propdmg$PROPDMGVAL, decreasing = TRUE)])
# Only interested on Top 10
summary_propdmg <- summary_propdmg[1:10,]
# Print results
print(summary_propdmg, row.names = FALSE)
## EVTYPE PROPDMGVAL
## FLOOD 144657709807
## HURRICANE/TYPHOON 69305840000
## TORNADO 56947380677
## STORM SURGE 43323536000
## FLASH FLOOD 16822673979
## HAIL 15735267513
## HURRICANE 11868319010
## TROPICAL STORM 7703890550
## WINTER STORM 6688497251
## HIGH WIND 5270046295
Top 10 Events resulting highest CROP DAMAGES
# Determine Total CROPDMGVAL for each EVTYPE
summary_cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data = stormdata, FUN = sum)
# Sort results according to highest CROPDMGVAL
summary_cropdmg <- summary_cropdmg[order(summary_cropdmg$CROPDMGVAL, decreasing = TRUE),]
summary_cropdmg$EVTYPE <- factor(summary_cropdmg$EVTYPE, levels = summary_cropdmg$EVTYPE[order(summary_cropdmg$CROPDMGVAL, decreasing = TRUE)])
# Only interested on Top 10
summary_cropdmg <- summary_cropdmg[1:10,]
# Print results
print(summary_cropdmg, row.names = FALSE)
## EVTYPE CROPDMGVAL
## DROUGHT 13972566000
## FLOOD 5661968450
## RIVER FLOOD 5029459000
## ICE STORM 5022113500
## HAIL 3025954473
## HURRICANE 2741910000
## HURRICANE/TYPHOON 2607872800
## FLASH FLOOD 1421317100
## EXTREME COLD 1292973000
## FROST/FREEZE 1094086000
Graphical representation of Top 10 Events resulting highest PROPERTY and CROP DAMAGES
plot3 <- qplot(EVTYPE, data = summary_propdmg, weight = PROPDMGVAL, geom = "bar", binwidth = 1) +
scale_y_continuous("Economic Damages (USD)") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + xlab("Event Type") +
ggtitle("Property Damages by Event Type")
plot4 <- qplot(EVTYPE, data = summary_cropdmg, weight = CROPDMGVAL, geom = "bar", binwidth = 1) +
scale_y_continuous("Economic Damages (USD)") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + xlab("Event Type") +
ggtitle("Crop Damages by Event Type")
grid.arrange(plot3, plot4, ncol=2, widths=c(4, 4))