Synopsis

This report analyzes the impact on population health and economic consequences resulting from major weather events in United States.

The data analysis is based on the storm data collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. The raw data can be downloaded from: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

Data Processing

This section explains how raw data is obtained and prepared for analysis.

Obtain source data

# Load required libraries
library(ggplot2)
library(gridExtra)

# Download the source file
sourceurl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
targetfile <- "repdata-data-StormData.csv.bz2"

if (!(targetfile %in% dir("./"))) {
  download.file(sourceurl, targetfile)  
}

# Read file content
if (!("stormdata" %in% ls())) {
  tmpfile <- bzfile(targetfile, open = "r")
  stormdata <- read.csv(tmpfile, header = TRUE, stringsAsFactors = FALSE)
  close(tmpfile)
}

Examine the data

# Show data structure
str(stormdata)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Prepare data for analysis

# Add new column [PROPDMGVAL] to calculate total value for PROPDMG
stormdata$PROPDMGVAL <- stormdata$PROPDMG

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="1"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="1"] * 10

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="2"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="2"] * 100

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="3"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="3"] * 1000

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="4"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="4"] * 10000

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="5"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="5"] * 100000

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="6"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="6"] * 1000000

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="7"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="7"] * 10000000

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="8"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="8"] * 100000000

stormdata$PROPDMGVAL[stormdata$PROPDMGEXP=="9"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP=="9"] * 1000000000

stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="H"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="H"] * 100

stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="K"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="K"] * 1000

stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="M"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="M"] * 1000000

stormdata$PROPDMGVAL[toupper(stormdata$PROPDMGEXP)=="B"] <- stormdata$PROPDMG[toupper(stormdata$PROPDMGEXP)=="B"] * 1000000000

# Add new column [CROPDMGVAL] to calculate total value for CROPDMG
stormdata$CROPDMGVAL <- stormdata$CROPDMG

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="1"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="1"] * 10

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="2"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="2"] * 100

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="3"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="3"] * 1000

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="4"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="4"] * 10000

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="5"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="5"] * 100000

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="6"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="6"] * 1000000

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="7"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="7"] * 10000000

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="8"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="8"] * 100000000

stormdata$CROPDMGVAL[stormdata$CROPDMGEXP=="9"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP=="9"] * 1000000000

stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="H"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="H"] * 100

stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="K"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="K"] * 1000

stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="M"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="M"] * 1000000

stormdata$CROPDMGVAL[toupper(stormdata$CROPDMGEXP)=="B"] <- stormdata$CROPDMG[toupper(stormdata$CROPDMGEXP)=="B"] * 1000000000

Results

This section present results from the data analysis.

1. The following shows which types of events are most harmful with respect to population health:

Top 10 Events resulting highest number of FATALITIES

# Determine Total FATALITIES for each EVTYPE
summary_fatalities <- aggregate(FATALITIES ~ EVTYPE, data = stormdata, FUN = sum)

# Sort results according to highest FATALITIES
summary_fatalities <- summary_fatalities[order(summary_fatalities$FATALITIES, decreasing = TRUE),]

summary_fatalities$EVTYPE <- factor(summary_fatalities$EVTYPE, levels = summary_fatalities$EVTYPE[order(summary_fatalities$FATALITIES, decreasing = TRUE)])

# Only interested on Top 10
summary_fatalities <- summary_fatalities[1:10,]

# Print results
print(summary_fatalities, row.names = FALSE)
##          EVTYPE FATALITIES
##         TORNADO       5633
##  EXCESSIVE HEAT       1903
##     FLASH FLOOD        978
##            HEAT        937
##       LIGHTNING        816
##       TSTM WIND        504
##           FLOOD        470
##     RIP CURRENT        368
##       HIGH WIND        248
##       AVALANCHE        224

Top 10 Events resulting highest number of INJURIES

# Determine Total INJURIES for each EVTYPE
summary_injuries <- aggregate(INJURIES ~ EVTYPE, data = stormdata, FUN = sum)

# Sort results according to highest INJURIES
summary_injuries <- summary_injuries[order(summary_injuries$INJURIES, decreasing = TRUE),]

summary_injuries$EVTYPE <- factor(summary_injuries$EVTYPE, levels = summary_injuries$EVTYPE[order(summary_injuries$INJURIES, decreasing = TRUE)])

# Only interested on Top 10
summary_injuries <- summary_injuries[1:10,]

# Print results
print(summary_injuries, row.names = FALSE)
##             EVTYPE INJURIES
##            TORNADO    91346
##          TSTM WIND     6957
##              FLOOD     6789
##     EXCESSIVE HEAT     6525
##          LIGHTNING     5230
##               HEAT     2100
##          ICE STORM     1975
##        FLASH FLOOD     1777
##  THUNDERSTORM WIND     1488
##               HAIL     1361

Graphical representation of Top 10 Events resulting highest number of FATALITIES and INJURIES

plot1 <- qplot(EVTYPE, data = summary_fatalities, weight = FATALITIES, geom = "bar", binwidth = 1) + 
    scale_y_continuous("Number of Fatalities") + 
    theme(axis.text.x = element_text(angle = 45, 
    hjust = 1)) + xlab("Event Type") + 
    ggtitle("Fatalities by Event Type")

plot2 <- qplot(EVTYPE, data = summary_injuries, weight = INJURIES, geom = "bar", binwidth = 1) + 
    scale_y_continuous("Number of Injuries") + 
    theme(axis.text.x = element_text(angle = 45, 
    hjust = 1)) + xlab("Event Type") + 
    ggtitle("Injuries by Event Type")

grid.arrange(plot1, plot2, ncol=2, widths=c(4, 4))

2. The following shows which types of events have the greatest economic consequences:

Top 10 Events resulting highest PROPERTY DAMAGES

# Determine Total PROPDMGVAL for each EVTYPE
summary_propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data = stormdata, FUN = sum)

# Sort results according to highest PROPDMGVAL
summary_propdmg <- summary_propdmg[order(summary_propdmg$PROPDMGVAL, decreasing = TRUE),]

summary_propdmg$EVTYPE <- factor(summary_propdmg$EVTYPE, levels = summary_propdmg$EVTYPE[order(summary_propdmg$PROPDMGVAL, decreasing = TRUE)])

# Only interested on Top 10
summary_propdmg <- summary_propdmg[1:10,]

# Print results
print(summary_propdmg, row.names = FALSE)
##             EVTYPE   PROPDMGVAL
##              FLOOD 144657709807
##  HURRICANE/TYPHOON  69305840000
##            TORNADO  56947380677
##        STORM SURGE  43323536000
##        FLASH FLOOD  16822673979
##               HAIL  15735267513
##          HURRICANE  11868319010
##     TROPICAL STORM   7703890550
##       WINTER STORM   6688497251
##          HIGH WIND   5270046295

Top 10 Events resulting highest CROP DAMAGES

# Determine Total CROPDMGVAL for each EVTYPE
summary_cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data = stormdata, FUN = sum)

# Sort results according to highest CROPDMGVAL
summary_cropdmg <- summary_cropdmg[order(summary_cropdmg$CROPDMGVAL, decreasing = TRUE),]

summary_cropdmg$EVTYPE <- factor(summary_cropdmg$EVTYPE, levels = summary_cropdmg$EVTYPE[order(summary_cropdmg$CROPDMGVAL, decreasing = TRUE)])

# Only interested on Top 10
summary_cropdmg <- summary_cropdmg[1:10,]

# Print results
print(summary_cropdmg, row.names = FALSE)
##             EVTYPE  CROPDMGVAL
##            DROUGHT 13972566000
##              FLOOD  5661968450
##        RIVER FLOOD  5029459000
##          ICE STORM  5022113500
##               HAIL  3025954473
##          HURRICANE  2741910000
##  HURRICANE/TYPHOON  2607872800
##        FLASH FLOOD  1421317100
##       EXTREME COLD  1292973000
##       FROST/FREEZE  1094086000

Graphical representation of Top 10 Events resulting highest PROPERTY and CROP DAMAGES

plot3 <- qplot(EVTYPE, data = summary_propdmg, weight = PROPDMGVAL, geom = "bar", binwidth = 1) + 
    scale_y_continuous("Economic Damages (USD)") + 
    theme(axis.text.x = element_text(angle = 45, 
    hjust = 1)) + xlab("Event Type") + 
    ggtitle("Property Damages by Event Type")

plot4 <- qplot(EVTYPE, data = summary_cropdmg, weight = CROPDMGVAL, geom = "bar", binwidth = 1) + 
    scale_y_continuous("Economic Damages (USD)") + 
    theme(axis.text.x = element_text(angle = 45, 
    hjust = 1)) + xlab("Event Type") + 
    ggtitle("Crop Damages by Event Type")

grid.arrange(plot3, plot4, ncol=2, widths=c(4, 4))