Analysis of Adverse Health & Economic Impacts of US Storms

Synopsis:

The assignment is to use the NOAA Storm Database to explore the effects of severe weather events on both population and economy. The database includes the time period between 1950 and November 2011.

The analysis investigates what types of severe weather events are most harmful on:

  • Health (injuries and fatalities)
  • Property and crops (economic consequences)

The data for this assignment can be downloaded from the course web site:

Data Processing:

2.1: Download and save data into a data.table

library("data.table")
library("ggplot2")

# Downloading file
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile="/home/dugi/coursera/Course5_week4/repdata_data_StormData.csv.bz2")

# Reading in csv file into a data.frame
stormFile <- read.csv("/home/dugi/coursera/Course5_week4/repdata_data_StormData.csv.bz2")

# Converting data.table
stormData <- as.data.table(stormFile)

2.2: Inspecting Data

colnames(stormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
head(stormData)
##    STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1:       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2:       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3:       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4:       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5:       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6:       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##     EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1: TORNADO         0                                               0
## 2: TORNADO         0                                               0
## 3: TORNADO         0                                               0
## 4: TORNADO         0                                               0
## 5: TORNADO         0                                               0
## 6: TORNADO         0                                               0
##    COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1:         NA         0                      14.0   100 3   0          0
## 2:         NA         0                       2.0   150 2   0          0
## 3:         NA         0                       0.1   123 2   0          0
## 4:         NA         0                       0.0   100 2   0          0
## 5:         NA         0                       0.0   150 2   0          0
## 6:         NA         0                       1.5   177 2   0          0
##    INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1:       15    25.0          K       0                                    
## 2:        0     2.5          K       0                                    
## 3:        2    25.0          K       0                                    
## 4:        2     2.5          K       0                                    
## 5:        2     2.5          K       0                                    
## 6:        6     2.5          K       0                                    
##    LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1:     3040      8812       3051       8806              1
## 2:     3042      8755          0          0              2
## 3:     3340      8742          0          0              3
## 4:     3458      8626          0          0              4
## 5:     3412      8642          0          0              5
## 6:     3450      8748          0          0              6

2.3: Subset Data

Subset dataset for parameters we want and we remove the columns we do not need.

# Setting vector colsRemove to columns to remove
colsRemove <- colnames(stormData[, !c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])

# Removing columns
stormData[, c(colsRemove) := NULL]

# Need only data where fatalities, injuries, property damage or crop damage are > 0  
stormData <- stormData[(EVTYPE != "?" &
             (FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)), 
             c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]

2.4 Fix columns into proper format for presenting in dollar amounts.

Need to fix PROPDMGEXP & CROPDMGEXP columns so they can be used to calculate in dollar amounts.

# Change all damage exponents to uppercase.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
stormData[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]

# Verifying values in PROPDMGEXP and CROPDMGEXP
table(stormData$PROPDMGEXP)
## 
##             -      +      0      2      3      4      5      6      7 
##  11585      1      5    210      1      1      4     18      3      3 
##      B      H      K      M 
##     40      7 231427  11327
table(stormData$CROPDMGEXP)
## 
##             ?      0      B      K      M 
## 152663      6     17      7  99953   1986
# Map property damage alphanumeric exponents to numeric values.
propDmgKey <-  c("\"\"" = 10^0,
                 "-"    = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9)

# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6,
                "B" = 10^9)

stormData[, PROPDMGEXP := propDmgKey[as.character(stormData[,PROPDMGEXP])]]
stormData[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

stormData[, CROPDMGEXP := cropDmgKey[as.character(stormData[,CROPDMGEXP])] ]
stormData[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

#stormData$PROPCOST <- stormData$PROPDMG %*% stormData$PROPDMGEXP

stormData <- stormData[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]

2.5: Calcuating Total Property and Crop Cost by Event

totalCostData <- stormData[, .(propCost=sum(propCost), cropCost=sum(cropCost), totalCost=sum(propCost) + sum(cropCost)), by = .(EVTYPE)]

totalCostData <- totalCostData[order(-totalCost), ]

totalCostData <- totalCostData[1:10, ]

totalCostData
##                EVTYPE     propCost    cropCost    totalCost
##  1:             FLOOD 144657709807  5661968450 150319678257
##  2: HURRICANE/TYPHOON  69305840000  2607872800  71913712800
##  3:           TORNADO  56947380676   414953270  57362333946
##  4:       STORM SURGE  43323536000        5000  43323541000
##  5:              HAIL  15735267513  3025954473  18761221986
##  6:       FLASH FLOOD  16822673978  1421317100  18243991078
##  7:           DROUGHT   1046106000 13972566000  15018672000
##  8:         HURRICANE  11868319010  2741910000  14610229010
##  9:       RIVER FLOOD   5118945500  5029459000  10148404500
## 10:         ICE STORM   3944927860  5022113500   8967041360

2.6: Calcuating Total Fatalities and Injuries by Event

totalInjuryData <- stormData[, .(FATALITIES=sum(FATALITIES), INJURIES=sum(INJURIES), totalInjury=sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]

totalInjuryData <- totalInjuryData[order(-FATALITIES), ]

totalInjuryData <- totalInjuryData[1:10, ]

totalInjuryData
##             EVTYPE FATALITIES INJURIES totalInjury
##  1:        TORNADO       5633    91346       96979
##  2: EXCESSIVE HEAT       1903     6525        8428
##  3:    FLASH FLOOD        978     1777        2755
##  4:           HEAT        937     2100        3037
##  5:      LIGHTNING        816     5230        6046
##  6:      TSTM WIND        504     6957        7461
##  7:          FLOOD        470     6789        7259
##  8:    RIP CURRENT        368      232         600
##  9:      HIGH WIND        248     1137        1385
## 10:      AVALANCHE        224      170         394

Results

3.1: Events that are the Most Harmful to Population

# Transpose data.table to put into histogram
harmPopData <- melt(totalInjuryData, id.vars="EVTYPE", variable.name = "badOutcome")
head(harmPopData)
##            EVTYPE badOutcome value
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816
## 6:      TSTM WIND FATALITIES   504
# Create Histogram
harmPopHist <- ggplot(harmPopData, aes(x=reorder(EVTYPE, -value), y=value)) +
               geom_bar(stat="identity", aes(fill=badOutcome), position="dodge") +
               ylab("Frequency") + xlab("Event") + ggtitle("Top 10 Harmful Storm Events in U.S.") +
               theme(axis.text.x = element_text(angle=45, hjust=1)) +
               theme(plot.title = element_text(hjust = 0.5))

harmPopHist

3.2: Events that are the Most Harmful to the Economy

# Transpose data.table to put into histogram
badEconomyData <- melt(totalCostData, id.vars="EVTYPE", variable.name = "damageType")
head(badEconomyData)
##               EVTYPE damageType        value
## 1:             FLOOD   propCost 144657709807
## 2: HURRICANE/TYPHOON   propCost  69305840000
## 3:           TORNADO   propCost  56947380676
## 4:       STORM SURGE   propCost  43323536000
## 5:              HAIL   propCost  15735267513
## 6:       FLASH FLOOD   propCost  16822673978
# Create Histogram
harmEconHist <- ggplot(badEconomyData, aes(x=reorder(EVTYPE, -value), y=value)) +
                geom_bar(stat="identity", aes(fill=damageType), position="dodge") +
                ylab("Cost (dollars)") + xlab("Event") + ggtitle("Top 10 Harmful Economic Storm Events in U.S.") +
                theme(axis.text.x = element_text(angle=45, hjust=1)) +
                theme(plot.title = element_text(hjust = 0.5))

harmEconHist