library(hashmap)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1. Synopsis

The goal of this assignment is to explore the NOAA Storm Database and answer the following basic questions about severe weather events.
1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
2. Across the United States, which types of events have the greatest economic consequences?

It has been concluded that Tornado caused maximum fatalities & injuries -whereas- flood and draught caused maximum property & crop damages respectively.

2. Data Processing

2.1 Loading data

  • Refer to sec ‘Storm Data Preparation’ in ref #2 which indicates that only few fields would be relevant for achieving this project goal
  • Hence, read first 2 cols to identify Column Names
stormPartial <- read.csv("./StormData.csv", nrows = 2)

cols <- colnames(stormPartial)
cols
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Concerned columns for analysis of population health and economic consequences are “EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP”

# Filling "colclasses" with selected columns to be read from .csv data file
relevantCols = rep('NULL', length(cols))
relevantCols[which(cols == c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP"))] = NA
## Warning in cols == c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG",
## "PROPDMGEXP", : longer object length is not a multiple of shorter object
## length
stormData <- read.csv("./StormData.csv", colClasses = relevantCols)
dim(stormData)
## [1] 902297      7

2.2 Pre-processing data

unique(stormData$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(stormData$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
expCodes <- c(
            "-", "?", "+", # invalid exponents
            "", "0",
            "1", 
            "2", "h", "H", 
            "3", "k", "K", 
            "4", 
            "5", 
            "6", "m", "M",
            "7",
            "8",
            "b", "B")
multVals <- c(
            0, 0, 0,
            1, 1,
            10,
            10^2, 10^2, 10^2,
            10^3, 10^3, 10^3,
            10^4,
            10^5,
            10^6, 10^6, 10^6,
            10^7,
            10^8,
            10^9, 10^9
            )
exp2MultMap <- hashmap(expCodes, multVals)

stormData$PROPDMG <- stormData$PROPDMG * exp2MultMap[[as.character(stormData$PROPDMGEXP)]]
#head(stormData$PROPDMG, 100)
stormData$CROPDMG <- stormData$CROPDMG * exp2MultMap[[as.character(stormData$CROPDMGEXP)]]
#head(stormData$CROPDMG, 100)
str(stormData)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...

2.3 Calculate harmful effects on population & economoics

# Totalling the data by event
fatal <- aggregate(FATALITIES ~ EVTYPE, stormData, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, stormData, FUN = sum)
propdmg <- aggregate(PROPDMG ~ EVTYPE, stormData, FUN = sum)
cropdmg <- aggregate(CROPDMG ~ EVTYPE, stormData, FUN = sum)

fatalSum <- sum(fatal$FATALITIES)
fatal <- mutate(fatal, pct=FATALITIES * 100 / fatalSum)
fatal <- arrange(fatal, desc(pct))
head(fatal, 10)
##            EVTYPE FATALITIES       pct
## 1         TORNADO       5633 37.193793
## 2  EXCESSIVE HEAT       1903 12.565203
## 3     FLASH FLOOD        978  6.457577
## 4            HEAT        937  6.186860
## 5       LIGHTNING        816  5.387917
## 6       TSTM WIND        504  3.327831
## 7           FLOOD        470  3.103334
## 8     RIP CURRENT        368  2.429845
## 9       HIGH WIND        248  1.637504
## 10      AVALANCHE        224  1.479036
injurySum <- sum(injury$INJURIES)
injury <- mutate(injury, pct=INJURIES * 100 / injurySum)
injury <- arrange(injury, desc(pct))
head(injury, 10)
##               EVTYPE INJURIES        pct
## 1            TORNADO    91346 65.0019925
## 2          TSTM WIND     6957  4.9506148
## 3              FLOOD     6789  4.8310657
## 4     EXCESSIVE HEAT     6525  4.6432028
## 5          LIGHTNING     5230  3.7216782
## 6               HEAT     2100  1.4943641
## 7          ICE STORM     1975  1.4054139
## 8        FLASH FLOOD     1777  1.2645167
## 9  THUNDERSTORM WIND     1488  1.0588637
## 10              HAIL     1361  0.9684903
propdmgSum <- sum(propdmg$PROPDMG)
propdmg <- mutate(propdmg, pct=PROPDMG * 100 / propdmgSum)
propdmg <- arrange(propdmg, desc(pct))
head(propdmg, 10)
##               EVTYPE      PROPDMG       pct
## 1              FLOOD 144657709807 33.780782
## 2  HURRICANE/TYPHOON  69305840000 16.184450
## 3            TORNADO  56947380617 13.298476
## 4        STORM SURGE  43323536000 10.117006
## 5        FLASH FLOOD  16822673979  3.928467
## 6               HAIL  15735267513  3.674534
## 7          HURRICANE  11868319010  2.771516
## 8     TROPICAL STORM   7703890550  1.799029
## 9       WINTER STORM   6688497251  1.561912
## 10         HIGH WIND   5270046260  1.230673
cropdmgSum <- sum(cropdmg$CROPDMG)
cropdmg <- mutate(cropdmg, pct=CROPDMG * 100 / cropdmgSum)
cropdmg <- arrange(cropdmg, desc(pct))
head(cropdmg, 10)
##               EVTYPE     CROPDMG       pct
## 1            DROUGHT 13972566000 28.454935
## 2              FLOOD  5661968450 11.530519
## 3        RIVER FLOOD  5029459000 10.242423
## 4          ICE STORM  5022113500 10.227464
## 5               HAIL  3025954473  6.162314
## 6          HURRICANE  2741910000  5.583861
## 7  HURRICANE/TYPHOON  2607872800  5.310896
## 8        FLASH FLOOD  1421317100  2.894492
## 9       EXTREME COLD  1292973000  2.633121
## 10      FROST/FREEZE  1094086000  2.228091
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatal$FATALITIES[1:10], las = 3, names.arg = fatal$EVTYPE[1:10], main = "Events with Highest Fatalities", ylab = "Number of fatalities", col = "light blue")
barplot(injury$INJURIES[1:10], las = 3, names.arg = injury$EVTYPE[1:10], main = "Events with Highest Injuries", ylab = "Number of injuries", col = "light blue")

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg$PROPDMG[1:10], las = 3, names.arg = propdmg$EVTYPE[1:10], main = "Events with Highest Property Damages", ylab = "Amount of damages", col = "light blue")
barplot(cropdmg$CROPDMG[1:10], las = 3, names.arg = cropdmg$EVTYPE[1:10], main = "Events with Highest Crop Damages", ylab = "Number of damages", col = "light blue")

3. Results

Tornado caused maximum fatalities & injuries -whereas- flood and draught caused maximum property & crop damages respectively.

References

  1. Storm Data https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
  2. National Weather Service Storm Data Documentation https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf