1: Synopsis

The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.

The following analysis investigates which types of severe weather events are most harmful on:

  1. Health (injuries and fatalities)
  2. Property and crops (economic consequences)

2: Data Processing

2.1: Data downloading

library("data.table")
library("ggplot2")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("reshape2")
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = paste0(getwd(), '/repdata%2Fdata%2FStormData.csv.bz2'), method = "curl")

2.2: reading the data and examine the column names

data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2")
names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

2.2: Subsetting and arranging the data

data1 <- subset(data,select=c("PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP","FATALITIES","INJURIES","EVTYPE"))
data1 <- arrange(data1,EVTYPE)
head(data1,n=5)
##   PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP FATALITIES INJURIES
## 1     200          K       0                     0        0
## 2       0                  0                     0        0
## 3      50          K       0                     0        0
## 4       0                  0                     0        0
## 5       0                  0                     0        0
##                  EVTYPE
## 1    HIGH SURF ADVISORY
## 2         COASTAL FLOOD
## 3           FLASH FLOOD
## 4             LIGHTNING
## 5             TSTM WIND

2.3:subsetting the data which is not valid

data1 <- subset(data1,EVTYPE != "?" &(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0))
head(data1,n=5)
##   PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP FATALITIES INJURIES
## 1     200          K       0                     0        0
## 3      50          K       0                     0        0
## 7     100          K       0                     0        0
## 8       8          M       0                     0        0
## 9       8          K       0                     0        0
##                  EVTYPE
## 1    HIGH SURF ADVISORY
## 3           FLASH FLOOD
## 7             TSTM WIND
## 8             TSTM WIND
## 9       TSTM WIND (G45)

2.4: Converting Exponent Columns into Actual Exponents instead of ( M, K, etc)

data1 <- data.table(data1)
cols <- c("PROPDMGEXP", "CROPDMGEXP")
data1[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]

# Map property damage alphanumeric exponents to numeric values.
propDmgKey <-  c("\"\"" = 10^0,
                 "K" = 10^3,
                 "M" = 10^6)

# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6)

data1[, PROPDMGEXP := propDmgKey[as.character(data1[,PROPDMGEXP])]]
data1[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

data1[, CROPDMGEXP := cropDmgKey[as.character(data1[,CROPDMGEXP])] ]
data1[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
head(data1,n=10)
##     PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP FATALITIES INJURIES
##  1:     200      1e+03    0.00      1e+00          0        0
##  2:      50      1e+03    0.00      1e+00          0        0
##  3:     100      1e+03    0.00      1e+00          0        0
##  4:       8      1e+06    0.00      1e+00          0        0
##  5:       8      1e+03    0.00      1e+00          0        0
##  6:       0      1e+00    1.48      1e+06          0        0
##  7:       0      1e+00   17.96      1e+06          0        0
##  8:       0      1e+00    9.38      1e+06          0        0
##  9:       5      1e+03    0.00      1e+00          0        0
## 10:       1      1e+06    0.00      1e+00          0        0
##                     EVTYPE
##  1:     HIGH SURF ADVISORY
##  2:            FLASH FLOOD
##  3:              TSTM WIND
##  4:              TSTM WIND
##  5:        TSTM WIND (G45)
##  6:    AGRICULTURAL FREEZE
##  7:    AGRICULTURAL FREEZE
##  8:    AGRICULTURAL FREEZE
##  9:          APACHE COUNTY
## 10: ASTRONOMICAL HIGH TIDE

2.5: Making Economic Cost Columns

data1 <- mutate(data1,propCost = PROPDMG * PROPDMGEXP,cropCost = CROPDMG * CROPDMGEXP)

2.6: Calcuating Total Property and Crop Cost

data1 <- data.table(data1)
total_c <- data1[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
total_c <- total_c[order(-Total_Cost), ]
total_c <- total_c[1:10, ]
head(total_c,n=5)
##         EVTYPE    propCost    cropCost  Total_Cost
## 1:     TORNADO 51637160784   414953270 52052114054
## 2:       FLOOD 22157709930  5661968450 27819678380
## 3:        HAIL 13932267050  3025954473 16958221523
## 4: FLASH FLOOD 15140812068  1421317100 16562129168
## 5:     DROUGHT  1046106000 12472566002 13518672002

2.7: Calcuating Total Fatalities and Injuries

data1 <- data.table(data1)
total_i<- data1[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
total_i<- total_i[order(-FATALITIES), ]
total_i <- total_i[1:10, ]
head(total_i,n=5)
##            EVTYPE FATALITIES INJURIES totals
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755
## 4:           HEAT        937     2100   3037
## 5:      LIGHTNING        816     5230   6046

2.8: Melting the data of total property and crop cost

pop_health <- melt(total_i, id.vars="EVTYPE", variable.name = "THINGS",value.name="VALUE")
head(pop_health,n=5)
##            EVTYPE     THINGS VALUE
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816

2.9 Melting the data of total fatalities and injuries

econ_con<- melt(total_c, id.vars="EVTYPE", variable.name = "THINGS",value.name="VALUE")
head(econ_con,n=5)
##         EVTYPE   THINGS       VALUE
## 1:     TORNADO propCost 51637160784
## 2:       FLOOD propCost 22157709930
## 3:        HAIL propCost 13932267050
## 4: FLASH FLOOD propCost 15140812068
## 5:     DROUGHT propCost  1046106000

3:RESULTS

3.1: Events that are Most Harmful to Population Health

health <- ggplot(pop_health, aes(x=reorder(EVTYPE, -VALUE), y=VALUE))
health = health + geom_bar(stat="identity", aes(fill=THINGS), position="dodge")
health = health + ylab("Frequency Count")
health = health + xlab("Event Type")
health = health + theme(axis.text.x = element_text(angle=45, hjust=1))
health = health + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
health

3.2: Events that have the Greatest Economic Consequences

econ <- ggplot(econ_con, aes(x=reorder(EVTYPE, -VALUE), y=VALUE))
econ = econ + geom_bar(stat="identity", aes(fill=THINGS), position="dodge")
econ = econ + ylab("Cost (dollars)") 
econ = econ + xlab("Event Type") 
econ = econ + theme(axis.text.x = element_text(angle=45, hjust=1))
econ = econ + ggtitle("Top 10 US Storm Events causing economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
econ