1: Description

The purpose of the assignment is to examine the NOAA database of storms and to study the impact of severe weather events on both the population and the economy.The database covers the period from 1950 to November 2011.

This analysis shows which types of severe weather events are most dangerous for the population and the economy. 1. Health effects: injuries and deaths. 2. Economic consequences: the impact on property and crops.

2: Data Processing

Download the raw data file.

library("data.table")
library("ggplot2")

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "D:/R projects/datasciencecoursera/course5/project2/repdata_data_StormData.csv.bz2")
storm_df <- read.csv("D:/R projects/datasciencecoursera/course5/project2/repdata_data_StormData.csv.bz2")
storm_dt <- as.data.table(storm_df)
colnames(storm_dt)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Subset the dataset on the parameters of interest. Basically, we remove the columns we don’t need for clarity.

cols_to_remove <- colnames(storm_dt[, !c("EVTYPE"
  , "FATALITIES"
  , "INJURIES"
  , "PROPDMG"
  , "PROPDMGEXP"
  , "CROPDMG"
  , "CROPDMGEXP")])

storm_dt[, c(cols_to_remove) := NULL]

storm_dt <- storm_dt[(EVTYPE != "?" & 
             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
                                                                            , "FATALITIES"
                                                                            , "INJURIES"
                                                                            , "PROPDMG"
                                                                            , "PROPDMGEXP"
                                                                            , "CROPDMG"
                                                                            , "CROPDMGEXP") ]

Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost.

cols <- c("PROPDMGEXP", "CROPDMGEXP")
storm_dt[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]

prop_dmg_key <-  c("\"\"" = 10^0,
                 "-" = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9)

crop_dmg_key <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6,
                "B" = 10^9)

storm_dt[, PROPDMGEXP := prop_dmg_key[as.character(storm_dt[,PROPDMGEXP])]]
storm_dt[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

storm_dt[, CROPDMGEXP := crop_dmg_key[as.character(storm_dt[,CROPDMGEXP])] ]
storm_dt[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

Making Economic Cost Columns

storm_dt <- storm_dt[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]

Total Property and Crop Cost

total_cost_dt <- storm_dt[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]

total_cost_dt <- total_cost_dt[order(-Total_Cost), ]

total_cost_dt <- total_cost_dt[1:10, ]

head(total_cost_dt, 5)
##               EVTYPE     propCost   cropCost   Total_Cost
##               <char>        <num>      <num>        <num>
## 1:             FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
## 3:           TORNADO  56947380677  414953270  57362333947
## 4:       STORM SURGE  43323536000       5000  43323541000
## 5:              HAIL  15735267513 3025954473  18761221986

Total Fatalities and Injuries

total_injuries_dt <- storm_dt[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]

total_injuries_dt <- total_injuries_dt[order(-FATALITIES), ]

total_injuries_dt <- total_injuries_dt[1:10, ]

head(total_injuries_dt, 5)
##            EVTYPE FATALITIES INJURIES totals
##            <char>      <num>    <num>  <num>
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755
## 4:           HEAT        937     2100   3037
## 5:      LIGHTNING        816     5230   6046

3: Results

Melting data.table so that it is easier to put in bar graph format

bad_stuff <- melt(total_injuries_dt, id.vars="EVTYPE", variable.name = "bad_thing")
head(bad_stuff, 5)
##            EVTYPE  bad_thing value
##            <char>     <fctr> <num>
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816

Plot

health_chart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value)) +
    geom_bar(stat="identity", aes(fill=bad_thing), position="dodge") +
    ylab("Frequency (count)") +
    xlab("Event type") +
    theme(axis.text.x = element_text(angle=50, hjust=1)) +
    ggtitle("Top 10 killers") + 
    theme(plot.title = element_text(hjust = 0.3))

health_chart

econ_consequences <- melt(total_cost_dt, id.vars="EVTYPE", variable.name = "Damage_Type")
head(econ_consequences, 5)
##               EVTYPE Damage_Type        value
##               <char>      <fctr>        <num>
## 1:             FLOOD    propCost 144657709807
## 2: HURRICANE/TYPHOON    propCost  69305840000
## 3:           TORNADO    propCost  56947380677
## 4:       STORM SURGE    propCost  43323536000
## 5:              HAIL    propCost  15735267513

Plot 2

econ_chart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value)) + 
    geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge") +
    ylab("Cost, dollars") + 
    xlab("Event type") +
    theme(axis.text.x = element_text(angle=50, hjust=1)) +
    ggtitle("Top 10 Storm Events causing Economic Consequences") +
    theme(plot.title = element_text(hjust = 0.5))

econ_chart