1: Synopsis

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.The goal is to address which events have the greatest economic consequences as well as harmful effects on population health.

Information on the Data: Documentation

2: Data Processing

2.1: Loading the data

The downloaded raw data file that was copied into the working directory was extracted using dataframe followed by conversion to data.table

library("data.table")
## Warning: package 'data.table' was built under R version 4.0.5
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.0.5
storm <- read.csv("repdata_data_StormData.csv.bz2")
stormDT <- as.data.table(storm)

2.2: Extracting the column Names

colnames(stormDT)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

2.3: Subsetting

Subset the dataset on the parameters of interest. Here, we remove the columns we don’t need.

# Finding columns to remove
cols2Remove <- colnames(stormDT[, !c("EVTYPE"
  , "FATALITIES"
  , "INJURIES"
  , "PROPDMG"
  , "PROPDMGEXP"
  , "CROPDMG"
  , "CROPDMGEXP")])
# Removing columns
stormDT[, c(cols2Remove) := NULL]
# Only use data where fatalities or injuries occurred.  
stormDT <- stormDT[(EVTYPE != "?" & 
             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
                                                                            , "FATALITIES"
                                                                            , "INJURIES"
                                                                            , "PROPDMG"
                                                                            , "PROPDMGEXP"
                                                                            , "CROPDMG"
                                                                            , "CROPDMGEXP") ]

2.4: Converting Exponent Columns into Actual Exponents

Representing the PROPDMGEXP and CROPDMGEXP columns in terms of actual exponents instead of the labelled ones

# Change all damage exponents to uppercase.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
stormDT[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
# Map property damage alphanumeric exponents to numeric values.
propDmgKey <-  c("\"\"" = 10^0,
                 "-" = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9)
# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6,
                "B" = 10^9)
stormDT[, PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]]
stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ]
stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

2.5: Making Economic Cost Columns

Making columns in a way that is convenient to us

stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]

2.6: Calcuating Total Property and Crop Cost

Computation of total property and crop cost for the purpose of visual representation

totalCostDT <- stormDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCostDT <- totalCostDT[order(-Total_Cost), ]
totalCostDT <- totalCostDT[1:10, ]
head(totalCostDT, 5)
##               EVTYPE     propCost   cropCost   Total_Cost
## 1:             FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
## 3:           TORNADO  56947380677  414953270  57362333947
## 4:       STORM SURGE  43323536000       5000  43323541000
## 5:              HAIL  15735267513 3025954473  18761221986

2.7: Calcuating Total Fatalities and Injuries

Computation of total fatalities and injuries for the purpose of visual representation

totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]
totalInjuriesDT <- totalInjuriesDT[1:10, ]
head(totalInjuriesDT, 5)
##            EVTYPE FATALITIES INJURIES totals
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755
## 4:           HEAT        937     2100   3037
## 5:      LIGHTNING        816     5230   6046

3: Results

3.1: Events that are Most Harmful to Population Health

Melting data.table so that it is easier to put in bar graph format

total_injuries_melt <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "new_ev")
head(total_injuries_melt, 5)
##            EVTYPE     new_ev value
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816
# Create chart
g<- ggplot(total_injuries_melt, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
g = g + geom_bar(stat="identity", aes(fill=new_ev), position="dodge")
# Format y-axis scale and set y-axis label
g = g + ylab("Frequency Count") 
# Set x-axis label
g = g + xlab("Event Type") 
# Rotate x-axis tick labels 
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
g = g + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
g

3.2: Events that have the Greatest Economic Consequences

Melting data.table so that it is easier to put in bar graph format

econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage")
head(econ_consequences, 5)
##               EVTYPE   Damage        value
## 1:             FLOOD propCost 144657709807
## 2: HURRICANE/TYPHOON propCost  69305840000
## 3:           TORNADO propCost  56947380677
## 4:       STORM SURGE propCost  43323536000
## 5:              HAIL propCost  15735267513
# Create chart
g_2 <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
g_2 = g_2 + geom_bar(stat="identity", aes(fill=Damage), position="dodge")
# Format y-axis scale and set y-axis label
g_2 = g_2 + ylab("Cost (dollars)") 
# Set x-axis label
g_2 = g_2 + xlab("Event Type") 
# Rotate x-axis tick labels 
g_2 = g_2 + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
g_2 = g_2 + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
g_2