Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. In this study, I explored the NOAA Storm data and answer two questions below. 1. Across the United States, which types of events are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences?

Data Processing

In this section, I describe (in words and code) how the data were loaded into R and processed for analysis.

Global Settings

Firstly, let’s set up the environment for the R markdown file

knitr::opts_chunk$set(fig.width = 10, fig.height = 10, fig.path = 'Figs/',
                      echo = TRUE, warning = FALSE, message = FALSE)
library(knitr)
library(ggplot2)
library(gridExtra)

Load data

Note, you need to set working directory into the same folder with the data file

if(file.exists("repdata_data_StormData.csv.bz2")) 
    rawdata <- read.table("repdata_data_StormData.csv.bz2", sep = ",", header = T)

Events Study in terms of population health

Now with the data, let’s find out what is the most harmful weather event with respect to the total number of fatalities and injuries. In this study, I list the top 10 events.

# Create summary table for total number os Fatalities and Injuries by all evenets
summaryData1 <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = rawdata, FUN = "sum", na.rm = TRUE)

# Create sum columns for FATALITIES and INJURIES
summaryData1$HEALTH <- summaryData1$FATALITIES + summaryData1$INJURIES

# Sort data by three factors and get the first 10 elements
byFat <- summaryData1[order(-summaryData1$FATALITIES),] [1:10,]
byInj <- summaryData1[order(-summaryData1$INJURIES),] [1:10,]
byHea <- summaryData1[order(-summaryData1$HEALTH),] [1:10,]

# Create plosts
fat <- ggplot(byFat, aes(x = reorder(EVTYPE, FATALITIES), y = FATALITIES)) +
    geom_bar(stat="identity",fill="steelblue4") +
    labs(x = "Event Type", y = "Total Fatalilties Number") + 
    labs(title = "Top 10 Fatalities by Event Types") +
    ylim(c(0,100000)) +
    coord_flip()

inj <- ggplot(byInj, aes(x = reorder(EVTYPE, INJURIES), y = INJURIES)) +
    geom_bar(stat="identity",fill="tomato1") +
    labs(x = "Event Type", y = "Total Injuries Number") + 
    labs(title = "Top 10 Injuries by Event Types") +
    ylim(c(0,100000)) +
    coord_flip()

hea <- ggplot(byHea, aes(x = reorder(EVTYPE, HEALTH), y = HEALTH)) +
    geom_bar(stat="identity",fill="grey50") +
    labs(x = "Event Type", y = "Total Fatalilties and Injuries Number") + 
    labs(title = "Top 10 Fatalilties and Injuries by Event Types") +
    ylim(c(0,100000)) +
    coord_flip()

grid.arrange(fat, inj, hea, ncol = 1, nrow = 3)

Events Study in terms of economic consequence

Then let’s find out what is the most harmful weather event with respect to the property damage and crop damage. In this study, I list the top 10 events.

According to the instruction, the alphabetical characters used to signify magnitude include “K” for thousands, “M” for millions, and “B” for billions.

# Convert the estimated number in column "PROPDMG" and "CROPDMG" into correct number by magnitude
propDamgeMag <- ifelse(rawdata$PROPDMGEXP == "K", 1000, ifelse(rawdata$PROPDMGEXP == "M", 1000000, ifelse(rawdata$PROPDMGEXP == "B", 1000000000, 1)))
rawdata$REAL_PROPDMG <- rawdata$PROPDMG * propDamgeMag

cropDamgeMag <- ifelse(rawdata$CROPDMGEXP == "K", 1000, ifelse(rawdata$CROPDMGEXP == "M", 1000000, ifelse(rawdata$CROPDMGEXP == "B", 1000000000, 1)))
rawdata$REAL_CROPDMG <- rawdata$CROPDMG * cropDamgeMag

# Create summary table for the estimated damages
summaryData2 <- aggregate(cbind(REAL_PROPDMG, REAL_CROPDMG) ~ EVTYPE, data = rawdata, FUN = "sum", na.rm = TRUE)

# Create sum columns for REAL_PROPDMG and REAL_CROPDMG
summaryData2$ECOM <- summaryData2$REAL_PROPDMG + summaryData2$REAL_CROPDMG

# Sort data by three factors and get the first 10 elements
byProd <- summaryData2[order(-summaryData2$REAL_PROPDMG),] [1:10,]
byCrop <- summaryData2[order(-summaryData2$REAL_CROPDMG),] [1:10,]
byEcom <- summaryData2[order(-summaryData2$ECOM),] [1:10,]

# Create plosts
prod <- ggplot(byProd, aes(x = reorder(EVTYPE, REAL_PROPDMG), y = REAL_PROPDMG)) +
    geom_bar(stat="identity",fill="steelblue4") +
    labs(x = "Event Type", y = "Total Property Damage") + 
    labs(title = "Top 10 Property Damage by Event Types") +
    ylim(c(0,151000000000)) +
    coord_flip()

crop <- ggplot(byCrop, aes(x = reorder(EVTYPE, REAL_CROPDMG), y = REAL_CROPDMG)) +
    geom_bar(stat="identity",fill="tomato1") +
    labs(x = "Event Type", y = "Total Crop Damage") + 
    labs(title = "Top 10 Crop Damage by Event Types") +
    ylim(c(0,151000000000)) +
    coord_flip()

ecom <- ggplot(byEcom, aes(x = reorder(EVTYPE, ECOM), y = ECOM)) +
    geom_bar(stat="identity",fill="grey50") +
    labs(x = "Event Type", y = "Total Property and Crop Damage") + 
    labs(title = "Top 10 Property and Crop Damage by Event Types") +
    ylim(c(0,151000000000)) +
    coord_flip()

grid.arrange(prod, crop, ecom, ncol = 1, nrow = 3)

Results