Synopsis

Looking at the NOAA Storm Database for the US, this analysis aims to look at the human and economic costs of servere weather events.

Data Processing

Data is downloaded from a url (https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2)

library(ggplot2)
storm <- read.csv("repdata_data_StormData.csv.bz2")

Which types of events are most harmful to population health?

In the plots below - it shows tornados are the most harmful with the most injuries and deaths

injuries <- aggregate(storm$INJURIES, by = list(EVTYPE = storm$EVTYPE), sum)
injuries <- injuries[order(injuries$x, decreasing = TRUE), ]

fatalities <- aggregate(storm$FATALITIES, by = list(EVTYPE = storm$EVTYPE), sum)
fatalities <- fatalities[order(fatalities$x, decreasing = TRUE), ]

Results

ggplot(injuries[1:8, ], aes(EVTYPE, y = x)) + 
    geom_bar(stat = "identity", fill = "skyblue") + 
    xlab("Event") + 
    ylab("Number of injuries") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle("Injuries from different events")

ggplot(fatalities[1:8, ], aes(EVTYPE, y = x)) + 
    geom_bar(stat = "identity", fill = "skyblue") + 
    xlab("Event") + 
    ylab("Number of fatalities") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle("Fatalities from different events")

Types of events have the greatest economic consequences?

In the plot below - it shows that floods have the greatest economic consequenses

economic_data <- storm[, c("EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]

# deal with NAs
economic_data[economic_data$PROPDMGEXP %in% c("", "+", "-", "?"), "PROPDMGEXP"] <- "0"
economic_data[economic_data$CROPDMGEXP %in% c("", "+", "-", "?"), "CROPDMGEXP"] <- "0"
economic_data[is.na(economic_data$PROPDMG), "PROPDMG"] <- 0
economic_data[is.na(economic_data$CROPDMG), "CROPDMG"] <- 0

# substitute letters with numbers
economic_data$PROPDMGEXP <- toupper(economic_data$PROPDMGEXP)
economic_data$CROPDMGEXP <- toupper(economic_data$CROPDMGEXP)
economic_data[economic_data$PROPDMGEXP == "B", "PROPDMGEXP"] <- 9
economic_data[economic_data$CROPDMGEXP == "B", "CROPDMGEXP"] <- 9
economic_data[economic_data$PROPDMGEXP == "M", "PROPDMGEXP"] <- 6
economic_data[economic_data$CROPDMGEXP == "M", "CROPDMGEXP"] <- 6
economic_data[economic_data$PROPDMGEXP == "K", "PROPDMGEXP"] <- 3
economic_data[economic_data$CROPDMGEXP == "K", "CROPDMGEXP"] <- 3
economic_data[economic_data$PROPDMGEXP == "H", "PROPDMGEXP"] <- 2
economic_data[economic_data$CROPDMGEXP == "H", "CROPDMGEXP"] <- 2

# Sum damage total cost
economic_data <- within(economic_data, TOTALDMG <- PROPDMG * 10^as.numeric(PROPDMGEXP) + CROPDMG * 10^as.numeric(CROPDMGEXP))

damage <- aggregate(economic_data$TOTALDMG, by = list(EVTYPE = economic_data$EVTYPE), 
    FUN = sum)
damage <- damage[order(damage$x, decreasing = TRUE), ]

DamageSubset <- damage[1:8, ]

Results

ggplot(DamageSubset, aes(EVTYPE, y = x)) + 
    geom_bar(stat = "identity", fill = "skyblue") + 
    xlab("Event Type") + 
    ylab("Damage (in Dollars ($)") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle("Economic cost per event type")