Synopsis:

Data from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database was analyzed to understand the impact of different weather events on human health and the economic effects.

Excessive heat and tornados cause the most fatalities. However, tornados cause significantly more injuries than all other events. Flooding causes the highest property and crop damage (in USD)

Data Processing

#load libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#reading and pre-preocessiong the data
Url_data <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
File_data <- "StormData.csv.bz2"
if (!file.exists(File_data)) {
    download.file(Url_data, File_data, mode = "wb")
}

rawdata <- read.csv(file = File_data, header=TRUE, sep=",")
rawdata$BGN_DATE <- strptime(rawdata$BGN_DATE, "%m/%d/%Y %H:%M:%S")
maindata <- subset(rawdata, BGN_DATE > "1995-12-31")
rm(rawdata)

maindata <- subset(maindata, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))

maindata$EVTYPE <- toupper(maindata$EVTYPE)

maindata <- maindata[maindata$FATALITIES !=0 | 
                         maindata$INJURIES !=0 | 
                         maindata$PROPDMG !=0 | 
                         maindata$CROPDMG !=0, ]

Results

Question 1: Across the United States, which types of events (as indicated in the EVTYPEEVTYPE variable) are most harmful with respect to population health?

To understand this, we will seperately look at injuries and fatalities for each event

#Sum fatalities ad injuries by Event Type
fatalities <- aggregate(FATALITIES ~ EVTYPE, data=maindata, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data=maindata, sum)

#Arrange in descending order by Event Type by number of fatalities or injuries - extract top ten
fatalities <- arrange(fatalities,desc(FATALITIES),EVTYPE)[1:10,]
injuries <- arrange(injuries,desc(INJURIES),EVTYPE)[1:10,]

#fatalities
#injuries

Plot showing the number of fatalities caused by events:

ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) + 
    geom_bar(stat = "identity", fill = "lightblue", width = NULL) + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("Event Type") + ylab("Fatalities") 

Plot showing the number of injuries caused by events:

# Injuries per event type
ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) + 
    geom_bar(stat = "identity", fill = "red3", width = NULL) + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("Event Type") + ylab("Injuries")

Question 2: Across the United States, which types of events have the greatest economic consequences?

#process data to understand economic consequences

maindata$PROPDMGEXP <- gsub("[Hh]", "2", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Kk]", "3", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Mm]", "6", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Bb]", "9", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("\\+", "1", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("\\?|\\-|\\ ", "0",  maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- as.numeric(maindata$PROPDMGEXP)

maindata$CROPDMGEXP <- gsub("[Hh]", "2", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Kk]", "3", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Mm]", "6", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Bb]", "9", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("\\+", "1", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("\\-|\\?|\\ ", "0", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- as.numeric(maindata$CROPDMGEXP)

maindata$PROPDMGEXP[is.na(maindata$PROPDMGEXP)] <- 0
maindata$CROPDMGEXP[is.na(maindata$CROPDMGEXP)] <- 0

maindata <- mutate(maindata, 
                   PROPDMGTOTAL = PROPDMG * (10 ^ PROPDMGEXP), 
                   CROPDMGTOTAL = CROPDMG * (10 ^ CROPDMGEXP))

# Summing economic consequencess
Economic_data <- aggregate(cbind(PROPDMGTOTAL, CROPDMGTOTAL) ~ EVTYPE, data = maindata, FUN=sum)
Economic_data$ECONOMIC_LOSS <- Economic_data$PROPDMGTOTAL + Economic_data$CROPDMGTOTAL
Economic_data <- Economic_data[order(Economic_data$ECONOMIC_LOSS, decreasing = TRUE), ]
worsteconomicevents <- Economic_data[1:10,c(1,4)]
worsteconomicevents
##                EVTYPE ECONOMIC_LOSS
## 48              FLOOD  148919611950
## 88  HURRICANE/TYPHOON   71913712800
## 141       STORM SURGE   43193541000
## 149           TORNADO   24900370720
## 66               HAIL   17071172870
## 46        FLASH FLOOD   16557105610
## 86          HURRICANE   14554229010
## 32            DROUGHT   14413667000
## 152    TROPICAL STORM    8320186550
## 83          HIGH WIND    5881421660
# Loss per event type
ggplot(worsteconomicevents, aes(x = EVTYPE, y = ECONOMIC_LOSS)) + 
    geom_bar(stat = "identity", fill = "magenta") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
    xlab("Event Type") + ylab("Total Prop & Crop Damages (USD)") +
    ggtitle("Total economic loss in the US in the period 1996 - 2011 by weather event")