Synopsis

In this report we explore the different natural disasters that have occured in the US. We will see which natural disasters have been the most catostrophic. We will measure how catostrophic a disaster is by the number of casualties and the total cost of damage. This data contains natural disasters that have happened in the US. We can see when the disaster started, ended, injuries, fatalities, property damage, and crop damage. For this report we only need the injuries, fatalities, crop damage, crop damage exponential, property damage, and property damage exponential.

Processing and Reading the Data

#NOTE THIS WILL TAKE A LONG TIME TO EXECUTE
#set your working directory
setwd("C:\\Users\\shubh\\Documents\\Coursera\\Reproducible Research\\Week 4")

#downloading the file
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL, destfile = "storm_data.csv.bz2")

#read the data
storm_data <- read.csv(bzfile("storm_data.csv.bz2"))
strom_data <- subset(storm_data,select = c("EVTYPE", "CROPDMG", "CROPDMGEXP", "PROPDMG", "PROPDMGEXP", "INJURIES", "FATALITIES"))
dim(storm_data)
## [1] 902297     37
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6
#Calculate the total injuries by each event type
total_injuries <- tapply(storm_data$FATALITIES + storm_data$INJURIES, storm_data$EVTYPE, sum)

#Calculating the total economic consequences
storm_data$PROPDAMAGE <- 0
storm_data$CROPDAMAGE <- 0

#change PROPEXP
storm_data$PROPDMGEXP <- gsub("\\?|\\+|\\-", "0", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("h|H", "2", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("k|K", "3", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("m|M", "6", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("b|B", "9", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- as.numeric(storm_data$PROPDMGEXP)

#change CROPEXP
storm_data$CROPDMGEXP <- gsub("\\?", "0", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- gsub("k|K", "3", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- gsub("m|M", "6", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- gsub("b|B", "9", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- as.numeric(storm_data$CROPDMGEXP)


#Create new columns
storm_data$PROPDAMAGE <- (storm_data$PROPDMG * 10^storm_data$PROPDMGEXP)
storm_data$CROPDAMAGE <- (storm_data$CROPDMG * 10^storm_data$CROPDMGEXP)
storm_data$TOTALDAMAGE <- rowSums(storm_data[,c("PROPDAMAGE","CROPDAMAGE")], na.rm = T)

#Finding out the worst events based on certain kind of damage
property_damage <- tapply(storm_data$PROPDAMAGE, storm_data$EVTYPE, sum)
crop_damage <- tapply(storm_data$CROPDAMAGE, storm_data$EVTYPE, sum)
total_damage <- tapply(storm_data$TOTALDAMAGE, storm_data$EVTYPE, sum)

Results

Injuries

#Most injuries
barplot(tail(sort(total_injuries)), main = "Total Injuries by Event Type", ylab = "Total Injuries (Fatalities + Injuries)", xlab = "Event Type", cex.names=0.6)

This graph shows the events that caused the most health problems (fatalities + injuries)

Economic

#Highest economic consequence
par(mfrow = c(3,1))
barplot(tail(sort(property_damage)), main = "Property Damage", ylab = "Dollars", xlab = "Event Type", cex.names=0.6)
barplot(tail(sort(crop_damage)), main = "Crop Damage", ylab = "Dollars", xlab = "Event Type", cex.names=0.6)
barplot(tail(sort(total_damage)), main = "Total Damage", ylab = "Dollars", xlab = "Event Type", cex.names=0.6)
title("Economic Consequences", outer = T, line = -1)

These three graphs show the worst disasters economically for three different categories: propety damage, crop damage, and total damage (property + crop damage).

NOTE THE GRAPHS ARE IN ONE FIGURE IT IS JUST MULTIPANEL CHECK CODE FOR PROOF