In this report we explore the different natural disasters that have occured in the US. We will see which natural disasters have been the most catostrophic. We will measure how catostrophic a disaster is by the number of casualties and the total cost of damage. This data contains natural disasters that have happened in the US. We can see when the disaster started, ended, injuries, fatalities, property damage, and crop damage. For this report we only need the injuries, fatalities, crop damage, crop damage exponential, property damage, and property damage exponential.
#NOTE THIS WILL TAKE A LONG TIME TO EXECUTE
#set your working directory
setwd("C:\\Users\\shubh\\Documents\\Coursera\\Reproducible Research\\Week 4")
#downloading the file
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL, destfile = "storm_data.csv.bz2")
#read the data
storm_data <- read.csv(bzfile("storm_data.csv.bz2"))
strom_data <- subset(storm_data,select = c("EVTYPE", "CROPDMG", "CROPDMGEXP", "PROPDMG", "PROPDMGEXP", "INJURIES", "FATALITIES"))
dim(storm_data)
## [1] 902297 37
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
#Calculate the total injuries by each event type
total_injuries <- tapply(storm_data$FATALITIES + storm_data$INJURIES, storm_data$EVTYPE, sum)
#Calculating the total economic consequences
storm_data$PROPDAMAGE <- 0
storm_data$CROPDAMAGE <- 0
#change PROPEXP
storm_data$PROPDMGEXP <- gsub("\\?|\\+|\\-", "0", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("h|H", "2", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("k|K", "3", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("m|M", "6", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- gsub("b|B", "9", storm_data$PROPDMGEXP)
storm_data$PROPDMGEXP <- as.numeric(storm_data$PROPDMGEXP)
#change CROPEXP
storm_data$CROPDMGEXP <- gsub("\\?", "0", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- gsub("k|K", "3", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- gsub("m|M", "6", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- gsub("b|B", "9", storm_data$CROPDMGEXP)
storm_data$CROPDMGEXP <- as.numeric(storm_data$CROPDMGEXP)
#Create new columns
storm_data$PROPDAMAGE <- (storm_data$PROPDMG * 10^storm_data$PROPDMGEXP)
storm_data$CROPDAMAGE <- (storm_data$CROPDMG * 10^storm_data$CROPDMGEXP)
storm_data$TOTALDAMAGE <- rowSums(storm_data[,c("PROPDAMAGE","CROPDAMAGE")], na.rm = T)
#Finding out the worst events based on certain kind of damage
property_damage <- tapply(storm_data$PROPDAMAGE, storm_data$EVTYPE, sum)
crop_damage <- tapply(storm_data$CROPDAMAGE, storm_data$EVTYPE, sum)
total_damage <- tapply(storm_data$TOTALDAMAGE, storm_data$EVTYPE, sum)
#Most injuries
barplot(tail(sort(total_injuries)), main = "Total Injuries by Event Type", ylab = "Total Injuries (Fatalities + Injuries)", xlab = "Event Type", cex.names=0.6)
#Highest economic consequence
par(mfrow = c(3,1))
barplot(tail(sort(property_damage)), main = "Property Damage", ylab = "Dollars", xlab = "Event Type", cex.names=0.6)
barplot(tail(sort(crop_damage)), main = "Crop Damage", ylab = "Dollars", xlab = "Event Type", cex.names=0.6)
barplot(tail(sort(total_damage)), main = "Total Damage", ylab = "Dollars", xlab = "Event Type", cex.names=0.6)
title("Economic Consequences", outer = T, line = -1)