library(lubridate)
library(data.table)
library(dtplyr)
library(ggplot2)
library(grid)
library(knitr)
library(markdown)
library(R.utils)
library(quantmod) # used if going to look at time value of cost impacts
library(gridExtra)
### Download file, saving as a CSV.
if(!file.exists("storm.csv")) {
temp <- tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "repdata-data-StormData.csv.bz2")
if (!file.exists("repdata-data-StormData.csv")) {
bunzip2("repdata-data-StormData.csv.bz2", overwrite = F)
}
storm <- read.csv("repdata-data-StormData.csv", sep = ",")
write.csv(storm, file = "storm.csv")
datedownload <- Sys.time() ## record the download date of the zip file
}
datedownload <- file.mtime("storm.csv")
data <- read.table(file="storm.csv",header=TRUE, sep = ",", na.strings = "", fill = TRUE, row.names=NULL)
### had to add in fill=TRUE to read blank or missing information, adding row.names=NULL
datasubset <- data[, c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
### A simple way to convert text or charters to the proper multiplier
costs<-cbind(c("K", "M", "B", "0", "5", "6", "4", "2", "3", "H", "7", "1", "8","-", ""), c( 3, 6, 9, 0, 5, 6, 4, 2, 3, 2, 7, 1, 8, 1, 1))
### Converting based on "costs" multipier
datasubset$PROPDMGEXP<-ifelse(toupper(datasubset$PROPDMGEXP) %in% costs[,1],costs[,2],"")
### Converting based on "costs" multipier
datasubset$CROPDMGEXP<-ifelse(toupper(datasubset$CROPDMGEXP) %in% costs[,1],costs[,2],"")
### Convert to single units - dollars
datasubset$PROPDMG<-datasubset$PROPDMG*10^as.numeric(datasubset$PROPDMGEXP)
datasubset$CROPDMG<-datasubset$CROPDMG*10^as.numeric(datasubset$CROPDMGEXP)
### Death plots, with ording of data within ggplots
deaths <- ggplot() + geom_bar(data = healthdatafat, aes(x = factor(EVTYPE, levels =EVTYPE[order(FATALITIES, decreasing = TRUE)]) , y = FATALITIES, fill = interaction(FATALITIES, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Harmful Events") + ylab("No. of Fatailities") + ggtitle("Top 10 weather events causing Fatalities")
### Injury plots, with ording of data within ggplots
injuries <- ggplot() + geom_bar(data = healthdatainj, aes(x = factor(EVTYPE, levels =EVTYPE[order(INJURIES, decreasing = TRUE)]), y = INJURIES, fill = interaction(INJURIES, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Harmful Events") + ylab("No. of Injuries") + ggtitle("Top 10 weather events causing Injuries")
grid.arrange(deaths, injuries, ncol = 2)
### Fig #1 Deaths and Injuries Plots
##
## Total deaths are: 15145 , and the total injuries are: 140528 .
print(healthdatafat)
## EVTYPE FATALITIES
## 68 TORNADO 5227
## 10 EXCESSIVE HEAT 402
## 48 LIGHTNING 283
## 71 TSTM WIND 199
## 15 FLASH FLOOD 171
## 16 FLOOD 104
## 36 HIGH WIND 102
## 80 WINTER STORM 85
## 27 HEAT 73
## 78 WILDFIRE 55
print(healthdatainj)
## EVTYPE INJURIES
## 68 TORNADO 60187
## 10 EXCESSIVE HEAT 4791
## 16 FLOOD 2679
## 43 ICE STORM 1720
## 27 HEAT 1420
## 41 HURRICANE/TYPHOON 1219
## 3 BLIZZARD 718
## 48 LIGHTNING 649
## 71 TSTM WIND 646
## 15 FLASH FLOOD 641
### Property damage plots, with ording of data within ggplots
costprop <- ggplot()+ geom_bar(data = propdata, aes(x = factor(EVTYPE, levels =EVTYPE[order(PROPDMG, decreasing = TRUE)]), y = PROPDMG, fill = interaction(PROPDMG, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Costly Events") + ylab("Cost of Property Damage - Dollars($)") + ggtitle("Top 10 weather events causing Property Damage")
### Corp damage plots, with ording of data within ggplots
costcrop <- ggplot()+ geom_bar(data = cropdata, aes(x = factor(EVTYPE, levels =EVTYPE[order(CROPDMG, decreasing = TRUE)]), y = CROPDMG, fill = interaction(CROPDMG, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Costly Events") + ylab("Cost of Crop Damage - Dollars($)") + ggtitle("Top 10 weather events causing Crop Damage")
grid.arrange(costprop, costcrop, ncol = 2)
### Fig #2 Property & Crop Plots
##
## Total Prorperty damage is: 8.40926e+13 , and the total crop damage is: 7.344694e+13 for a total damage cost of 1.575395e+14 .
print(propdata)
## EVTYPE PROPDMG
## 10 FLASH FLOOD 1.721257e+13
## 24 HAIL 1.593794e+13
## 14 FLOOD 1.205047e+13
## 72 TORNADO 9.175890e+12
## 79 TSTM WIND 8.253993e+12
## 63 THUNDERSTORM WINDS 5.626033e+12
## 62 THUNDERSTORM WIND 3.965385e+12
## 37 HIGH WIND 3.685047e+12
## 12 FLASH FLOODING 1.324823e+12
## 88 WILDFIRE 7.931438e+11
print(cropdata)
## EVTYPE CROPDMG
## 24 HAIL 2.679696e+13
## 10 FLASH FLOOD 1.070681e+13
## 14 FLOOD 9.528190e+12
## 72 TORNADO 7.741043e+12
## 79 TSTM WIND 5.578355e+12
## 62 THUNDERSTORM WIND 4.075668e+12
## 37 HIGH WIND 2.162796e+12
## 75 TROPICAL STORM 9.344577e+11
## 63 THUNDERSTORM WINDS 8.039181e+11
## 58 STORM SURGE/TIDE 7.501000e+11