library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
# download and unizp data file
fileurl <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
download.file(fileurl, destfile = paste0(getwd(), '/repdata%2Fdata%2FStormData.csv.bz2'))
data <- read.csv('repdata%2Fdata%2FStormData.csv.bz2')
# remove unimportant columns
Next we have to determine which columns to retain in the analysis and remove all others that are not important. This will also transform the factors in the exponential multipliers into numerical ones so proper calculations can be done.
colnames(data) # name all columnN then select those of importance
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
data <- data %>% select(c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
data <- data %>% filter(EVTYPE != '?')
data <- data %>% filter( FATALITIES>0|INJURIES>0|PROPDMG>0|CROPDMG>0)
# Convert Exponents into Numeric values
data <- data %>% mutate_at(c('PROPDMGEXP', 'CROPDMGEXP'), toupper)
propDmgKey <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
cropDmgKey <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# Transform factors into numeric values
data$propdmgexp.n <- propDmgKey[data$PROPDMGEXP]
data$CROPDMGEXP.n <- cropDmgKey[data$CROPDMGEXP]
data <- data %>% mutate(propdmgexp.n = replace_na(propdmgexp.n, 1))# this sets NA values to 1 sinc eno EXP multiplier
data <- data %>% mutate(CROPDMGEXP.n = replace_na(CROPDMGEXP.n, 1))
data$propcost <- data$PROPDMG*data$propdmgexp.n
data$cropcost <- data$CROPDMG*data$CROPDMGEXP.n
totalcost <- data %>% group_by(EVTYPE) %>% summarise(propcost = sum(propcost), cropcost = sum(cropcost), tot_cost = sum(propcost) + sum(cropcost))
totalcost <- totalcost[order(-totalcost$tot_cost),]
head(totalcost)
## # A tibble: 6 x 4
## EVTYPE propcost cropcost tot_cost
## <fct> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 5661968450 150319678257
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 56947380676. 414953270 57362333946.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15735267513. 3025954473 18761221986.
## 6 FLASH FLOOD 16822673978. 1421317100 18243991078.
top10dmg <- totalcost[1:10,]
totalinjuries <- data %>% group_by(EVTYPE) %>% summarise(fatalities = sum(FATALITIES), injuries = sum(INJURIES), total = sum(injuries)+sum(fatalities))
totalinjuries <- totalinjuries[order(-totalinjuries$fatalities),]
top10injuries <- totalinjuries[1:10,]
top10injuries <- as.data.table(top10injuries)
top10injuries <- melt(top10injuries, id.vars = 'EVTYPE', variable.name = 'Casualty')
ggplot(top10injuries, aes(x = EVTYPE, y = value))+
geom_bar(stat = 'identity', aes(fill = Casualty), position = 'dodge')+
labs(x = 'Top 10 Most Dangerous', y = 'Number')+
theme(axis.text.x = element_text(angle=45, hjust=1))
#3.2: Events with the most economice damage#
top10dmg <- as.data.table(top10dmg)
top10dmg <- melt(top10dmg, id.vars = 'EVTYPE', variable.name = 'Type.of.dmg')
ggplot(top10dmg, aes(x = EVTYPE, y = value))+
geom_bar(stat = 'identity', aes(fill = Type.of.dmg), position = 'dodge')+
labs(x = 'Top 10 destroyers', y = 'Cost in $US')+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Results write up# As can be seen from the charts, the greatest economic damage comes from floods, followed by hurricanes, while the greatest cuases of injury or death come from tornados followed by excessive heat.The vast majority of economic damage that comes from sever weather can also be seen to be that done to property rather than agricultural loss.