The dataset “repdata_data_StormData.csv” has been loaded and cleaned in R throughout the steps that follow.
The results of the analysis show the most destructive weather events on ones health are: 1. Tornado 2. Wind 3. Heat
Furthermore, the most destructive weather events in terms of economic damage are: 1. Floor 2. Multiple Event 3. Tornado
Load the dataset into R and view basic data information
StormData <- read.csv("repdata_data_StormData.csv.bz2")
dim(StormData)
## [1] 902297 37
colnames(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Event titles have multinames for the same event. Clearn data to have unified event names.
StormData$EVTYPE <- as.character(StormData$EVTYPE)
StormData$EVTYPE[grepl("/|&|and", StormData$EVTYPE,ignore.case = TRUE)] <- "Multiple Event"
StormData$EVTYPE[grepl("volc", StormData$EVTYPE,ignore.case = TRUE)] <- "Volcano"
StormData$EVTYPE[grepl("wind|wnd", StormData$EVTYPE,ignore.case = TRUE)] <- "Wind"
StormData$EVTYPE[grepl("funnel|tornado", StormData$EVTYPE,ignore.case = TRUE)] <- "Tornado"
StormData$EVTYPE[grepl("glaze", StormData$EVTYPE,ignore.case = TRUE)] <- "Glaze"
StormData$EVTYPE[grepl("hail", StormData$EVTYPE,ignore.case = TRUE)] <- "Hail"
StormData$EVTYPE[grepl("dust", StormData$EVTYPE,ignore.case = TRUE)] <- "Dust"
StormData$EVTYPE[grepl("flood", StormData$EVTYPE,ignore.case = TRUE)] <- "Flood"
StormData$EVTYPE[grepl("ic(e|y)", StormData$EVTYPE,ignore.case = TRUE)] <- "Ice"
StormData$EVTYPE[grepl("fire|smoke", StormData$EVTYPE,ignore.case = TRUE)] <- "Fire"
StormData$EVTYPE[grepl("thunder", StormData$EVTYPE,ignore.case = TRUE)] <- "Thunder Storm"
StormData$EVTYPE[grepl("slide|eros", StormData$EVTYPE,ignore.case = TRUE)] <- "Erosion"
StormData$EVTYPE[grepl("rain", StormData$EVTYPE,ignore.case = TRUE)] <- "Rain"
StormData$EVTYPE[grepl("freez|cold|snow|chill|winter", StormData$EVTYPE,ignore.case = TRUE)] <- "Cold Weather"
StormData$EVTYPE[grepl("TROPICAL.STORM", StormData$EVTYPE,ignore.case = TRUE)] <- "Tropical Store"
StormData$EVTYPE[grepl("heat", StormData$EVTYPE,ignore.case = TRUE)] <- "Heat"
StormData$EVTYPE[grepl("(hurri|opal)", StormData$EVTYPE,ignore.case = TRUE)] <- "Hurricane"
Create two unique sub-datasets for health and economy
StormData_Health <- StormData[,(c(8,23:24))]
StormData_Economy <- StormData[,c(8,25:28)]
colnames(StormData_Health)
## [1] "EVTYPE" "FATALITIES" "INJURIES"
colnames(StormData_Economy)
## [1] "EVTYPE" "PROPDMG" "PROPDMGEXP" "CROPDMG" "CROPDMGEXP"
Continue to clean data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
StormData_Health_Aggre <- aggregate(cbind(FATALITIES,INJURIES) ~ EVTYPE, data = StormData_Health, sum, na.rm=TRUE)
StormData_Health_Aggre <- arrange(StormData_Health_Aggre, desc(FATALITIES+INJURIES))
StormData_Health_Aggre_top <- StormData_Health_Aggre[1:10,]
Clean economy data for analysis
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMG==0] <- 0
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMG==0] <- 0
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="H"| StormData_Economy$PROPDMGEXP=="h"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="H"|StormData_Economy$PROPDMGEXP=="h"]*100
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="H"| StormData_Economy$CROPDMGEXP=="h"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="H"|StormData_Economy$CROPDMGEXP=="h"]*100
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="K"| StormData_Economy$PROPDMGEXP=="k"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="K"|StormData_Economy$PROPDMGEXP=="k"]*1000
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="K"| StormData_Economy$CROPDMGEXP=="k"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="K"|StormData_Economy$CROPDMGEXP=="k"]*1000
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="M"| StormData_Economy$PROPDMGEXP=="m"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="M"|StormData_Economy$PROPDMGEXP=="m"]*1000000
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="M"| StormData_Economy$CROPDMGEXP=="m"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="M"|StormData_Economy$CROPDMGEXP=="m"]*1000000
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="B"| StormData_Economy$PROPDMGEXP=="b"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="B"|StormData_Economy$PROPDMGEXP=="b"]*1000000000
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="B"| StormData_Economy$CROPDMGEXP=="b"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="B"|StormData_Economy$CROPDMGEXP=="b"]*1000000000
StormData_Economy_Aggre <- aggregate(cbind(PROPDMGCALC,CROPDMGCALC)~EVTYPE, data = StormData_Economy, sum, na.rm=TRUE)
StormData_Economy_Aggre <- arrange(StormData_Economy_Aggre, desc(PROPDMGCALC+CROPDMGCALC))
StormData_Economy_Aggre_top <- StormData_Economy_Aggre[1:10,]
Visualize results for health data
Based on fatalities and injuries, tornados do the most health damange followed by wind, heat, floods, lightning, and cold weather.
head(StormData_Health_Aggre_top)
## EVTYPE FATALITIES INJURIES
## 1 Tornado 5633 91368
## 2 Wind 1207 11299
## 3 Heat 3119 9224
## 4 Flood 1486 8582
## 5 LIGHTNING 816 5230
## 6 Cold Weather 605 3205
library(ggplot2)
qplot(EVTYPE, FATALITIES+INJURIES, data = StormData_Health_Aggre_top, main = "Impact of Weather Events on Health Damange")
Visualize results for economic data
Based on property and crop, floods do the most economic damange followed by multiple events, tornados, storm surges, wind and hail.
head(StormData_Economy_Aggre_top)
## EVTYPE PROPDMGCALC CROPDMGCALC
## 1 Flood 167004467270 12170542100
## 2 Multiple Event 80750771250 4202571910
## 3 Tornado 56942011330 364958360
## 4 STORM SURGE 43323536000 5000
## 5 Wind 17456763670 1963516550
## 6 Hail 15974043220 3021882450
qplot(EVTYPE, PROPDMGCALC+CROPDMGCALC, data = StormData_Economy_Aggre_top, main = "Impact of Weather Events on Economic Damange")
Tornados cause the most health damage while floods cause the most economic damange. Tornados also is a top 6 in terms of economic damage and floods is a top 6 for health damage.