Questions to answer

  1. Across the United States, which types of events are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

SYNOPSIS

The dataset “repdata_data_StormData.csv” has been loaded and cleaned in R throughout the steps that follow.

The results of the analysis show the most destructive weather events on ones health are: 1. Tornado 2. Wind 3. Heat

Furthermore, the most destructive weather events in terms of economic damage are: 1. Floor 2. Multiple Event 3. Tornado

DATA PROCESSING

loading raw data

Load the dataset into R and view basic data information

StormData <- read.csv("repdata_data_StormData.csv.bz2")
dim(StormData)
## [1] 902297     37
colnames(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Cleaning data

Event titles have multinames for the same event. Clearn data to have unified event names.

StormData$EVTYPE <- as.character(StormData$EVTYPE)
StormData$EVTYPE[grepl("/|&|and", StormData$EVTYPE,ignore.case = TRUE)] <- "Multiple Event"
StormData$EVTYPE[grepl("volc", StormData$EVTYPE,ignore.case = TRUE)] <- "Volcano"
StormData$EVTYPE[grepl("wind|wnd", StormData$EVTYPE,ignore.case = TRUE)] <- "Wind"
StormData$EVTYPE[grepl("funnel|tornado", StormData$EVTYPE,ignore.case = TRUE)] <- "Tornado"
StormData$EVTYPE[grepl("glaze", StormData$EVTYPE,ignore.case = TRUE)] <- "Glaze"
StormData$EVTYPE[grepl("hail", StormData$EVTYPE,ignore.case = TRUE)] <- "Hail"
StormData$EVTYPE[grepl("dust", StormData$EVTYPE,ignore.case = TRUE)]  <- "Dust"
StormData$EVTYPE[grepl("flood", StormData$EVTYPE,ignore.case = TRUE)] <- "Flood"
StormData$EVTYPE[grepl("ic(e|y)", StormData$EVTYPE,ignore.case = TRUE)] <- "Ice"
StormData$EVTYPE[grepl("fire|smoke", StormData$EVTYPE,ignore.case = TRUE)] <- "Fire"
StormData$EVTYPE[grepl("thunder", StormData$EVTYPE,ignore.case = TRUE)] <- "Thunder Storm"
StormData$EVTYPE[grepl("slide|eros", StormData$EVTYPE,ignore.case = TRUE)] <- "Erosion"
StormData$EVTYPE[grepl("rain", StormData$EVTYPE,ignore.case = TRUE)] <- "Rain"
StormData$EVTYPE[grepl("freez|cold|snow|chill|winter", StormData$EVTYPE,ignore.case = TRUE)] <- "Cold Weather"
StormData$EVTYPE[grepl("TROPICAL.STORM", StormData$EVTYPE,ignore.case = TRUE)] <- "Tropical Store"
StormData$EVTYPE[grepl("heat", StormData$EVTYPE,ignore.case = TRUE)] <- "Heat"
StormData$EVTYPE[grepl("(hurri|opal)", StormData$EVTYPE,ignore.case = TRUE)] <- "Hurricane"

Isolate Needed Data

Create two unique sub-datasets for health and economy

StormData_Health <- StormData[,(c(8,23:24))]
StormData_Economy <- StormData[,c(8,25:28)]
colnames(StormData_Health)
## [1] "EVTYPE"     "FATALITIES" "INJURIES"
colnames(StormData_Economy)
## [1] "EVTYPE"     "PROPDMG"    "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP"

Focus on health data

Continue to clean data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
StormData_Health_Aggre <- aggregate(cbind(FATALITIES,INJURIES) ~ EVTYPE, data = StormData_Health, sum, na.rm=TRUE)
StormData_Health_Aggre <- arrange(StormData_Health_Aggre, desc(FATALITIES+INJURIES))
StormData_Health_Aggre_top <- StormData_Health_Aggre[1:10,]

Focus on economy data

Clean economy data for analysis

StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMG==0] <- 0  
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMG==0] <- 0 
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="H"| StormData_Economy$PROPDMGEXP=="h"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="H"|StormData_Economy$PROPDMGEXP=="h"]*100
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="H"| StormData_Economy$CROPDMGEXP=="h"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="H"|StormData_Economy$CROPDMGEXP=="h"]*100
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="K"| StormData_Economy$PROPDMGEXP=="k"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="K"|StormData_Economy$PROPDMGEXP=="k"]*1000
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="K"| StormData_Economy$CROPDMGEXP=="k"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="K"|StormData_Economy$CROPDMGEXP=="k"]*1000
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="M"| StormData_Economy$PROPDMGEXP=="m"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="M"|StormData_Economy$PROPDMGEXP=="m"]*1000000
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="M"| StormData_Economy$CROPDMGEXP=="m"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="M"|StormData_Economy$CROPDMGEXP=="m"]*1000000
StormData_Economy$PROPDMGCALC [StormData_Economy$PROPDMGEXP=="B"| StormData_Economy$PROPDMGEXP=="b"]<- StormData_Economy$PROPDMG[StormData_Economy$PROPDMGEXP=="B"|StormData_Economy$PROPDMGEXP=="b"]*1000000000
StormData_Economy$CROPDMGCALC [StormData_Economy$CROPDMGEXP=="B"| StormData_Economy$CROPDMGEXP=="b"]<- StormData_Economy$CROPDMG[StormData_Economy$CROPDMGEXP=="B"|StormData_Economy$CROPDMGEXP=="b"]*1000000000

StormData_Economy_Aggre <- aggregate(cbind(PROPDMGCALC,CROPDMGCALC)~EVTYPE, data = StormData_Economy, sum, na.rm=TRUE)
StormData_Economy_Aggre <- arrange(StormData_Economy_Aggre, desc(PROPDMGCALC+CROPDMGCALC))

StormData_Economy_Aggre_top <- StormData_Economy_Aggre[1:10,]

RESULTS

Health data

Which types of events are most harmful to population health

Visualize results for health data

Based on fatalities and injuries, tornados do the most health damange followed by wind, heat, floods, lightning, and cold weather.

head(StormData_Health_Aggre_top)
##         EVTYPE FATALITIES INJURIES
## 1      Tornado       5633    91368
## 2         Wind       1207    11299
## 3         Heat       3119     9224
## 4        Flood       1486     8582
## 5    LIGHTNING        816     5230
## 6 Cold Weather        605     3205
library(ggplot2)
qplot(EVTYPE, FATALITIES+INJURIES, data = StormData_Health_Aggre_top, main = "Impact of Weather Events on Health Damange")

Economic data

which types of events have the greatest economic consequences

Visualize results for economic data

Based on property and crop, floods do the most economic damange followed by multiple events, tornados, storm surges, wind and hail.

head(StormData_Economy_Aggre_top)
##           EVTYPE  PROPDMGCALC CROPDMGCALC
## 1          Flood 167004467270 12170542100
## 2 Multiple Event  80750771250  4202571910
## 3        Tornado  56942011330   364958360
## 4    STORM SURGE  43323536000        5000
## 5           Wind  17456763670  1963516550
## 6           Hail  15974043220  3021882450
qplot(EVTYPE, PROPDMGCALC+CROPDMGCALC, data = StormData_Economy_Aggre_top, main = "Impact of Weather Events on Economic Damange")

CONCLUSIONS

Tornados cause the most health damage while floods cause the most economic damange. Tornados also is a top 6 in terms of economic damage and floods is a top 6 for health damage.