This report takes the storm data from NOAA database and looks at all of the event types (EVTYPES), FATALITIES, INJURIES, property damage (PROPDMG), and crop damage (CROPDMG) to determine which weather events have the most impact on population health and economic status. Crop damage and propery damage are combined in one DAMAGES total for the purpose of this research. Fatalities and injuries are also combined in a “public health” total (HEALTH) for the purposes of assessing total damage to population health.
The results were as follows:
* Tornados, excessive heat, and flash flood were the top 3 causes of fatalities.
* Tornados, Thunderstorm wind, and flood were the top 3 causes of injuries.
* Tornadoes/Thunderstorm wind/hail, high winds/cold, and Hurricane Opal/High winds were the top 3 causes of total economic damages.
The purpose of this report is to look at two (2) main questions and review how to analyze these 2 questions, through key comparisons.
Basic set up: Load libraries, Download and read the data.
library(dplyr)
library(knitr)
library(lubridate)
library(ggplot2)
#check if the file exists. if not, download
if (file.exists('stormdata.csv.bz2') == FALSE){
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Fstormdata.csv.bz2", "stormdata.csv.bz2")
}
Now, extract, read and clean up data
rawdata <- read.csv(bzfile("stormdata.csv.bz2"))
stormdata <- rawdata #preserve raw data by modifying copy
#drop unnecessary columns
stormdata<- stormdata[,grep('BGN_DATE|EVTYPE|FATALITIES|INJURIES|PROPDMG|PROPRDMGEXP|CROPDMG|CROPDMGEXP', x = names(stormdata) )]
#transfrom data
stormdata <- transform(stormdata, EVTYPE = factor(EVTYPE))
stormdata$EVTYPE <- toupper(stormdata$EVTYPE)
Fill in the blanks & format damages
#get the # of 0s required and sub
stormdata$PROPDMGEXP[is.na(stormdata$PROPDMGEXP)] <- "0"
stormdata$PROPDMGEXP <- as.character(stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'H'] <- "2"
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'K'] <- "3"
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'M'] <- "6"
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'B'] <- "9"
stormdata$PROPDMGEXP[is.na(stormdata$PROPDMGEXP)] <- 0
stormdata$PROPDMGEXP <- as.numeric(stormdata$PROPDMGEXP)
## Warning: NAs introduced by coercion
# calculate total damages and place into new column
stormdata$TOTALPROPDMG <- stormdata$PROPDMG * 10^stormdata$PROPDMGEXP
#repeat for crops
stormdata$CROPDMGEXP[is.na(stormdata$CROPDMGEXP)] <- "0"
stormdata$CROPDMGEXP <- as.character(stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'H'] <- "2"
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'K'] <- "3"
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'M'] <- "6"
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'B'] <- "9"
stormdata$CROPDMGEXP[is.na(stormdata$CROPDMGEXP)] <- 0
stormdata$CROPDMGEXP <- as.numeric(stormdata$CROPDMGEXP)
## Warning: NAs introduced by coercion
# calculate total damages and place into new column
stormdata$TOTALCROPDMG <- as.numeric(stormdata$CROPDMG * 10^stormdata$CROPDMGEXP)
Add new columns and summarize data
#add crop damage cost and property damage cost
stormdata$DAMAGES<- stormdata$TOTALCROPDMG+stormdata$TOTALPROPDMG
stormdata$HEALTH<- stormdata$INJURIES+stormdata$FATALITIES
Display the top 10 causes of Fatalities.
#group by event type and sum by count
sum_fat<-aggregate(stormdata$FATALITIES, by=list(EVENTS=stormdata$EVTYPE),FUN=sum)
sum_fat<-arrange(sum_fat,desc(x))
p_fat<-head(sum_fat,n=10)
p_fat
## EVENTS x
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
Display the top 10 causes of Injuries.
#group by event type and sum by count
sum_inj<-aggregate(stormdata$INJURIES, by=list(EVENTS=stormdata$EVTYPE), FUN=sum)
sum_inj<-arrange(sum_inj,desc(x))
p_inj<-head(sum_inj,n=10)
p_inj
## EVENTS x
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
Display the top 10 causes of all Fatalities and Injuries.
#group by event type and sum by count
sum_health<-aggregate(stormdata$HEALTH, by=list(EVENTS=stormdata$EVTYPE),FUN=sum)
sum_health<-arrange(sum_health,desc(x))
p_health<-head(sum_health,n=10)
p_health
## EVENTS x
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
Display the top 10 causes of Property Damage
sum_dmg<-aggregate(stormdata$DAMAGES, by=list(EVENTS=stormdata$EVTYPE),FUN=sum)
sum_dmg<-arrange(sum_dmg,desc(x))
p_dmg<-head(sum_dmg,n=10)
p_dmg
## EVENTS x
## 1 TORNADOES, TSTM WIND, HAIL 1602500000
## 2 HIGH WINDS/COLD 117500000
## 3 HURRICANE OPAL/HIGH WINDS 110000000
## 4 WINTER STORM HIGH WINDS 65000000
## 5 HEAVY RAIN/HIGH SURF 15000000
## 6 LAKESHORE FLOOD 7540000
## 7 HIGH WINDS HEAVY RAINS 7510000
## 8 FOREST FIRES 5500000
## 9 FLASH FLOODING/FLOOD 1925000
## 10 HEAVY SNOW/HIGH WINDS & FLOOD 1520000