The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.
The following analysis investigates which types of severe weather events are most harmful on:
Load the data using read.csv() function.
data = read.csv("repdata_data_StormData.csv",sep = ",",header = T, stringsAsFactors = FALSE)
used <- c('EVTYPE','FATALITIES','INJURIES','PROPDMG','PROPDMGEXP','CROPDMG','CROPDMGEXP')
dataused <- data[, used]
head(dataused)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
EVTYPE column contains data in non-standard format. Clean the data using regular expressions.
trim <- function(x) gsub("^\\s+|\\s+$", "", x)
dataused$EVTYPE <- toupper(dataused$EVTYPE)
dataused$EVTYPE <- trim(dataused$EVTYPE)
Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost.
dataused$PROPDMGEXP[is.na(dataused$PROPDMGEXP)] <- 0
dataused$PROPDMGEXP[dataused$PROPDMGEXP == ""] <- 1
dataused$PROPDMGEXP[grep("[-+?]", dataused$PROPDMGEXP)] <- 1
dataused$PROPDMGEXP[grep("[Hh]", dataused$PROPDMGEXP)] <- 100
dataused$PROPDMGEXP[grep("[Kk]", dataused$PROPDMGEXP)] <- 1000
dataused$PROPDMGEXP[grep("[Mm]", dataused$PROPDMGEXP)] <- 1e+06
dataused$PROPDMGEXP[grep("[Bb]", dataused$PROPDMGEXP)] <- 1e+09
dataused$PROPDMGEXP <- as.numeric(dataused$PROPDMGEXP)
dataused$PROPDMG <- dataused$PROPDMGEXP * dataused$PROPDMG
dataused$CROPDMGEXP[is.na(dataused$CROPDMGEXP)] <- 0
dataused$CROPDMGEXP[dataused$CROPDMGEXP == ""] <- 1
dataused$CROPDMGEXP[grep("[-+?]", dataused$CROPDMGEXP)] <- 1
dataused$CROPDMGEXP[grep("[Hh]", dataused$CROPDMGEXP)] <- 100
dataused$CROPDMGEXP[grep("[Kk]", dataused$CROPDMGEXP)] <- 1000
dataused$CROPDMGEXP[grep("[Mm]", dataused$CROPDMGEXP)] <- 1e+06
dataused$CROPDMGEXP[grep("[Bb]", dataused$CROPDMGEXP)] <- 1e+09
dataused$CROPDMGEXP <- as.numeric(dataused$CROPDMGEXP)
dataused$CROPDMG <- dataused$CROPDMGEXP * dataused$CROPDMG
total_fatalities <- aggregate(FATALITIES~EVTYPE,data=dataused,sum)
total_injuries <- aggregate(INJURIES~EVTYPE,data=dataused,sum)
total_propdmg <- aggregate(PROPDMG~EVTYPE,data=dataused,sum)
total_cropdmg <- aggregate(CROPDMG~EVTYPE,data=dataused,sum)
health <- cbind(total_fatalities,total_injuries)
health <- health[,c("EVTYPE","FATALITIES","INJURIES")]
health$Total <- health$FATALITIES + health$INJURIES
economy <- cbind(total_cropdmg,total_propdmg)
economy <- economy[,c("EVTYPE","PROPDMG","CROPDMG")]
economy$Total <- economy$PROPDMG + economy$CROPDMG
head(economy)
## EVTYPE PROPDMG CROPDMG Total
## 1 ? 5000 0 5000
## 2 ABNORMAL WARMTH 0 0 0
## 3 ABNORMALLY DRY 0 0 0
## 4 ABNORMALLY WET 0 0 0
## 5 ACCUMULATED SNOWFALL 0 0 0
## 6 AGRICULTURAL FREEZE 0 28820000 28820000
head(health)
## EVTYPE FATALITIES INJURIES Total
## 1 ? 0 0 0
## 2 ABNORMAL WARMTH 0 0 0
## 3 ABNORMALLY DRY 0 0 0
## 4 ABNORMALLY WET 0 0 0
## 5 ACCUMULATED SNOWFALL 0 0 0
## 6 AGRICULTURAL FREEZE 0 0 0
health <- head(health[order(-health$Total), ],10)
economy <- head(economy[order(-economy$Total),],10)
health
## EVTYPE FATALITIES INJURIES Total
## 750 TORNADO 5633 91346 96979
## 108 EXCESSIVE HEAT 1903 6525 8428
## 771 TSTM WIND 504 6957 7461
## 146 FLOOD 470 6789 7259
## 410 LIGHTNING 816 5230 6046
## 235 HEAT 937 2100 3037
## 130 FLASH FLOOD 978 1777 2755
## 379 ICE STORM 89 1975 2064
## 677 THUNDERSTORM WIND 133 1488 1621
## 880 WINTER STORM 206 1321 1527
economy
## EVTYPE PROPDMG CROPDMG Total
## 146 FLOOD 144657709807 5661968450 150319678257
## 364 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 750 TORNADO 56937161054 414953110 57352114164
## 591 STORM SURGE 43323536000 5000 43323541000
## 204 HAIL 15732267427 3025954453 18758221880
## 130 FLASH FLOOD 16140862294 1421317100 17562179394
## 76 DROUGHT 1046106000 13972566000 15018672000
## 355 HURRICANE 11868319010 2741910000 14610229010
## 521 RIVER FLOOD 5118945500 5029459000 10148404500
## 379 ICE STORM 3944927810 5022113500 8967041310
library(ggplot2)
ggplot(health, aes(x = EVTYPE, y = Total)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Event type", y = "Sum of Fatalities and Injuries") +
ggtitle("Most Population Health Harm by various Events") + theme(axis.text.x = element_text(angle = 90))
As seen by the plot, TORNADO is most harmful to Population Health
library(ggplot2)
ggplot(economy, aes(x = EVTYPE, y = Total)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Event type", y = "Sum of Property and Crop Damage") +
ggtitle("Most Economic Harm by various Events") + theme(axis.text.x = element_text(angle = 90))
As seen by the plot, FLOOD is most harmful to the economy