This project explores the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, which tracks weather events and major storms, to evaluate the most severe types of weather events in the USA. Specifically, I want to address which catastrophic events caused the greatest damage to the human population (fatalities / injuries) and that cause economic loss (property / crop damage) between the years 1950 - 2011.
This analysis have two major goal -
The data can be downloaded from here and read by the read.csv() function.
stormData <- read.csv("repdata_data_StormData.csv.bz2", header = T)
Up next, the libraries needed for analysis are loaded.
library(lubridate)
library(plyr)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(magrittr)
Then, column names are checked by names() function.
names(stormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Three things to be modified for analysis -
stormData$BGN_DATE <- mdy_hms(stormData$BGN_DATE)
stormData_sub <- subset(stormData, BGN_DATE > "1995-12-31",
select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
The FATALITIES and INJURIES variables are to be aggregated seperately with the aggregate() function, and sum will be it’s FUN parameter.
totalFatality <- aggregate(FATALITIES ~ EVTYPE, data = stormData_sub, FUN = sum)
totalInjury <- aggregate(INJURIES ~ EVTYPE, data = stormData_sub, FUN = sum)
The aggregated data is arranged in a descending order base on the EVTYPE variable with the arrange() function, and top 10 are selected for analysis.
stormDataFatality <- arrange(totalFatality, desc(FATALITIES), EVTYPE)[1:10, ]
show(stormDataFatality)
## EVTYPE FATALITIES
## 1 EXCESSIVE HEAT 1797
## 2 TORNADO 1511
## 3 FLASH FLOOD 887
## 4 LIGHTNING 651
## 5 FLOOD 414
## 6 RIP CURRENT 340
## 7 TSTM WIND 241
## 8 HEAT 237
## 9 HIGH WIND 235
## 10 AVALANCHE 223
stormDataInjury <- arrange(totalInjury, desc(INJURIES), EVTYPE)[1:10, ]
show(stormDataInjury)
## EVTYPE INJURIES
## 1 TORNADO 20667
## 2 FLOOD 6758
## 3 EXCESSIVE HEAT 6391
## 4 LIGHTNING 4141
## 5 TSTM WIND 3629
## 6 FLASH FLOOD 1674
## 7 THUNDERSTORM WIND 1400
## 8 WINTER STORM 1292
## 9 HURRICANE/TYPHOON 1275
## 10 HEAT 1222
Now, these data are plotted side by side with the ggplot() function, and the grid.arrange() function.
plotFatality <- ggplot(data = stormDataFatality, aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity", fill = "red3") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
xlab("") + ylab("Total number of fatalities")
plotInjury <- ggplot(data = stormDataInjury, aes(x = reorder(EVTYPE, -INJURIES), y = INJURIES)) +
geom_bar(stat = "identity", fill = "red3") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
xlab("") + ylab("Total number of injuries")
grid.arrange(plotFatality, plotInjury, ncol = 2,
top = "Fatalities & Injuries from top 10 Weather Events - Public Health Impact")
First, unique values of the PROPDMGEXP and CROPDMGEXP are checked.
unique(stormData_sub$PROPDMGEXP)
## [1] "" "M" "K" "B" "0"
unique(stormData_sub$CROPDMGEXP)
## [1] "" "M" "K" "B"
The components of PROPDMGEXP and CROPDMGEXP variables need to be converted into a common exponential number by mapvalues() function, and will be multiplied with the values of PROPDMG and CROPDMG to get total damage.
stormData_sub$totalPROPDMG <- (mapvalues(stormData_sub$PROPDMGEXP, from = c("", "K", "M", "B"),
to = c(1, 1e3, 1e6, 1e9)) %>% as.numeric) * stormData_sub$PROPDMG
stormData_sub$totalCROPDMG <- (mapvalues(stormData_sub$CROPDMGEXP, from = c("", "K", "M", "B"),
to = c(1, 1e3, 1e6, 1e9)) %>% as.numeric) * stormData_sub$CROPDMG
Then, these are to be aggregated separately with the aggregate() function, and sum will be it’s FUN parameter.
totalPropDmg <- aggregate(totalPROPDMG ~ EVTYPE, data = stormData_sub, FUN = sum)
totalCropDmg <- aggregate(totalCROPDMG ~ EVTYPE, data = stormData_sub, FUN = sum)
The aggregated data is arranged in a descending order base on the EVTYPE variable with the arrange() function, and top 10 are selected for analysis.
stormDataPropDmg <- arrange(totalPropDmg, desc(totalPROPDMG), EVTYPE)[1:10, ]
show(stormDataPropDmg)
## EVTYPE totalPROPDMG
## 1 FLOOD 143944833550
## 2 HURRICANE/TYPHOON 69305840000
## 3 STORM SURGE 43193536000
## 4 TORNADO 24616945710
## 5 FLASH FLOOD 15222203910
## 6 HAIL 14595143420
## 7 HURRICANE 11812819010
## 8 TROPICAL STORM 7642475550
## 9 HIGH WIND 5247860360
## 10 WILDFIRE 4758667000
stormDataCropDmg <- arrange(totalCropDmg, desc(totalCROPDMG), EVTYPE)[1:10, ]
show(stormDataCropDmg)
## EVTYPE totalCROPDMG
## 1 DROUGHT 13367566000
## 2 FLOOD 4974778400
## 3 HURRICANE 2741410000
## 4 HURRICANE/TYPHOON 2607872800
## 5 HAIL 2476029450
## 6 FLASH FLOOD 1334901700
## 7 EXTREME COLD 1288973000
## 8 FROST/FREEZE 1094086000
## 9 HEAVY RAIN 728169800
## 10 TROPICAL STORM 677711000
Now, these data are plotted side by side with the ggplot() function, and the grid.arrange() function.
plotPropDmg <- ggplot(data = stormDataPropDmg, aes(x = reorder(EVTYPE, -totalPROPDMG), y = totalPROPDMG)) +
geom_bar(stat = "identity", fill = "red3") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
xlab("") + ylab("Total Property Damage")
plotCropDmg <- ggplot(data = stormDataCropDmg, aes(x = reorder(EVTYPE, -totalCROPDMG), y = totalCROPDMG)) +
geom_bar(stat = "identity", fill = "red3") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
xlab("") + ylab("Total Crop Damage")
grid.arrange(plotPropDmg, plotCropDmg, ncol = 2,
top = "Property & Crop Damage from top 10 Weather Events - Greatest Economic Consequences")