Introduction
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The analysis below will analyze the major storm events causing injuries and fatalities. Similarly, we will also examine the major Storm Event causing highest property damage.
Synopsis
The analysis on the storm event database revealed that tornadoes are the most dangerous weather event to the populations health. The second most dangerous event type is excessive heat. The economic impact of weather events was also analyzed. Flash floods and thunderstorm winds caused billions of dollars in property damages between 1950 and 2011. The largest damage to crops were caused by droughts, followed by floods and hailing.
Loading the data
Load required libraries
library("ggplot2")
library("gridExtra")
library("R.utils")
library("data.table")
library("h2o")
Now read data
if (!exists("stormData")) {
# Extract file if it is not already extracted
if (file.exists("repdata_data_StormData.csv.bz2")) {
if (!file.exists("repdata_data_StormData.csv")) {
bunzip2("repdata_data_StormData.csv.bz2", overwrite = F)
}
}
# Read data into the varirable called stormData
stormData <- fread("repdata_data_StormData.csv", sep = ",")
}
Summary of Data
Take a quick look at the data available.
summary(stormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0.0000
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0.0000
## Mode :character Median :0 Median : 0.0000
## Mean :0 Mean : 0.9862
## 3rd Qu.:0 3rd Qu.: 0.0000
## Max. :0 Max. :925.0000
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.2301 Mean : 7.503
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
##
## F MAG FATALITIES INJURIES
## Min. :0.0 Min. : 0.0 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:0.0 1st Qu.: 0.0 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :1.0 Median : 50.0 Median : 0.0000 Median : 0.0000
## Mean :0.9 Mean : 46.9 Mean : 0.0168 Mean : 0.1557
## 3rd Qu.:1.0 3rd Qu.: 75.0 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :5.0 Max. :22000.0 Max. :583.0000 Max. :1700.0000
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
stormData[,lapply(.SD, function(x) sum(is.na(x)))]
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
## 1: 0 0 0 0 0 0 0 0 0
## BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI
## 1: 0 0 0 0 0 902297 0 0
## END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP
## 1: 0 0 0 843563 0 0 0 0 0
## CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE LATITUDE_E
## 1: 0 0 0 0 0 47 0 40
## LONGITUDE_ REMARKS REFNUM
## 1: 0 0 0
Data Processing for Q1
health <- stormData[FATALITIES>0 | INJURIES>0,
.(FATALITIES = sum(FATALITIES), INJURIES= sum(INJURIES)),by = EVTYPE]
top_injuries <- health[,.(EVTYPE,INJURIES)][order(-INJURIES)][1:5,]
top_fatalities <- health[,.(EVTYPE,FATALITIES)][order(-FATALITIES)][1:5,]
1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
ggplot() + geom_bar(data = top_fatalities, aes(x = reorder(EVTYPE,-FATALITIES),
y = FATALITIES, fill = EVTYPE), stat = "identity", show.legend = F) +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
scale_fill_brewer(palette="Accent") +
xlab("Event type") + ylab("No. of fatailities") +
ggtitle("Top 5 weather events causing fatalities")

ggplot() + geom_bar(data = top_injuries, aes(x = reorder(EVTYPE,-INJURIES), y = INJURIES,
fill = EVTYPE), stat = "identity", show.legend = F) +
scale_fill_brewer(palette="Accent") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Event type") +
ylab("No. of Injuries") + ggtitle("Top 5 weather events causing Injuries")

Data Processing for Q2
economic <- stormData[PROPDMG>0 | CROPDMG>0, .(EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)][
,(c("CROPDMGEXP","PROPDMGEXP")) := lapply(.SD,toupper), .SDcols =c("CROPDMGEXP","PROPDMGEXP")]
economic$CROPDMGFACTOR[(economic$CROPDMGEXP %in% "")] <- 10^0
economic$CROPDMGFACTOR[(economic$CROPDMGEXP == "?")] <- 10^0
economic$CROPDMGFACTOR[(economic$CROPDMGEXP == "0")] <- 10^0
economic$CROPDMGFACTOR[(economic$CROPDMGEXP == "2")] <- 10^2
economic$CROPDMGFACTOR[(economic$CROPDMGEXP == "K")] <- 10^3
economic$CROPDMGFACTOR[(economic$CROPDMGEXP == "M")] <- 10^6
economic$CROPDMGFACTOR[(economic$CROPDMGEXP == "B")] <- 10^9
unique(economic[,.(CROPDMGEXP,CROPDMGFACTOR)] )
## CROPDMGEXP CROPDMGFACTOR
## 1: 1e+00
## 2: M 1e+06
## 3: K 1e+03
## 4: B 1e+09
## 5: ? 1e+00
## 6: 0 1e+00
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "")] <- 10^0
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "-")] <- 10^0
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "?")] <- 10^0
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "+")] <- 10^0
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "0")] <- 10^0
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "1")] <- 10^1
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "2")] <- 10^2
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "3")] <- 10^3
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "4")] <- 10^4
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "5")] <- 10^5
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "6")] <- 10^6
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "7")] <- 10^7
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "8")] <- 10^8
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "H")] <- 10^2
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "K")] <- 10^3
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "M")] <- 10^6
economic$PROPDMGFACTOR[(economic$PROPDMGEXP == "B")] <- 10^9
total_damage <- economic[,c("crop","prop") := .(CROPDMG*CROPDMGFACTOR, PROPDMG*PROPDMGFACTOR)][
,DAMAGE := crop+prop][,.(DAMAGE_T = sum(DAMAGE)), by = EVTYPE][
order(-DAMAGE_T)][1:5]
2. Across the United States, which types of events have the greatest economic consequences?
ggplot() + geom_bar(data = total_damage, aes(x = reorder(EVTYPE, -DAMAGE_T), y = DAMAGE_T, fill = EVTYPE),
stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 30,
hjust = 1)) + xlab("Event Type") + ylab("Total Damage")+
ggtitle("Top 5 weather events causing damage to property and crop") +
scale_fill_brewer(palette = "Accent")
