This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.The goal is to address which events have the greatest economic consequences as well as harmful effects on population health.
Information on the Data: Documentation
The downloaded raw data file that was copied into the working directory was extracted using dataframe followed by conversion to data.table
library("data.table")
## Warning: package 'data.table' was built under R version 4.0.5
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.0.5
storm <- read.csv("repdata_data_StormData.csv.bz2")
stormDT <- as.data.table(storm)
colnames(stormDT)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Subset the dataset on the parameters of interest. Here, we remove the columns we don’t need.
# Finding columns to remove
cols2Remove <- colnames(stormDT[, !c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP")])
# Removing columns
stormDT[, c(cols2Remove) := NULL]
# Only use data where fatalities or injuries occurred.
stormDT <- stormDT[(EVTYPE != "?" &
(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP") ]
Representing the PROPDMGEXP and CROPDMGEXP columns in terms of actual exponents instead of the labelled ones
# Change all damage exponents to uppercase.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
stormDT[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
# Map property damage alphanumeric exponents to numeric values.
propDmgKey <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
stormDT[, PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]]
stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ]
stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
Making columns in a way that is convenient to us
stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
Computation of total property and crop cost for the purpose of visual representation
totalCostDT <- stormDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCostDT <- totalCostDT[order(-Total_Cost), ]
totalCostDT <- totalCostDT[1:10, ]
head(totalCostDT, 5)
## EVTYPE propCost cropCost Total_Cost
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380677 414953270 57362333947
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: HAIL 15735267513 3025954473 18761221986
Computation of total fatalities and injuries for the purpose of visual representation
totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]
totalInjuriesDT <- totalInjuriesDT[1:10, ]
head(totalInjuriesDT, 5)
## EVTYPE FATALITIES INJURIES totals
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
## 4: HEAT 937 2100 3037
## 5: LIGHTNING 816 5230 6046
Melting data.table so that it is easier to put in bar graph format
total_injuries_melt <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "new_ev")
head(total_injuries_melt, 5)
## EVTYPE new_ev value
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
## 4: HEAT FATALITIES 937
## 5: LIGHTNING FATALITIES 816
# Create chart
g<- ggplot(total_injuries_melt, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
g = g + geom_bar(stat="identity", aes(fill=new_ev), position="dodge")
# Format y-axis scale and set y-axis label
g = g + ylab("Frequency Count")
# Set x-axis label
g = g + xlab("Event Type")
# Rotate x-axis tick labels
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
g = g + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
g
Melting data.table so that it is easier to put in bar graph format
econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage")
head(econ_consequences, 5)
## EVTYPE Damage value
## 1: FLOOD propCost 144657709807
## 2: HURRICANE/TYPHOON propCost 69305840000
## 3: TORNADO propCost 56947380677
## 4: STORM SURGE propCost 43323536000
## 5: HAIL propCost 15735267513
# Create chart
g_2 <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
g_2 = g_2 + geom_bar(stat="identity", aes(fill=Damage), position="dodge")
# Format y-axis scale and set y-axis label
g_2 = g_2 + ylab("Cost (dollars)")
# Set x-axis label
g_2 = g_2 + xlab("Event Type")
# Rotate x-axis tick labels
g_2 = g_2 + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
g_2 = g_2 + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
g_2