Effects of storms and other severe weather events on Human Health and Economics
Storms and other severe weather events can cause both public health and economic impact for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Download the raw data file and extract the data into a dataframe.
library(R.utils, quietly = TRUE)
setwd("C:/Users/sc56603/DataScience/Reproducible Research/week4/")
if (!file.exists("stormdata.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="stormdata.csv.bz2")
bunzip2("stormdata.csv.bz2", "stormdata.csv")
}
if (!exists('storm_data')) {
storm_data <- read.csv("stormdata.csv")
}
dim(storm_data)
## [1] 902297 37
str(storm_data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436774 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
Subset the dataset on the fields of interest.
# Change parameter names to lowercase.
colnames(storm_data) <- tolower(colnames(storm_data))
# Subset on the parameters of interest.
data <- subset(x=storm_data,
subset=(evtype != "?" &
(injuries > 0 | fatalities > 0 | propdmg > 0 | cropdmg > 0)),
select=c("evtype",
"fatalities",
"injuries",
"propdmg",
"propdmgexp",
"cropdmg",
"cropdmgexp"))
str(data)
## 'data.frame': 254632 obs. of 7 variables:
## $ evtype : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ fatalities: num 0 0 0 0 0 0 0 0 1 0 ...
## $ injuries : num 15 0 2 2 2 6 1 0 14 0 ...
## $ propdmg : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ propdmgexp: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ cropdmg : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cropdmgexp: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
unique(data$propdmgexp)
## [1] K M B m + 0 5 6 4 h 2 7 3 H -
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(data$cropdmgexp)
## [1] M K m B ? 0 k
## Levels: ? 0 2 B k K m M
Map the property and crop damage exponent alphabetic multipliers to aapropiate numeric values.
# Change all damage exponents to lowercase.
data$propdmgexp <- tolower(data$propdmgexp)
data$cropdmgexp <- tolower(data$cropdmgexp)
# Map property damage alphanumeric exponents to numeric values.
propDmgKey <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"h" = 10^2,
"k" = 10^3,
"m" = 10^6,
"b" = 10^9)
data$propdmgexp <- propDmgKey[as.character(data$propdmgexp)]
data$propdmgexp[is.na(data$propdmgexp)] <- 10^0
# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"k" = 10^3,
"m" = 10^6,
"b" = 10^9)
data$cropdmgexp <- cropDmgKey[as.character(data$cropdmgexp)]
data$cropdmgexp[is.na(data$cropdmgexp)] <- 10^0
Select the applicable health columns from the dataset, then calculate the total number of fatalities and injuries per event type. Find the event types corresponding with the the highest health impacts.
# Aggregate number of fatalities and injuries per evtype into healthData dataframe
healthData <- aggregate(cbind(fatalities, injuries) ~ evtype, data=data, FUN=sum)
# Add total column to healthData
#healthData1 <- data.frame("combined" = healthData$fatalities + healthData$injuries)
healthData$combined <- healthData$fatalities + healthData$injuries
#healthData <- cbind(healthData1, healthData)
# Remove rows with zero health impact
healthData <- healthData[healthData$combined > 0, ]
# Sort health data in descending order
healthData <- healthData[order(healthData$combine, decreasing=TRUE), ]
# Re-label the rows
rownames(healthData) <- 1:nrow(healthData)
# Create dataframe of highest health impacting event types and append an "other" event type as a catchall
# for everything else
healthDataTop <- healthData[1:10, ]
#re-order the aggregated columns
healthDataTop <- data.frame("evtype" = healthDataTop$evtype,
#"combined" = healthDataTop$combined,
"injuries" = healthDataTop$injuries, "fatalities" = healthDataTop$fatalities)
healthDataTop
## evtype injuries fatalities
## 1 TORNADO 91346 5633
## 2 EXCESSIVE HEAT 6525 1903
## 3 TSTM WIND 6957 504
## 4 FLOOD 6789 470
## 5 LIGHTNING 5230 816
## 6 HEAT 2100 937
## 7 FLASH FLOOD 1777 978
## 8 ICE STORM 1975 89
## 9 THUNDERSTORM WIND 1488 133
## 10 WINTER STORM 1321 206
Combine the damage and damage exponent multiplier parameters into the single parameters propertyloss and croploss. Select the applicable economic columns from the dataset, then calculate the total amount of property loss and crop loss per event type. Find the event types corresponding with the highest economic impacts.
# Combine propdmg and propdmgexp parameters into a single parameter called propertyloss.
data$propertyloss <- data$propdmg * data$propdmgexp
# Combine cropdmg and cropdmgexp parameters into a single parameter called croploss.
data$croploss <- data$cropdmg * data$cropdmgexp
# Aggregate amount of proploss and croploss per evtype into economicData dataframe
economicData <- aggregate(cbind(propertyloss, croploss) ~ evtype, data=data, FUN=sum)
# Add total loss column to economicData
economicData$combined <- economicData$propertyloss + economicData$croploss
# Remove rows with zero economic impact
economicData <- economicData[economicData$combined > 0, ]
# Sort the economy data in descending order
economicData <- economicData[order(economicData$combined, decreasing=TRUE), ]
# Re-label the rows
rownames(economicData) <- tolower(rownames(economicData))
# Create dataframe of highest economy impacting event types
economicDataTop <- economicData[1:10, ]
#re-order the aggregated columns
economicDataTop <- data.frame("evtype" = economicDataTop$evtype,
#"combined" = economicDataTop$combined,
"propertyloss" = economicDataTop$propertyloss, "croploss" = economicDataTop$croploss)
economicDataTop
## evtype propertyloss croploss
## 1 FLOOD 144657709807 5661968450
## 2 HURRICANE/TYPHOON 69305840000 2607872800
## 3 TORNADO 56947380677 414953270
## 4 STORM SURGE 43323536000 5000
## 5 HAIL 15735267513 3025954473
## 6 FLASH FLOOD 16822673979 1421317100
## 7 DROUGHT 1046106000 13972566000
## 8 HURRICANE 11868319010 2741910000
## 9 RIVER FLOOD 5118945500 5029459000
## 10 ICE STORM 3944927860 5022113500
Plot of the ten event types with the highest fatality counts plus an eleventh catchall event type that combines the total fatality counts of all other event types.
# Load necessary libraries
library(reshape2, quietly = TRUE)
library(ggplot2, quietly = TRUE)
# Melt the data
healthDataTopMelt <- melt(healthDataTop, id.vars="evtype")
# Create chart
healthChart <- ggplot(healthDataTopMelt, aes(x=reorder(evtype, -value), y=value))
# Plot data as bar chart
healthChart = healthChart + geom_bar(stat="identity", aes(fill=variable), position="stack")
# Format y-axis scale and set y-axis label
healthChart = healthChart + scale_y_sqrt("Frequency Count")
# Set x-axis label
healthChart = healthChart + xlab("Event Type")
# Rotate x-axis tick labels
healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title
healthChart = healthChart + ggtitle("Pareto Chart of Top 10 US Storm Health Impacts")
# Display the chart
print(healthChart)
Plot of the ten event types with the highest economic impacts.
# Melt the data
economicDataTopMelt <- melt(economicDataTop, id.vars="evtype")
# Create chart
economicChart <- ggplot(economicDataTopMelt, aes(x=reorder(evtype, -value), y=value))
# Add bars
economicChart <- economicChart + geom_bar(stat="identity", aes(fill=variable), position="stack")
# Format y-axis scale and set y-axis label
economicChart <- economicChart + scale_y_sqrt("Damage Impact [$]")
# Set x-axis label
economicChart <- economicChart + xlab("Event Type")
# Rotate x-axis tick labels
economicChart <- economicChart + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title
economicChart <- economicChart + ggtitle("Pareto Chart of Top 10 US Storm Economic Impacts")
# Display the chart
print(economicChart)
5.1 - Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Tornadoes are responsible for the largest proportion of both deaths and injuries out of all event types.
5.2 - Across the United States, which types of events have the greatest economic consequences?
Flooding is responsible for the largeset proportion of total economic impact out of all event types.