This report analyzes storm data published by the National Oceanic & Atmospheric Administration (NOAA). The data documents injuries, fatalities, property and crop damage by type of weather event beginning in January, 1950 through November, 2011. The data can be analyzed by type of weather event, date and geographic area or any combination of these attributes. Documentation of the data can be found on the NOAA website. There is also an FAQ document available.
Summarizing the data by type of weather event shows that the deadliest events include tornadoes (5,633 deaths), heat (3,138 deaths) and floods (1,525 deaths) (Figure 1). Weather events resulting in the most injuries include tornadoes (91,346 injuries), thunderstorms (9,544 injuries) and heat (9,224 injuries) (Figure 1). Weather events causing the most property damage included floods ($167.5 Billion), hurricanes ($84.7 Billion), other storms ($66.6 Billion) (Figure 2). Weather events causing the most crop damage include droughts ($14 Billion), floods ($12.4 Billion) and other storms ($5.8 Billion) (Figure 2).
# Specify URL where file is stored
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
# Specify destination where file should be saved
destfile = "C:/Users/maril/OneDrive/Documents/R Data Science/Course5/RepDATA_PeerAssessment2.storms_data.csv"
# Apply download.file function in R
download.file(url, destfile)
## 2.2 Data Formatting
#read the data into a dataframe
storms <- read.csv(file=destfile, header=TRUE, sep=',', stringsAsFactors = FALSE, na.strings=NA)
#Get the first and last date of events reported so we can indicate in the report and plots
#what time period the report represents
bdate=as.Date(gsub(pattern = " 0:00:00", replacement = "", x=storms$BGN_DATE), format = "%m/%d/%Y")
min(bdate)
## [1] "1950-01-03"
max(bdate)
## [1] "2011-11-30"
Multiply the property and crop damage vaules by the appropriate multiplier which is specified in the documentation
#This approach uses a hashtable in R to map the key EXP value to the multiplier value
#A is a dummy value which maps to 0 for values in the data that do not have a documented multiplier value
#using r2r package
m <- hashmap()
m[c("K", "M", "B","A")] <- c(1000, 1000000, 1000000000, 0)
#Replace non-mapped values with a dummy value which also is found in the hashtable
storms$PROPDMGEXP[!storms$PROPDMGEXP %in% c("K", "M", "B")]<-"A"
storms$CROPDMGEXP[!storms$CROPDMGEXP %in% c("K", "M", "B")]<-"A"
#function to calculate the value of Damage based on DMG value and units specified by EXP value
estDamage <- function(val, units ) {
val*query(m,toupper(units))
}
#Add columns for property damage value and crop damage value
storms <- storms %>%
rowwise() %>%
mutate(estPropDamage = estDamage(val=PROPDMG, units=PROPDMGEXP)) %>%
mutate(estCropDamage = estDamage(val=CROPDMG, units=CROPDMGEXP))
Group weather events that represent the same type of event but have slightly different descriptors.
#grouping of the values of EVTYPE that appear to represent the same types of events
storms$EVTYPE[grepl("TSTM|THUNDERSTORM", storms$EVTYPE, ignore.case = TRUE)] <- "THUNDERSTRM"
storms$EVTYPE[grepl("HURRICANE", storms$EVTYPE, ignore.case = TRUE)] <- "HURRICANE"
storms$EVTYPE[grepl("COLD", storms$EVTYPE, ignore.case = TRUE)] <- "COLD"
storms$EVTYPE[grepl("FLOOD", storms$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
storms$EVTYPE[grepl("HEAT", storms$EVTYPE, ignore.case = TRUE)] <- "HEAT"
storms$EVTYPE[grepl("WIND", storms$EVTYPE, ignore.case = TRUE)] <- "WIND"
storms$EVTYPE[grepl("RIP CURRENT", storms$EVTYPE, ignore.case = TRUE)] <- "RIP CURRENT"
storms$EVTYPE[grepl("DUST STORM", storms$EVTYPE, ignore.case = TRUE)] <- "DUST"
storms$EVTYPE[grepl("STORM", storms$EVTYPE, ignore.case = TRUE)] <- "OTHER STORM"
storms$EVTYPE[grepl("THUNDERSTRM", storms$EVTYPE, ignore.case = TRUE)] <- "THUNDERSTORM"
storms$EVTYPE[grepl("RAIN", storms$EVTYPE, ignore.case = TRUE)] <- "RAIN"
storms$EVTYPE[grepl("SNOW", storms$EVTYPE, ignore.case = TRUE)] <- "SNOW"
storms$EVTYPE[grepl("LIGHTNING", storms$EVTYPE, ignore.case = TRUE)] <- "LIGHTNING"
storms$EVTYPE[grepl("DROUGHT", storms$EVTYPE, ignore.case = TRUE)] <- "DROUGHT"
#sum number of fatalities, number of injuries, property damage amount and crop damage amount by event type
event_summaries <- storms %>%
group_by(EVTYPE) %>%
summarize(n_Deaths=sum(FATALITIES, na.rm=T), n_Injuries=sum(INJURIES, na.rm=T), PropDamage=sum(estPropDamage, na.rm=T), CropDamage=sum(estCropDamage, na.rm=T))
Across the United States, which types of events are most harmful with respect to population health?
#Health Events
#Transform the data to long format to set up charts for "Number of Deaths", "Number of Injuries"
health_events <- event_summaries %>%
select(EVTYPE, n_Deaths, n_Injuries) %>%
pivot_longer(!EVTYPE , names_to = "outcome", values_to = "n_events", names_prefix="n_", )
#summarize health outcomes by event type for reporting
health_outcome <- health_events %>%
group_by(EVTYPE, outcome) %>%
summarize(eventCount=sum(n_events))
## `summarise()` has grouped output by 'EVTYPE'. You can override using the
## `.groups` argument.
#select the 5 most frqeuent events for injuries and deaths for reporting
health_outcomes <- health_outcome %>% arrange(desc(eventCount)) %>%
group_by(outcome) %>% do(head(.,5))
Across the United States, which types of events have the greatest economic consequences?
#Property Events
#Transform the data to long format to set up charts for "Property Damage" and "Crop Damage"
damage_events <- event_summaries %>%
select(EVTYPE, PropDamage, CropDamage) %>%
pivot_longer(!EVTYPE , names_to = "Damage_Type", values_to = "Est_Damage")
#summarize property and crop damage amounts by event type for reporting
#Divide the estimated damage amounts by 1,000,000,000 for reporting results in $ Billions
econ_outcome <- damage_events %>%
group_by(EVTYPE, Damage_Type) %>%
summarize(Damage_Amt=sum(Est_Damage)) %>%
mutate(Est_Damage_B = Damage_Amt / 1000000000)
## `summarise()` has grouped output by 'EVTYPE'. You can override using the
## `.groups` argument.
#select the 5 most frqeuent events for injuries and deaths for reporting
econ_outcomes <- econ_outcome %>% arrange(desc(Est_Damage_B)) %>%
group_by(Damage_Type) %>% do(head(.,5))
#set up facet label names for panels variable
panel_names <- as_labeller(c(
'CropDamage'="Crop Damage",
'PropDamage'="Property Damage"
))
#Create a panel bar plot to show the five most common weather events that result
#in injury and the five most common weather events that result in death
ggplot(health_outcomes, aes(x=reorder(EVTYPE, -eventCount), y=eventCount, fill=EVTYPE)) +
geom_bar(stat="identity") +
xlab("Event type") +
ylab("Count") +
labs(subtitle = "Event Dates: January, 1950 - November, 2011") +
labs(caption = "Source: National Oceanic & Atmospheric Administration") +
facet_wrap(~ outcome, ncol = 1, scales="free") +
ggtitle("Figure 1: Population health outcomes of the five most harmful \n weather events resulting in injury or death") +
theme(axis.text.x = element_text(size = 7)) +
theme(legend.position = "none",
panel.background = element_rect(fill='transparent'),
plot.background = element_rect(fill='transparent', color=NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank() ) +
geom_text(aes(label = comma(eventCount)), vjust=.9, size=3, color="black")
Tornadoes, heat and floods resulted in the most deaths. Tornadoes, thunderstorms and heat resulted in the most injuries.
#set up facet label names for panels variable
panel_names <- as_labeller(c(
'CropDamage'="Crop Damage",
'PropDamage'="Property Damage"
))
#Create a panel bar plot to show the five most common weather events that result
#in property damage and the five most common weather events that result in crop damage
ggplot(econ_outcomes, aes(x=reorder(EVTYPE, -Est_Damage_B), y=Est_Damage_B, fill=EVTYPE)) +
geom_bar(stat="identity") +
xlab("Event type") +
ylab("USD $Billions") +
labs(title = "Figure 2: Economic impact of the five costliest weather events \n resulting in property or crop damage") +
labs(subtitle = "Event Dates: January, 1950 - November, 2011") +
labs(caption = "Source: National Oceanic & Atmospheric Administration") +
facet_wrap(~ Damage_Type, ncol = 1, scales="free", labeller = panel_names) +
theme(axis.text.x = element_text(size = 7)) +
scale_x_discrete(labels = wrap_format(8)) +
theme(legend.position = "none",
panel.background = element_rect(fill='transparent'),
plot.background = element_rect(fill='transparent', color=NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_text(aes(label = dollar(Est_Damage_B)), vjust=1, size=2.7)
Droughts, floods and other storms resulted in the most crop damage. Floods, hurricanes and other storms resulted in the most property damage.