#load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
#load file
storm_data <- read.csv("/Users/itzelvalencia/Documents/Research/projects/datasciencecoursera/repdata_data_StormData.csv.bz2")
Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
## Summary of the data showing which EVTYPE is most harmful
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE)
)
## Look at fatalities and injuries
health_impact2 <- health_impact %>%
mutate(total_health_impact = total_fatalities + total_injuries)
## Identify the top 5
top_5 <- health_impact2 %>%
arrange(desc(total_health_impact)) %>%
top_n(5, total_health_impact)
## Plot the top 5
ggplot(top_5, aes(x = reorder(EVTYPE, -total_health_impact), y = total_health_impact)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(
title = "Top 5 Most Harmful Event Types",
x = "Event Type",
y = "Total Health Impact"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Question 2: Across the United States, which types of events have the greatest economic consequences?
## Function to convert damage to numeric
convert_damage <- function(damage, exp) {
multiplier <- case_when(
exp == "K" ~ 1e3,
exp == "M" ~ 1e6,
exp == "B" ~ 1e9,
TRUE ~ 1
)
return(damage * multiplier)
}
storm_data2 <- storm_data %>%
mutate(
PROPDMG = convert_damage(PROPDMG, PROPDMGEXP),
CROPDMG = convert_damage(CROPDMG, CROPDMGEXP)
)
## New df of just economic impact
economic_impact <- storm_data2 %>%
group_by(EVTYPE) %>%
summarise(
total_property_damage = sum(PROPDMG, na.rm = TRUE),
total_crop_damage = sum(CROPDMG, na.rm = TRUE)
)
## Calculate the total economic impact
economic_impact <- economic_impact %>%
mutate(total_economic_impact = total_property_damage + total_crop_damage)
## Identify the top 5
top_5_economic <- economic_impact %>%
arrange(desc(total_economic_impact)) %>%
top_n(5, total_economic_impact)
# Plot the top 5
ggplot(top_5_economic, aes(x = reorder(EVTYPE, -total_economic_impact), y = total_economic_impact)) +
geom_bar(stat = "identity", fill = "pink") +
labs(
title = "Top 5 Events with the Greatest Economic Impact",
x = "Event Type",
y = "Total Economic Impact"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))