Data Processing

#load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

#load file
storm_data <- read.csv("/Users/itzelvalencia/Documents/Research/projects/datasciencecoursera/repdata_data_StormData.csv.bz2")

Results

Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

## Summary of the data showing which EVTYPE is most harmful
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE)
  )

## Look at fatalities and injuries
health_impact2 <- health_impact %>%
  mutate(total_health_impact = total_fatalities + total_injuries)

## Identify the top 5
top_5 <- health_impact2 %>%
  arrange(desc(total_health_impact)) %>%
  top_n(5, total_health_impact)

## Plot the top 5
ggplot(top_5, aes(x = reorder(EVTYPE, -total_health_impact), y = total_health_impact)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(
    title = "Top 5 Most Harmful Event Types",
    x = "Event Type",
    y = "Total Health Impact"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Results continued

Question 2: Across the United States, which types of events have the greatest economic consequences?

## Function to convert damage to numeric
convert_damage <- function(damage, exp) {
  multiplier <- case_when(
    exp == "K" ~ 1e3,
    exp == "M" ~ 1e6,
    exp == "B" ~ 1e9,
    TRUE ~ 1
  )
  return(damage * multiplier)
}

storm_data2 <- storm_data %>%
  mutate(
    PROPDMG = convert_damage(PROPDMG, PROPDMGEXP),
    CROPDMG = convert_damage(CROPDMG, CROPDMGEXP)
  )

## New df of just economic impact
economic_impact <- storm_data2 %>%
  group_by(EVTYPE) %>%
  summarise(
    total_property_damage = sum(PROPDMG, na.rm = TRUE),
    total_crop_damage = sum(CROPDMG, na.rm = TRUE)
  )

## Calculate the total economic impact 
economic_impact <- economic_impact %>%
  mutate(total_economic_impact = total_property_damage + total_crop_damage)

## Identify the top 5 
top_5_economic <- economic_impact %>%
  arrange(desc(total_economic_impact)) %>%
  top_n(5, total_economic_impact)

# Plot the top 5
ggplot(top_5_economic, aes(x = reorder(EVTYPE, -total_economic_impact), y = total_economic_impact)) +
  geom_bar(stat = "identity", fill = "pink") +
  labs(
    title = "Top 5 Events with the Greatest Economic Impact",
    x = "Event Type",
    y = "Total Economic Impact"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))