Introduction

This report analyzes storm data to identify the events most harmful to public health and with the greatest economic impacts.

Load Data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

file_path <- "repdata_data_StormData.csv"  # Atualizar com o caminho correto
data <- read.csv(file_path, stringsAsFactors = FALSE)

Normalize economic damage values

convert_damage <- function(dmg, exp) {
  exp_factors <- c("K" = 1e3, "M" = 1e6, "B" = 1e9)
  
  exp <- toupper(trimws(ifelse(is.na(exp) | exp == "", "0", exp)))
  factor <- ifelse(exp %in% names(exp_factors), exp_factors[exp], 1)
  
  return(as.numeric(dmg) * factor)
}

data <- data %>%
  mutate(
    PROPDMG_TOTAL = mapply(convert_damage, PROPDMG, PROPDMGEXP),
    CROPDMG_TOTAL = mapply(convert_damage, CROPDMG, CROPDMGEXP)
  )

Question 1: Events most harmful to public health

damage_health <- data %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
    Total_Injuries = sum(INJURIES, na.rm = TRUE),
    Total_Health_Damage = Total_Fatalities + Total_Injuries
  ) %>%
  arrange(desc(Total_Health_Damage))

print(head(damage_health, 10))  # Top 10 eventos mais prejudiciais
## # A tibble: 10 Ă— 4
##    EVTYPE            Total_Fatalities Total_Injuries Total_Health_Damage
##    <chr>                        <dbl>          <dbl>               <dbl>
##  1 TORNADO                       5633          91346               96979
##  2 EXCESSIVE HEAT                1903           6525                8428
##  3 TSTM WIND                      504           6957                7461
##  4 FLOOD                          470           6789                7259
##  5 LIGHTNING                      816           5230                6046
##  6 HEAT                           937           2100                3037
##  7 FLASH FLOOD                    978           1777                2755
##  8 ICE STORM                       89           1975                2064
##  9 THUNDERSTORM WIND              133           1488                1621
## 10 WINTER STORM                   206           1321                1527

Question 2: Events with the greatest economic impact

damage_economic <- data %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Prop_Damage = sum(PROPDMG_TOTAL, na.rm = TRUE),
    Total_Crop_Damage = sum(CROPDMG_TOTAL, na.rm = TRUE),
    Total_Economic_Damage = Total_Prop_Damage + Total_Crop_Damage
  ) %>%
  arrange(desc(Total_Economic_Damage))

Graphics

Top 10 most damaging events for public health

grafico1 <- ggplot(head(damage_health, 10), aes(x = reorder(EVTYPE, -Total_Health_Damage), y = Total_Health_Damage, fill = EVTYPE)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 most damaging events for public healtha",
       x = "Event Type", y = "Public Health Damage (Fatalities + Injuries)") +
  theme_minimal() +
  theme(legend.position = "none")

print(grafico1)

Top 10 events with the greatest economic impact

grafico2 <- ggplot(head(damage_economic, 10), aes(x = reorder(EVTYPE, -Total_Economic_Damage), 
                                      y = Total_Economic_Damage, fill = EVTYPE)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 events with the greatest economic impact ",
       x = "Type of Event", y = "Total Economic Damages (USD)") +
  theme_minimal() +
  theme(legend.position = "none")

print(grafico2)