This report analyzes storm data to identify the events most harmful to public health and with the greatest economic impacts.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
file_path <- "repdata_data_StormData.csv" # Atualizar com o caminho correto
data <- read.csv(file_path, stringsAsFactors = FALSE)
convert_damage <- function(dmg, exp) {
exp_factors <- c("K" = 1e3, "M" = 1e6, "B" = 1e9)
exp <- toupper(trimws(ifelse(is.na(exp) | exp == "", "0", exp)))
factor <- ifelse(exp %in% names(exp_factors), exp_factors[exp], 1)
return(as.numeric(dmg) * factor)
}
data <- data %>%
mutate(
PROPDMG_TOTAL = mapply(convert_damage, PROPDMG, PROPDMGEXP),
CROPDMG_TOTAL = mapply(convert_damage, CROPDMG, CROPDMGEXP)
)
damage_health <- data %>%
group_by(EVTYPE) %>%
summarise(
Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE),
Total_Health_Damage = Total_Fatalities + Total_Injuries
) %>%
arrange(desc(Total_Health_Damage))
print(head(damage_health, 10)) # Top 10 eventos mais prejudiciais
## # A tibble: 10 Ă— 4
## EVTYPE Total_Fatalities Total_Injuries Total_Health_Damage
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
damage_economic <- data %>%
group_by(EVTYPE) %>%
summarise(
Total_Prop_Damage = sum(PROPDMG_TOTAL, na.rm = TRUE),
Total_Crop_Damage = sum(CROPDMG_TOTAL, na.rm = TRUE),
Total_Economic_Damage = Total_Prop_Damage + Total_Crop_Damage
) %>%
arrange(desc(Total_Economic_Damage))
grafico1 <- ggplot(head(damage_health, 10), aes(x = reorder(EVTYPE, -Total_Health_Damage), y = Total_Health_Damage, fill = EVTYPE)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 most damaging events for public healtha",
x = "Event Type", y = "Public Health Damage (Fatalities + Injuries)") +
theme_minimal() +
theme(legend.position = "none")
print(grafico1)
grafico2 <- ggplot(head(damage_economic, 10), aes(x = reorder(EVTYPE, -Total_Economic_Damage),
y = Total_Economic_Damage, fill = EVTYPE)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 events with the greatest economic impact ",
x = "Type of Event", y = "Total Economic Damages (USD)") +
theme_minimal() +
theme(legend.position = "none")
print(grafico2)