Introducction: # This analysis examined the U.S. National Weather Service (NWS) Storm Database to : 1 Most Harmful Events to Population Health; 2 Events with Greatest Economic Consequences

Data Processing

data loading

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
#dir("repdata_data_StormData")
dat1 <- read.csv("repdata_data_StormData/repdata_data_StormData.csv")

data analysis

1. To detremin which types of events (as indicated in the EVTYPE) are most harmful with respect to population health

harm_summary <- tapply(dat1$FATALITIES, dat1$EVTYPE, sum, na.rm = TRUE) + 
                tapply(dat1$INJURIES, dat1$EVTYPE, sum, na.rm = TRUE) %>%
                as.data.frame() %>%
                rename(total_harm = ".")


harm_summary <- harm_summary %>% 
                arrange(desc(total_harm)) %>% as.data.frame()
harm_summary$EVTYPE <- rownames(harm_summary)
# View the top 5 most harmful events
Justification: The raw data contains separate columns for FATALITIES and INJURIES, requiring aggregation to accurately assess total population health impact. By summing these metric

result 1:

ggplot(harm_summary[1:5, ], aes(x = reorder(EVTYPE, -total_harm), y = total_harm)) +
  geom_col(fill = "blue") +
  geom_text(aes(label = total_harm), vjust = -0.5) +
  labs(title = "Top 5 Most Harmful Weather Events to Population Health",
       x = "Event Type",
       y = "Total Harm (Fatalities + Injuries)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme_classic()

2. To determine which types of events cause the greatest economic losses

Damage Value Conversion: Description: Transformation: Combined PROPDMG/CROPDMG with their EXP multipliers (K=1,000, M=1,000,000, B=1,000,000,000)
# Calculate total economic impact by event type
economic_impact <- dat1 %>%
  mutate(
    # Convert damage amounts to actual values (using EXP columns)
    prop_dmg_value = PROPDMG * case_when(
      PROPDMGEXP == "K" ~ 1e3,
      PROPDMGEXP == "M" ~ 1e6,
      PROPDMGEXP == "B" ~ 1e9,
      TRUE ~ 1
    ),
    crop_dmg_value = CROPDMG * case_when(
      CROPDMGEXP == "K" ~ 1e3,
      CROPDMGEXP == "M" ~ 1e6,
      CROPDMGEXP == "B" ~ 1e9,
      TRUE ~ 1
    )
  )
economic_impact <- tapply(economic_impact$prop_dmg_value, economic_impact$EVTYPE, sum, na.rm = TRUE) + 
                tapply(economic_impact$crop_dmg_value,economic_impact$EVTYPE, sum, na.rm = TRUE) %>%
                as.data.frame() %>%
                rename(total_damage = ".") %>% as.data.frame()
economic_impact$EVTYPE <- rownames(economic_impact)

economic_impact <- economic_impact  %>% arrange(desc(total_damage))
Justification: Raw data stores damage values separately from their units, making aggregation impossible without conversion to consistent units

result 2:

ggplot(economic_impact[1:5, ], aes(x = reorder(EVTYPE, -total_damage), y = total_damage)) +
  geom_col(fill = "blue") +
  geom_text(aes(label = total_damage), vjust = -0.5) +
  labs(title = "Top 5 Most Harmful Weather Events to Total Damage",
       x = "Event Type",
       y = "Total Damage (Crop Damage + Property Damage)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme_classic()

Synopsis: