The analysis examines the impact of weather events on population health and the economy. Figure 1 highlights that TORNADO is the most harmful event for population health, causing the highest number of fatalities, followed by EXCESSIVE HEAT and FLASH FLOOD. The state-level analysis reveals that TORNADO is the most fatal event in most states. Figure 2 shows that FLOOD is the most economically damaging event, with significant property damage, followed by HURRICANE/TYPHOON and STORM SURGE. The state-level economic impact varies, with FLOOD and TORNADO being the most damaging in most states. Overall, TORNADO and FLOOD are the most harmful events for health and the economy, respectively, with significant regional variations.

Data Processing

Loading Data

# Load necessary libraries
library(tidyverse)
library(skimr)
library(data.table)
library(R.utils)
library(R.oo)
library(maps)
library(ggplot2)
library(usmap)
library(gridExtra)
library(grid)  # Load the grid package for custom text labels

# Load the dataset
df <- fread("C:/Users/aaact/Downloads/repdata_data_StormData.csv/repdata_data_StormData.csv")

Checking Data

# Display the first few rows of the dataset to understand its structure
head(df)
##    STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME  STATE
##      <num>             <char>   <char>    <char>  <num>     <char> <char>
## 1:       1  4/18/1950 0:00:00     0130       CST     97     MOBILE     AL
## 2:       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN     AL
## 3:       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE     AL
## 4:       1   6/8/1951 0:00:00     0900       CST     89    MADISON     AL
## 5:       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN     AL
## 6:       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE     AL
##     EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
##     <char>     <num>  <char>     <char>   <char>   <char>      <num>     <lgcl>
## 1: TORNADO         0                                               0         NA
## 2: TORNADO         0                                               0         NA
## 3: TORNADO         0                                               0         NA
## 4: TORNADO         0                                               0         NA
## 5: TORNADO         0                                               0         NA
## 6: TORNADO         0                                               0         NA
##    END_RANGE END_AZI END_LOCATI LENGTH WIDTH     F   MAG FATALITIES INJURIES
##        <num>  <char>     <char>  <num> <num> <int> <num>      <num>    <num>
## 1:         0                      14.0   100     3     0          0       15
## 2:         0                       2.0   150     2     0          0        0
## 3:         0                       0.1   123     2     0          0        2
## 4:         0                       0.0   100     2     0          0        2
## 5:         0                       0.0   150     2     0          0        2
## 6:         0                       1.5   177     2     0          0        6
##    PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP    WFO STATEOFFIC ZONENAMES LATITUDE
##      <num>     <char>   <num>     <char> <char>     <char>    <char>    <num>
## 1:    25.0          K       0                                            3040
## 2:     2.5          K       0                                            3042
## 3:    25.0          K       0                                            3340
## 4:     2.5          K       0                                            3458
## 5:     2.5          K       0                                            3412
## 6:     2.5          K       0                                            3450
##    LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
##        <num>      <num>      <num>  <char>  <num>
## 1:      8812       3051       8806              1
## 2:      8755          0          0              2
## 3:      8742          0          0              3
## 4:      8626          0          0              4
## 5:      8642          0          0              5
## 6:      8748          0          0              6

Results for Fatality Analysis

Event Type and Fatalities

# Summarize fatalities by event type
Fatality_table <- df %>% 
  group_by(EVTYPE) %>% 
  summarise(FATALITIES_EACH = sum(FATALITIES, na.rm = TRUE))

# Display the summary table
print(Fatality_table)
## # A tibble: 985 × 2
##    EVTYPE                  FATALITIES_EACH
##    <chr>                             <dbl>
##  1 "   HIGH SURF ADVISORY"               0
##  2 " COASTAL FLOOD"                      0
##  3 " FLASH FLOOD"                        0
##  4 " LIGHTNING"                          0
##  5 " TSTM WIND"                          0
##  6 " TSTM WIND (G45)"                    0
##  7 " WATERSPOUT"                         0
##  8 " WIND"                               0
##  9 "?"                                   0
## 10 "ABNORMAL WARMTH"                     0
## # ℹ 975 more rows
# Filter out events with 0 fatalities, sort in descending order, and keep the top 10
Fatality_table_ranking <- Fatality_table %>% 
  filter(FATALITIES_EACH != 0) %>% 
  arrange(desc(FATALITIES_EACH)) %>% 
  slice_head(n = 10)

# Display the top 10 events by fatalities
print(Fatality_table_ranking)
## # A tibble: 10 × 2
##    EVTYPE         FATALITIES_EACH
##    <chr>                    <dbl>
##  1 TORNADO                   5633
##  2 EXCESSIVE HEAT            1903
##  3 FLASH FLOOD                978
##  4 HEAT                       937
##  5 LIGHTNING                  816
##  6 TSTM WIND                  504
##  7 FLOOD                      470
##  8 RIP CURRENT                368
##  9 HIGH WIND                  248
## 10 AVALANCHE                  224

Histogram

library(ggplot2)

# Convert EVTYPE to a factor for proper ordering in the plot
Fatality_table_ranking$EVTYPE <- factor(
  Fatality_table_ranking$EVTYPE,
  levels = Fatality_table_ranking$EVTYPE[order(Fatality_table_ranking$FATALITIES_EACH, decreasing = TRUE)]
)

# Create a histogram for the top 10 events by fatalities
hist_plot <- ggplot(Fatality_table_ranking, aes(x = EVTYPE, y = FATALITIES_EACH)) +
  geom_bar(stat = "identity", fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Top 10 Events by Fatalities", x = "Event Type", y = "Fatalities") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    legend.position = "bottom",
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10),
    panel.background = element_blank(),  # Remove background
    panel.grid.major = element_blank(),  # Remove major grid lines
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    plot.title = element_text(size = 16, face = "bold", hjust = 0)  
  )

Event Type and Fatalities by State

# Summarize fatalities by state and event type
Fatality_table_state <- df %>% 
  group_by(STATE, EVTYPE) %>% 
  summarise(FATALITIES_EACH = sum(FATALITIES, na.rm = TRUE)) %>% 
  ungroup()

# Identify the most fatal event type for each state
Fatality_table_state_max <- Fatality_table_state %>% 
  group_by(STATE) %>% 
  filter(FATALITIES_EACH == max(FATALITIES_EACH)) %>% 
  ungroup()

# Display the most fatal event type by state
print(Fatality_table_state_max)
## # A tibble: 90 × 3
##    STATE EVTYPE                   FATALITIES_EACH
##    <chr> <chr>                              <dbl>
##  1 AK    AVALANCHE                             33
##  2 AL    TORNADO                              617
##  3 AM    MARINE THUNDERSTORM WIND               6
##  4 AN    MARINE TSTM WIND                       6
##  5 AR    TORNADO                              379
##  6 AS    TSUNAMI                               32
##  7 AZ    FLASH FLOOD                           62
##  8 CA    EXCESSIVE HEAT                       110
##  9 CO    AVALANCHE                             48
## 10 CO    LIGHTNING                             48
## # ℹ 80 more rows
# Prepare data for plotting
state_fatalities <- Fatality_table_state_max %>% 
  select(STATE, EVTYPE) %>% 
  rename(state = STATE, event = EVTYPE)

# Create a US map plot showing the most fatal event type by state
map_plot <- plot_usmap(data = state_fatalities, values = "event", color = "white") +
  scale_fill_discrete(name = "Most Fatal Event Type") +
  labs(title = "Most Fatal Event Type by State") +
  theme(
    legend.position = "right",  # Move legend to the right
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10),
    plot.title = element_text(size = 16, face = "bold", hjust = 0) 
  )

# Arrange the histogram and map plot vertically, with the histogram taller
combined_plot <- grid.arrange(hist_plot, map_plot, ncol = 1, heights = c(2, 1.5))

# Add figure title
grid.text("Figure 1, Fatality Analysis", x = 0.5, y = 0.02, just = "center", gp = gpar(fontsize = 20, fontface = "bold"))

The most harmful type of events for population health is TORNADO. The most harmful type of events for population health varies by state, TORNADO is the most harmful event in most states.

Results for Economic Analysis

Calculation for Prices

# Convert property damage values to numeric prices based on the exponent (K, M, B)
df <- df %>% mutate(PROPDMG_PRICE = case_when(
  PROPDMGEXP == "K" ~ PROPDMG * 1000,
  PROPDMGEXP == "M" ~ PROPDMG * 1000000,
  PROPDMGEXP == "B" ~ PROPDMG * 1000000000,
  TRUE ~ 0
))

# Display the first few rows of the calculated prices
head(df$PROPDMG_PRICE)
## [1] 25000  2500 25000  2500  2500  2500

Event Type and Economy

# Summarize economic damage by event type
Economy_table <- df %>% 
  group_by(EVTYPE) %>% 
  summarise(Economy_EACH = sum(PROPDMG_PRICE, na.rm = TRUE))

# Display the summary table
print(Economy_table)
## # A tibble: 985 × 2
##    EVTYPE                  Economy_EACH
##    <chr>                          <dbl>
##  1 "   HIGH SURF ADVISORY"       200000
##  2 " COASTAL FLOOD"                   0
##  3 " FLASH FLOOD"                 50000
##  4 " LIGHTNING"                       0
##  5 " TSTM WIND"                 8100000
##  6 " TSTM WIND (G45)"              8000
##  7 " WATERSPOUT"                      0
##  8 " WIND"                            0
##  9 "?"                             5000
## 10 "ABNORMAL WARMTH"                  0
## # ℹ 975 more rows
# Filter out events with 0 economic damage, sort in descending order, and keep the top 10
Economy_table_ranking <- Economy_table %>% 
  filter(Economy_EACH != 0) %>% 
  arrange(desc(Economy_EACH)) %>% 
  slice_head(n = 10)

# Display the top 10 events by economic damage
print(Economy_table_ranking)
## # A tibble: 10 × 2
##    EVTYPE            Economy_EACH
##    <chr>                    <dbl>
##  1 FLOOD             144657709800
##  2 HURRICANE/TYPHOON  69305840000
##  3 TORNADO            56925660480
##  4 STORM SURGE        43323536000
##  5 FLASH FLOOD        16140811510
##  6 HAIL               15727366720
##  7 HURRICANE          11868319010
##  8 TROPICAL STORM      7703890550
##  9 WINTER STORM        6688497250
## 10 HIGH WIND           5270046260

Histogram

# Convert EVTYPE to a factor for proper ordering in the plot
Economy_table_ranking$EVTYPE <- factor(
  Economy_table_ranking$EVTYPE,
  levels = Economy_table_ranking$EVTYPE[order(Economy_table_ranking$Economy_EACH, decreasing = TRUE)]
)

# Create a histogram for the top 10 events by economic damage
econ_hist_plot <- ggplot(Economy_table_ranking, aes(x = EVTYPE, y = Economy_EACH)) +
  geom_bar(stat = "identity", fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Top 10 Events by Economic Damage", x = "Event Type", y = "Economic Damage (USD)") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    legend.position = "bottom",
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10),
    panel.background = element_blank(),  # Remove background
    panel.grid.major = element_blank(),  # Remove major grid lines
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    plot.title = element_text(size = 16, face = "bold", hjust = 0) 
  )

Event Type and Economic Damage by State

# Summarize economic damage by state and event type
Economy_table_state <- df %>% 
  group_by(STATE, EVTYPE) %>% 
  summarise(Economy_EACH = sum(PROPDMG_PRICE, na.rm = TRUE)) %>% 
  ungroup()

# Identify the most economically damaging event type for each state
Economy_table_state_max <- Economy_table_state %>% 
  group_by(STATE) %>% 
  filter(Economy_EACH == max(Economy_EACH)) %>% 
  ungroup()

# Display the most economically damaging event type by state
print(Economy_table_state_max)
## # A tibble: 79 × 3
##    STATE EVTYPE                   Economy_EACH
##    <chr> <chr>                           <dbl>
##  1 AK    FLOOD                       157131940
##  2 AL    TORNADO                    6321296560
##  3 AM    WATERSPOUT                    5102000
##  4 AN    MARINE THUNDERSTORM WIND       169000
##  5 AR    TORNADO                    2590007310
##  6 AS    TSUNAMI                      81000000
##  7 AZ    HAIL                       2828908700
##  8 CA    FLOOD                    116751420000
##  9 CO    HAIL                       1423944750
## 10 CT    TORNADO                     596236620
## # ℹ 69 more rows
# Prepare data for plotting
state_economy <- Economy_table_state_max %>% 
  select(STATE, EVTYPE) %>% 
  rename(state = STATE, event = EVTYPE)

# Create a US map plot showing the most economically damaging event type by state
econ_map_plot <- plot_usmap(data = state_economy, values = "event", color = "white") +
  scale_fill_discrete(name = "Most Economically Damaging Event Type") +
  labs(title = "Most Economically Damaging Event Type by State") +
  theme(
    legend.position = "right",  # Move legend to the right
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10),
    plot.title = element_text(size = 16, face = "bold", hjust = 0) 
  )

Combined Plot for Economic Damage

# Arrange the histogram and map plot vertically
combined_econ_plot <- grid.arrange(econ_hist_plot, econ_map_plot, ncol = 1, heights = c(2, 1.5))

# Add figure title
grid.text("Figure 2, Economic Damage Analysis", x = 0.5, y = 0.02, just = "center", gp = gpar(fontsize = 20, fontface = "bold"))

The most harmful type of events for the economy is FLOOD. The most harmful type of events for the economy varies by state, FLOOD and TORNADO are the most harmful events in most states.