The analysis examines the impact of weather events on population health and the economy. Figure 1 highlights that TORNADO is the most harmful event for population health, causing the highest number of fatalities, followed by EXCESSIVE HEAT and FLASH FLOOD. The state-level analysis reveals that TORNADO is the most fatal event in most states. Figure 2 shows that FLOOD is the most economically damaging event, with significant property damage, followed by HURRICANE/TYPHOON and STORM SURGE. The state-level economic impact varies, with FLOOD and TORNADO being the most damaging in most states. Overall, TORNADO and FLOOD are the most harmful events for health and the economy, respectively, with significant regional variations.
# Load necessary libraries
library(tidyverse)
library(skimr)
library(data.table)
library(R.utils)
library(R.oo)
library(maps)
library(ggplot2)
library(usmap)
library(gridExtra)
library(grid) # Load the grid package for custom text labels
# Load the dataset
df <- fread("C:/Users/aaact/Downloads/repdata_data_StormData.csv/repdata_data_StormData.csv")
# Display the first few rows of the dataset to understand its structure
head(df)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## <num> <char> <char> <char> <num> <char> <char>
## 1: 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2: 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3: 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4: 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5: 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6: 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## <char> <num> <char> <char> <char> <char> <num> <lgcl>
## 1: TORNADO 0 0 NA
## 2: TORNADO 0 0 NA
## 3: TORNADO 0 0 NA
## 4: TORNADO 0 0 NA
## 5: TORNADO 0 0 NA
## 6: TORNADO 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES
## <num> <char> <char> <num> <num> <int> <num> <num> <num>
## 1: 0 14.0 100 3 0 0 15
## 2: 0 2.0 150 2 0 0 0
## 3: 0 0.1 123 2 0 0 2
## 4: 0 0.0 100 2 0 0 2
## 5: 0 0.0 150 2 0 0 2
## 6: 0 1.5 177 2 0 0 6
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE
## <num> <char> <num> <char> <char> <char> <char> <num>
## 1: 25.0 K 0 3040
## 2: 2.5 K 0 3042
## 3: 25.0 K 0 3340
## 4: 2.5 K 0 3458
## 5: 2.5 K 0 3412
## 6: 2.5 K 0 3450
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## <num> <num> <num> <char> <num>
## 1: 8812 3051 8806 1
## 2: 8755 0 0 2
## 3: 8742 0 0 3
## 4: 8626 0 0 4
## 5: 8642 0 0 5
## 6: 8748 0 0 6
# Summarize fatalities by event type
Fatality_table <- df %>%
group_by(EVTYPE) %>%
summarise(FATALITIES_EACH = sum(FATALITIES, na.rm = TRUE))
# Display the summary table
print(Fatality_table)
## # A tibble: 985 × 2
## EVTYPE FATALITIES_EACH
## <chr> <dbl>
## 1 " HIGH SURF ADVISORY" 0
## 2 " COASTAL FLOOD" 0
## 3 " FLASH FLOOD" 0
## 4 " LIGHTNING" 0
## 5 " TSTM WIND" 0
## 6 " TSTM WIND (G45)" 0
## 7 " WATERSPOUT" 0
## 8 " WIND" 0
## 9 "?" 0
## 10 "ABNORMAL WARMTH" 0
## # ℹ 975 more rows
# Filter out events with 0 fatalities, sort in descending order, and keep the top 10
Fatality_table_ranking <- Fatality_table %>%
filter(FATALITIES_EACH != 0) %>%
arrange(desc(FATALITIES_EACH)) %>%
slice_head(n = 10)
# Display the top 10 events by fatalities
print(Fatality_table_ranking)
## # A tibble: 10 × 2
## EVTYPE FATALITIES_EACH
## <chr> <dbl>
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
library(ggplot2)
# Convert EVTYPE to a factor for proper ordering in the plot
Fatality_table_ranking$EVTYPE <- factor(
Fatality_table_ranking$EVTYPE,
levels = Fatality_table_ranking$EVTYPE[order(Fatality_table_ranking$FATALITIES_EACH, decreasing = TRUE)]
)
# Create a histogram for the top 10 events by fatalities
hist_plot <- ggplot(Fatality_table_ranking, aes(x = EVTYPE, y = FATALITIES_EACH)) +
geom_bar(stat = "identity", fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Top 10 Events by Fatalities", x = "Event Type", y = "Fatalities") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
legend.position = "bottom",
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
panel.background = element_blank(), # Remove background
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
plot.title = element_text(size = 16, face = "bold", hjust = 0)
)
# Summarize fatalities by state and event type
Fatality_table_state <- df %>%
group_by(STATE, EVTYPE) %>%
summarise(FATALITIES_EACH = sum(FATALITIES, na.rm = TRUE)) %>%
ungroup()
# Identify the most fatal event type for each state
Fatality_table_state_max <- Fatality_table_state %>%
group_by(STATE) %>%
filter(FATALITIES_EACH == max(FATALITIES_EACH)) %>%
ungroup()
# Display the most fatal event type by state
print(Fatality_table_state_max)
## # A tibble: 90 × 3
## STATE EVTYPE FATALITIES_EACH
## <chr> <chr> <dbl>
## 1 AK AVALANCHE 33
## 2 AL TORNADO 617
## 3 AM MARINE THUNDERSTORM WIND 6
## 4 AN MARINE TSTM WIND 6
## 5 AR TORNADO 379
## 6 AS TSUNAMI 32
## 7 AZ FLASH FLOOD 62
## 8 CA EXCESSIVE HEAT 110
## 9 CO AVALANCHE 48
## 10 CO LIGHTNING 48
## # ℹ 80 more rows
# Prepare data for plotting
state_fatalities <- Fatality_table_state_max %>%
select(STATE, EVTYPE) %>%
rename(state = STATE, event = EVTYPE)
# Create a US map plot showing the most fatal event type by state
map_plot <- plot_usmap(data = state_fatalities, values = "event", color = "white") +
scale_fill_discrete(name = "Most Fatal Event Type") +
labs(title = "Most Fatal Event Type by State") +
theme(
legend.position = "right", # Move legend to the right
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
plot.title = element_text(size = 16, face = "bold", hjust = 0)
)
# Arrange the histogram and map plot vertically, with the histogram taller
combined_plot <- grid.arrange(hist_plot, map_plot, ncol = 1, heights = c(2, 1.5))
# Add figure title
grid.text("Figure 1, Fatality Analysis", x = 0.5, y = 0.02, just = "center", gp = gpar(fontsize = 20, fontface = "bold"))
The most harmful type of events for population health is TORNADO. The most harmful type of events for population health varies by state, TORNADO is the most harmful event in most states.
# Convert property damage values to numeric prices based on the exponent (K, M, B)
df <- df %>% mutate(PROPDMG_PRICE = case_when(
PROPDMGEXP == "K" ~ PROPDMG * 1000,
PROPDMGEXP == "M" ~ PROPDMG * 1000000,
PROPDMGEXP == "B" ~ PROPDMG * 1000000000,
TRUE ~ 0
))
# Display the first few rows of the calculated prices
head(df$PROPDMG_PRICE)
## [1] 25000 2500 25000 2500 2500 2500
# Summarize economic damage by event type
Economy_table <- df %>%
group_by(EVTYPE) %>%
summarise(Economy_EACH = sum(PROPDMG_PRICE, na.rm = TRUE))
# Display the summary table
print(Economy_table)
## # A tibble: 985 × 2
## EVTYPE Economy_EACH
## <chr> <dbl>
## 1 " HIGH SURF ADVISORY" 200000
## 2 " COASTAL FLOOD" 0
## 3 " FLASH FLOOD" 50000
## 4 " LIGHTNING" 0
## 5 " TSTM WIND" 8100000
## 6 " TSTM WIND (G45)" 8000
## 7 " WATERSPOUT" 0
## 8 " WIND" 0
## 9 "?" 5000
## 10 "ABNORMAL WARMTH" 0
## # ℹ 975 more rows
# Filter out events with 0 economic damage, sort in descending order, and keep the top 10
Economy_table_ranking <- Economy_table %>%
filter(Economy_EACH != 0) %>%
arrange(desc(Economy_EACH)) %>%
slice_head(n = 10)
# Display the top 10 events by economic damage
print(Economy_table_ranking)
## # A tibble: 10 × 2
## EVTYPE Economy_EACH
## <chr> <dbl>
## 1 FLOOD 144657709800
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56925660480
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16140811510
## 6 HAIL 15727366720
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
## 9 WINTER STORM 6688497250
## 10 HIGH WIND 5270046260
# Convert EVTYPE to a factor for proper ordering in the plot
Economy_table_ranking$EVTYPE <- factor(
Economy_table_ranking$EVTYPE,
levels = Economy_table_ranking$EVTYPE[order(Economy_table_ranking$Economy_EACH, decreasing = TRUE)]
)
# Create a histogram for the top 10 events by economic damage
econ_hist_plot <- ggplot(Economy_table_ranking, aes(x = EVTYPE, y = Economy_EACH)) +
geom_bar(stat = "identity", fill = "red", color = "black", alpha = 0.7) +
labs(title = "Top 10 Events by Economic Damage", x = "Event Type", y = "Economic Damage (USD)") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
legend.position = "bottom",
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
panel.background = element_blank(), # Remove background
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
plot.title = element_text(size = 16, face = "bold", hjust = 0)
)
# Summarize economic damage by state and event type
Economy_table_state <- df %>%
group_by(STATE, EVTYPE) %>%
summarise(Economy_EACH = sum(PROPDMG_PRICE, na.rm = TRUE)) %>%
ungroup()
# Identify the most economically damaging event type for each state
Economy_table_state_max <- Economy_table_state %>%
group_by(STATE) %>%
filter(Economy_EACH == max(Economy_EACH)) %>%
ungroup()
# Display the most economically damaging event type by state
print(Economy_table_state_max)
## # A tibble: 79 × 3
## STATE EVTYPE Economy_EACH
## <chr> <chr> <dbl>
## 1 AK FLOOD 157131940
## 2 AL TORNADO 6321296560
## 3 AM WATERSPOUT 5102000
## 4 AN MARINE THUNDERSTORM WIND 169000
## 5 AR TORNADO 2590007310
## 6 AS TSUNAMI 81000000
## 7 AZ HAIL 2828908700
## 8 CA FLOOD 116751420000
## 9 CO HAIL 1423944750
## 10 CT TORNADO 596236620
## # ℹ 69 more rows
# Prepare data for plotting
state_economy <- Economy_table_state_max %>%
select(STATE, EVTYPE) %>%
rename(state = STATE, event = EVTYPE)
# Create a US map plot showing the most economically damaging event type by state
econ_map_plot <- plot_usmap(data = state_economy, values = "event", color = "white") +
scale_fill_discrete(name = "Most Economically Damaging Event Type") +
labs(title = "Most Economically Damaging Event Type by State") +
theme(
legend.position = "right", # Move legend to the right
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
plot.title = element_text(size = 16, face = "bold", hjust = 0)
)
# Arrange the histogram and map plot vertically
combined_econ_plot <- grid.arrange(econ_hist_plot, econ_map_plot, ncol = 1, heights = c(2, 1.5))
# Add figure title
grid.text("Figure 2, Economic Damage Analysis", x = 0.5, y = 0.02, just = "center", gp = gpar(fontsize = 20, fontface = "bold"))
The most harmful type of events for the economy is FLOOD. The most harmful type of events for the economy varies by state, FLOOD and TORNADO are the most harmful events in most states.