SYNOPSIS

This is an analysis of the NOAA Storm Database intended to summarize the types of weather events that produce the most personal and financial damage. In the context of this analysis personal damage includes two categories - injuries and fatalities - and financial damage also includes two categories: property damage and crop damage. The analysis concludes that tornados are the largest driver of losses for both personal and financial categories by a substantial margin.

DATA PROCESSING

weather_df <- read.csv("C:\\Users\\nhart\\OneDrive\\Desktop\\Johns Hopkins\\Reproducible Research\\Course Project 2\\repdata_data_StormData.csv");
subset_weather_df <- weather_df[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG")]
# Convert specified columns to numeric
subset_weather_df$FATALITIES <- as.numeric(subset_weather_df$FATALITIES)
subset_weather_df$INJURIES <- as.numeric(subset_weather_df$INJURIES)
subset_weather_df$PROPDMG <- as.numeric(subset_weather_df$PROPDMG)
subset_weather_df$CROPDMG <- as.numeric(subset_weather_df$CROPDMG)

# Ensure "EVTYPE" remains as string
subset_weather_df$EVTYPE <- as.character(subset_weather_df$EVTYPE)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Calculate the total instances of personal damage (FATALITIES + INJURIES) and financial damage (PROPDMG + CROPDMG) across all event types
total_instances_of_personal_damage <- sum(subset_weather_df$FATALITIES, na.rm = TRUE) + sum(subset_weather_df$INJURIES, na.rm = TRUE)
total_instances_of_financial_damage <- sum(subset_weather_df$PROPDMG, na.rm = TRUE) + sum(subset_weather_df$CROPDMG, na.rm = TRUE)

# Print the grand totals
cat(paste("Total instances of personal damage: ", format(total_instances_of_personal_damage, big.mark = ",", scientific = FALSE), "\n"))
## Total instances of personal damage:  155,673
cat(paste("Total instances of financial damage: ", format(total_instances_of_financial_damage, big.mark = ",", scientific = FALSE), "\n"))
## Total instances of financial damage:  12,262,327
# Group by EVTYPE and calculate the sum of fatalities and injuries
personal_damage <- subset_weather_df %>%
  group_by(EVTYPE) %>%
  summarise(total_fatalities = sum(FATALITIES, na.rm = TRUE),
            total_injuries = sum(INJURIES, na.rm = TRUE),
            total_personal_damage_EVTYPE = total_fatalities + total_injuries,
            personal_damage_percentage = round((total_personal_damage_EVTYPE / total_instances_of_personal_damage) * 100, 1)) %>%
  arrange(desc(total_personal_damage_EVTYPE)) %>%
  slice(1:10)  # Select only the top 10 event types

# Group by EVTYPE and calculate the sum of property damage and crop damage
financial_damage <- subset_weather_df %>%
  group_by(EVTYPE) %>%
  summarise(total_prop_damage = sum(PROPDMG, na.rm = TRUE),
            total_crop_damage = sum(CROPDMG, na.rm = TRUE),
            total_financial_damage_EVTYPE = total_prop_damage + total_crop_damage,
            financial_damage_percentage = round((total_financial_damage_EVTYPE / total_instances_of_financial_damage) * 100, 1)) %>%
  arrange(desc(total_financial_damage_EVTYPE)) %>%
  slice(1:10)  # Select only the top 10 event types

# Identify the top 10 event types
top_10_event_types <- personal_damage

# Calculate the percentage of financial damage for the top 10 event types
top_10_event_types <- top_10_event_types %>%
  mutate(financial_damage_percentage = round((total_personal_damage_EVTYPE / total_instances_of_personal_damage) * 100, 1))



# Identify the top 10 event types for financial damage
top_10_financial_event_types <- financial_damage

# Calculate the percentage of financial damage for the top 10 event types
top_10_financial_event_types <- top_10_financial_event_types %>%
  mutate(financial_damage_percentage = round((total_financial_damage_EVTYPE / total_instances_of_financial_damage) * 100, 1))

RESULTS

# Print Table 1: Personal Damage by EVTYPE
cat("\n\n## Table 1: Personal Damage by Event Type\n\n")
## 
## 
## ## Table 1: Personal Damage by Event Type
knitr::kable(personal_damage, caption = "Table 1: Personal Damage by Event Type")
Table 1: Personal Damage by Event Type
EVTYPE total_fatalities total_injuries total_personal_damage_EVTYPE personal_damage_percentage
TORNADO 5633 91346 96979 62.3
EXCESSIVE HEAT 1903 6525 8428 5.4
TSTM WIND 504 6957 7461 4.8
FLOOD 470 6789 7259 4.7
LIGHTNING 816 5230 6046 3.9
HEAT 937 2100 3037 2.0
FLASH FLOOD 978 1777 2755 1.8
ICE STORM 89 1975 2064 1.3
THUNDERSTORM WIND 133 1488 1621 1.0
WINTER STORM 206 1321 1527 1.0
# Print Table 2: Financial Damage by EVTYPE
cat("\n\n## Table 2: Financial Damage by Event Type\n\n")
## 
## 
## ## Table 2: Financial Damage by Event Type
knitr::kable(financial_damage, caption = "Table 2: Financial Damage by Event Type")
Table 2: Financial Damage by Event Type
EVTYPE total_prop_damage total_crop_damage total_financial_damage_EVTYPE financial_damage_percentage
TORNADO 3212258.2 100018.52 3312276.7 27.0
FLASH FLOOD 1420124.6 179200.46 1599325.1 13.0
TSTM WIND 1335965.6 109202.60 1445168.2 11.8
HAIL 688693.4 579596.28 1268289.7 10.3
FLOOD 899938.5 168037.88 1067976.4 8.7
THUNDERSTORM WIND 876844.2 66791.45 943635.6 7.7
LIGHTNING 603351.8 3580.61 606932.4 4.9
THUNDERSTORM WINDS 446293.2 18684.93 464978.1 3.8
HIGH WIND 324731.6 17283.21 342014.8 2.8
WINTER STORM 132720.6 1978.99 134699.6 1.1
# Create Bar Chart 1: Personal Damage by EVTYPE
bar_chart_1 <- ggplot(top_10_event_types, aes(x = reorder(EVTYPE, personal_damage_percentage), y = personal_damage_percentage)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +
  labs(x = "Event Type", y = "Total Personal Damage (%)", title = "Personal Damage by Event Type") +
  theme_minimal()

# Create Bar Chart 2: Financial Damage by EVTYPE
bar_chart_2 <- ggplot(top_10_financial_event_types, aes(x = reorder(EVTYPE, financial_damage_percentage), y = financial_damage_percentage)) +
  geom_bar(stat = "identity", fill = "#66c2a5") +  # Lighter shade of green
  coord_flip() +
  labs(x = "Event Type", y = "Total Financial Damage (%)", title = "Financial Damage by Event Type") +
  theme_minimal()

# Combine the bar charts into a single plot with two panels side by side
combined_plot <- gridExtra::grid.arrange(bar_chart_1, bar_chart_2, nrow = 1)

# Print the combined plot
print(combined_plot)
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.