This is an analysis of the NOAA Storm Database intended to summarize the types of weather events that produce the most personal and financial damage. In the context of this analysis personal damage includes two categories - injuries and fatalities - and financial damage also includes two categories: property damage and crop damage. The analysis concludes that tornados are the largest driver of losses for both personal and financial categories by a substantial margin.
weather_df <- read.csv("C:\\Users\\nhart\\OneDrive\\Desktop\\Johns Hopkins\\Reproducible Research\\Course Project 2\\repdata_data_StormData.csv");
subset_weather_df <- weather_df[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG")]
# Convert specified columns to numeric
subset_weather_df$FATALITIES <- as.numeric(subset_weather_df$FATALITIES)
subset_weather_df$INJURIES <- as.numeric(subset_weather_df$INJURIES)
subset_weather_df$PROPDMG <- as.numeric(subset_weather_df$PROPDMG)
subset_weather_df$CROPDMG <- as.numeric(subset_weather_df$CROPDMG)
# Ensure "EVTYPE" remains as string
subset_weather_df$EVTYPE <- as.character(subset_weather_df$EVTYPE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Calculate the total instances of personal damage (FATALITIES + INJURIES) and financial damage (PROPDMG + CROPDMG) across all event types
total_instances_of_personal_damage <- sum(subset_weather_df$FATALITIES, na.rm = TRUE) + sum(subset_weather_df$INJURIES, na.rm = TRUE)
total_instances_of_financial_damage <- sum(subset_weather_df$PROPDMG, na.rm = TRUE) + sum(subset_weather_df$CROPDMG, na.rm = TRUE)
# Print the grand totals
cat(paste("Total instances of personal damage: ", format(total_instances_of_personal_damage, big.mark = ",", scientific = FALSE), "\n"))
## Total instances of personal damage: 155,673
cat(paste("Total instances of financial damage: ", format(total_instances_of_financial_damage, big.mark = ",", scientific = FALSE), "\n"))
## Total instances of financial damage: 12,262,327
# Group by EVTYPE and calculate the sum of fatalities and injuries
personal_damage <- subset_weather_df %>%
group_by(EVTYPE) %>%
summarise(total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE),
total_personal_damage_EVTYPE = total_fatalities + total_injuries,
personal_damage_percentage = round((total_personal_damage_EVTYPE / total_instances_of_personal_damage) * 100, 1)) %>%
arrange(desc(total_personal_damage_EVTYPE)) %>%
slice(1:10) # Select only the top 10 event types
# Group by EVTYPE and calculate the sum of property damage and crop damage
financial_damage <- subset_weather_df %>%
group_by(EVTYPE) %>%
summarise(total_prop_damage = sum(PROPDMG, na.rm = TRUE),
total_crop_damage = sum(CROPDMG, na.rm = TRUE),
total_financial_damage_EVTYPE = total_prop_damage + total_crop_damage,
financial_damage_percentage = round((total_financial_damage_EVTYPE / total_instances_of_financial_damage) * 100, 1)) %>%
arrange(desc(total_financial_damage_EVTYPE)) %>%
slice(1:10) # Select only the top 10 event types
# Identify the top 10 event types
top_10_event_types <- personal_damage
# Calculate the percentage of financial damage for the top 10 event types
top_10_event_types <- top_10_event_types %>%
mutate(financial_damage_percentage = round((total_personal_damage_EVTYPE / total_instances_of_personal_damage) * 100, 1))
# Identify the top 10 event types for financial damage
top_10_financial_event_types <- financial_damage
# Calculate the percentage of financial damage for the top 10 event types
top_10_financial_event_types <- top_10_financial_event_types %>%
mutate(financial_damage_percentage = round((total_financial_damage_EVTYPE / total_instances_of_financial_damage) * 100, 1))
# Print Table 1: Personal Damage by EVTYPE
cat("\n\n## Table 1: Personal Damage by Event Type\n\n")
##
##
## ## Table 1: Personal Damage by Event Type
knitr::kable(personal_damage, caption = "Table 1: Personal Damage by Event Type")
| EVTYPE | total_fatalities | total_injuries | total_personal_damage_EVTYPE | personal_damage_percentage |
|---|---|---|---|---|
| TORNADO | 5633 | 91346 | 96979 | 62.3 |
| EXCESSIVE HEAT | 1903 | 6525 | 8428 | 5.4 |
| TSTM WIND | 504 | 6957 | 7461 | 4.8 |
| FLOOD | 470 | 6789 | 7259 | 4.7 |
| LIGHTNING | 816 | 5230 | 6046 | 3.9 |
| HEAT | 937 | 2100 | 3037 | 2.0 |
| FLASH FLOOD | 978 | 1777 | 2755 | 1.8 |
| ICE STORM | 89 | 1975 | 2064 | 1.3 |
| THUNDERSTORM WIND | 133 | 1488 | 1621 | 1.0 |
| WINTER STORM | 206 | 1321 | 1527 | 1.0 |
# Print Table 2: Financial Damage by EVTYPE
cat("\n\n## Table 2: Financial Damage by Event Type\n\n")
##
##
## ## Table 2: Financial Damage by Event Type
knitr::kable(financial_damage, caption = "Table 2: Financial Damage by Event Type")
| EVTYPE | total_prop_damage | total_crop_damage | total_financial_damage_EVTYPE | financial_damage_percentage |
|---|---|---|---|---|
| TORNADO | 3212258.2 | 100018.52 | 3312276.7 | 27.0 |
| FLASH FLOOD | 1420124.6 | 179200.46 | 1599325.1 | 13.0 |
| TSTM WIND | 1335965.6 | 109202.60 | 1445168.2 | 11.8 |
| HAIL | 688693.4 | 579596.28 | 1268289.7 | 10.3 |
| FLOOD | 899938.5 | 168037.88 | 1067976.4 | 8.7 |
| THUNDERSTORM WIND | 876844.2 | 66791.45 | 943635.6 | 7.7 |
| LIGHTNING | 603351.8 | 3580.61 | 606932.4 | 4.9 |
| THUNDERSTORM WINDS | 446293.2 | 18684.93 | 464978.1 | 3.8 |
| HIGH WIND | 324731.6 | 17283.21 | 342014.8 | 2.8 |
| WINTER STORM | 132720.6 | 1978.99 | 134699.6 | 1.1 |
# Create Bar Chart 1: Personal Damage by EVTYPE
bar_chart_1 <- ggplot(top_10_event_types, aes(x = reorder(EVTYPE, personal_damage_percentage), y = personal_damage_percentage)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() +
labs(x = "Event Type", y = "Total Personal Damage (%)", title = "Personal Damage by Event Type") +
theme_minimal()
# Create Bar Chart 2: Financial Damage by EVTYPE
bar_chart_2 <- ggplot(top_10_financial_event_types, aes(x = reorder(EVTYPE, financial_damage_percentage), y = financial_damage_percentage)) +
geom_bar(stat = "identity", fill = "#66c2a5") + # Lighter shade of green
coord_flip() +
labs(x = "Event Type", y = "Total Financial Damage (%)", title = "Financial Damage by Event Type") +
theme_minimal()
# Combine the bar charts into a single plot with two panels side by side
combined_plot <- gridExtra::grid.arrange(bar_chart_1, bar_chart_2, nrow = 1)
# Print the combined plot
print(combined_plot)
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.