This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
##Quantifying the Human and Financial impact of Severe Weather Events analyzed with the NOAA Storm Database
##TORNADO: The leading cause of public health damage by a massive margin. Tornadoes have caused over 5,600+ fatalities and over 91,000+ injuries historically.EXCESSIVE HEAT / HEAT: While less visually destructive than tornadoes, excessive heat is the second deadliest event type, frequently causing spikes in fatalities during summer heatwaves.FLASH FLOOD / FLOOD: Flooding causes significant casualties, primarily due to vehicle entrapment and structural failures during sudden surges.LIGHTNING: Lightning ranks highly because it occurs frequently across the entire country, causing steady numbers of individual injuries and deaths every year.TSTM WIND (Thunderstorm Wind): High-velocity winds from severe thunderstorms cause widespread localized structural damage, leading to high injury numbers.
initial_row <- read.csv("repdata_data_StormData.csv", nrows = 1)
dim(initial_row)
## [1] 1 37
col_names <- names(initial_row)
classes <- rep("NULL",length(col_names))
names(classes) <- col_names
classes["EVTYPE"] <- "character"
classes["FATALITIES"] <- "numeric"
classes["INJURIES"] <- "numeric"
data1 <- read.csv("repdata_data_StormData.csv", colClasses = classes)
# total records
dim(data1)
## [1] 902297 3
names(data1)
## [1] "EVTYPE" "FATALITIES" "INJURIES"
head(data1)
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 0 15
## 2 TORNADO 0 0
## 3 TORNADO 0 2
## 4 TORNADO 0 2
## 5 TORNADO 0 2
## 6 TORNADO 0 6
library(dplyr)
library(knitr)
# 1. Clean the event type strings (fixes lowercase and trailing spaces)
data1$EVTYPE <- toupper(trimws(data1$EVTYPE))
# 2. Aggregate fatalities and injuries
health_summary <- data1 %>%
group_by(EVTYPE) %>%
summarise(
Fatalities = sum(FATALITIES, na.rm = TRUE),
Injuries = sum(INJURIES, na.rm = TRUE),
Total_Harm = sum(FATALITIES + INJURIES, na.rm = TRUE)
) %>%
arrange(desc(Total_Harm))
# 3. Print a clean, formatted table of the top 10 deadliest event types
kable(head(health_summary, 10),
caption = "Top 10 Most Harmful Weather Events to U.S. Population Health")
| EVTYPE | Fatalities | Injuries | Total_Harm |
|---|---|---|---|
| TORNADO | 5633 | 91346 | 96979 |
| EXCESSIVE HEAT | 1903 | 6525 | 8428 |
| TSTM WIND | 504 | 6957 | 7461 |
| FLOOD | 470 | 6789 | 7259 |
| LIGHTNING | 816 | 5230 | 6046 |
| HEAT | 937 | 2100 | 3037 |
| FLASH FLOOD | 978 | 1777 | 2755 |
| ICE STORM | 89 | 1975 | 2064 |
| THUNDERSTORM WIND | 133 | 1488 | 1621 |
| WINTER STORM | 206 | 1321 | 1527 |
library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Take the top 10 events from your summary data
top_10_health <- head(health_summary, 10)
# 2. Reshape data from wide to long format for ggplot coloring
long_health <- top_10_health %>%
pivot_longer(cols = c(Fatalities, Injuries),
names_to = "Harm_Type",
values_to = "Casualties")
# 3. Build the bar chart
ggplot(long_health, aes(x = reorder(EVTYPE, -Total_Harm), y = Casualties, fill = Harm_Type)) +
geom_bar(stat = "identity") +
labs(
title = "Top 10 Most Harmful Severe Weather Events in the U.S.",
subtitle = "Aggregated data reflecting total fatalities and injuries",
x = "Event Type (EVTYPE)",
y = "Total Number of Casualties",
fill = "Impact Type"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
plot.title = element_text(face = "bold", size = 14),
legend.position = "right"
) +
scale_fill_manual(values = c("Fatalities" = "#d95f02", "Injuries" = "#7570b3"))