Synopsis

This data analysis aims to explore the NOAA Storm Database and provide insights into severe weather events in the United States. The analysis addresses two key questions: 1) Which types of events are most harmful to population health? 2) Which types of events have the greatest economic consequences? The results are presented in a clear and concise manner to assist government or municipal managers in prioritizing resources for different types of severe weather events.

Data Processing

# Load required libraries
library(ggplot2)

# Load the NOAA Storm Database
storm_data <- read.csv("repdata_data_StormData.csv")

# Display the first few rows of the data
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Most Harmful Events to Population Health

# Summarize the data to find harmful events to population health
harmful_events <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, storm_data, sum)
harmful_events <- harmful_events[order(rowSums(harmful_events[, 2:3]), decreasing = TRUE), ][1:10, ]

# Display the top harmful events
harmful_events
##                EVTYPE FATALITIES INJURIES
## 834           TORNADO       5633    91346
## 130    EXCESSIVE HEAT       1903     6525
## 856         TSTM WIND        504     6957
## 170             FLOOD        470     6789
## 464         LIGHTNING        816     5230
## 275              HEAT        937     2100
## 153       FLASH FLOOD        978     1777
## 427         ICE STORM         89     1975
## 760 THUNDERSTORM WIND        133     1488
## 972      WINTER STORM        206     1321
# Load ggplot2
library(ggplot2)

harmful_events$EVTYPE <- factor(harmful_events$EVTYPE, levels = harmful_events$EVTYPE)

# Reshape the data
harmful_events_long <- tidyr::gather(harmful_events, key = "Category", value = "Count", -EVTYPE)

# Create a ggplot bar plot with facets and color
ggplot(harmful_events_long, aes(x = EVTYPE, y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Total Fatalities and Injuries by Event Type",
       x = "Event Type",
       y = "Count") +
  facet_grid(Category ~ ., scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))  # Rotate axis labels by 45 degrees

Events with the Greatest Economic Consequences

# Summarize the data to find events with the greatest economic consequences
economic_consequences <- aggregate(cbind(PROPDMG, CROPDMG) ~ EVTYPE, storm_data, sum)
economic_consequences <- economic_consequences[order(rowSums(economic_consequences[, 2:3]), decreasing = TRUE), ][1:10, ]

# Display the events with greatest economic consequences
economic_consequences
##                 EVTYPE   PROPDMG   CROPDMG
## 834            TORNADO 3212258.2 100018.52
## 153        FLASH FLOOD 1420124.6 179200.46
## 856          TSTM WIND 1335965.6 109202.60
## 244               HAIL  688693.4 579596.28
## 170              FLOOD  899938.5 168037.88
## 760  THUNDERSTORM WIND  876844.2  66791.45
## 464          LIGHTNING  603351.8   3580.61
## 786 THUNDERSTORM WINDS  446293.2  18684.93
## 359          HIGH WIND  324731.6  17283.21
## 972       WINTER STORM  132720.6   1978.99
economic_consequences$EVTYPE <- factor(economic_consequences$EVTYPE, levels = economic_consequences$EVTYPE)

# Reshape the data
economic_consequences_long <- tidyr::gather(economic_consequences, key = "Category", value = "Count", -EVTYPE)

# Create a ggplot bar plot with facets and color
ggplot(economic_consequences_long, aes(x = EVTYPE, y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Total Property and Crop Damage by Event Type",
       x = "Event Type",
       y = "Count") +
  facet_grid(Category ~ ., scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))

Results

From the first plot, we can see that the most harmful event for both fatalities and injuries are Tornados. From the second plot, we can see that the event with the greatest economic consequence is Tornados for Poperty damage, but Hail for Crop Damage.