Data Processing Section

# Load required packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Load the data
storm_data <- read.csv("repdata_data_StormData.csv.bz2")

# Display the first few rows of the dataset
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Analyze Health Impact

# Calculate the total number of fatalities and injuries for each event type
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  arrange(desc(total_fatalities), desc(total_injuries))

# Display the top 10 events with the highest health impact
head(health_impact, 10)
## # A tibble: 10 × 3
##    EVTYPE         total_fatalities total_injuries
##    <chr>                     <dbl>          <dbl>
##  1 TORNADO                    5633          91346
##  2 EXCESSIVE HEAT             1903           6525
##  3 FLASH FLOOD                 978           1777
##  4 HEAT                        937           2100
##  5 LIGHTNING                   816           5230
##  6 TSTM WIND                   504           6957
##  7 FLOOD                       470           6789
##  8 RIP CURRENT                 368            232
##  9 HIGH WIND                   248           1137
## 10 AVALANCHE                   224            170

Analyze Economic Impact

# Calculate the total property and crop damage for each event type
economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(
    total_property_damage = sum(PROPDMG, na.rm = TRUE),
    total_crop_damage = sum(CROPDMG, na.rm = TRUE)
  ) %>%
  arrange(desc(total_property_damage), desc(total_crop_damage))

# Display the top 10 events with the highest economic impact
head(economic_impact, 10)
## # A tibble: 10 × 3
##    EVTYPE             total_property_damage total_crop_damage
##    <chr>                              <dbl>             <dbl>
##  1 TORNADO                         3212258.           100019.
##  2 FLASH FLOOD                     1420125.           179200.
##  3 TSTM WIND                       1335966.           109203.
##  4 FLOOD                            899938.           168038.
##  5 THUNDERSTORM WIND                876844.            66791.
##  6 HAIL                             688693.           579596.
##  7 LIGHTNING                        603352.             3581.
##  8 THUNDERSTORM WINDS               446293.            18685.
##  9 HIGH WIND                        324732.            17283.
## 10 WINTER STORM                     132721.             1979.

Results

Events Most Harmful to Population Health

The table below shows the top 10 event types with the highest total fatalities and injuries.

# Plot the top 10 events with the highest health impact
top_health_impact <- health_impact %>% head(10)
ggplot(top_health_impact, aes(x=reorder(EVTYPE, -total_fatalities), y=total_fatalities)) +
  geom_bar(stat="identity", fill="red") +
  labs(title="Top 10 Events with Highest Fatalities", x="Event Type", y="Total Fatalities") +
  theme(axis.text.x = element_text(angle=45, hjust=1))

Events with the Greatest Economic Consequences

The table below shows the top 10 event types with the highest total property and crop damages.

# Plot the top 10 events with the highest economic impact
top_economic_impact <- economic_impact %>% head(10)
ggplot(top_economic_impact, aes(x=reorder(EVTYPE, -total_property_damage), y=total_property_damage)) +
  geom_bar(stat="identity", fill="blue") +
  labs(title="Top 10 Events with Highest Property Damage", x="Event Type", y="Total Property Damage") +
  theme(axis.text.x = element_text(angle=45, hjust=1))