Loading Data

data <- read.csv("repdata_data_StormData.csv.bz2")
head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Most Harmful Events (Health Impact)

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
health_data <- data %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES, na.rm = TRUE),
    injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  arrange(desc(fatalities + injuries))

head(health_data, 10)
## # A tibble: 10 × 3
##    EVTYPE            fatalities injuries
##    <chr>                  <dbl>    <dbl>
##  1 TORNADO                 5633    91346
##  2 EXCESSIVE HEAT          1903     6525
##  3 TSTM WIND                504     6957
##  4 FLOOD                    470     6789
##  5 LIGHTNING                816     5230
##  6 HEAT                     937     2100
##  7 FLASH FLOOD              978     1777
##  8 ICE STORM                 89     1975
##  9 THUNDERSTORM WIND        133     1488
## 10 WINTER STORM             206     1321

Top 10 Harmful Events Plot

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
top10_health <- head(health_data, 10)

ggplot(top10_health, aes(x = reorder(EVTYPE, fatalities + injuries), 
                         y = fatalities + injuries)) +
  geom_bar(stat = "identity", fill = "red") +
  coord_flip() +
  labs(title = "Top 10 Most Harmful Weather Events",
       x = "Event Type",
       y = "Total Fatalities + Injuries")

Economic Damage Analysis

# Convert exponent to multiplier
convert_exp <- function(exp) {
  if (exp == "K") return(1000)
  else if (exp == "M") return(1e6)
  else if (exp == "B") return(1e9)
  else return(1)
}

data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$CROPDMGEXP <- as.character(data$CROPDMGEXP)

data$prop_dmg <- data$PROPDMG * sapply(data$PROPDMGEXP, convert_exp)
data$crop_dmg <- data$CROPDMG * sapply(data$CROPDMGEXP, convert_exp)

economic_data <- data %>%
  group_by(EVTYPE) %>%
  summarise(total_damage = sum(prop_dmg + crop_dmg, na.rm = TRUE)) %>%
  arrange(desc(total_damage))

head(economic_data, 10)
## # A tibble: 10 × 2
##    EVTYPE             total_damage
##    <chr>                     <dbl>
##  1 FLOOD             150319678257 
##  2 HURRICANE/TYPHOON  71913712800 
##  3 TORNADO            57340614060.
##  4 STORM SURGE        43323541000 
##  5 HAIL               18752904943.
##  6 FLASH FLOOD        17562129167.
##  7 DROUGHT            15018672000 
##  8 HURRICANE          14610229010 
##  9 RIVER FLOOD        10148404500 
## 10 ICE STORM           8967041360

Top 10 Economic Damage Plot

top10_econ <- head(economic_data, 10)

ggplot(top10_econ, aes(x = reorder(EVTYPE, total_damage), 
                       y = total_damage)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "Top 10 Events Causing Economic Damage",
       x = "Event Type",
       y = "Total Damage")