Synopsis

This report analyzes the NOAA storm database to identify the most harmful weather events in terms of public health impace and economic consequences. The analysis focuses on the number of fatalities, injuries, and financial damages caused by different weather/natural event types.

Data Processing

The dataset was loaded, and key variables such as property and crop damage were transformed to obtain real economic values. Fatalities and injuries were summed for each event type to determine public health impact.

# Inspect the dataset
str(storm_data)
## Classes 'data.table' and 'data.frame':   902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, ".internal.selfref")=<externalptr>
head(storm_data)
##    STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME  STATE
##      <num>             <char>   <char>    <char>  <num>     <char> <char>
## 1:       1  4/18/1950 0:00:00     0130       CST     97     MOBILE     AL
## 2:       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN     AL
## 3:       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE     AL
## 4:       1   6/8/1951 0:00:00     0900       CST     89    MADISON     AL
## 5:       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN     AL
## 6:       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE     AL
##     EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
##     <char>     <num>  <char>     <char>   <char>   <char>      <num>     <lgcl>
## 1: TORNADO         0                                               0         NA
## 2: TORNADO         0                                               0         NA
## 3: TORNADO         0                                               0         NA
## 4: TORNADO         0                                               0         NA
## 5: TORNADO         0                                               0         NA
## 6: TORNADO         0                                               0         NA
##    END_RANGE END_AZI END_LOCATI LENGTH WIDTH     F   MAG FATALITIES INJURIES
##        <num>  <char>     <char>  <num> <num> <int> <num>      <num>    <num>
## 1:         0                      14.0   100     3     0          0       15
## 2:         0                       2.0   150     2     0          0        0
## 3:         0                       0.1   123     2     0          0        2
## 4:         0                       0.0   100     2     0          0        2
## 5:         0                       0.0   150     2     0          0        2
## 6:         0                       1.5   177     2     0          0        6
##    PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP    WFO STATEOFFIC ZONENAMES LATITUDE
##      <num>     <char>   <num>     <char> <char>     <char>    <char>    <num>
## 1:    25.0          K       0                                            3040
## 2:     2.5          K       0                                            3042
## 3:    25.0          K       0                                            3340
## 4:     2.5          K       0                                            3458
## 5:     2.5          K       0                                            3412
## 6:     2.5          K       0                                            3450
##    LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
##        <num>      <num>      <num>  <char>  <num>
## 1:      8812       3051       8806              1
## 2:      8755          0          0              2
## 3:      8742          0          0              3
## 4:      8626          0          0              4
## 5:      8642          0          0              5
## 6:      8748          0          0              6
# Define a function to convert exponent values
convert_exp <- function(exp) {
  ifelse(exp %in% c("H", "h"), 100,
         ifelse(exp %in% c("K", "k"), 1000,
                ifelse(exp %in% c("M", "m"), 1e6,
                       ifelse(exp %in% c("B", "b"), 1e9, 1))))
}

# Apply exponent conversion
storm_data$PROPDMGEXP <- convert_exp(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- convert_exp(storm_data$CROPDMGEXP)

# Compute actual damage values
storm_data$PROPDMGVAL <- storm_data$PROPDMG * storm_data$PROPDMGEXP
storm_data$CROPDMGVAL <- storm_data$CROPDMG * storm_data$CROPDMGEXP

Results

# Aggregate fatalities and injuries by event type
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
            Total_Injuries = sum(INJURIES, na.rm = TRUE)) %>%
  arrange(desc(Total_Fatalities))

# Top 10 most harmful events
top_health_events <- health_impact %>% top_n(10, Total_Fatalities)

Most Harmful Events to Population Health

# Print top harmful events to health
print(top_health_events)
## # A tibble: 10 × 3
##    EVTYPE         Total_Fatalities Total_Injuries
##    <chr>                     <dbl>          <dbl>
##  1 TORNADO                    5633          91346
##  2 EXCESSIVE HEAT             1903           6525
##  3 FLASH FLOOD                 978           1777
##  4 HEAT                        937           2100
##  5 LIGHTNING                   816           5230
##  6 TSTM WIND                   504           6957
##  7 FLOOD                       470           6789
##  8 RIP CURRENT                 368            232
##  9 HIGH WIND                   248           1137
## 10 AVALANCHE                   224            170
# Aggregate economic impact by event type
economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(Total_Property_Damage = sum(PROPDMGVAL, na.rm = TRUE),
            Total_Crop_Damage = sum(CROPDMGVAL, na.rm = TRUE),
            Total_Economic_Loss = Total_Property_Damage + Total_Crop_Damage) %>%
  arrange(desc(Total_Economic_Loss))

# Top 10 costliest events
top_economic_events <- economic_impact %>% top_n(10, Total_Economic_Loss)

Plot

ggplot(top_health_events, aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Top 10 Weather Events Causing Fatalities",
       x = "Event Type", y = "Total Fatalities") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Plot

ggplot(top_economic_events, aes(x = reorder(EVTYPE, -Total_Economic_Loss), y = Total_Economic_Loss / 1e9)) +
  geom_bar(stat = "identity", fill = "green") +
  labs(title = "Top 10 Weather Events by Economic Damage",
       x = "Event Type", y = "Total Economic Damage (in Billion USD)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))