I will describe which types of events are most harmful with the fatalities and the economical amount, based on the simple histograms to show the ranking.

Data Processing

loading

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.1
## Warning: package 'ggplot2' was built under R version 4.4.1
## Warning: package 'tibble' was built under R version 4.4.1
## Warning: package 'tidyr' was built under R version 4.4.1
## Warning: package 'readr' was built under R version 4.4.1
## Warning: package 'purrr' was built under R version 4.4.1
## Warning: package 'dplyr' was built under R version 4.4.1
## Warning: package 'stringr' was built under R version 4.4.1
## Warning: package 'forcats' was built under R version 4.4.1
## Warning: package 'lubridate' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
## Warning: package 'skimr' was built under R version 4.4.1
library(data.table)
## Warning: package 'data.table' was built under R version 4.4.1
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
df <- fread("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2")

check

head(df)
##    STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME  STATE
##      <num>             <char>   <char>    <char>  <num>     <char> <char>
## 1:       1  4/18/1950 0:00:00     0130       CST     97     MOBILE     AL
## 2:       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN     AL
## 3:       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE     AL
## 4:       1   6/8/1951 0:00:00     0900       CST     89    MADISON     AL
## 5:       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN     AL
## 6:       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE     AL
##     EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
##     <char>     <num>  <char>     <char>   <char>   <char>      <num>     <lgcl>
## 1: TORNADO         0                                               0         NA
## 2: TORNADO         0                                               0         NA
## 3: TORNADO         0                                               0         NA
## 4: TORNADO         0                                               0         NA
## 5: TORNADO         0                                               0         NA
## 6: TORNADO         0                                               0         NA
##    END_RANGE END_AZI END_LOCATI LENGTH WIDTH     F   MAG FATALITIES INJURIES
##        <num>  <char>     <char>  <num> <num> <int> <num>      <num>    <num>
## 1:         0                      14.0   100     3     0          0       15
## 2:         0                       2.0   150     2     0          0        0
## 3:         0                       0.1   123     2     0          0        2
## 4:         0                       0.0   100     2     0          0        2
## 5:         0                       0.0   150     2     0          0        2
## 6:         0                       1.5   177     2     0          0        6
##    PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP    WFO STATEOFFIC ZONENAMES LATITUDE
##      <num>     <char>   <num>     <char> <char>     <char>    <char>    <num>
## 1:    25.0          K       0                                            3040
## 2:     2.5          K       0                                            3042
## 3:    25.0          K       0                                            3340
## 4:     2.5          K       0                                            3458
## 5:     2.5          K       0                                            3412
## 6:     2.5          K       0                                            3450
##    LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
##        <num>      <num>      <num>  <char>  <num>
## 1:      8812       3051       8806              1
## 2:      8755          0          0              2
## 3:      8742          0          0              3
## 4:      8626          0          0              4
## 5:      8642          0          0              5
## 6:      8748          0          0              6

Results for Fatality analysis

Event type and fatalities

#summary by event type
Fatality_table <- df %>% 
  group_by(EVTYPE) %>% 
  summarise(FATALITIES_EACH = sum(FATALITIES, na.rm = TRUE))

print(Fatality_table)
## # A tibble: 985 × 2
##    EVTYPE                  FATALITIES_EACH
##    <chr>                             <dbl>
##  1 "   HIGH SURF ADVISORY"               0
##  2 " COASTAL FLOOD"                      0
##  3 " FLASH FLOOD"                        0
##  4 " LIGHTNING"                          0
##  5 " TSTM WIND"                          0
##  6 " TSTM WIND (G45)"                    0
##  7 " WATERSPOUT"                         0
##  8 " WIND"                               0
##  9 "?"                                   0
## 10 "ABNORMAL WARMTH"                     0
## # ℹ 975 more rows
#remove 0 fatality and change order
#only top 10 included
Fatality_table_ranking <- Fatality_table %>% 
  filter(FATALITIES_EACH!=0) %>% 
  arrange(desc(FATALITIES_EACH)) %>% 
  slice_head(n = 10)

print(Fatality_table_ranking)
## # A tibble: 10 × 2
##    EVTYPE         FATALITIES_EACH
##    <chr>                    <dbl>
##  1 TORNADO                   5633
##  2 EXCESSIVE HEAT            1903
##  3 FLASH FLOOD                978
##  4 HEAT                       937
##  5 LIGHTNING                  816
##  6 TSTM WIND                  504
##  7 FLOOD                      470
##  8 RIP CURRENT                368
##  9 HIGH WIND                  248
## 10 AVALANCHE                  224

Histgram

library(ggplot2)

# Change EVTYPE into factor for ordering
Fatality_table_ranking$EVTYPE <- factor(
  Fatality_table_ranking$EVTYPE,
  levels = Fatality_table_ranking$EVTYPE[order(Fatality_table_ranking$FATALITIES_EACH, decreasing = TRUE)]
)

# Histgram
ggplot(Fatality_table_ranking, aes(x = EVTYPE, y = FATALITIES_EACH)) +
  geom_bar(stat = "identity", fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Event ranking", x = "Event type", y = "Fatalities") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) 

The most harmful type of events for population health is TORNADO.

Results for Economic analysis

Calculation for prices

#create numeric prices
df <- df %>% mutate(PROPDMG_PRICE = case_when(
  PROPDMGEXP == "K" ~ PROPDMG*1000,
  PROPDMGEXP == "M" ~ PROPDMG*1000000,
  PROPDMGEXP == "B" ~ PROPDMG*1000000000
))

head(df$PROPDMG_PRICE)
## [1] 25000  2500 25000  2500  2500  2500

Event type and economy

#summary by event type
Economy_table <- df %>% 
  group_by(EVTYPE) %>% 
  summarise(Economy_EACH = sum(PROPDMG_PRICE, na.rm = TRUE))

print(Economy_table)
## # A tibble: 985 × 2
##    EVTYPE                  Economy_EACH
##    <chr>                          <dbl>
##  1 "   HIGH SURF ADVISORY"       200000
##  2 " COASTAL FLOOD"                   0
##  3 " FLASH FLOOD"                 50000
##  4 " LIGHTNING"                       0
##  5 " TSTM WIND"                 8100000
##  6 " TSTM WIND (G45)"              8000
##  7 " WATERSPOUT"                      0
##  8 " WIND"                            0
##  9 "?"                             5000
## 10 "ABNORMAL WARMTH"                  0
## # ℹ 975 more rows
#remove 0 economy and change order
#only top 10 included
Economy_table_ranking <- Economy_table %>% 
  filter(Economy_EACH!=0) %>% 
  arrange(desc(Economy_EACH)) %>% 
  slice_head(n = 10)

print(Economy_table_ranking)
## # A tibble: 10 × 2
##    EVTYPE            Economy_EACH
##    <chr>                    <dbl>
##  1 FLOOD             144657709800
##  2 HURRICANE/TYPHOON  69305840000
##  3 TORNADO            56925660480
##  4 STORM SURGE        43323536000
##  5 FLASH FLOOD        16140811510
##  6 HAIL               15727366720
##  7 HURRICANE          11868319010
##  8 TROPICAL STORM      7703890550
##  9 WINTER STORM        6688497250
## 10 HIGH WIND           5270046260

Histgram

# Change EVTYPE into factor for ordering
Economy_table_ranking$EVTYPE <- factor(
  Economy_table_ranking$EVTYPE,
  levels = Economy_table_ranking$EVTYPE[order(Economy_table_ranking$Economy_EACH, decreasing = TRUE)]
)

# Histgram
ggplot(Economy_table_ranking, aes(x = EVTYPE, y = Economy_EACH)) +
  geom_bar(stat = "identity", fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Event ranking", x = "Event type", y = "Economy damage") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) 

The most harmful type of events for economy is FLOOD.