library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.3

Synopsis

This report analyzes the NOAA Storm Database to identify weather events that are most harmful to population health and those with the greatest economic consequences in the United States. Fatalities and injuries were used to measure health impact, while property and crop damages were used to measure economic impact. The analysis shows that tornadoes cause the greatest health damage, while floods and hurricanes contribute significantly to economic losses.

Data Processing

storm <- read.csv("repdata_data_StormData.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
str(storm)
## 'data.frame':    831084 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
storm2 <- storm %>%
    select(
        EVTYPE,
        FATALITIES,
        INJURIES,
        PROPDMG,
        PROPDMGEXP,
        CROPDMG,
        CROPDMGEXP
    )
health <- storm2 %>%
    group_by(EVTYPE) %>%
    summarize(
        fatalities = sum(FATALITIES),
        injuries = sum(INJURIES),
        total_harm = fatalities + injuries
    ) %>%
    arrange(desc(total_harm))
health_top <- head(health, 10)
health_top
## # A tibble: 10 × 4
##    EVTYPE         fatalities injuries total_harm
##    <chr>               <dbl>    <dbl>      <dbl>
##  1 TORNADO              5035    85049      90084
##  2 EXCESSIVE HEAT       1867     6387       8254
##  3 TSTM WIND             504     6957       7461
##  4 FLOOD                 400     6658       7058
##  5 LIGHTNING             787     5019       5806
##  6 FLASH FLOOD           900     1747       2647
##  7 HEAT                  872     1489       2361
##  8 ICE STORM              89     1975       2064
##  9 WINTER STORM          205     1319       1524
## 10 HIGH WIND             243     1125       1368
convert_exp <- function(exp) {
    ifelse(exp == "K", 1e3,
    ifelse(exp == "M", 1e6,
    ifelse(exp == "B", 1e9, 1)))
}
storm2$prop_mult <- convert_exp(storm2$PROPDMGEXP)

storm2$crop_mult <- convert_exp(storm2$CROPDMGEXP)

storm2$prop_total <- storm2$PROPDMG * storm2$prop_mult

storm2$crop_total <- storm2$CROPDMG * storm2$crop_mult
economic <- storm2 %>%
    group_by(EVTYPE) %>%
    summarize(
        property_damage = sum(prop_total),
        crop_damage = sum(crop_total),
        total_damage = property_damage + crop_damage
    ) %>%
    arrange(desc(total_damage))
economic_top <- head(economic, 10)
economic_top
## # A tibble: 10 × 4
##    EVTYPE            property_damage crop_damage  total_damage
##    <chr>                       <dbl>       <dbl>         <dbl>
##  1 FLOOD               136668419717   5499074450 142167494167 
##  2 HURRICANE/TYPHOON    69305840000   2607872800  71913712800 
##  3 TORNADO              46877429590.   383018270  47260447860.
##  4 STORM SURGE          43323536000         5000  43323541000 
##  5 FLASH FLOOD          14669748367.  1197630100  15867378467.
##  6 HAIL                 12287866003.  2891413890  15179279893.
##  7 DROUGHT               1045992000  13938635000  14984627000 
##  8 HURRICANE            11857819010   2731410000  14589229010 
##  9 RIVER FLOOD           5118945500   5029459000  10148404500 
## 10 ICE STORM             3936950360   5022033500   8958983860
ggplot(health_top,
       aes(x = reorder(EVTYPE, total_harm),
           y = total_harm)) +
    geom_bar(stat = "identity", fill = "steelblue") +
    coord_flip() +
    labs(
        title = "Top 10 Weather Events Harmful to Population Health",
        x = "Weather Event",
        y = "Total Injuries and Fatalities"
    )

ggplot(economic_top,
       aes(x = reorder(EVTYPE, total_damage),
           y = total_damage)) +
    geom_bar(stat = "identity", fill = "darkred") +
    coord_flip() +
    labs(
        title = "Top 10 Weather Events Causing Economic Damage",
        x = "Weather Event",
        y = "Total Economic Damage"
    )