R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Title

The impact of Weather events to population health and economic state in the United States

Study synopsis

This analysis explores the NOAA Storm Database to assess the most harmful weather events in the U.S. regarding population health and economic impact. We examine storm events from [year range in data] and identify which event types (EVTYPE) contribute most to fatalities, injuries, property damage, and crop damage. Tornadoes emerge as the leading cause of injuries and fatalities, while floods and hurricanes have the highest economic consequences.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)

Data Processing

storm_data <- read.csv("C:/Users/sumim/OneDrive/ドキュメント/Coursera datascience/Reproduciable Research/repdata_data_StormData.csv", stringsAsFactors = FALSE)

str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
storm_subset <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
damage_exp <- function(exp) {
  case_when(
    exp %in% c("H", "h") ~ 100,
    exp %in% c("K", "k") ~ 1000,
    exp %in% c("M", "m") ~ 1e6,
    exp %in% c("B", "b") ~ 1e9,
    TRUE ~ 1
  )
}

storm_subset <- storm_subset %>%
  mutate(PROPDMGEXP = damage_exp(PROPDMGEXP),
         CROPDMGEXP = damage_exp(CROPDMGEXP),
         PROPDMG_TOTAL = PROPDMG * PROPDMGEXP,
         CROPDMG_TOTAL = CROPDMG * CROPDMGEXP)

Results

  1. Most Harmful Events for Population Health
# events sorted by fatalities
health_impact <- storm_subset %>%
  group_by(EVTYPE) %>%
  summarize(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
            Total_Injuries = sum(INJURIES, na.rm = TRUE)) %>%
  arrange(desc(Total_Fatalities))

head(health_impact)
## # A tibble: 6 × 3
##   EVTYPE         Total_Fatalities Total_Injuries
##   <chr>                     <dbl>          <dbl>
## 1 TORNADO                    5633          91346
## 2 EXCESSIVE HEAT             1903           6525
## 3 FLASH FLOOD                 978           1777
## 4 HEAT                        937           2100
## 5 LIGHTNING                   816           5230
## 6 TSTM WIND                   504           6957
# events sorted by injuries
health_impact2 <- health_impact %>%
  arrange(desc(Total_Injuries))

head(health_impact2)
## # A tibble: 6 × 3
##   EVTYPE         Total_Fatalities Total_Injuries
##   <chr>                     <dbl>          <dbl>
## 1 TORNADO                    5633          91346
## 2 TSTM WIND                   504           6957
## 3 FLOOD                       470           6789
## 4 EXCESSIVE HEAT             1903           6525
## 5 LIGHTNING                   816           5230
## 6 HEAT                        937           2100
top10_events_health <- health_impact %>%
  top_n(10, wt = Total_Fatalities)

ggplot(top10_events_health, aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "Top 10 Weather Events causing Fatalities",
       x = "Event Type",
       y = "Total Fatalities")

  1. Events with the Greatest Economic Consequences
# events sorted by economic damage
economic_impact <- storm_subset %>%
  group_by(EVTYPE) %>%
  summarize(Total_Property_Damage = sum(PROPDMG_TOTAL, na.rm = TRUE),
            Total_Crop_Damage = sum(CROPDMG_TOTAL, na.rm = TRUE),
            Total_Economic_Damage = Total_Property_Damage + Total_Crop_Damage) %>%
  arrange(desc(Total_Economic_Damage))

economic_impact
## # A tibble: 985 × 4
##    EVTYPE          Total_Property_Damage Total_Crop_Damage Total_Economic_Damage
##    <chr>                           <dbl>             <dbl>                 <dbl>
##  1 FLOOD                   144657709807         5661968450         150319678257 
##  2 HURRICANE/TYPH…          69305840000         2607872800          71913712800 
##  3 TORNADO                  56937160779.         414953270          57352114049.
##  4 STORM SURGE              43323536000               5000          43323541000 
##  5 HAIL                     15732267543.        3025954473          18758222016.
##  6 FLASH FLOOD              16140812067.        1421317100          17562129167.
##  7 DROUGHT                   1046106000        13972566000          15018672000 
##  8 HURRICANE                11868319010         2741910000          14610229010 
##  9 RIVER FLOOD               5118945500         5029459000          10148404500 
## 10 ICE STORM                 3944927860         5022113500           8967041360 
## # ℹ 975 more rows
top10_events_economic <- economic_impact %>%
  top_n(10, wt = Total_Economic_Damage)

ggplot(top10_events_economic, aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage / 1e9)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  coord_flip() +
  labs(title = "Top 10 Weather Events causing Economic damage",
       x = "Event Type",
       y = "Total Damage (Billion USD)")

Conclusion

Tornadoes had the highest impact on injuries, fatalities, and economic damage. We should prioritize high impact events for effective preparation and solution.