R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Title

The impact of weather events to population health and economic state in the United States

Study synopsis

This analysis explores the NOAA Storm Database to assess the most harmful weather events in the U.S. regarding population health and economic impact. We examine storm events from [year range in data] and identify which event types (EVTYPE) contribute most to fatalities, injuries, property damage, and crop damage. I measured health impact as counting the number of fatality and injuries. Tornadoes had the most harmful event to population health, while floods and hurricanes have the highest economic consequences.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)

Data Processing

# download raw data as csv and check the structure. 

storm_data <- read.csv("C:/Users/sumim/OneDrive/ドキュメント/Coursera datascience/Reproduciable Research/repdata_data_StormData.csv", stringsAsFactors = FALSE)

str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# extract interested data for analysis

storm_subset <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# data cleaning adjusting exponentiation

damage_exp <- function(exp) {
  case_when(
    exp %in% c("H", "h") ~ 100,
    exp %in% c("K", "k") ~ 1000,
    exp %in% c("M", "m") ~ 1e6,
    exp %in% c("B", "b") ~ 1e9,
    TRUE ~ 1
  )
}

storm_subset <- storm_subset %>%
  mutate(PROPDMGEXP = damage_exp(PROPDMGEXP),
         CROPDMGEXP = damage_exp(CROPDMGEXP),
         PROPDMG_TOTAL = PROPDMG * PROPDMGEXP,
         CROPDMG_TOTAL = CROPDMG * CROPDMGEXP)

Results

  1. Most Harmful Events for Population Health
# sum fatalities and injuries as health impact, check first 6 events, and sorted by descending order

health_impact <- storm_subset %>%
  group_by(EVTYPE) %>%
  summarize(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
            Total_Injuries = sum(INJURIES, na.rm = TRUE),
            Total_healthimpact = Total_Fatalities +  Total_Injuries) %>%
  arrange(desc(Total_healthimpact))

head(health_impact)
## # A tibble: 6 × 4
##   EVTYPE         Total_Fatalities Total_Injuries Total_healthimpact
##   <chr>                     <dbl>          <dbl>              <dbl>
## 1 TORNADO                    5633          91346              96979
## 2 EXCESSIVE HEAT             1903           6525               8428
## 3 TSTM WIND                   504           6957               7461
## 4 FLOOD                       470           6789               7259
## 5 LIGHTNING                   816           5230               6046
## 6 HEAT                        937           2100               3037
# create a bar chart represented top 10 harmful events to population health 

top10_events_health <- health_impact %>%
  top_n(10, wt = Total_healthimpact)

ggplot(top10_events_health, aes(x = reorder(EVTYPE, -Total_healthimpact), y = Total_healthimpact)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = " Figure1. Top 10 Weather Events causing Health Impact",
       x = "Event Type",
       y = "Total Health Impact")

  1. Events with the Greatest Economic Consequences
# sum property damage and crop damage as economic damage, events was sorted by economic damage as descending order
economic_impact <- storm_subset %>%
  group_by(EVTYPE) %>%
  summarize(Total_Property_Damage = sum(PROPDMG_TOTAL, na.rm = TRUE),
            Total_Crop_Damage = sum(CROPDMG_TOTAL, na.rm = TRUE),
            Total_Economic_Damage = Total_Property_Damage + Total_Crop_Damage) %>%
  arrange(desc(Total_Economic_Damage))

economic_impact
## # A tibble: 985 × 4
##    EVTYPE          Total_Property_Damage Total_Crop_Damage Total_Economic_Damage
##    <chr>                           <dbl>             <dbl>                 <dbl>
##  1 FLOOD                   144657709807         5661968450         150319678257 
##  2 HURRICANE/TYPH…          69305840000         2607872800          71913712800 
##  3 TORNADO                  56937160779.         414953270          57352114049.
##  4 STORM SURGE              43323536000               5000          43323541000 
##  5 HAIL                     15732267543.        3025954473          18758222016.
##  6 FLASH FLOOD              16140812067.        1421317100          17562129167.
##  7 DROUGHT                   1046106000        13972566000          15018672000 
##  8 HURRICANE                11868319010         2741910000          14610229010 
##  9 RIVER FLOOD               5118945500         5029459000          10148404500 
## 10 ICE STORM                 3944927860         5022113500           8967041360 
## # ℹ 975 more rows
# create a bar chart represented top 10 harmful events to economic consequences

top10_events_economic <- economic_impact %>%
  top_n(10, wt = Total_Economic_Damage)

ggplot(top10_events_economic, aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage / 1e9)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  coord_flip() +
  labs(title = "Figure 2. Top 10 Weather Events Causing Economic Damage",
       x = "Event Type",
       y = "Total Damage (Billion USD)")

Conclusion

Tornadoes was the most harmful events to population health. Flood was the most harmful event to economic consequence. We should prioritize high impact events for effective preparation and solution against natural disasters.