Title : Analysis of the Impact of Severe Weather Events on Public Health and Economy in United States

Synopsis

The report analyses the US National Weather Service Storm Data to identify which weather is most harmful to the population and which have the greatest economic consequences. Through R, we compile fatalities, injury, property damage and crop failure. According to the analysis, tornados are the most harmful to the health of the population, meanwhile floods cause the most economic damage.These findings can help the government with disaster management.

Load and Prepare the Data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(tidyr)
setwd("/Users/dishav/Downloads")
storm<-read.csv("repdata_data_StormData.csv")
str(storm) 
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Aggregating the casualities

casualities<-ddply(storm, .(EVTYPE), summarize,
                   fatalities=sum(FATALITIES, na.rm=TRUE),
                   injuries=sum(INJURIES, na.rm=TRUE))
#top 10 events considered 
fatal_events<-head(casualities[order(casualities$fatalities, decreasing=TRUE), ], 10)
injury_events<-head(casualities[order(casualities$injuries, decreasing=TRUE), ], 10)

Calculating economic damage

# transform the exponents in property damage into actual power of 10 values
exp_transform<-function(e){
  if(e %in% c('h','H')) return(2)
  else if (e %in% c('k','K')) return(3)
  else if (e %in% c('m','M')) return(6)
  else if (e %in% c('b','B')) return(9)
  else if (!is.na(as.numeric(e))) return(as.numeric(e))
  else return(0)
}
# property and crop damage in dollars 
storm$PROPDMGEXP_num<-sapply(as.character(storm$PROPDMGEXP), exp_transform)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
storm$CROPDMGEXP_num<-sapply(as.character(storm$CROPDMGEXP), exp_transform)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
storm$PROPDMG_total<-storm$PROPDMG * 10^storm$PROPDMGEXP_num
storm$CROPDMG_total<-storm$CROPDMG * 10^storm$CROPDMGEXP_num
economic <- ddply(storm, .(EVTYPE), summarize,
                  property = sum(PROPDMG_total, na.rm = TRUE),
                  crop = sum(CROPDMG_total, na.rm = TRUE))
# after aggregating by event type, listing the top 10 events 
property_events <- head(economic[order(economic$property, decreasing = TRUE), ], 10)
crop_events <- head(economic[order(economic$crop, decreasing = TRUE), ], 10)

Results

Population health impact

pop_health<-casualities %>%
  mutate(type=ifelse(EVTYPE %in% fatal_events$EVTYPE, "Fatalities","Injuries")) %>%
  filter(EVTYPE %in% c(fatal_events$EVTYPE, injury_events$EVTYPE))

ggplot(data=pop_health, aes(reorder(EVTYPE, -fatalities), fatalities, fill=type)) + geom_bar(stat="identity") + labs(x="Event type", y="Count", title="Top 10 Storm Event types by fatalities") + theme(axis.text.x = element_text(angle=45, hjust=1))

econ_long <- economic %>%
  filter(EVTYPE %in% c(property_events$EVTYPE, crop_events$EVTYPE)) %>%
  tidyr::gather(key="type", value="value", property, crop)

ggplot(data=econ_long, aes(reorder(EVTYPE, -value), value/1e9, fill=type)) +
  geom_bar(stat="identity", position="dodge") +
  labs(x="Event Type", y="Damage (Billion USD)", title="Top 10 Storm Event Types by Economic Damage") +
  theme(axis.text.x = element_text(angle = 45, hjust=1))