The report analyses the US National Weather Service Storm Data to identify which weather is most harmful to the population and which have the greatest economic consequences. Through R, we compile fatalities, injury, property damage and crop failure. According to the analysis, tornados are the most harmful to the health of the population, meanwhile floods cause the most economic damage.These findings can help the government with disaster management.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(tidyr)
setwd("/Users/dishav/Downloads")
storm<-read.csv("repdata_data_StormData.csv")
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
casualities<-ddply(storm, .(EVTYPE), summarize,
fatalities=sum(FATALITIES, na.rm=TRUE),
injuries=sum(INJURIES, na.rm=TRUE))
#top 10 events considered
fatal_events<-head(casualities[order(casualities$fatalities, decreasing=TRUE), ], 10)
injury_events<-head(casualities[order(casualities$injuries, decreasing=TRUE), ], 10)
# transform the exponents in property damage into actual power of 10 values
exp_transform<-function(e){
if(e %in% c('h','H')) return(2)
else if (e %in% c('k','K')) return(3)
else if (e %in% c('m','M')) return(6)
else if (e %in% c('b','B')) return(9)
else if (!is.na(as.numeric(e))) return(as.numeric(e))
else return(0)
}
# property and crop damage in dollars
storm$PROPDMGEXP_num<-sapply(as.character(storm$PROPDMGEXP), exp_transform)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
storm$CROPDMGEXP_num<-sapply(as.character(storm$CROPDMGEXP), exp_transform)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
storm$PROPDMG_total<-storm$PROPDMG * 10^storm$PROPDMGEXP_num
storm$CROPDMG_total<-storm$CROPDMG * 10^storm$CROPDMGEXP_num
economic <- ddply(storm, .(EVTYPE), summarize,
property = sum(PROPDMG_total, na.rm = TRUE),
crop = sum(CROPDMG_total, na.rm = TRUE))
# after aggregating by event type, listing the top 10 events
property_events <- head(economic[order(economic$property, decreasing = TRUE), ], 10)
crop_events <- head(economic[order(economic$crop, decreasing = TRUE), ], 10)
pop_health<-casualities %>%
mutate(type=ifelse(EVTYPE %in% fatal_events$EVTYPE, "Fatalities","Injuries")) %>%
filter(EVTYPE %in% c(fatal_events$EVTYPE, injury_events$EVTYPE))
ggplot(data=pop_health, aes(reorder(EVTYPE, -fatalities), fatalities, fill=type)) + geom_bar(stat="identity") + labs(x="Event type", y="Count", title="Top 10 Storm Event types by fatalities") + theme(axis.text.x = element_text(angle=45, hjust=1))
econ_long <- economic %>%
filter(EVTYPE %in% c(property_events$EVTYPE, crop_events$EVTYPE)) %>%
tidyr::gather(key="type", value="value", property, crop)
ggplot(data=econ_long, aes(reorder(EVTYPE, -value), value/1e9, fill=type)) +
geom_bar(stat="identity", position="dodge") +
labs(x="Event Type", y="Damage (Billion USD)", title="Top 10 Storm Event Types by Economic Damage") +
theme(axis.text.x = element_text(angle = 45, hjust=1))