library(ggplot2)
library(plyr)
library(dplyr)
library(reshape2)
Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Tornado turns to be the most dangerous for public health and Flooding causes the biggest economic impact.
Data is presented by csv file, we will take only columns, which are related to public health or economy impacts. We also will remove rows, which doesn’t contain any related data.
storm.data <- read.csv('repdata-data-StormData.csv', header = T, stringsAsFactors = F)
storm.data <- select(storm.data, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
storm.data <- subset(storm.data, FATALITIES > 0 || INJURIES > 0 || PROPDMG > 0 || CROPDMG > 0)
Economy impact presented by Scientific Notation, and stored as two pieces, so we need to write a function to calculate full impact.
full_value <- function (values, exps) {
exps <- tolower(exps)
exps[exps %in% c("","+","?","-")] <- 0
exps[exps == 'h'] <- 2
exps[exps == 'k'] <- 3
exps[exps == 'm'] <- 6
exps[exps == 'b'] <- 9
exps <- as.numeric(exps)
return(values * 10 ^ exps)
}
Now we can calculate full values for PROPDMG and CROPDMG
storm.data$PROPDMG <- full_value(storm.data$PROPDMG, storm.data$PROPDMGEXP)
storm.data$CROPDMG <- full_value(storm.data$CROPDMG, storm.data$CROPDMGEXP)
The same event types are presented by different strings, so we need to combine it to have better understanding of real impact of each event.
evtype <- tolower(storm.data$EVTYPE)
evtype[grep('(winter|snow)(.*)storm', evtype)] <- 'blizzard'
evtype[grep('ice storm|thundersnow' , evtype)] <- 'blizzard'
evtype[grep('bliz' , evtype)] <- 'blizzard'
evtype[grep('hurricane' , evtype)] <- 'hurricane'
evtype[grep('tornado' , evtype)] <- 'tornado'
evtype[grep('tstm|thunde|thun|tund' , evtype)] <- 'thunderstorm'
evtype[grep('funnel' , evtype)] <- 'funnel'
evtype[grep('light' , evtype)] <- 'lightning'
evtype[grep('flood|fld' , evtype)] <- 'flood'
evtype[grep('hail' , evtype)] <- 'hail'
evtype[grep('rain' , evtype)] <- 'rain'
evtype[grep('burst' , evtype)] <- 'burst'
evtype[grep('dust' , evtype)] <- 'dust devil'
evtype[grep('fog' , evtype)] <- 'fog'
evtype[grep('surf|rip' , evtype)] <- 'surf'
evtype[grep('wall cloud' , evtype)] <- 'wall cloud'
evtype[grep('dr(y|ie)' , evtype)] <- 'dryness'
evtype[grep('cool|cold' , evtype)] <- 'cold'
evtype[grep('wind|wnd' , evtype)] <- 'wind'
evtype[grep('ic[ey]' , evtype)] <- 'ice'
evtype[grep('snow' , evtype)] <- 'snow'
evtype[grep('wet' , evtype)] <- 'wet'
evtype[grep('slide' , evtype)] <- 'mud/land/rock slides'
evtype[grep('winter' , evtype)] <- 'winter'
evtype[grep('warm' , evtype)] <- 'warm'
evtype[grep('hot|heat' , evtype)] <- 'hot'
evtype[grep('free|fro' , evtype)] <- 'frost'
evtype[grep('fire' , evtype)] <- 'fire'
evtype[grep('ava' , evtype)] <- 'avalanche'
storm.data$EVTYPE <- evtype
rm(evtype)
storm.data$EVTYPE <- as.factor(storm.data$EVTYPE)
Now we can calculate total amount of casialties for each event and combine it together.
fatalities <- aggregate(FATALITIES ~ EVTYPE, storm.data, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, storm.data, sum)
health_impact <- merge(fatalities, injuries, by='EVTYPE')
health_impact.sort <- health_impact[order(health_impact$FATALITIES, health_impact$INJURIES, decreasing=T),]
health_impact.top <- melt(head(health_impact.sort, n=10), 'EVTYPE')
colnames(health_impact.top) <- c('Events', 'Severness', 'Casualties')
We can do the same for data, related to economic impact.
propdmg <- aggregate(PROPDMG ~ EVTYPE, storm.data, sum)
cropdmg <- aggregate(CROPDMG ~ EVTYPE, storm.data, sum)
economy_impact <- merge(propdmg, cropdmg, by='EVTYPE')
economy_impact.sort <- economy_impact[order(economy_impact$PROPDMG, economy_impact$CROPDMG, decreasing=T),]
economy_impact.top <- melt(head(economy_impact.sort, n=10), 'EVTYPE')
colnames(economy_impact.top) <- c('Events', 'Category', 'Amount')
health_impact.plot <- ggplot(health_impact.top, aes(x = Events, y = Casualties, fill = Severness))
health_impact.plot <- health_impact.plot + geom_bar(position="dodge",stat="identity")
health_impact.plot <- health_impact.plot + ggtitle('Impact of severe weather events on public health')
health_impact.plot <- health_impact.plot + theme(axis.text.x=element_text(angle=45,hjust=1))
print(health_impact.plot)
economy_impact.plot <- ggplot(economy_impact.top, aes(x = Events, y = Amount, fill = Category))
economy_impact.plot <- economy_impact.plot + geom_bar(position="dodge",stat="identity")
economy_impact.plot <- economy_impact.plot + ggtitle('Economic impact of severe weather events')
economy_impact.plot <- economy_impact.plot + theme(axis.text.x=element_text(angle=45,hjust=1))
print(economy_impact.plot)