Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

Loading the data from a csv file:

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.5
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.2.5
setwd("C:/Users/JP/Desktop/Data Science Certification/5.- Reproducible Research/Project 2")
#Load Data
data <- read.csv("repdata-data-StormData.csv")

Select the columns that are needed for the analysis of the population and economic consequences of weather events across the United States

#Selected Columns 
SelectCols <- c("BGN_DATE","EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
newdata <- data[,SelectCols]
newdata$year <- as.numeric(format(as.Date(newdata$BGN_DATE, format = "%m/%d/%Y"), "%Y"))

Since the later years account for more observations, results could be skewed by the first years. By still using the majority of the observations, the cutoff point is arbritrarely set at 80%.

So we keep the events ocurred since 1992.

EventYear <- aggregate(newdata["year"], by=newdata[c("year")], FUN=length)
colnames(EventYear) <- c("Year","Events")
EventYear$Perc <- cumsum(EventYear$Events)/nrow(newdata)
EventYear <- subset(EventYear,EventYear$Perc > 0.2)

Finaldata <- subset(newdata,newdata$year >= 1992)

Now we calculate the population damage and the crop damage in dolars

#Calculate Porp_Damage
Finaldata$PropMult <- factor(Finaldata$PROPDMGEXP)
levels(Finaldata$PropMult) <- list("2" = "H", "3" = "K", "6" = "M", "9" = "B")
Finaldata$Porp_Damage <- Finaldata$PROPDMG * 10^(as.numeric(Finaldata$PropMult)+1)

#Calculate Crop_Damage
Finaldata$CropMult <- factor(Finaldata$CROPDMGEXP)
levels(Finaldata$CropMult) <- list("2" = "H", "3" = "K", "6" = "M", "9" = "B")
Finaldata$Crop_Damage <- Finaldata$CROPDMG * 10^(as.numeric(Finaldata$CropMult)+1)

#Replace NA values with zeros
Finaldata[is.na(Finaldata)] <- 0
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = 0): invalid factor level,
## NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = 0): invalid factor level,
## NA generated

Then we crate a table with the aggregate damage per event type.

#Damage per Type Event
Damage_EVTYPE <- aggregate(Finaldata[c("FATALITIES","INJURIES","Porp_Damage","Crop_Damage")], by=Finaldata["EVTYPE"], FUN=sum)

Pop_Damage <- Damage_EVTYPE[Damage_EVTYPE$FATALITIES > 0 | Damage_EVTYPE$INJURIES > 0, c("EVTYPE","FATALITIES","INJURIES")]
Pop_Damage$Total_Pop <- Pop_Damage$FATALITIES + Pop_Damage$INJURIES
Pop_Damage <- Pop_Damage[order(-Pop_Damage$Total_Pop),]
Pop_Damage$Perc <- cumsum(Pop_Damage$Total_Pop)/sum(Pop_Damage$Total_Pop)

Eco_Damage <- Damage_EVTYPE[Damage_EVTYPE$Porp_Damage > 0 | Damage_EVTYPE$Crop_Damage > 0, c("EVTYPE","Porp_Damage","Crop_Damage")]
Eco_Damage$Total_Eco <- Eco_Damage$Porp_Damage + Eco_Damage$Crop_Damage
Eco_Damage <- Eco_Damage[order(-Eco_Damage$Total_Eco),]
Eco_Damage$Perc <- cumsum(Eco_Damage$Total_Eco)/sum(Eco_Damage$Total_Eco)

Since there are 985 diffrent events types. We select the events that accumulate the 80% of de damage in both cases.

Pop_Damage_80 <- Pop_Damage[Pop_Damage$Perc < 0.8,]
Eco_Damage_80 <- Eco_Damage[Eco_Damage$Perc < 0.8,]

Results

1.Aggregated human injuries & fatalities for weather events from 1992 to 2011.

First we put the events labels on the middle of the plot

# add middle column with just EVTYPE labels
g.mid_1 <- ggplot(data=Pop_Damage_80, aes(x=1,y=EVTYPE)) +
  geom_text(aes(label=EVTYPE), size=4) +
  ggtitle("") +
  ylab(NULL) +
  scale_x_continuous(expand=c(0,0),limits=c(0.94,1.065)) +
  theme(axis.title=element_blank(),
        panel.grid=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.background=element_blank(),
        axis.text.x=element_text(color=NA),
        axis.ticks.x=element_line(color=NA),
        plot.margin = unit(c(1,-1,1,-1), "mm"))

Then add the left chart with the injuries.

# add left chart with injuries
g.injuries <- ggplot(data=Pop_Damage_80, aes(x=EVTYPE, y=INJURIES)) +
  geom_bar(stat = "identity") + 
  geom_text(aes(label=INJURIES), size=3, vjust=0.5, hjust=2.0) +
  ggtitle("Injuries") +
  coord_flip() + scale_y_reverse() +
  theme(axis.title.x = element_blank(), 
        axis.title.y = element_blank(), 
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        plot.margin = unit(c(1,0,1,0), "mm")) 

Then add the right chart with the fatalities.

# add right chart with fatalities
g.fatalities <- ggplot(data=Pop_Damage_80, aes(x=EVTYPE, y=FATALITIES)) +
  geom_bar(stat = "identity") + 
  geom_text(aes(label=FATALITIES), size=3, vjust=0.5, hjust=-1.0) +
  ggtitle("Fatalities") +
  coord_flip() +
  theme(axis.title.x = element_blank(), 
        axis.title.y = element_blank(), 
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        plot.margin = unit(c(1,0,1,-1), "mm")) 

And finaly combine all the charts together in a plot:

# combine charts in one plot
gg.injuries <- ggplot_gtable(ggplot_build(g.injuries))
## Warning: Stacking not well defined when ymin != 0
gg.fatalities <- ggplot_gtable(ggplot_build(g.fatalities))
gg.mid_1 <- ggplot_gtable(ggplot_build(g.mid_1))

grid.arrange(gg.injuries,gg.mid_1,gg.fatalities,
             ncol=3,widths=c(4/10,2/10,4/10),
             top="Aggregated human injuries & fatalities for weather events from 1992 to 2011")

Analogously we apply the same to the economic damage:

2.Aggregated economic propeties & crops for weather events from 1992 to 2011

First we put the events labels on the middle of the plot

# add middle column with just EVTYPE labels
g.mid_2 <- ggplot(data=Eco_Damage_80, aes(x=1,y=EVTYPE)) +
  geom_text(aes(label=EVTYPE), size=4) +
  ggtitle("") +
  ylab(NULL) +
  scale_x_continuous(expand=c(0,0),limits=c(0.94,1.065)) +
  theme(axis.title=element_blank(),
        panel.grid=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.background=element_blank(),
        axis.text.x=element_text(color=NA),
        axis.ticks.x=element_line(color=NA),
        plot.margin = unit(c(1,-1,1,-1), "mm"))

Then add the left chart with the propeties damage

# add left chart with propeties
g.injuries <- ggplot(data=Eco_Damage_80, aes(x=EVTYPE, y=Porp_Damage)) +
  geom_bar(stat = "identity") + 
  geom_text(aes(label=Porp_Damage), size=3, vjust=0.5, hjust=2.0) +
  ggtitle("Propeties Damage") +
  coord_flip() + scale_y_reverse() +
  theme(axis.title.x = element_blank(), 
        axis.title.y = element_blank(), 
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        plot.margin = unit(c(1,0,1,0), "mm"))

Then add the right chart with the crop damage

# add right chart with crop
g.fatalities <- ggplot(data=Eco_Damage_80, aes(x=EVTYPE, y=Crop_Damage)) +
  geom_bar(stat = "identity") + 
  geom_text(aes(label=Crop_Damage), size=3, vjust=0.5, hjust=-1.0) +
  ggtitle("Crop Damage") +
  coord_flip() +
  theme(axis.title.x = element_blank(), 
        axis.title.y = element_blank(), 
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        plot.margin = unit(c(1,0,1,-1), "mm")) 

And finaly combine all the charts together in a plot:

# combine charts in one plot
gg.injuries <- ggplot_gtable(ggplot_build(g.injuries))
## Warning: Stacking not well defined when ymin != 0
gg.fatalities <- ggplot_gtable(ggplot_build(g.fatalities))
gg.mid_2 <- ggplot_gtable(ggplot_build(g.mid_2))

grid.arrange(gg.injuries,gg.mid_2,gg.fatalities,
             ncol=3,widths=c(4/10,2/10,4/10),
             top="Aggregated economic propeties & crops for weather events from 1992 to 2011")

Conclusion

From these data, we found that excessive heat and tornado are most harmful with respect to population health, while flood, drought, and hurricane/typhoon have the greatest economic consequences.