Reproducable Research

Jonathan Julyan

Project 2

Severe Weather Events

Data Information

You can download the file from the course web site: Storm Data
There is also some documentation of the database available. Here you will find how some of the variables are constructed/defined.

Aim

Many severe events can result in fatalities, injuries, and property damage. This project aims to identify which weather events have the most sever effects.

Imports

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.5.3

Load Data

Note the data file must be in the same folder

data <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))

Harmful Events

Data Processing

fatal <- tapply(data$FATALITIES, data$EVTYPE, sum)
fatal <- as.data.frame(fatal[order(fatal,decreasing = TRUE)])
names(fatal) <- "fatalities"
injury <- tapply(data$INJURIES, data$EVTYPE, sum)
injury <- as.data.frame(injury[order(injury,decreasing = TRUE)])
names(injury) <- "injuries"

Results

plot1 <- ggplot(data=head(fatal,10), aes(x=row.names(head(fatal,10)), y=fatalities))  +
     geom_bar(fill="red",stat="identity")  + coord_flip() + 
     ylab("Number of Fatalities") + xlab("Event Type") +
     ggtitle("Health Impacts of weather events") +
     theme(legend.position="none")
plot2 <- ggplot(data=head(injury,10), aes(x=row.names(head(injury,10)), y=injuries))  +
     geom_bar(fill="blue",stat="identity")  + coord_flip() + 
     ylab("Number of Injuries") + xlab("Event Type") +
     theme(legend.position="none")
grid.arrange(plot1, plot2, nrow =2)

Economic Consequences

Data Processing

getExp <- function(e) {
    if (e %in% c("h", "H"))
        return(10 ** 2)
    else if (e %in% c("k", "K"))
        return(10 ** 3)
    else if (e %in% c("m", "M"))
        return(10 ** 6)
    else if (e %in% c("b", "B"))
        return(10 ** 9)
    else if (!is.na(as.numeric(e))) 
        return(10 ** as.numeric(e))
    else
        return(10 ** 0)
}

temp <- sapply(data$CROPDMGEXP,getExp)
data$CROPDMGCALC <- data$CROPDMG * temp
cropDamage <- tapply(data$CROPDMGCALC, data$EVTYPE, sum)
cropDamage <- as.data.frame(cropDamage[order(cropDamage,decreasing = TRUE)])
names(cropDamage) <- "damages"
temp <- sapply(data$PROPDMGEXP,getExp)
data$PROPDMGCALC <- data$PROPDMG * temp
propDamage <- tapply(data$PROPDMGCALC, data$EVTYPE, sum)
propDamage <- as.data.frame(propDamage[order(propDamage,decreasing = TRUE)])
names(propDamage) <- "damages"

Results

plot1 <- ggplot(data=head(cropDamage,10), aes(x=row.names(head(cropDamage,10)), y=damages))  +
     geom_bar(fill="yellow",stat="identity")  + coord_flip() + 
     ylab("Crop Damages") + xlab("Event Type") +
     ggtitle("Economic Impacts of weather events") +
     theme(legend.position="none")
plot2 <- ggplot(data=head(propDamage,10), aes(x=row.names(head(propDamage,10)), y=damages))  +
     geom_bar(fill="green",stat="identity")  + coord_flip() + 
     ylab("Property Damaged") + xlab("Event Type") +
     theme(legend.position="none")
grid.arrange(plot1, plot2, nrow =2)

Findings

  • Tornados cause the most injuries and fatalities
  • Drought causes the most crop damage.
  • Flash Floods cause the most property damage.