Load the required libraries

  library(R.utils)
  library(data.table)
  library(R.cache)
  library(dplyr)
  library(lubridate)
  library(ggplot2)

Downloading and Loading the Data

Only including the fields that will be used in analysis

    download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","noaa.csv.bz2", method="curl")
  bunzip2("noaa.csv.bz2",remove=FALSE, skip=TRUE)
## [1] "noaa.csv"
## attr(,"temporary")
## [1] FALSE
  noaa<-fread("noaa.csv",sep=",")[,c("BGN_DATE","EVTYPE", "FATALITIES", "INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]

Then I tidy up the data: change damange data into real numbers, change BGN to lubradate, create a total dmg column.

  noaa<- noaa %>% mutate(date=mdy_hms(BGN_DATE))
  noaa<- noaa %>% mutate(year=year(date))    
  
  noaa<-noaa %>% mutate(cropdmgv2 = case_when(CROPDMGEXP %in% c("B","b") ~ CROPDMG*100000000, 
                                               CROPDMGEXP %in% c("M","m") ~ CROPDMG*1000000,
                                               CROPDMGEXP %in% c("K","k") ~ CROPDMG*1000,
                                               CROPDMGEXP %in% c("H","h") ~ CROPDMG*100,
                                               CROPDMGEXP %in% c("+") ~ CROPDMG*1,
                                               CROPDMGEXP %in% c("-","?") ~ CROPDMG*0,
                                               CROPDMGEXP %in% c(""," ") ~ CROPDMG*0,
                                               CROPDMGEXP %in% 0:8 ~ CROPDMG*10,
                                               ))
  
  noaa<-noaa %>% mutate(propdmgv2 = case_when(PROPDMGEXP %in% c("B","b") ~ PROPDMG*100000000, 
                                               PROPDMGEXP %in% c("M","m") ~ PROPDMG*1000000,
                                               PROPDMGEXP %in% c("K","k") ~ PROPDMG*1000,
                                               PROPDMGEXP %in% c("H","h") ~ PROPDMG*100,
                                               PROPDMGEXP %in% c("+") ~ PROPDMG*1,
                                               PROPDMGEXP %in% c("-","?") ~ PROPDMG*0,
                                               PROPDMGEXP %in% c(""," ") ~ PROPDMG*0,
                                               PROPDMGEXP %in% 0:8 ~ PROPDMG*10,
  ))
  
  noaa<-noaa %>% mutate(totaldmg = propdmgv2+cropdmgv2)
  noaa<-noaa %>% mutate(anydmg = propdmgv2+cropdmgv2+FATALITIES+INJURIES)

Then I determine which years have valueable data. In this case I find that it wasn’t until 1993 that there were a signifiant number of EV Types.

    subset(noaa,year>1990 & year < 1995) %>% 
    group_by(year) %>%
    summarise(count = n_distinct(EVTYPE))
## # A tibble: 4 x 2
##    year count
##   <dbl> <int>
## 1  1991     3
## 2  1992     3
## 3  1993   160
## 4  1994   267

So I removed all years prior to 1993, plus any records where there were no damange or population health impact

  noaav2<<-subset(noaa, anydmg>0 & year>1992)

By ordering the data by total fatalities and plotting i was able to determine which events are the most impactful to the population.

  popimpact<-noaav2 %>%
    group_by(EVTYPE) %>%
    summarise(Total_Fatality = sum(FATALITIES), Total_Injury = sum(INJURIES), Total=sum(FATALITIES+INJURIES))
  popimpact<-as.data.table(popimpact)
  popimpact<-popimpact[order(-Total_Fatality)]
  popimpact<-popimpact[1:10,]
  
  popimpact<-melt(popimpact, id.vars="EVTYPE", variable.name="category")
  g<-ggplot(popimpact, aes(x=reorder(EVTYPE,-value), y=value))+geom_bar(position="dodge",stat="identity", aes(fill=category))
  g<-g+ylab("Count")+xlab("Event")+ggtitle("Top Population Impact") + theme(axis.text.x = element_text(angle=45, hjust=1))
  print(g)

The by ordering the data by total damange I was able to determine which events have done the most damange.

  dmg<-noaav2 %>%
    group_by(EVTYPE) %>%
    summarise(Prop_Dmg = sum(propdmgv2), Crop_Dmg = sum(cropdmgv2), Total=sum(propdmgv2+cropdmgv2))
  dmg<-as.data.table(dmg)
  dmg<-dmg[order(-Total)]
  dmg<-dmg[1:10,]
  
  dmg<-melt(dmg, id.vars="EVTYPE", variable.name="category")
  g<-ggplot(dmg, aes(x=reorder(EVTYPE,-value), y=value))+geom_bar(position="dodge",stat="identity", aes(fill=category))
  g<-g+ylab("Damage")+xlab("Event")+ggtitle("Top Crop and Properity Damage") + theme(axis.text.x = element_text(angle=45, hjust=1))
  print(g)