Load & join the data

Downloaded the data from NOAA.GOV for the years 2014 and 2015.

library(dplyr)

df.eventdetails <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_details.csv", header = T, sep = ',')

df.eventdetails_2014 <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_details_2014.csv", header = T, sep = ',')

df.eventdetails <- rbind(df.eventdetails, df.eventdetails_2014)

#View(df.eventdetails)

#Filter for the state of AR
df.eventdetails.ar <- df.eventdetails %>% filter(STATE=='ARKANSAS')
#head(df.eventdetails.ar)
#View(df.eventdetails.ar)

#load the storm location data
df.location <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_locations.csv", header = T, sep = ',')
df.location_2014 <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_locations_2014.csv", header = T, sep = ',')
df.location <- rbind(df.location, df.location_2014)
#View(df.location)

#Join the AR events with location data.
df.eventdetails.ar.loc <- left_join(df.eventdetails.ar, df.location, by = "EVENT_ID")

#Join the all events with location data.
df.eventdetails.loc <- left_join(df.eventdetails, df.location, by = "EVENT_ID")

#load fatalities
df.fatalities <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_fatalities.csv", header = T, sep =',')
#View(df.fatalities)

df.fatalities_2014 <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_fatalities_2014.csv", header = T, sep =',')

#Combine both
df.fatalities <- rbind(df.fatalities, df.fatalities_2014)

#join to get ar loc fat
df.eventdetails.ar.loc.fat <- left_join(df.eventdetails.ar.loc, df.fatalities, by = "EVENT_ID")
#View(df.eventdetails.ar.loc.fat)

#All loc fat
df.eventdetails.loc.fat <- left_join(df.eventdetails.loc, df.fatalities, by = "EVENT_ID")
#View(df.eventdetails.loc.fat)

Tiday the data and prepare required csv files

#View(df.eventdetails.ar.loc.fat)

#Get all AR impacts
df.fatal.ar <- df.eventdetails.ar.loc.fat %>% filter(DEATHS_DIRECT > 0 | DEATHS_INDIRECT >0 | INJURIES_DIRECT > 0| INJURIES_INDIRECT> 0 | (DAMAGE_CROPS != '' & DAMAGE_PROPERTY != '0.00K' ) )
#View(df.fatal.ar)


#Get only deaths/injuries by event for all states
df.fatal <- df.eventdetails.loc.fat %>% filter(DEATHS_DIRECT > 0 | DEATHS_INDIRECT >0 | INJURIES_DIRECT > 0| INJURIES_INDIRECT> 0)
#View(df.fatal)


#Remove Duplicates
df.fatal.ar <- subset(df.fatal.ar, !duplicated(df.fatal.ar[,"EVENT_ID"])) 
#View(df.fatal.ar)

#-prepare the required data for AR----------------------------------------------------------------------

#Utility function to change the abberiviated currency values [for eg: 2.25K] into numbers [2250]
library('stringr')
getNumber <- function(str)
{
  result <- c(0)
  str <- str_trim(str)
  val <- str_sub(str, start=1, end= str_length(str)-1)
  ext <- str_sub(str, start=str_length(str), end = str_length(str) + 1)
    
  ifelse(ext == "K", 
         result <-  as.numeric(val) * 1000,
         (ifelse(ext == "M", result <-  as.numeric(val) * 1000000,result <- as.numeric(str)))
  )

  return(ifelse(is.na(result), 0, result))

}

#options(scipen = 999)


#Change the abberiviated currency values [for eg: 2.25K] into numbers [2250]
df.fatal.ar$DAMAGE_CROPS <- lapply(df.fatal.ar$DAMAGE_CROPS, getNumber)
df.fatal.ar$DAMAGE_PROPERTY <- lapply(df.fatal.ar$DAMAGE_PROPERTY, getNumber)

#View(df.fatal.ar)

#Fetch the YEAR, EVENT_TYPE, COUNTY, DEATHCOUNT, INJURY COUNT, PROPERTY DAMAGES....
df.fatal.ar.county.by.year.event <- df.fatal.ar %>% group_by(YEAR,EVENT_TYPE, CZ_NAME) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT), INJURY_COUNT = sum(INJURIES_DIRECT, INJURIES_INDIRECT), CROPS_DAMAGE = sum(as.numeric(DAMAGE_CROPS)), PROPERTY_DAMAGE = sum(as.numeric(DAMAGE_PROPERTY)))

head(df.fatal.ar.county.by.year.event)
## Source: local data frame [6 x 7]
## Groups: YEAR, EVENT_TYPE [1]
## 
##    YEAR  EVENT_TYPE    CZ_NAME DEATH_COUNT INJURY_COUNT CROPS_DAMAGE
##   (int)      (fctr)     (fctr)       (int)        (int)        (dbl)
## 1  2014 Flash Flood      BOONE           0            0            0
## 2  2014 Flash Flood    CALHOUN           0            0            0
## 3  2014 Flash Flood     CHICOT           0            0            0
## 4  2014 Flash Flood       CLAY           0            0            0
## 5  2014 Flash Flood   CRAWFORD           0            2            0
## 6  2014 Flash Flood CRITTENDEN           0            0            0
## Variables not shown: PROPERTY_DAMAGE (dbl)
#View(df.fatal.ar.county.by.year.event)

df.fatal.ar.county.by.year.event$FatalWeight <- df.fatal.ar.county.by.year.event$DEATH_COUNT * 0.75 + df.fatal.ar.county.by.year.event$INJURY_COUNT * 0.25

#/Write the required data into csv
#write.csv(df.fatal.ar.county.by.year.event, file = "sei_ARSummary.csv", row.names = F)

#------------------------------------------------------------------------
#Remove dups for all state.(if any)
df.fatal <- subset(df.fatal, !duplicated(df.fatal[,"EVENT_ID"])) 
#View(df.fatal)


#fatal counts by state
df.fatal.by.state <- df.fatal %>% group_by(YEAR,STATE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT), INJURY_COUNT = sum(INJURIES_DIRECT, INJURIES_INDIRECT))

#View(df.fatal.by.state)

#events that cause fatals by state by year
df.fatal.by.state.eventtype <- df.fatal %>% group_by(YEAR,STATE, EVENT_TYPE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT))

df.fatal.by.state.fatal.eventtypes <- df.fatal.by.state.eventtype %>% filter(DEATH_COUNT > 0)
#View(df.fatal.by.state.fatal.eventtypes)


#require(tidyr)
#testdf = spread(df.fatal.by.state.eventtype, EVENT_TYPE, DEATH_COUNT)


write.csv(df.fatal.by.state, file = "sei_StateImpactSummary.csv", row.names = F)
write.csv(df.fatal.by.state.fatal.eventtypes, file = "sei_StateEvents.csv", row.names = F)
#fatal count by weather event
#df.fatal.by.state.event <- df.fatal %>% group_by(STATE, EVENT_TYPE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT))
df.fatal.by.event <- df.fatal %>% group_by(YEAR, EVENT_TYPE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT), INJURY_COUNT= sum(INJURIES_DIRECT, INJURIES_INDIRECT))
#View(df.fatal.by.event)

#GET TOP5 EVENTS CAUSING FATALITIES
df.fatal.by.event.top5 <- df.fatal.by.event %>% arrange(desc(DEATH_COUNT)) %>% slice(1:5)
#View(df.fatal.by.event.top5)

#GET TOP5 EVENTS CAUSING INJURIES
df.injuries.by.event.top5 <- df.fatal.by.event %>% arrange(desc(INJURY_COUNT)) %>% slice(1:5)
#View(df.injuries.by.event.top5 )

#COMBINE BOTH
df.fatals.injuiries.by.event <- rbind(df.fatal.by.event.top5, df.injuries.by.event.top5)

#Remove duplicates
df.fatals.injuiries.by.event <- df.fatals.injuiries.by.event[!duplicated(df.fatals.injuiries.by.event),]

#View(df.fatals.injuiries.by.event)

#write the csv
#write.csv(df.fatals.injuiries.by.event, file = "sei_eventImpactSummary.csv", row.names = F)