Load & join the data
Downloaded the data from NOAA.GOV for the years 2014 and 2015.
library(dplyr)
df.eventdetails <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_details.csv", header = T, sep = ',')
df.eventdetails_2014 <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_details_2014.csv", header = T, sep = ',')
df.eventdetails <- rbind(df.eventdetails, df.eventdetails_2014)
#View(df.eventdetails)
#Filter for the state of AR
df.eventdetails.ar <- df.eventdetails %>% filter(STATE=='ARKANSAS')
#head(df.eventdetails.ar)
#View(df.eventdetails.ar)
#load the storm location data
df.location <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_locations.csv", header = T, sep = ',')
df.location_2014 <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_locations_2014.csv", header = T, sep = ',')
df.location <- rbind(df.location, df.location_2014)
#View(df.location)
#Join the AR events with location data.
df.eventdetails.ar.loc <- left_join(df.eventdetails.ar, df.location, by = "EVENT_ID")
#Join the all events with location data.
df.eventdetails.loc <- left_join(df.eventdetails, df.location, by = "EVENT_ID")
#load fatalities
df.fatalities <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_fatalities.csv", header = T, sep =',')
#View(df.fatalities)
df.fatalities_2014 <- read.csv(file = "C:/Users/p_sum/OneDrive/Documents/StormEvents_fatalities_2014.csv", header = T, sep =',')
#Combine both
df.fatalities <- rbind(df.fatalities, df.fatalities_2014)
#join to get ar loc fat
df.eventdetails.ar.loc.fat <- left_join(df.eventdetails.ar.loc, df.fatalities, by = "EVENT_ID")
#View(df.eventdetails.ar.loc.fat)
#All loc fat
df.eventdetails.loc.fat <- left_join(df.eventdetails.loc, df.fatalities, by = "EVENT_ID")
#View(df.eventdetails.loc.fat)
Tiday the data and prepare required csv files
#View(df.eventdetails.ar.loc.fat)
#Get all AR impacts
df.fatal.ar <- df.eventdetails.ar.loc.fat %>% filter(DEATHS_DIRECT > 0 | DEATHS_INDIRECT >0 | INJURIES_DIRECT > 0| INJURIES_INDIRECT> 0 | (DAMAGE_CROPS != '' & DAMAGE_PROPERTY != '0.00K' ) )
#View(df.fatal.ar)
#Get only deaths/injuries by event for all states
df.fatal <- df.eventdetails.loc.fat %>% filter(DEATHS_DIRECT > 0 | DEATHS_INDIRECT >0 | INJURIES_DIRECT > 0| INJURIES_INDIRECT> 0)
#View(df.fatal)
#Remove Duplicates
df.fatal.ar <- subset(df.fatal.ar, !duplicated(df.fatal.ar[,"EVENT_ID"]))
#View(df.fatal.ar)
#-prepare the required data for AR----------------------------------------------------------------------
#Utility function to change the abberiviated currency values [for eg: 2.25K] into numbers [2250]
library('stringr')
getNumber <- function(str)
{
result <- c(0)
str <- str_trim(str)
val <- str_sub(str, start=1, end= str_length(str)-1)
ext <- str_sub(str, start=str_length(str), end = str_length(str) + 1)
ifelse(ext == "K",
result <- as.numeric(val) * 1000,
(ifelse(ext == "M", result <- as.numeric(val) * 1000000,result <- as.numeric(str)))
)
return(ifelse(is.na(result), 0, result))
}
#options(scipen = 999)
#Change the abberiviated currency values [for eg: 2.25K] into numbers [2250]
df.fatal.ar$DAMAGE_CROPS <- lapply(df.fatal.ar$DAMAGE_CROPS, getNumber)
df.fatal.ar$DAMAGE_PROPERTY <- lapply(df.fatal.ar$DAMAGE_PROPERTY, getNumber)
#View(df.fatal.ar)
#Fetch the YEAR, EVENT_TYPE, COUNTY, DEATHCOUNT, INJURY COUNT, PROPERTY DAMAGES....
df.fatal.ar.county.by.year.event <- df.fatal.ar %>% group_by(YEAR,EVENT_TYPE, CZ_NAME) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT), INJURY_COUNT = sum(INJURIES_DIRECT, INJURIES_INDIRECT), CROPS_DAMAGE = sum(as.numeric(DAMAGE_CROPS)), PROPERTY_DAMAGE = sum(as.numeric(DAMAGE_PROPERTY)))
head(df.fatal.ar.county.by.year.event)
## Source: local data frame [6 x 7]
## Groups: YEAR, EVENT_TYPE [1]
##
## YEAR EVENT_TYPE CZ_NAME DEATH_COUNT INJURY_COUNT CROPS_DAMAGE
## (int) (fctr) (fctr) (int) (int) (dbl)
## 1 2014 Flash Flood BOONE 0 0 0
## 2 2014 Flash Flood CALHOUN 0 0 0
## 3 2014 Flash Flood CHICOT 0 0 0
## 4 2014 Flash Flood CLAY 0 0 0
## 5 2014 Flash Flood CRAWFORD 0 2 0
## 6 2014 Flash Flood CRITTENDEN 0 0 0
## Variables not shown: PROPERTY_DAMAGE (dbl)
#View(df.fatal.ar.county.by.year.event)
df.fatal.ar.county.by.year.event$FatalWeight <- df.fatal.ar.county.by.year.event$DEATH_COUNT * 0.75 + df.fatal.ar.county.by.year.event$INJURY_COUNT * 0.25
#/Write the required data into csv
#write.csv(df.fatal.ar.county.by.year.event, file = "sei_ARSummary.csv", row.names = F)
#------------------------------------------------------------------------
#Remove dups for all state.(if any)
df.fatal <- subset(df.fatal, !duplicated(df.fatal[,"EVENT_ID"]))
#View(df.fatal)
#fatal counts by state
df.fatal.by.state <- df.fatal %>% group_by(YEAR,STATE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT), INJURY_COUNT = sum(INJURIES_DIRECT, INJURIES_INDIRECT))
#View(df.fatal.by.state)
#events that cause fatals by state by year
df.fatal.by.state.eventtype <- df.fatal %>% group_by(YEAR,STATE, EVENT_TYPE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT))
df.fatal.by.state.fatal.eventtypes <- df.fatal.by.state.eventtype %>% filter(DEATH_COUNT > 0)
#View(df.fatal.by.state.fatal.eventtypes)
#require(tidyr)
#testdf = spread(df.fatal.by.state.eventtype, EVENT_TYPE, DEATH_COUNT)
write.csv(df.fatal.by.state, file = "sei_StateImpactSummary.csv", row.names = F)
write.csv(df.fatal.by.state.fatal.eventtypes, file = "sei_StateEvents.csv", row.names = F)
#fatal count by weather event
#df.fatal.by.state.event <- df.fatal %>% group_by(STATE, EVENT_TYPE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT))
df.fatal.by.event <- df.fatal %>% group_by(YEAR, EVENT_TYPE) %>% summarize(DEATH_COUNT = sum(DEATHS_DIRECT, DEATHS_INDIRECT), INJURY_COUNT= sum(INJURIES_DIRECT, INJURIES_INDIRECT))
#View(df.fatal.by.event)
#GET TOP5 EVENTS CAUSING FATALITIES
df.fatal.by.event.top5 <- df.fatal.by.event %>% arrange(desc(DEATH_COUNT)) %>% slice(1:5)
#View(df.fatal.by.event.top5)
#GET TOP5 EVENTS CAUSING INJURIES
df.injuries.by.event.top5 <- df.fatal.by.event %>% arrange(desc(INJURY_COUNT)) %>% slice(1:5)
#View(df.injuries.by.event.top5 )
#COMBINE BOTH
df.fatals.injuiries.by.event <- rbind(df.fatal.by.event.top5, df.injuries.by.event.top5)
#Remove duplicates
df.fatals.injuiries.by.event <- df.fatals.injuiries.by.event[!duplicated(df.fatals.injuiries.by.event),]
#View(df.fatals.injuiries.by.event)
#write the csv
#write.csv(df.fatals.injuiries.by.event, file = "sei_eventImpactSummary.csv", row.names = F)