Data Processing
As a first step, I automated the process of download and save the data in a directory /data (also created by the Rmarkdown).
filename <- "./data/2FStormData.csv.bz2"
datapath <- "./data"
# Checking if dir exists
if (!file.exists(datapath)) { dir.create(datapath) }
# Checking if archieve already exists.
if (!file.exists(filename)){
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, "./data/2FStormData.csv.bz2")
}
# Reading data
data <- read_csv("./data/2FStormData.csv.bz2",
col_types = cols(EVTYPE = col_character(),
STATE = col_character(),
FATALITIES = col_number(),
INJURIES = col_number(),
PROPDMG = col_number(),
PROPDMGEXP = col_character(),
CROPDMG = col_number(),
CROPDMGEXP = col_character() ))
We are interested in variables related with health and economic damage, so I picked the the group of variables:
Health variables:
- FATALITIES: approx. number of deaths
- INJURIES: approx. number of injuries
Economic variables:
- PROPDMG: approx. property damages
- PROPDMGEXP: (K: thousands, M: millions, B: billions)
- CROPDMG: approx. crop damages
- CROPDMGEXP: (K: thousands, M: millions, B: billions)
Event variable (target variable):
- EVTYPE: weather event (Tornados, Wind, Snow, Flood, etc..)
Extras
With these variables I made 4 datasets:
tidydata: (902297 rows, 8 variables). Dataset with all the variables.
harmful_events: (977 rows, 3 variables). Event, Deads, Injuries.
damageData_prop: (436035 rows, 4 variables). EVTYPE (event), PROPDMG, PROPDMGEXP. Added an extra variable millions to unify Economic losses scale.
damageData_crop: (283835 rows, 4 variables). EVTYPE (event), CROPDMG, CROPDMGEXP, Added an extra variable millions to unify Economic losses scale.
# Selecting just useful variables
tidydata <- select(data, c("EVTYPE", "STATE", "FATALITIES", "INJURIES", "PROPDMG",
"PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))
# Making dataset to use in health impact analysis
harmful_events <- tidydata %>% group_by(EVTYPE) %>%
summarise(across(c(FATALITIES,INJURIES),sum)) %>%
rename( Event = EVTYPE, Deads = FATALITIES, Injuries = INJURIES)
## `summarise()` ungrouping output (override with `.groups` argument)
more_deads <- harmful_events %>% arrange(desc(Deads)) %>% .[1:15,]
more_injuries <- harmful_events %>% arrange(desc(Injuries)) %>% .[1:15,]
# Making a dataset to use in economic analisys
damageData_prop <- filter(tidydata, tidydata$PROPDMGEXP == "K" |
tidydata$PROPDMGEXP == "M" |
tidydata$PROPDMGEXP == "B" ) %>%
select(c(EVTYPE,PROPDMG,PROPDMGEXP))
damageData_crop <- filter(tidydata, tidydata$CROPDMGEXP == "K" |
tidydata$CROPDMGEXP == "M" |
tidydata$CROPDMGEXP == "B" ) %>%
select(c(EVTYPE,CROPDMG,CROPDMGEXP))
Making plot Top 15. Most Frequent Events Across the Country
events <- data.frame(table(tidydata$EVTYPE)) %>% rename( event = Var1 )
most_freq_events <- filter(events, events$Freq >= 100) %>% arrange(desc(Freq))
g_freq <- most_freq_events[1:15,] %>% mutate(event = reorder(event, Freq)) %>%
ggplot(aes(Freq, event)) +
geom_col(position = "dodge") +
geom_text(aes(label = Freq), position = position_dodge(0.9), hjust = -0.1 ) +
xlab("") + ylab("") +
coord_cartesian(xlim = c(0,310000)) +
ggtitle("Top 15. Most Frequent Events Across the Country")
Making plot Top 15. Number Of fatalities (A) / Injuries (B)
g_deads <- more_deads %>% mutate(Event = reorder(Event, Deads)) %>%
ggplot(aes(Deads, Event)) +
geom_col(position = "dodge") +
geom_text(aes(label = Deads),size =2.5,
position = position_dodge(0.9), hjust = -0.1 ) +
xlab("") + ylab("") +
coord_cartesian(xlim = c(0,6500)) +
ggtitle("A")
g_injuries <- more_injuries %>% mutate(Event = reorder(Event, Injuries)) %>%
ggplot(aes(Injuries, Event)) +
geom_col(position = "dodge") +
geom_text(aes(label = Injuries), size =2.5,
position = position_dodge(0.9), hjust = -0.1 ) +
xlab("") + ylab("") +
coord_cartesian(xlim = c(0,110000)) +
ggtitle("B")
Making Top 15. Economic Losses Properties (A) / Crops (B)
damageData_prop <- damageData_prop %>%
mutate(millions = ifelse(PROPDMGEXP == "K" | PROPDMGEXP == "B",
ifelse(PROPDMGEXP == "K",
PROPDMG/(10^3),PROPDMG*(10^3)), PROPDMG))
damageData_crop <- damageData_crop %>%
mutate(millions = ifelse(CROPDMGEXP == "K" | CROPDMGEXP == "B",
ifelse(CROPDMGEXP == "K",
CROPDMG/(10^3),CROPDMG*(10^3)), CROPDMG))
g_prop <- damageData_prop %>% group_by(EVTYPE) %>%
summarise(across(millions, ~ sum(.x, na.rm = TRUE))) %>%
arrange(desc(millions)) %>% .[1:15,] %>%
mutate(Event = reorder(EVTYPE, millions)) %>%
ggplot(aes(millions, Event)) +
geom_col(position = "dodge") +
geom_text(aes(label = round(millions)),size =2.5,
position = position_dodge(0.9), hjust = -0.1 ) +
xlab("Millions") + ylab("") +
coord_cartesian(xlim = c(0,180000)) +
ggtitle("A")
## `summarise()` ungrouping output (override with `.groups` argument)
g_crop <- damageData_crop %>% group_by(EVTYPE) %>%
summarise(across(millions, ~ sum(.x, na.rm = TRUE))) %>%
arrange(desc(millions)) %>% .[1:15,] %>%
mutate(Event = reorder(EVTYPE, millions)) %>%
ggplot(aes(millions, Event)) +
geom_col(position = "dodge") +
geom_text(aes(label = round(millions)),size =2.5,
position = position_dodge(0.9), hjust = -0.1 ) +
xlab("Millions") + ylab("") +
coord_cartesian(xlim = c(0,16500)) +
ggtitle("B")
## `summarise()` ungrouping output (override with `.groups` argument)