Data Processing

Loading packages

knitr::opts_chunk$set(cache = TRUE)
library(ggplot2)
library(reshape2)
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Loading the data

data_NOAA <- read.csv("repdata-data-StormData.csv.bz2", sep = ",", header = T)

Subsetting data

data_NOAA <- data_NOAA[, c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

Cleaning data

data_cleaned <- data_NOAA %>% 
                mutate(CROPDMG = CROPDMG * case_when(
                        CROPDMGEXP == "B" ~ 10^9,
                        CROPDMGEXP == "k" | CROPDMGEXP == "K" ~ 10^3,
                        CROPDMGEXP == "m" | CROPDMGEXP == "M" ~ 10^6,
                        CROPDMGEXP == 2 ~ 2,
                        TRUE ~ 0), CROPDMGEXP = NULL)
data_cleaned %<>% mutate(PROPDMG = PROPDMG * case_when(
                        PROPDMGEXP == "B" ~ 10^9,
                        PROPDMGEXP == "k" | PROPDMGEXP == "K" ~ 10^3,
                        PROPDMGEXP == "m" | PROPDMGEXP == "M" ~ 10^6,
                        PROPDMGEXP == "h" | PROPDMGEXP == "H" ~ 10^2,
                        PROPDMGEXP == 1 ~ 1,
                        PROPDMGEXP == 2 ~ 2,
                        PROPDMGEXP == 3 ~ 3,
                        PROPDMGEXP == 4 ~ 4,
                        PROPDMGEXP == 5 ~ 5,
                        PROPDMGEXP == 6 ~ 6,
                        PROPDMGEXP == 7 ~ 7,
                        PROPDMGEXP == 8 ~ 8,
                        TRUE ~ 0), PROPDMGEXP = NULL)

data_final <- melt(data_cleaned, id = c('EVTYPE'), variable.name = "category", value.name = "value", na.rm = TRUE)

Results

1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health ?

fatalities <- data_final[data_final$category == "FATALITIES", ]
fat_nr <- aggregate(value ~ EVTYPE, data = fatalities, sum)
fat_nr <- fat_nr[order(-fat_nr$value), ][1:10, ]
fat_nr$EVTYPE <- factor(fat_nr$EVTYPE, levels = fat_nr$EVTYPE)

# Plot nr. of fatalities with the most harmful event type
ggplot(fat_nr, aes(x = EVTYPE, y = value)) + 
    geom_bar(stat = "identity", fill = "red") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities for the top 10 weather events")

injuries <- data_final[data_final$category == "INJURIES", ]
inj_nr <- aggregate(value ~ EVTYPE, data = injuries, sum)
inj_nr <- inj_nr[order(-inj_nr$value), ][1:10, ]
inj_nr$EVTYPE <- factor(inj_nr$EVTYPE, levels = inj_nr$EVTYPE)

# Plot nr. of injuries with the most harmful event type
ggplot(inj_nr, aes(x = EVTYPE, y = value)) + 
    geom_bar(stat = "identity", fill = "red") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries for the top 10 weather events")

2.Across the United States, which types of events have the greatest economic consequences ?

damages <- data_final[data_final$category == "PROPDMGNUM|CROPDMGNUM", ]
dam_nr <- aggregate(value ~ EVTYPE, data = data_final, sum)
names(dam_nr) = c("EVTYPE", "DAMAGE")
dam_nr <- dam_nr[order(-dam_nr$DAMAGE), ][1:10, ]
dam_nr$EVTYPE <- factor(dam_nr$EVTYPE, levels = dam_nr$EVTYPE)

# Plot nr. of damages with the most harmful event type
ggplot(dam_nr, aes(x = EVTYPE, y = DAMAGE)) + 
    geom_bar(stat = "identity", fill = "red") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Damages") + ggtitle("Property & Crop damages for the top 10 weather events")

As seen in the picture, floods cause the most damage.