Synopsis

Natural events cause health and economic problems. In this project, we explore which one of natural events have the greatest effect on human health (fatalities and injuries) and on economic consequences by using U.S. NOAA storm database.

Data Processing

The data contains the natural event, number of fatalities, number of injuries and property damage donebyeach event. First, we need to apply multipliers to PROPDMG and CROPDMG to start our analysis.

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
## Warning: package 'forcats' was built under R version 3.6.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
data <- read.csv("E:/Nada/Others/Courses/Data Science Specialization/Ex/Course 5/Assignment 2/repdata_data_StormData.csv")

data <- mutate(data, PROP_FIG = ifelse(PROPDMGEXP == "K", 1000,
                                ifelse(PROPDMGEXP == "M", 1000000,
                                ifelse(PROPDMGEXP == "B", 1000000000, NA))))

data <- mutate(data, CROP_FIG = ifelse(CROPDMGEXP == "K", 1000,
                                ifelse(CROPDMGEXP == "M", 1000000,
                                ifelse(CROPDMGEXP == "B", 1000000000, NA))))

data <- mutate(data, PROP_VAL = PROPDMG * PROP_FIG)

data <- mutate(data, CROP_VAL = CROPDMG * CROP_FIG)

Results

Population Health Impact

Fatalities

data$EVTYPE = toupper(data$EVTYPE)
sumF <- aggregate(as.numeric(data$FATALITIES), by = list(EVTYPE = data$EVTYPE), FUN = sum)
sumFTop <- sumF[order(-sumF$x), ][1:15, ]
head(sumFTop)
##             EVTYPE    x
## 758        TORNADO 5633
## 116 EXCESSIVE HEAT 1903
## 138    FLASH FLOOD  978
## 243           HEAT  937
## 418      LIGHTNING  816
## 779      TSTM WIND  504
#ggplot(data = sumFTop, mapping = aes(x = EVTYPE, y = x, fill = EVTYPE)) +
#        geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() +
#        labs(title = "The Top 15 severe weather events that were most harmful to #\n population health - Fatalities", x = "Event type", y = "fatalities")

Injuries

data$EVTYPE = toupper(data$EVTYPE)
sumI <- aggregate(as.numeric(data$INJURIES), by = list(EVTYPE = data$EVTYPE), FUN = sum)
sumITop <- sumI[order(-sumI$x), ][1:15, ]
head(sumITop)
##             EVTYPE     x
## 758        TORNADO 91346
## 779      TSTM WIND  6957
## 154          FLOOD  6789
## 116 EXCESSIVE HEAT  6525
## 418      LIGHTNING  5230
## 243           HEAT  2100
#ggplot(data = sumITop, mapping = aes(x = EVTYPE, y = x, fill = EVTYPE)) +
#        geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() +
#        labs(title = "The Top 15 severe weather events that were most harmful to #\n population health - Injuries", x = "Event type", y = "Injuries")

Total Harm to Population Health

names(sumI)[names(sumI) == "x"] <- "sum"
total <- merge(sumI, sumF, by = "EVTYPE")
totalHarm <- sumI
totalHarm$sum <- total$sum + total$x
totalTop <- totalHarm[order(-totalHarm$sum), ][1:15, ]
ggplot(data = totalTop, mapping = aes(x = EVTYPE, y = sum, fill = EVTYPE)) +
        geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() +
        labs(title = "The Top 15 severe weather events that were most harmful to \n population health", x = "Event type", y = "Fatalities + Injuries")

Across the United States, which types of events have the greatest economic consequences ?

damages <- aggregate(PROPDMG + PROPDMG ~ EVTYPE, data=data, sum)
names(damages) = c("EVTYPE", "TOTALDAMAGE")
damages <- damages[order(-damages$TOTALDAMAGE), ][1:15, ]
damages$EVTYPE <- factor(damages$EVTYPE, levels = damages$EVTYPE)


ggplot(data = damages, mapping = aes(x = EVTYPE, y = TOTALDAMAGE, fill = EVTYPE)) + geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() + labs(title = "The Top 15 severe weather events that were most harmful to \n economics", x = "Event type", y = "consequences")