Natural events cause health and economic problems. In this project, we explore which one of natural events have the greatest effect on human health (fatalities and injuries) and on economic consequences by using U.S. NOAA storm database.
The data contains the natural event, number of fatalities, number of injuries and property damage donebyeach event. First, we need to apply multipliers to PROPDMG and CROPDMG to start our analysis.
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
## Warning: package 'forcats' was built under R version 3.6.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
data <- read.csv("E:/Nada/Others/Courses/Data Science Specialization/Ex/Course 5/Assignment 2/repdata_data_StormData.csv")
data <- mutate(data, PROP_FIG = ifelse(PROPDMGEXP == "K", 1000,
ifelse(PROPDMGEXP == "M", 1000000,
ifelse(PROPDMGEXP == "B", 1000000000, NA))))
data <- mutate(data, CROP_FIG = ifelse(CROPDMGEXP == "K", 1000,
ifelse(CROPDMGEXP == "M", 1000000,
ifelse(CROPDMGEXP == "B", 1000000000, NA))))
data <- mutate(data, PROP_VAL = PROPDMG * PROP_FIG)
data <- mutate(data, CROP_VAL = CROPDMG * CROP_FIG)
data$EVTYPE = toupper(data$EVTYPE)
sumF <- aggregate(as.numeric(data$FATALITIES), by = list(EVTYPE = data$EVTYPE), FUN = sum)
sumFTop <- sumF[order(-sumF$x), ][1:15, ]
head(sumFTop)
## EVTYPE x
## 758 TORNADO 5633
## 116 EXCESSIVE HEAT 1903
## 138 FLASH FLOOD 978
## 243 HEAT 937
## 418 LIGHTNING 816
## 779 TSTM WIND 504
#ggplot(data = sumFTop, mapping = aes(x = EVTYPE, y = x, fill = EVTYPE)) +
# geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() +
# labs(title = "The Top 15 severe weather events that were most harmful to #\n population health - Fatalities", x = "Event type", y = "fatalities")
data$EVTYPE = toupper(data$EVTYPE)
sumI <- aggregate(as.numeric(data$INJURIES), by = list(EVTYPE = data$EVTYPE), FUN = sum)
sumITop <- sumI[order(-sumI$x), ][1:15, ]
head(sumITop)
## EVTYPE x
## 758 TORNADO 91346
## 779 TSTM WIND 6957
## 154 FLOOD 6789
## 116 EXCESSIVE HEAT 6525
## 418 LIGHTNING 5230
## 243 HEAT 2100
#ggplot(data = sumITop, mapping = aes(x = EVTYPE, y = x, fill = EVTYPE)) +
# geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() +
# labs(title = "The Top 15 severe weather events that were most harmful to #\n population health - Injuries", x = "Event type", y = "Injuries")
names(sumI)[names(sumI) == "x"] <- "sum"
total <- merge(sumI, sumF, by = "EVTYPE")
totalHarm <- sumI
totalHarm$sum <- total$sum + total$x
totalTop <- totalHarm[order(-totalHarm$sum), ][1:15, ]
ggplot(data = totalTop, mapping = aes(x = EVTYPE, y = sum, fill = EVTYPE)) +
geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() +
labs(title = "The Top 15 severe weather events that were most harmful to \n population health", x = "Event type", y = "Fatalities + Injuries")
damages <- aggregate(PROPDMG + PROPDMG ~ EVTYPE, data=data, sum)
names(damages) = c("EVTYPE", "TOTALDAMAGE")
damages <- damages[order(-damages$TOTALDAMAGE), ][1:15, ]
damages$EVTYPE <- factor(damages$EVTYPE, levels = damages$EVTYPE)
ggplot(data = damages, mapping = aes(x = EVTYPE, y = TOTALDAMAGE, fill = EVTYPE)) + geom_bar(stat = 'identity', show.legend = FALSE) + coord_flip() + labs(title = "The Top 15 severe weather events that were most harmful to \n economics", x = "Event type", y = "consequences")