Course Project: Finding Event Types’ Impact on Health and Economy This project used the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. Data consists of any fatalities, injuries, and property damage caused by different event types.
DATA PROCESSING
setwd("Course")
file <- download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "Course", method = "curl")
data2 <- read.table("Course", sep = ",", header = F)
data <- read.table("Course", sep = ",", skip = 1)
names(data) <- data2[1,]
We have to find the types of events that are most harmful with respect to population health across the United States.
First we found the impact of top 10 events that have high fatalities
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
fatalities_evtype <- group_by(data, EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES, na.rm = T))
## `summarise()` ungrouping output (override with `.groups` argument)
fatalities_evtype <- fatalities_evtype[order(fatalities_evtype$FATALITIES, decreasing = T),]
top10fatalities <- fatalities_evtype[1:10,]
Second we found the impact of top 10 events that have high injuries
injuries_evtype <- group_by(data, EVTYPE) %>% summarise(Injuries = sum(INJURIES)) %>% arrange(-Injuries)
## `summarise()` ungrouping output (override with `.groups` argument)
top10injuries <- injuries_evtype[1:10,]
We have to find the types of events have the greatest economic consequences. The data provides two types of economic impact: property damage (PROPDMG) and crop damage (CROPDMG). The actual damage in $USD is indicated by PROPDMGEXP and CROPDMGEXP parameters. H, h -> hundreds = x100 K, K -> kilos = x1,000 M, m -> millions = x1,000,000 B,b -> billions = x1,000,000,000 (+) -> x1 (-) -> x0 (?) -> x0 blank -> x0
The total damage caused by each event type:
totaldamage <- data %>% select(EVTYPE, PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
Symbol <- sort(unique(as.character(totaldamage$PROPDMGEXP)))
Multiplier <- c(0,0,0,1,10,10,10,10,10,10,10,10,10,10^9,10^2,10^2,10^3,10^6,10^6)
convert.Multiplier <- data.frame(Symbol, Multiplier)
totaldamage$Prop.Multiplier <- convert.Multiplier$Multiplier[match(totaldamage$PROPDMGEXP, convert.Multiplier$Symbol)]
totaldamage$Crop.Multiplier <- convert.Multiplier$Multiplier[match(totaldamage$CROPDMGEXP, convert.Multiplier$Symbol)]
totaldamage <- totaldamage %>% mutate(PROPDMG = PROPDMG*Prop.Multiplier) %>% mutate(CROPDMG = CROPDMG*Crop.Multiplier) %>% mutate(TOTAL.DMG = PROPDMG+CROPDMG)
damage.total <- totaldamage %>% group_by(EVTYPE) %>% summarize(TOTAL.DMG.EVTYPE = sum(TOTAL.DMG))%>% arrange(-TOTAL.DMG.EVTYPE)
## `summarise()` ungrouping output (override with `.groups` argument)
top10damage <- damage.total[1:10,]
RESULTS
library(ggplot2)
g <- ggplot(top10fatalities, aes(x=reorder(EVTYPE, -FATALITIES), y = FATALITIES )) + geom_bar(stat="identity") + xlab("Event Type") +
ylab("Total number of fatalities") +
ggtitle("Top 10 Weather event types with Highest Total fatalities ")
print(g)
g1 <- ggplot(top10injuries, aes(x=reorder(EVTYPE, -Injuries), y = Injuries)) +geom_bar(stat="identity") + xlab("Event Type") +
ylab("Total number of Injuries") +
ggtitle("Top 10 Weather event types with Highest Total injuries")
print(g1)
g3 <- ggplot(top10damage, aes(x=reorder(EVTYPE, -TOTAL.DMG.EVTYPE), y = TOTAL.DMG.EVTYPE )) + geom_bar(stat="identity") + xlab("Event Type") +
ylab("Economic Consequences ") +
ggtitle("Top 10 Weather event types' impact on economy")
print(g3)