R Markdown

Course Project: Finding Event Types’ Impact on Health and Economy This project used the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. Data consists of any fatalities, injuries, and property damage caused by different event types.

DATA PROCESSING

setwd("Course")
file <- download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "Course", method = "curl")
data2 <- read.table("Course", sep = ",", header = F)
data <- read.table("Course", sep = ",", skip = 1)
names(data) <- data2[1,]

We have to find the types of events that are most harmful with respect to population health across the United States.

First we found the impact of top 10 events that have high fatalities

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
fatalities_evtype <- group_by(data, EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES, na.rm = T))
## `summarise()` ungrouping output (override with `.groups` argument)
fatalities_evtype <- fatalities_evtype[order(fatalities_evtype$FATALITIES, decreasing = T),]
top10fatalities <- fatalities_evtype[1:10,]

Second we found the impact of top 10 events that have high injuries

injuries_evtype <- group_by(data, EVTYPE) %>% summarise(Injuries = sum(INJURIES)) %>% arrange(-Injuries)
## `summarise()` ungrouping output (override with `.groups` argument)
top10injuries <- injuries_evtype[1:10,]

We have to find the types of events have the greatest economic consequences. The data provides two types of economic impact: property damage (PROPDMG) and crop damage (CROPDMG). The actual damage in $USD is indicated by PROPDMGEXP and CROPDMGEXP parameters. H, h -> hundreds = x100 K, K -> kilos = x1,000 M, m -> millions = x1,000,000 B,b -> billions = x1,000,000,000 (+) -> x1 (-) -> x0 (?) -> x0 blank -> x0

The total damage caused by each event type:

totaldamage <- data %>% select(EVTYPE, PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)

Symbol <- sort(unique(as.character(totaldamage$PROPDMGEXP)))
Multiplier <- c(0,0,0,1,10,10,10,10,10,10,10,10,10,10^9,10^2,10^2,10^3,10^6,10^6)
convert.Multiplier <- data.frame(Symbol, Multiplier)

totaldamage$Prop.Multiplier <- convert.Multiplier$Multiplier[match(totaldamage$PROPDMGEXP, convert.Multiplier$Symbol)]
totaldamage$Crop.Multiplier <- convert.Multiplier$Multiplier[match(totaldamage$CROPDMGEXP, convert.Multiplier$Symbol)]

totaldamage <- totaldamage %>% mutate(PROPDMG = PROPDMG*Prop.Multiplier) %>% mutate(CROPDMG = CROPDMG*Crop.Multiplier) %>% mutate(TOTAL.DMG = PROPDMG+CROPDMG)

damage.total <- totaldamage %>% group_by(EVTYPE) %>% summarize(TOTAL.DMG.EVTYPE = sum(TOTAL.DMG))%>% arrange(-TOTAL.DMG.EVTYPE) 
## `summarise()` ungrouping output (override with `.groups` argument)
top10damage <- damage.total[1:10,]

RESULTS

library(ggplot2)
g <- ggplot(top10fatalities, aes(x=reorder(EVTYPE, -FATALITIES), y = FATALITIES )) + geom_bar(stat="identity") + xlab("Event Type") + 
     ylab("Total number of fatalities") +
     ggtitle("Top 10 Weather event types with Highest Total fatalities ") 
print(g)

g1 <- ggplot(top10injuries, aes(x=reorder(EVTYPE, -Injuries), y = Injuries)) +geom_bar(stat="identity") + xlab("Event Type") + 
     ylab("Total number of Injuries") +
     ggtitle("Top 10 Weather event types with Highest Total injuries") 
print(g1)

g3 <- ggplot(top10damage, aes(x=reorder(EVTYPE, -TOTAL.DMG.EVTYPE), y = TOTAL.DMG.EVTYPE )) + geom_bar(stat="identity") + xlab("Event Type") + 
     ylab("Economic Consequences ") +
     ggtitle("Top 10 Weather event types' impact on economy") 
print(g3)