Synopsis

This document summarizes the analyses of Storm database.

This is a file for Peer Assessment 2 in Coursera’s Reproducible Research course.

Data Processing

Look for file in the current folder. If the file doesn’t exist, then download it

if (!file.exists("repdata_data_StormData.csv")) {
    f <- tempfile()
    myURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
    download.file(myURL, f)
    mydata <- read.csv(bzfile(f), header=TRUE)
    write.csv(mydata, file = "repdata_data_StormData.csv")
} else {
    mydata = read.csv("repdata_data_StormData.csv",  header = TRUE)
}
#head(mydata)
popHealthDF = data.frame(subset(mydata,select = c(EVTYPE, FATALITIES, INJURIES)))

Results

Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.1
head(popHealthDF)
##    EVTYPE FATALITIES INJURIES
## 1 TORNADO          0       15
## 2 TORNADO          0        0
## 3 TORNADO          0        2
## 4 TORNADO          0        2
## 5 TORNADO          0        2
## 6 TORNADO          0        6
#Aggregate the events that lead to most fatalities
aggFAT = aggregate(FATALITIES~EVTYPE, popHealthDF,sum)
top20HarmfulEvents = data.frame(aggFAT[order(aggFAT$FATALITIES, decreasing=TRUE),][1:20,])
q = qplot(top20HarmfulEvents$EVTYPE, top20HarmfulEvents$FATALITIES, data = top20HarmfulEvents, main= ("EventType Vs Fatalities"), xlab = "Event Type", ylab = "Number of fatalities")
q = q+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
q

plot of chunk unnamed-chunk-2

rm(aggFAT,top20HarmfulEvents,q)

The plot above shows top 20 event types that resulted in most fatalities with tornado and excessive heat accounting for maximum fatalities across the united states.

aggINJ = aggregate(INJURIES~EVTYPE, popHealthDF,sum)
top20HarmfulInjuries = data.frame(aggINJ[order(aggINJ$INJURIES, decreasing=TRUE),][1:20,])
q = qplot(top20HarmfulInjuries$EVTYPE, top20HarmfulInjuries$INJURIES, data = top20HarmfulInjuries, , main= ("EventType Vs Injuries"), xlab = "Event Type", ylab = "Number of Injuries"  )
q = q+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
q

plot of chunk unnamed-chunk-3

rm(aggINJ,top20HarmfulInjuries,q)

The maximum number of injuries are inflicted by Tornadoes. Other types are shown in the plot above.

Question 2: Across the United States, which types of events have the greatest economic consequences?

This is a similar question but here, we need to look for other types of damages such as Property damage and crop damage.

Add another column that represents the total damage of property as well as crop and add a new column in the data.

colnames(mydata)
##  [1] "X"          "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE" 
##  [6] "COUNTY"     "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE" 
## [11] "BGN_AZI"    "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END"
## [16] "COUNTYENDN" "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"    
## [21] "WIDTH"      "F"          "MAG"        "FATALITIES" "INJURIES"  
## [26] "PROPDMG"    "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"       
## [31] "STATEOFFIC" "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E"
## [36] "LONGITUDE_" "REMARKS"    "REFNUM"
economyDamage = data.frame(subset(mydata,select = c(EVTYPE, PROPDMG, CROPDMG)))
head(economyDamage)
##    EVTYPE PROPDMG CROPDMG
## 1 TORNADO    25.0       0
## 2 TORNADO     2.5       0
## 3 TORNADO    25.0       0
## 4 TORNADO     2.5       0
## 5 TORNADO     2.5       0
## 6 TORNADO     2.5       0
economyDamage$totalDMG = economyDamage$PROPDMG + economyDamage$CROPDMG
#Aggregate the damages by event types
aggDMG = aggregate(totalDMG~EVTYPE, economyDamage,sum)
top20DMG = data.frame(aggDMG[order(aggDMG$totalDMG, decreasing=TRUE),][1:20,])
q = qplot(top20DMG$EVTYPE, top20DMG$totalDMG, data = top20DMG, , main= ("EventType Vs Total damages (Property and crops)"), xlab = "Event Type", ylab = "Damages"  )
q = q+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(q)

plot of chunk unnamed-chunk-4

The greatest economic consequences are due to tornado followed by flash flood and and thunderstorm wind as is evident from the plot. Other less severe conditions are also shown in the plot.