Assignment: Course Project 2

Introduction

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database contains information about major storms and others weather events in the United States, including the date and the place they occur, as well as estimates of any fatalities, injuries, and property damage.

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

Objectives

to identify events that are harmful to population health and identify events that have the worst economic effects.

In this report,effect of weather events on personal as well as property damages was studied. Barplots were plotted seperately for the top 8 weather events that causes highest fatalities and highest injuries. Results indicate that most Fatalities and injuries were caused by Tornados. Also, barplots were plotted for the top 8 weather events that causes the highest property damage and crop damage.

Import Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

Open the data

## Source file
## https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

  if(!file.exists("StormData.csv.bz2")) {
  Url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(Url, destfile = "./StormData.csv.bz2")
  }

StormData <- read.csv(file = "StormData.csv.bz2", header = TRUE, sep = ",")
colnames(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
db <- StormData[ , c("EVTYPE", "BGN_DATE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
#Transform Data Elements for this Analysis and Look at Data Structure
#Transform Raw Date to Date:
db$BGN_DATE <- as.POSIXct(db$BGN_DATE,format="%m/%d/%Y %H:%M:%S")

unique(db$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
# Assigning values for the property exponent df 
db %<>% mutate(P = ifelse(PROPDMGEXP == "K",1000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "M",1000000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "",1,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "B",100000000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "m",1000000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "0",1,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "1",10,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "2",100,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "3",1000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "4",10000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "5",100000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "6",1000000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "7",10000000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "8",100000000,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "H",100,0))
db %<>% mutate(P = ifelse(PROPDMGEXP == "h",100,0))

# Calculating the damage value
db %<>% mutate(PV = PROPDMG*P)


unique(db$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
# Assigning values for the crop exponent data 
db %<>% mutate(C = ifelse(CROPDMGEXP == "M",1000000,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "K",1000,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "m",1000000,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "B",1000000000,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "M",1000000,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "0",1,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "k",1000,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "2",100,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "",1,0))
db %<>% mutate(C = ifelse(CROPDMGEXP == "M",1000000,0))


# calculating the crop damage value
db %<>% mutate(CV = CROPDMG*C)

# Totalling the data by event
fatal <- aggregate(FATALITIES ~ EVTYPE, db, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, db, FUN = sum)
propdmg <- aggregate(PV ~ EVTYPE, db, FUN = sum)
cropdmg <- aggregate(CV ~ EVTYPE, db, FUN = sum)

Question 1: Across the United States, which types of events are most harmful with respect to population health?

# Events with highest fatalities
fataltop10 <- fatal[order(-fatal$FATALITIES), ][1:10, ]

# Events with highest injuries
injurytop10 <- injury[order(-injury$INJURIES), ][1:10, ]

Population Health Results Top 10 major causes of deaths and injuriees

# Basic barplot fatalities
f <- ggplot(data=fataltop10, aes(x=reorder(EVTYPE, -FATALITIES), y=FATALITIES)) +
  geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
ggtitle("Events with Highest Fatalities") +
  xlab("Types of events") + ylab("Number of fatalities")

# Basic barplot injuries
i <- ggplot(data=injurytop10, aes(x=reorder(EVTYPE, -INJURIES), y=INJURIES)) +
  geom_bar(stat="identity") +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
  ggtitle("Events with Highest Injuries") +
  xlab("Types of events") + ylab("Number of injuries")

# Put two plots generated above  in two columns
grid.arrange(f, i, ncol = 2)

Question 2: Across the United States, which types of events have the greatest economic consequences?

# Events with highest property damage
proptop10 <- propdmg[order(-propdmg$PV), ][1:10, ]

# Events with highest crop damage
croptop10 <- cropdmg[order(-cropdmg$CV), ][1:10, ]

Economics Health Results Top 10 major causes of economics damage

# Basic barplot property damage
p <- ggplot(data=proptop10, aes(x=reorder(EVTYPE, -PV), y=PV)) +
  geom_bar(stat="identity") +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
  ggtitle("Events with Highest Property Damages") +
  xlab("Types of events") + ylab("Property Damages")

# Basic barplot crop damage
c <- ggplot(data=croptop10, aes(x=reorder(EVTYPE, -CV), y=CV)) +
  geom_bar(stat="identity") +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
  ggtitle("Events with Highest Damages") +
  xlab("Types of events") + ylab("Crop Damages")

# Put two plots generated above  in two columns
grid.arrange(p, c, ncol = 2)

Summary From the plots shown above, the main weather event that caused the majority of deaths and injuries were from Tornadoes. The economic impact came from Tornadoes, too, and Drought