This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
You can download the file from the course web site:
Strom Data
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
Here my data analysis on this data shows which strom events are most harmful to the human health and i’ve also analysed how much economic damage(i.e. damage in crop and properties) is there due to various strom events.
Loading the data.
if(!file.exists("data.csv")){
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "data.csv", method = "curl")
}
data <- read.csv("data.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
Loading required libraries.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
library(ggplot2)
Cleaning the data.
#Converting to peoper date and time formats.
data$BGN_DATE <- mdy_hms(data$BGN_DATE)
data$END_DATE <- mdy_hms(data$END_DATE)
Calculating total property and crop damage.
#Calculating the total damage by multiplying the damage to the rounded of damage enponents.
data <- data %>% mutate(newPropdmgexp = case_when(PROPDMGEXP == "-"~"0",
PROPDMGEXP == "" ~"0",
PROPDMGEXP == "?"~"0",
PROPDMGEXP == "+"~"1",
PROPDMGEXP == "0"~"10",
PROPDMGEXP == "1"~"10",
PROPDMGEXP == "2"~"10",
PROPDMGEXP == "3"~"10",
PROPDMGEXP == "4"~"10",
PROPDMGEXP == "5"~"10",
PROPDMGEXP == "6"~"10",
PROPDMGEXP == "7"~"10",
PROPDMGEXP == "8"~"10",
PROPDMGEXP == "B"~"1000000000",
PROPDMGEXP == "M"~"1000000",
PROPDMGEXP == "m"~"1000000",
PROPDMGEXP == "K"~"1000",
PROPDMGEXP == "H"~"100",
PROPDMGEXP == "h"~"100")) %>% mutate(tPropdmg = PROPDMG * as.numeric(newPropdmgexp))
data <- data %>% mutate(newCropdmgexp = case_when(CROPDMGEXP == "" ~"0",
CROPDMGEXP == "?"~"0",
CROPDMGEXP == "0"~"10",
CROPDMGEXP == "2"~"10",
CROPDMGEXP == "B"~"1000000000",
CROPDMGEXP == "M"~"1000000",
CROPDMGEXP == "m"~"1000000",
CROPDMGEXP == "k"~"1000",
CROPDMGEXP == "K"~"1000")) %>% mutate(tCropdmg = CROPDMG * as.numeric(newCropdmgexp))
Limiting to recent years so that more accurate results can be found.
data <- data %>% filter(year(BGN_DATE) >= 1990)
#Calculating the total fatalities and injuries on the basis of strom events.
health <- data %>% group_by(EVTYPE) %>% summarise(tFatalities = sum(FATALITIES, na.rm = TRUE), tInjuries = sum(INJURIES, na.rm = TRUE)) %>% arrange(desc(tFatalities), desc(tInjuries))
#As the data is very large the highest top 10 Fatalities and Injuries are considered and plotted to get a better understaning of the most harmful strom events.
healthImpact <- melt(health[1:10,], id.vars = "EVTYPE", measure.vars = c("tFatalities", "tInjuries"))
#plotting th data.
g <- ggplot(healthImpact, aes(value, EVTYPE, fill = variable))
g + geom_bar(stat = "identity", position = "dodge") + xlab("Number of People") + ylab("Event Type") + ggtitle("Most harmful events with respect to Population health")
#Calclating the total economic damage made by strom events.
damage <- data %>% group_by(EVTYPE) %>% summarise(tPropdmg = sum(tPropdmg, na.rm = T), tCropdmg = sum(tCropdmg, na.rm = T)) %>% arrange(desc(tPropdmg), desc(tCropdmg))
#As the data is very large the highest top 10 Crop and Property damages are considered and plotted to get a better understaning of the damages created by the strom events.
damageImpact <- melt(damage[1:10,], id.vars = "EVTYPE", measure.vars = c("tPropdmg", "tCropdmg"))
#plotting the data.
d <- ggplot(damageImpact, aes(value, EVTYPE, fill = variable))
d + geom_bar(stat = "identity", position = "dodge") + xlab("Damage in USD") + ylab("Event Type") + ggtitle("Events having greatest economic consequences") + facet_grid(.~variable, scales = "free")