This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage
The events in the database start in the year 1950 and end in November 2011.The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. The data analysis addreses two key aspects, One, Population health measured in terms of Fatalities & Injuries and the second Economic consequences measured in terms of Fatalities & Injuries
Our Analysis is done using R statistical Package abd shows that, Across the United States, TORNADO and EXCESSIVE HEAT had major impact on Fatalities and TORNADO was the major contributor of Injuries.Events such as FLOOD, HURRYCANE and TORNADO had maximum economic impact on the property damage and DROUGHT had major impact on the Crop Damage. Our reccomendation is to implement Early warning systems for monitorning of the key events which is consequntial for the health and economic impact
A.Data Source: link:https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
B.Data Dictionary-National Weather Service Storm Data:
link:https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf
C.National Climatic Data Center Storm Events FAQ link:https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf
-Raw data from the above source is transformed to our Analysis by summarizing the data by Event Type -Top 10 events causing the maximum damage is considered for the analysis
-Further refinement can be done to deep dive in to each event type, it’s trend and impact over the years
loadData <- function() {
if(file.exists("StormData.rds")) {
loadRDS("raw_data.rds")
}
if(!file.exists("StormData.rds.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2", method = "curl")
}
df <- read.csv("StormData.csv.bz2")
saveRDS(df, "raw_data.rds")
df
}
raw_data <- loadData()
library(dplyr)
# Aggregate Data by Event Type for Fatalities
Data_fatalities_aggregate<-aggregate(list(FATALITIES=raw_data$FATALITIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)
# Arrange Data in Descending order
Data_fatalities_descending <-arrange(Data_fatalities_aggregate,desc(FATALITIES))
library(sqldf)
attach(Data_fatalities_descending)
Data_fatalities_aggregate_nonzero <- sqldf("select * from Data_fatalities_descending where FATALITIES>0")
Data_fatalities_aggregate_nonzero_top10 <- head(Data_fatalities_aggregate_nonzero,n=10)
Data_injuries_aggregate<-aggregate(list(INJURIES=raw_data$INJURIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)
# Data Injuries Desending
Data_injuries_descending <-arrange(Data_injuries_aggregate,desc(INJURIES))
# Data Injuries remove non zero
Data_injuries_aggregate_nonzero <- sqldf("select * from Data_injuries_descending where INJURIES>0")
#Select Top10 Injury Events
Data_injuries_aggregate_nonzero_top10 <- head(Data_injuries_aggregate_nonzero,n=10)
library(plyr)
library(dplyr)
library(ggplot2)
library(gridExtra)
Data_economic_impact <- select(raw_data,COUNTYNAME,STATE,EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
propdmg <- Data_economic_impact[raw_data$PROPDMG>0,] # filter out Data without any Property damage from the Dataset
for (i in 1:nrow(propdmg)){
tmp <- propdmg$PROPDMGEXP[i]
mul <- propdmg$PROPDMG[i]
if (tmp %in% c('h', 'H'))
propdmg$actualpropdmg[i] <- mul * 100
else if (tmp %in% c('k', 'K'))
propdmg$actualpropdmg[i] <- mul * 1000
else if (tmp %in% c('m', 'M'))
propdmg$actualpropdmg[i] <- mul * 1000000
else if (tmp %in% c('b', 'B'))
propdmg$actualpropdmg[i] <- mul * 1000000000
}
propdmgSum <- aggregate(list(actualpropdmg=propdmg$actualpropdmg), by=list(EVTYPE=propdmg$EVTYPE),sum,na.rm = TRUE)
# Arrange data in descending order
Propdmgsum_descending <-arrange(propdmgSum,desc(actualpropdmg))
# Select top10 events with highest property damage
top_propdmg <- head(Propdmgsum_descending,10)
cropdmg <- Data_economic_impact[raw_data$CROPDMG>0,] # filter out Data without any Property damage from the Dataset
for (i in 1:nrow(cropdmg)){
tmp <- cropdmg$CROPDMGEXP[i]
mul <- cropdmg$CROPDMG[i]
if (tmp %in% c('h', 'H'))
cropdmg$actualcropdmg[i] <- mul * 100
else if (tmp %in% c('k', 'K'))
cropdmg$actualcropdmg[i] <- mul * 1000
else if (tmp %in% c('m', 'M'))
cropdmg$actualcropdmg[i] <- mul * 1000000
else if (tmp %in% c('b', 'B'))
cropdmg$actualcropdmg[i] <- mul * 1000000000
}
library(plyr)
#cropdmgSum <- ddply(propdmg, .(EVTYPE), summarize, propdmg = sum(actualpropdmg))
cropdmgSum <- aggregate(list(actualcropdmg=cropdmg$actualcropdmg), by=list(EVTYPE=cropdmg$EVTYPE),sum,na.rm = TRUE)
# Arrange data in descending order
cropdmgsum_descending <-arrange(cropdmgSum,desc(actualcropdmg))
# Select top10 events with highest property damage
top_cropdmg <- head(cropdmgsum_descending,10)
#Plotting the Data
Fatalities_plot<- ggplot(Data_fatalities_aggregate_nonzero_top10, aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
labs(title = "Fatalities by Event Type")+
labs(x = "Event Type")
Injuries_plot<-ggplot(Data_injuries_aggregate_nonzero_top10, aes(x = reorder(EVTYPE, -INJURIES), y = INJURIES)) +
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
labs(title = "Injuries by Event Type")+
labs(x = "Event Type")
grid.arrange(Fatalities_plot, Injuries_plot, ncol=2)
Property_damage_plot <- ggplot(top_propdmg, aes(x = reorder(EVTYPE, -actualpropdmg), y = actualpropdmg)) +
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
labs(title = "Property Damage by Event Type")+
labs(x = "Event Type")+
labs(y = "Property Damage (in Billions)")
Crop_damage_plot <- ggplot(top_cropdmg, aes(x = reorder(EVTYPE, -actualcropdmg), y = actualcropdmg)) +
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
labs(title = "Crop Damage by Event Type")+
labs(x = "Event Type")+
labs(y = "Crop Damage (in Billions)")
grid.arrange(Property_damage_plot,Crop_damage_plot, ncol=2)
The raw weather events or non-events contains patterns which can be used to reduce the number of events to the official forty-eight categories enumerated in the National Weather Service document provided with the course project instructions. We have found that FLOOD, HURRYCANE and TORNADO had maximum economic impact on the property damage and DROUGHT had major impact on the Crop Damage. Further analysis can be done by indvidually studying the trend and it’s impact for each event type.