This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The events in the database start in the year 1950 and end in November 2011.The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. The data analysis must address the following questions:
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
Source: website:“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2” We are using a Load function to load the Data in to the sesssion
loadData <- function() {
if(file.exists("StormData.rds")) {
loadRDS("raw_data.rds")
}
if(!file.exists("StormData.rds.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2", method = "curl")
}
df <- read.csv("StormData.csv.bz2")
saveRDS(df, "raw_data.rds")
df
}
raw_data <- loadData()
## Warning: running command 'curl "https://d396qusza40orc.cloudfront.net/
## repdata%2Fdata%2FStormData.csv.bz2" -o "StormData.csv.bz2"' had status 127
## Warning in download.file("https://d396qusza40orc.cloudfront.net/repdata
## %2Fdata%2FStormData.csv.bz2", : download had nonzero exit status
Source: https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf Astronomical Low Tide Avalanche Blizzard Coastal Flood Cold/Wind Chill Debris Flow Dense Fog Dense Smoke Drought Dust Devil Dust Storm Excessive Heat Extreme Cold/Wind Chill Flash Flood Flood Frost/Freeze Funnel Cloud Freezing Fog Hail Heat Heavy Rain Heavy Snow High Surf High Wind Hurricane (Typhoon) Ice Storm Lake-Effect Snow Lakeshore Flood Lightning Marine Hail Marine High Wind Marine Strong Wind Marine Thunderstorm Wind Rip Current Seiche Sleet Storm Surge/Tide Strong Wind Thunderstorm Wind Tornado Tropical Depression Z Tropical Storm Tsunami Volcanic Ash Waterspout Wildfire Winter Storm Winter Weather
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.1
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Aggregate Data by Event Type for Fatalities
Data_fatalities_aggregate<-aggregate(list(FATALITIES=raw_data$FATALITIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)
# Arrange Data in Descending order
Data_fatalities_descending <-arrange(Data_fatalities_aggregate,desc(FATALITIES))
library(sqldf)
## Warning: package 'sqldf' was built under R version 3.2.1
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.2.1
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.2.1
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.2.1
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.2.1
attach(Data_fatalities_descending)
Data_fatalities_aggregate_nonzero <- sqldf("select * from Data_fatalities_descending where FATALITIES>0")
## Loading required package: tcltk
Data_fatalities_aggregate_nonzero_top30 <- head(Data_fatalities_aggregate_nonzero,n=30)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.1
#Plotting the Data
ggplot(Data_fatalities_aggregate_nonzero_top30, aes(x = reorder(EVTYPE, FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
geom_text(aes(label=FATALITIES),colour = "red")+
labs(title = "Fatalities by Event Type")+
labs(x = "Event Type")+
coord_flip()
Data_injuries_aggregate<-aggregate(list(INJURIES=raw_data$INJURIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)
# Data Injuries Desending
Data_injuries_descending <-arrange(Data_injuries_aggregate,desc(INJURIES))
# Data Injuries remove non zero
Data_injuries_aggregate_nonzero <- sqldf("select * from Data_injuries_descending where INJURIES>0")
#Select Top50 Injury Events
Data_injuries_aggregate_nonzero_top30 <- head(Data_injuries_aggregate_nonzero,n=30)
ggplot(Data_injuries_aggregate_nonzero_top30, aes(x = reorder(EVTYPE, INJURIES), y = INJURIES)) +
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
geom_text(aes(label=INJURIES),colour = "red")+
labs(title = "Injuries by Event Type")+
labs(x = "Event Type")+
coord_flip()