Author: Anupama
Date: “Saturday, January 24, 2015”
In this report we are aiming to analyze weather events happened in United States between years 1950 and 2011. Our overall goal is to show different weaher events along with fatalities, injuries, property damage and crop damage. To investigate we’ve collected the data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. There is also some documentation of the database available. Here you will find how some of the variables are constructed/defined.
.National Weather Service Storm Data Documentation @ https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf
.National Climatic Data Center Storm Events FAQ @ https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC Storm Events-FAQ Page.pdf
The analysis database can be downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. The zipped file needs to be unzipped for data processing.
library("RCurl")
## Warning: package 'RCurl' was built under R version 3.1.2
## Loading required package: bitops
library("bitops")
if(!file.exists("C:\\Data")) dir.create("C:\\Data")
setwd("C:\\Data")
getwd()
## [1] "C:/Data"
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile="StormData1.csv.bz2" ,method="curl")
## Warning: running command 'curl
## "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
## -o "StormData1.csv.bz2"' had status 127
## Warning in
## download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
## : download had nonzero exit status
df <- read.csv(bzfile("StormData1.csv.bz2"))
fi<-aggregate(df$FATALITIES+df$INJURIES,by=list(df$EVTYPE), FUN=sum)
fi<- fi[with(fi, order(-x)), ]
tm<-as.character(fi[1,1])
plot_fi<-fi[2:11,]
colnames(plot_fi)<-c("Event","HumanDamage")
PropDmgMillions<-df[which(df$PROPDMGEXP == "M"),c('EVTYPE','PROPDMG')]
PropDmgBillions<-df[which(df$PROPDMGEXP == "B"),c('EVTYPE','PROPDMG')]
CropDmgMillions<-df[which(df$CROPDMGEXP == "M"),c('EVTYPE','CROPDMG')]
CropDmgBillions<-df[which(df$CROPDMGEXP == "B"),c('EVTYPE','CROPDMG')]
PropDmgMillions$PROPDMG <-PropDmgMillions$PROPDMG/1000
CropDmgMillions$CROPDMG <-CropDmgMillions$CROPDMG/1000
PropDmg<-rbind(PropDmgMillions,PropDmgBillions)
CropDmg<-rbind(CropDmgMillions,CropDmgBillions)
colnames(CropDmg)<-c('EVTYPE','PROPDMG')
PropDmg<-rbind(PropDmg,CropDmg)
AggPD<-aggregate(PropDmg$PROPDMG,by=list(PropDmg$EVTYPE), FUN=sum)
colnames(AggPD)<-c("PropertyDamage","Occurrences")
AggPD<- AggPD[with(AggPD, order(-Occurrences)), ]
plot_pd<-head(AggPD,10)
After the doing the analysis, found out that TORNADO has the biggest impact on population damage. Lets look at the next top 10 wether events that’ve caused the most population damage
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.2
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.1.2
## Loading required package: grid
y_breaks <- cumsum(plot_fi$HumanDamage) - plot_fi$HumanDamage/2
y_labels <- plot_fi$Event
y_HumanDamage = plot_fi$HumanDamage
p1 <- ggplot(plot_fi, aes(x = factor(1), y = HumanDamage, fill = plot_fi$Event)) +
geom_bar(width = 1, colour = "black", stat = "identity") +
guides(fill = FALSE) +
geom_text(aes(x = 1.70, y = y_breaks,
label = y_labels), size = 2.5) +
geom_text(aes(x = 1.2, y = y_breaks, label = y_HumanDamage),
size = 3, , colour = "white") +
coord_polar(theta = "y") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
plot.margin = unit(c(0, 0, -0.5, 0),"cm"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
p1+ labs(title="Top Weather Events causing Population Damage")
Now let’s look at the top 10 natural calmities that’ve caused the most property damage
library(ggplot2)
library(gridExtra)
y_breaks <- cumsum(plot_pd$Occurrences) - plot_pd$Occurrences/2
y_labels <- plot_pd$PropertyDamage
y_Occurrences = plot_pd$Occurrences
p1 <- ggplot(plot_pd, aes(x = factor(1), y = Occurrences, fill = plot_pd$PropertyDamage)) +
geom_bar(width = 1, colour = "black", stat = "identity") +
guides(fill = FALSE) +
geom_text(aes(x = 1.70, y = y_breaks,
label = y_labels), size = 2.5) +
geom_text(aes(x = 1.2, y = y_breaks, label = y_Occurrences),
size = 3, , colour = "white") +
coord_polar(theta = "y") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
plot.margin = unit(c(0, 0, -0.5, 0),"cm"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
p1+ labs(title="Top Weather Events causing Property Damage in Billions")