Setting Global options default to echo code, set working directory and load needed packages
knitr::opts_chunk$set(echo=TRUE)
knitr::opts_knit$set(root.dir = "~/R_Programs")
library(dplyr)
library(ggplot2)
Create Project’s folder in working directory
if(!file.exists("./RR_Course_Project_2")){
dir.create("./RR_Course_Project_2")
}
Download file if not exists and Load data
if(!file.exists("./RR_Course_Project_2/StormData.csv.bz2")){
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "./RR_Course_Project_2/StormData.csv.bz2")
Stormdata<-read.csv("./RR_Course_Project_2/StormData.csv.bz2")
}else{
Stormdata<-read.csv("./RR_Course_Project_2/StormData.csv.bz2")
}
head(Stormdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
Create a barplot with complete event records per year and keep only years with sufficient data.We keep only Years with more than 20.000 records
Stormdata$BGN_DATE<-as.Date(Stormdata$BGN_DATE,format = "%m/%d/%Y %H:%M:%S")
Stormdata$Year<-as.integer(format(Stormdata$BGN_DATE, "%Y"))
datarecords<-Stormdata %>% group_by(Year)%>% summarize(Records=n())
p<-ggplot(data=datarecords, aes(x=Year, y=Records)) +
geom_bar(stat="identity", fill="steelblue")+
ggtitle("Complete Records per Year - (Figure. 1)") +
ylab("No.of records")+
geom_vline(xintercept = 1993.5)+
geom_text(aes(x=1994, label="Years with >20.000 records (1994-2011)"),y=0, colour="black", angle=90, vjust = -1,hjust=0)
p
Stormdata<-filter(Stormdata,Year>1993)
Replacing CROPDMGEXP and PROPDMGEXP fields with numerics and summarize per weather event and per examined impact
explevels<-levels(Stormdata$CROPDMGEXP)
explevels=append(explevels,levels(Stormdata$PROPDMGEXP))
explevels<-as.factor(explevels)
explevels<-levels(explevels)
explevelsfixed<-vector()
explevelsfixed[grepl("[kK]",explevels)]<-1000
explevelsfixed[grepl("[Mm]",explevels)]<-1000000
explevelsfixed[grepl("[Hh]",explevels)]<-100
explevelsfixed[grepl("[bB]",explevels)]<-1000000000
explevelsfixed[grepl("[0-9]",explevels)]<-10^as.integer(explevels[grepl("[0-9]",explevels)])
explevelsfixed[is.na(explevelsfixed)]<-0
mapper<-data.frame(explevels,explevelsfixed)
colnames(mapper)<-c("CROPDMGEXP","CROPDMGEXPFIXED")
mapper2<-mapper
colnames(mapper2)<-c("PROPDMGEXP","PROPDMGEXPFIXED")
Stormdata<-select(Stormdata,"EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
Stormdata<-merge(Stormdata,mapper,all.x = TRUE)
Stormdata<-merge(Stormdata,mapper2,all.x = TRUE)
Stormdata<-mutate(Stormdata,CROPDMGTOTALS=as.numeric(Stormdata$CROPDMGEXPFIXED)*as.numeric(Stormdata$CROPDMG)/1000000)
Stormdata<-mutate(Stormdata,PROPDMGTOTALS=as.numeric(Stormdata$PROPDMGEXPFIXED)*as.numeric(Stormdata$PROPDMG)/1000000)
Stormdatafinal<-as.data.frame(Stormdata %>% group_by(EVTYPE)%>% summarize(FATALITIES=sum(FATALITIES),INJURIES=sum(INJURIES),CROPDMG=sum(CROPDMGTOTALS),PROPDMG=sum(PROPDMGTOTALS)))
head(Stormdatafinal[order(-Stormdatafinal$FATALITIES), ][1:15, ])
## EVTYPE FATALITIES INJURIES CROPDMG PROPDMG
## 122 EXCESSIVE HEAT 1903 6525 492.40200 7.7537
## 785 TORNADO 1593 22571 361.82447 25630.5883
## 145 FLASH FLOOD 951 1754 1402.66150 16398.2557
## 260 HEAT 930 2095 401.46150 1.7970
## 437 LIGHTNING 794 5116 11.98709 873.4378
## 160 FLOOD 450 6778 5506.94245 144179.6088
par(mfrow = c(1, 2), mar = c(10, 4, 4, 2), mgp = c(3, 1, 0), cex = 0.8,oma=c(0,0,2,0))
barplot(Stormdatafinal[order(-Stormdatafinal$FATALITIES), ][1:15, ]$FATALITIES, las = 3, names.arg = Stormdatafinal[order(-Stormdatafinal$FATALITIES), ][1:15, ]$EVTYPE, main = "Top 15 Weather Events \n with Highest Fatalities (1994-2011)",
ylab = "No. of fatalities", col = "red")
barplot(Stormdatafinal[order(-Stormdatafinal$INJURIES), ][1:15, ]$INJURIES, las = 3, names.arg = Stormdatafinal[order(-Stormdatafinal$INJURIES), ][1:15, ]$EVTYPE, main = "Top 15 Weather Events \n with Highest Injuries (1994-2011)",
ylab = "No. of Injuries", col = "purple")
title("Weather Events with Major Health Impact (1994-2011) - (Figure. 2)", outer=TRUE)
par(mfrow = c(1, 2), mar = c(10, 4, 4, 2), mgp = c(3, 1, 0), cex = 0.8,oma=c(0,0,2,0))
barplot(Stormdatafinal[order(-Stormdatafinal$CROPDMG), ][1:15, ]$CROPDMG,las=3, names.arg = Stormdatafinal[order(-Stormdatafinal$CROPDMG), ][1:15, ]$EVTYPE, main = "Top 15 Weather Events \n with Highest Crop Damage (1994-2011)",
ylab = "Millions U.S. Dollars", col = "green")
barplot(Stormdatafinal[order(-Stormdatafinal$PROPDMG), ][1:15, ]$PROPDMG, las = 3, names.arg = Stormdatafinal[order(-Stormdatafinal$PROPDMG), ][1:15, ]$EVTYPE, main = "Top 15 Weather Events \n with Highest Property Damage (1994-2011)",
ylab = "Millions U.S. Dollars", col = "blue")
title("Weather Events with Major Economic Impact (1994-2011) - (Figure. 3)", outer=TRUE)