echo = TRUE
setwd("E:/R/coursera/05 Reproducible Research/week3")
library(R.utils)
## Warning: package 'R.utils' was built under R version 3.1.3
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 3.1.3
## Loading required package: R.methodsS3
## Warning: package 'R.methodsS3' was built under R version 3.1.3
## R.methodsS3 v1.7.0 (2015-02-19) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.19.0 (2015-02-27) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
##
## R.utils v2.0.2 (2015-04-27) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(arm) # load the arm package (contains the coefplot function)
## Warning: package 'arm' was built under R version 3.1.3
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 3.1.3
## Loading required package: Rcpp
##
## arm (Version 1.8-5, built: 2015-05-13)
##
## Working directory is E:/R/coursera/05 Reproducible Research/week3
sourceURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
bZipFileName <- "stormData.csv.bz2"
csvFileName <- "stormData.csv"
# Check if it is needed to download and uncompress the data file?
if ( ! bZipFileName %in% dir() ) {
if ( ! bZipFileName %in% dir() ) {
download.file(sourceURL, bZipFileName)
}
bunzip2(bZipFileName, destfile=csvFileName, remove=FALSE)
}
stormData <- read.csv(csvFileName, header=TRUE, stringsAsFactors=FALSE)
head(stormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
## Fix names and values. Makes it easy to read the data.
names(stormData)[1] <- "STATEID"
stormData$BGN_DATE <- as.Date(stormData$BGN_DATE, "%m/%d/%Y %H:%M:%S")
stormData$END_DATE <- as.Date(stormData$END_DATE, "%m/%d/%Y %H:%M:%S")
stormData$STATEID <- factor(stormData$STATEID)
stormData$COUNTY <- factor(stormData$COUNTY)
stormData$year<-as.numeric(strftime(stormData$BGN_DATE,format="%Y"))
head(stormData)
## STATEID BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 1950-04-18 0130 CST 97 MOBILE AL TORNADO
## 2 1 1950-04-18 0145 CST 3 BALDWIN AL TORNADO
## 3 1 1951-02-20 1600 CST 57 FAYETTE AL TORNADO
## 4 1 1951-06-08 0900 CST 89 MADISON AL TORNADO
## 5 1 1951-11-15 1500 CST 43 CULLMAN AL TORNADO
## 6 1 1951-11-15 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 <NA> 0 NA
## 2 0 <NA> 0 NA
## 3 0 <NA> 0 NA
## 4 0 <NA> 0 NA
## 5 0 <NA> 0 NA
## 6 0 <NA> 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES
## 1 0 14.0 100 3 0 0 15
## 2 0 2.0 150 2 0 0 0
## 3 0 0.1 123 2 0 0 2
## 4 0 0.0 100 2 0 0 2
## 5 0 0.0 150 2 0 0 2
## 6 0 1.5 177 2 0 0 6
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE
## 1 25.0 K 0 3040
## 2 2.5 K 0 3042
## 3 25.0 K 0 3340
## 4 2.5 K 0 3458
## 5 2.5 K 0 3412
## 6 2.5 K 0 3450
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM year
## 1 8812 3051 8806 1 1950
## 2 8755 0 0 2 1950
## 3 8742 0 0 3 1951
## 4 8626 0 0 4 1951
## 5 8642 0 0 5 1951
## 6 8748 0 0 6 1951
analysisData<-stormData[,c("BGN_DATE","END_DATE","year", "EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
#unique(analysisData$EVTYPE)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:MASS':
##
## select
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
analysisData %>% group_by(EVTYPE) %>% summarize(proportion=n()) %>% arrange(desc(proportion)) %>% head(10)
## Source: local data frame [10 x 2]
##
## EVTYPE proportion
## 1 HAIL 288661
## 2 TSTM WIND 219940
## 3 THUNDERSTORM WIND 82563
## 4 TORNADO 60652
## 5 FLASH FLOOD 54277
## 6 FLOOD 25326
## 7 THUNDERSTORM WINDS 20843
## 8 HIGH WIND 20212
## 9 LIGHTNING 15754
## 10 HEAVY SNOW 15708
analysisData$HAIL <- ifelse(analysisData$EVTYPE=="HAIL", 1,0)
analysisData$TSTM_WINDL<-ifelse(analysisData$EVTYPE=="TSTM WIND", 1,0)
analysisData$THUNDERSTORM_WIND<-ifelse(analysisData$EVTYPE=="THUNDERSTORM WIND", 1,0)
analysisData$TORNADO<-ifelse(analysisData$EVTYPE=="TORNADO", 1,0)
analysisData$FLASH_FLOOD<-ifelse(analysisData$EVTYPE=="FLASH FLOOD", 1,0)
### LM for FATALITIES
fitFATALITIES <- lm(analysisData$FATALITIES ~
analysisData$HAIL + analysisData$TSTM_WINDL + analysisData$THUNDERSTORM_WIND + analysisData$TORNADO + analysisData$FLASH_FLOOD
, data=analysisData)
Cof_Fatal<- coefficients(fitFATALITIES)
### LM for INJURIES
fitINJURIES <- lm(analysisData$INJURIES ~
analysisData$HAIL + analysisData$TSTM_WINDL + analysisData$THUNDERSTORM_WIND + analysisData$TORNADO + analysisData$FLASH_FLOOD
, data=analysisData)
Cof_Injure<- coefficients(fitINJURIES)
### Calculated total money damage
analysisData$totalDamage<-(analysisData$PROPDMG + analysisData$CROPDMG)
### LM for Total Damage
fitTotalDamage <- lm(analysisData$totalDamage ~
analysisData$HAIL + analysisData$TSTM_WINDL + analysisData$THUNDERSTORM_WIND + analysisData$TORNADO + analysisData$FLASH_FLOOD
, data=analysisData)
cof_Damage<- coefficients(fitTotalDamage)
Cof_Fatal
## (Intercept) analysisData$HAIL
## 0.04017247 -0.04012051
## analysisData$TSTM_WINDL analysisData$THUNDERSTORM_WIND
## -0.03788094 -0.03856158
## analysisData$TORNADO analysisData$FLASH_FLOOD
## 0.05270163 -0.02215379
coefplot(fitFATALITIES)
# add axes and labels
axis(side = 1) # add bottom axis
mtext(side = 1, "Linear Regression Coefficient", line = 3) # label bottom axis
abline(v = 0, lty = 3, col = "red") # add verticle line
mtext(side = 3, "Linear Regression Model of\n the Fatalities", line = 1) # add title
box()
Cof_Injure
## (Intercept) analysisData$HAIL
## 0.1916322 -0.1869173
## analysisData$TSTM_WINDL analysisData$THUNDERSTORM_WIND
## -0.1600008 -0.1736096
## analysisData$TORNADO analysisData$FLASH_FLOOD
## 1.3144352 -0.1588927
coefplot(fitINJURIES)
axis(side = 1) # add bottom axis
mtext(side = 1, "Linear Regression Coefficient", line = 3) # label bottom axis
abline(v = 0, lty = 3, col = "red") # add verticle line
mtext(side = 3, "Linear Regression Model of\n the Injuries", line = 1) # add title
box()
cof_Damage
## (Intercept) analysisData$HAIL
## 18.825468 -14.431769
## analysisData$TSTM_WINDL analysisData$THUNDERSTORM_WIND
## -12.254730 -7.396188
## analysisData$TORNADO analysisData$FLASH_FLOOD
## 35.785702 10.640513
coefplot(fitTotalDamage)
axis(side = 1) # add bottom axis
mtext(side = 1, "Linear Regression Coefficient", line = 3) # label bottom axis
abline(v = 0, lty = 3, col = "red") # add verticle line
mtext(side = 3, "Linear Regression Model of\n the Total Damage", line = 1) # add title
box()