This report will answer the folowing questions:
Population health will be measured by fatalities and injuries.
Economic consequnces will be measured by the costs of property damage and crop damage.
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.6.1 (2014-01-04) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.18.0 (2014-02-22) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
##
## R.utils v1.34.0 (2014-10-07) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(knitr)
##Online location of the data
Url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#Download and unzip if the csv file does not already exist on the hardrive
if (!file.exists("StormData.csv")) {
setInternet2(use = TRUE)
download.file(Url, destfile = "StormData.csv.bz2")
bunzip2("StormData.csv.bz2")
}
data.all<-read.csv("StormData.csv") ##Load data
##Above setting shows the code, but does not show results, otherwise all columnsnames would be shown from the next stap
names(data.all)# Look at the column names to identify
#Of interest:
#"EVTYPE"
#"FATALITIES" "INJURIES" "PROPDMG" "PROPDMGEXP" "CROPDMG" "CROPDMGEXP"
##Subset the data just using these columns
data<-data.all[,c(grep("EVTYPE|FATALITIES|INJURIES|PROPDMG|PROPDMGEXP|CROPDMG|CROPDMGEXP",names(data.all)))]
##The same event types are spelled differently (lower/upper case mixture)
data$EVTYPE<-toupper(data$EVTYPE)
##There are also some mispellings in the events or some events can be summarised
data$EVTYPE<-gsub(".*AVA.*","AVALANCHE",data$EVTYPE)
data$EVTYPE<-gsub(".*COLD.*|.*FREEZ.*|.*FROST.*|.*HYPOTH.*|LOW TEMPERATURE","COLD",data$EVTYPE)
data$EVTYPE<-gsub(".*FLOOD.*","FLOOD",data$EVTYPE)
data$EVTYPE<-gsub(".*HURRICANE.*","HURRICANE",data$EVTYPE)
data$EVTYPE<-gsub(".*LANDS.*","LANDSLIDE",data$EVTYPE)
data$EVTYPE<-gsub(".*FLOOD.*","FLOOD",data$EVTYPE)
data$EVTYPE<-gsub(".*HEAT.*|.*HYPER.*","HEAT",data$EVTYPE)
data$EVTYPE<-gsub(".*ICE.*","ICE",data$EVTYPE)
data$EVTYPE<-gsub(".*FIRE.*","FIRE",data$EVTYPE)
data$EVTYPE<-gsub(".*TORN.*","TORNADO",data$EVTYPE)
data$EVTYPE<-gsub(".*THUNDER.*","THUNDERSTORM",data$EVTYPE)
data$EVTYPE<-gsub(".*SNOW.*","SNOW",data$EVTYPE)
data$EVTYPE<-gsub(".*TROPICAL.*|.*TSTM.*","TROPICAL STORM",data$EVTYPE)
data$EVTYPE<-gsub(".*WINTER WEATHER.*","WINTER WEATHER",data$EVTYPE)
data$EVTYPE<-gsub(".*WINTER STORM.*","WINTER STORM",data$EVTYPE)
data$EVTYPE<-gsub(".*RAIN.*","RAIN",data$EVTYPE)
data$EVTYPE<-gsub(".*SNOW.*","SNOW",data$EVTYPE)
data$EVTYPE<-gsub(".*STRONG WI.*","STRONG WIND",data$EVTYPE)
data$EVTYPE<-gsub(".*SURF.*|.*SEAS.*","HIGH/HEAVY SEAS/SURF",data$EVTYPE)
data$EVTYPE<-gsub(".*RIP CURRENT.*","RIP CURRENT",data$EVTYPE)
data$EVTYPE<-gsub(".*STORM SURGE.*","STORM SURGE",data$EVTYPE)
data$EVTYPE<-gsub(".*HIGH WIND.*","HIGH WIND",data$EVTYPE)
data$EVTYPE<-gsub(".*LIGHTN.*","LIGHTNING",data$EVTYPE)
data$EVTYPE<-gsub(".*MUD.*","MUDSLIDE",data$EVTYPE)
data$EVTYPE<-gsub(".*WIND.*","WIND",data$EVTYPE)
data$EVTYPE<-gsub(".*DUST.*","DUST",data$EVTYPE)
data$EVTYPE<-gsub(".*COAST.*","COASTAL STORM",data$EVTYPE)
data$EVTYPE<-gsub(".*ICY.*","ICE",data$EVTYPE)
data$EVTYPE<-gsub(".*WATERS.*","WATERSPOUT",data$EVTYPE)
data$EVTYPE<-gsub(".*URBAN.*","URBAN/SMALL STREAM FLOOD",data$EVTYPE)
data$EVTYPE<-gsub(".*LIG.*","LIGHTNING",data$EVTYPE)
data$EVTYPE<-gsub(".*HAIL.*","HAIL",data$EVTYPE)
data$EVTYPE<-gsub(".*WET.*","WET",data$EVTYPE)
unique(data$PROPDMGEXP)
##PROPDMGEXP contains a multiplyer
#with these Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
data$PROPDMGEXP<-gsub("0","1",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("1","10",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("2","100",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("3","1000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("4","10000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("5","100000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("6","1000000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("7","10000000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("8","100000000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("h|H","100",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("K","1000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("m|M","1000000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("B","1000000000",data$PROPDMGEXP)
data$PROPDMGEXP<-gsub("\\D","1",data$PROPDMGEXP)
unique(data$CROPDMGEXP)
#CROPDMGEXP has these Levels: ? 0 2 B k K m M
data$CROPDMGEXP<-gsub("0","1",data$CROPDMGEXP)
data$CROPDMGEXP<-gsub("2","100",data$CROPDMGEXP)
data$CROPDMGEXP<-gsub("K|k","1000",data$CROPDMGEXP)
data$CROPDMGEXP<-gsub("m|M","1000000",data$CROPDMGEXP)
data$CROPDMGEXP<-gsub("B","1000000000",data$CROPDMGEXP)
data$CROPDMGEXP<-gsub("\\D","1",data$CROPDMGEXP)
##HEALTH
##Summing up fatalities by weather event
FATALITIESByEV<-aggregate(data$FATALITIES,list(data$EVTYPE),sum)
Fata<-nrow(FATALITIESByEV[FATALITIESByEV$x>1,])
FATALITIESByEV.1<-FATALITIESByEV[order(FATALITIESByEV$x,decreasing = TRUE),]
##Summing up injuries by weather event
INJURIESByEV<-aggregate(data$INJURIES,list(data$EVTYPE),sum)
Inj<-nrow(INJURIESByEV[INJURIESByEV$x>1,])
INJURIESByEV.1<-INJURIESByEV[order(INJURIESByEV$x,decreasing = TRUE),]
##Economic costs
#Summing up costs of property damage by weather event
PROPByEV<-aggregate((as.numeric(data$PROPDMGEXP)*data$PROPDMG),list(data$EVTYPE),sum,na.rm=TRUE)
#Summing up costs of crop damage by weather event
CROPByEV<-aggregate((as.numeric(data$CROPDMGEXP)*data$CROPDMG),list(data$EVTYPE),sum,na.rm=TRUE)
##Adding up the costs and prepare them to be used in plot
Money<-PROPByEV$x+CROPByEV$x
Money.0<-cbind(PROPByEV[1],Money)
names(Money.0)<-c("Event","Costs")
Money.1<-Money.0[Money.0$Costs!=0,]
Money.2<-Money.1[order(Money.1$Costs,decreasing = TRUE),]
par(mar=c(4,9,4,3))
#Plotting the top ten weather events that causeed fatalities
barplot(FATALITIESByEV.1$x[1:10],
names.arg=FATALITIESByEV.1$Group.1[1:10],
las=1,
horiz=TRUE,
main="Top 10 weather events affecting population health \n fatalities",
xlab = "Fatalities between 1950 and 2011")
#Plotting the top ten weather events that caused injuries
barplot(INJURIESByEV.1$x[1:10],
names.arg=INJURIESByEV.1$Group.1[1:10],
las=1,
horiz=TRUE,
main="Top 10 weather events affecting population health \n injuries",
xlab = "Injuries between 1950 and 2011")
#Plotting the top weather with costs over 1 billion Dollar since 1950
barplot(Money.2$Costs[1:13]/1000000,
names.arg=Money.2$Event[1:13],
las=1,
horiz=TRUE,
main="Top weather events affecting the economy \n by damaging property and crop \n Costs larger than 1 billion US dollars since 1950",
xlab = "Costs in million dollars between 1950 and 2011")
In terms of population health, tornados are responsible for the largest numbers of fatalities and injuries.
In terms of financial burden, floods incurred the largests cost, followed by hurricane and tornados.