This code is for load library use in this work.
usePackage <- function(p) {
if (!is.element(p, installed.packages()[,1]))
install.packages(p, dep = TRUE)
require(p, character.only = TRUE)
}
usePackage("ggplot2")
## Loading required package: ggplot2
usePackage("plyr")
## Loading required package: plyr
usePackage("reshape")
## Loading required package: reshape
##
## Attaching package: 'reshape'
##
## The following objects are masked from 'package:plyr':
##
## rename, round_any
The main purpose of this work is analysis the impact severe weather events in the United State, in both perspective people and economy impact. We find out that second most import cause of fatalities in people are Excessive Heat. Pro rata this provoke more deaths than injuries.
In economic pespective Hail events has hung impact in crop economic.
if (!file.exists("data")) {
dir.create("data")
}
if (!file.exists("./data/repdata_data_StormData.csv.bz2")) {
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile = "./data/repdata_data_StormData.csv.bz2", method = "auto")
}
stormdata = read.table(bzfile("./data/repdata_data_StormData.csv.bz2", "rt"),sep=",",header=T)
The data dictionary for the individual variables is located at https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf.
hasinjuries = stormdata$INJURIES > 0
hasfatalities = stormdata$FATALITIES > 0
harmful_events = stormdata[hasinjuries | hasfatalities, ]
damages = aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, harmful_events, sum)
damages$FATA_POC = (damages$FATALITIES/sum(damages$FATALITIES))*100
damages$INJU_POC = (damages$INJURIES/sum(damages$INJURIES))*100
damages_orderby_Injuries = damages[order(damages$INJURIES,decreasing=TRUE),]
damages_orderby_Fatalities = damages[order(damages$FATALITIES,decreasing=TRUE),]
head(damages_orderby_Injuries)
## EVTYPE FATALITIES INJURIES FATA_POC INJU_POC
## 184 TORNADO 5633 91346 37.194 65.002
## 191 TSTM WIND 504 6957 3.328 4.951
## 47 FLOOD 470 6789 3.103 4.831
## 32 EXCESSIVE HEAT 1903 6525 12.565 4.643
## 123 LIGHTNING 816 5230 5.388 3.722
## 69 HEAT 937 2100 6.187 1.494
head(damages_orderby_Fatalities)
## EVTYPE FATALITIES INJURIES FATA_POC INJU_POC
## 184 TORNADO 5633 91346 37.194 65.002
## 32 EXCESSIVE HEAT 1903 6525 12.565 4.643
## 42 FLASH FLOOD 978 1777 6.458 1.265
## 69 HEAT 937 2100 6.187 1.494
## 123 LIGHTNING 816 5230 5.388 3.722
## 191 TSTM WIND 504 6957 3.328 4.951
haspropritiesdamage = stormdata$PROPDMG > 0
hascropdamage = stormdata$CROPDMG > 0
economic_events = stormdata[haspropritiesdamage | hascropdamage, ]
economic <- aggregate(cbind(PROPDMG, CROPDMG) ~ EVTYPE, economic_events, sum)
economic$PROPDMG_POC = (economic$PROPDMG/sum(economic$PROPDMG))*100
economic$CROPDMG_POC = (economic$CROPDMG/sum(economic$CROPDMG))*100
economic_orderby_property = economic[order(economic$PROPDMG,decreasing=TRUE),]
economic_orderby_crop = economic[order(economic$CROPDMG,decreasing=TRUE),]
head(economic_orderby_property)
## EVTYPE PROPDMG CROPDMG PROPDMG_POC CROPDMG_POC
## 354 TORNADO 3212258 100019 29.512 7.259
## 59 FLASH FLOOD 1420125 179200 13.047 13.006
## 369 TSTM WIND 1335966 109203 12.274 7.926
## 72 FLOOD 899938 168038 8.268 12.196
## 313 THUNDERSTORM WIND 876844 66791 8.056 4.848
## 116 HAIL 688693 579596 6.327 42.066
head(economic_orderby_crop)
## EVTYPE PROPDMG CROPDMG PROPDMG_POC CROPDMG_POC
## 116 HAIL 688693 579596 6.327 42.066
## 59 FLASH FLOOD 1420125 179200 13.047 13.006
## 72 FLOOD 899938 168038 8.268 12.196
## 369 TSTM WIND 1335966 109203 12.274 7.926
## 354 TORNADO 3212258 100019 29.512 7.259
## 313 THUNDERSTORM WIND 876844 66791 8.056 4.848
First we use rule 80-20 (http://en.wikipedia.org/wiki/Pareto_principle) to select events with most import impact.
damages_orderby_Injuries$cum = cumsum(damages_orderby_Injuries$INJU_POC)
damages_orderby_Fatalities$cum = cumsum(damages_orderby_Fatalities$FATA_POC)
economic_orderby_property$cum = cumsum(economic_orderby_property$PROPDMG_POC)
economic_orderby_crop$cum = cumsum(economic_orderby_crop$CROPDMG_POC)
Pareto_Inju = damages_orderby_Injuries[damages_orderby_Injuries$cum < 80.01,]
Pareto_Fata = damages_orderby_Fatalities[damages_orderby_Fatalities$cum < 80.01,]
Pareto_Prop = economic_orderby_property[economic_orderby_property$cum < 80.01,]
Pareto_Crop = economic_orderby_crop[economic_orderby_crop$cum < 80.01,]
Human_Impact =join(Pareto_Fata,Pareto_Inju, by = "EVTYPE",type="inner",match="all")
Human_Impact = Human_Impact[,unique(colnames(Human_Impact))]
Human_Impact = melt(Human_Impact)
## Using EVTYPE as id variables
Human_Impact = Human_Impact[Human_Impact$variable == "FATA_POC" |Human_Impact$variable == "INJU_POC", ]
ggplot(Human_Impact, aes(x = EVTYPE, y = value, fill = variable)) + geom_bar(position = "dodge",stat = "identity") + coord_flip() + ggtitle("People Events") + labs(x = "", y = "% of total by events") + scale_fill_manual(values = c("blue", "dark green"), labels = c("Fatalities", "Injuries"))
Anwser: Tornado
Eco_Impact =join(Pareto_Prop,Pareto_Crop, by = "EVTYPE",type="inner",match="all")
Eco_Impact = Eco_Impact[,unique(colnames(Eco_Impact))]
Eco_Impact = melt(Eco_Impact)
## Using EVTYPE as id variables
Eco_Impact = Eco_Impact[Eco_Impact$variable == "PROPDMG_POC" |Eco_Impact$variable == "CROPDMG_POC", ]
ggplot(Eco_Impact, aes(x = EVTYPE, y = value, fill = variable)) + geom_bar(position = "dodge",stat = "identity") + coord_flip() + ggtitle("Economic Events") + labs(x = "", y = "% of total by events") + scale_fill_manual(values = c("blue", "dark green"), labels = c("Properties", "Crop"))
Anwser: Hail