This code is for load library use in this work.

usePackage <- function(p) {
if (!is.element(p, installed.packages()[,1]))
        install.packages(p, dep = TRUE)
    require(p, character.only = TRUE)
}
usePackage("ggplot2")
## Loading required package: ggplot2
usePackage("plyr")
## Loading required package: plyr
usePackage("reshape")
## Loading required package: reshape
## 
## Attaching package: 'reshape'
## 
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any

Synopsis

The main purpose of this work is analysis the impact severe weather events in the United State, in both perspective people and economy impact. We find out that second most import cause of fatalities in people are Excessive Heat. Pro rata this provoke more deaths than injuries.

In economic pespective Hail events has hung impact in crop economic.

1. Data Processing

1.1 Load

if (!file.exists("data")) {
    dir.create("data")
}
if (!file.exists("./data/repdata_data_StormData.csv.bz2")) {
    download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", 
    destfile = "./data/repdata_data_StormData.csv.bz2", method = "auto")
}
stormdata = read.table(bzfile("./data/repdata_data_StormData.csv.bz2", "rt"),sep=",",header=T)

1.2 Process

The data dictionary for the individual variables is located at https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf.

1.2.1 - People impact.

hasinjuries = stormdata$INJURIES > 0
hasfatalities = stormdata$FATALITIES > 0
harmful_events = stormdata[hasinjuries | hasfatalities, ]
damages = aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, harmful_events, sum)
damages$FATA_POC = (damages$FATALITIES/sum(damages$FATALITIES))*100
damages$INJU_POC = (damages$INJURIES/sum(damages$INJURIES))*100
damages_orderby_Injuries = damages[order(damages$INJURIES,decreasing=TRUE),]
damages_orderby_Fatalities = damages[order(damages$FATALITIES,decreasing=TRUE),]
head(damages_orderby_Injuries)
##             EVTYPE FATALITIES INJURIES FATA_POC INJU_POC
## 184        TORNADO       5633    91346   37.194   65.002
## 191      TSTM WIND        504     6957    3.328    4.951
## 47           FLOOD        470     6789    3.103    4.831
## 32  EXCESSIVE HEAT       1903     6525   12.565    4.643
## 123      LIGHTNING        816     5230    5.388    3.722
## 69            HEAT        937     2100    6.187    1.494
head(damages_orderby_Fatalities)
##             EVTYPE FATALITIES INJURIES FATA_POC INJU_POC
## 184        TORNADO       5633    91346   37.194   65.002
## 32  EXCESSIVE HEAT       1903     6525   12.565    4.643
## 42     FLASH FLOOD        978     1777    6.458    1.265
## 69            HEAT        937     2100    6.187    1.494
## 123      LIGHTNING        816     5230    5.388    3.722
## 191      TSTM WIND        504     6957    3.328    4.951

1.2.3 - Economic impact.

haspropritiesdamage = stormdata$PROPDMG > 0
hascropdamage = stormdata$CROPDMG > 0
economic_events = stormdata[haspropritiesdamage | hascropdamage, ]
economic <- aggregate(cbind(PROPDMG, CROPDMG) ~ EVTYPE, economic_events, sum)
economic$PROPDMG_POC = (economic$PROPDMG/sum(economic$PROPDMG))*100
economic$CROPDMG_POC = (economic$CROPDMG/sum(economic$CROPDMG))*100
economic_orderby_property = economic[order(economic$PROPDMG,decreasing=TRUE),]
economic_orderby_crop = economic[order(economic$CROPDMG,decreasing=TRUE),]
head(economic_orderby_property)
##                EVTYPE PROPDMG CROPDMG PROPDMG_POC CROPDMG_POC
## 354           TORNADO 3212258  100019      29.512       7.259
## 59        FLASH FLOOD 1420125  179200      13.047      13.006
## 369         TSTM WIND 1335966  109203      12.274       7.926
## 72              FLOOD  899938  168038       8.268      12.196
## 313 THUNDERSTORM WIND  876844   66791       8.056       4.848
## 116              HAIL  688693  579596       6.327      42.066
head(economic_orderby_crop)
##                EVTYPE PROPDMG CROPDMG PROPDMG_POC CROPDMG_POC
## 116              HAIL  688693  579596       6.327      42.066
## 59        FLASH FLOOD 1420125  179200      13.047      13.006
## 72              FLOOD  899938  168038       8.268      12.196
## 369         TSTM WIND 1335966  109203      12.274       7.926
## 354           TORNADO 3212258  100019      29.512       7.259
## 313 THUNDERSTORM WIND  876844   66791       8.056       4.848

2. Result

First we use rule 80-20 (http://en.wikipedia.org/wiki/Pareto_principle) to select events with most import impact.

damages_orderby_Injuries$cum = cumsum(damages_orderby_Injuries$INJU_POC)
damages_orderby_Fatalities$cum = cumsum(damages_orderby_Fatalities$FATA_POC)
economic_orderby_property$cum = cumsum(economic_orderby_property$PROPDMG_POC)
economic_orderby_crop$cum = cumsum(economic_orderby_crop$CROPDMG_POC)

Pareto_Inju = damages_orderby_Injuries[damages_orderby_Injuries$cum < 80.01,]
Pareto_Fata = damages_orderby_Fatalities[damages_orderby_Fatalities$cum < 80.01,]
Pareto_Prop = economic_orderby_property[economic_orderby_property$cum < 80.01,]
Pareto_Crop = economic_orderby_crop[economic_orderby_crop$cum < 80.01,]

2.1 Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Human_Impact =join(Pareto_Fata,Pareto_Inju, by = "EVTYPE",type="inner",match="all")
Human_Impact = Human_Impact[,unique(colnames(Human_Impact))]
Human_Impact = melt(Human_Impact)
## Using EVTYPE as id variables
Human_Impact = Human_Impact[Human_Impact$variable == "FATA_POC" |Human_Impact$variable == "INJU_POC", ]

ggplot(Human_Impact, aes(x = EVTYPE, y = value, fill = variable)) + geom_bar(position = "dodge",stat = "identity") + coord_flip() + ggtitle("People Events") + labs(x = "", y = "% of total by events") + scale_fill_manual(values = c("blue", "dark green"), labels = c("Fatalities", "Injuries"))

plot of chunk unnamed-chunk-6

Anwser: Tornado

2.2 Across the United States, which types of events have the greatest economic consequences?

Eco_Impact =join(Pareto_Prop,Pareto_Crop, by = "EVTYPE",type="inner",match="all")
Eco_Impact = Eco_Impact[,unique(colnames(Eco_Impact))]
Eco_Impact = melt(Eco_Impact)
## Using EVTYPE as id variables
Eco_Impact = Eco_Impact[Eco_Impact$variable == "PROPDMG_POC" |Eco_Impact$variable == "CROPDMG_POC", ]

ggplot(Eco_Impact, aes(x = EVTYPE, y = value, fill = variable)) + geom_bar(position = "dodge",stat = "identity") + coord_flip() + ggtitle("Economic Events") + labs(x = "", y = "% of total by events") + scale_fill_manual(values = c("blue", "dark green"), labels = c("Properties", "Crop"))

plot of chunk unnamed-chunk-7

Anwser: Hail