The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. These basic questions are:
1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences?
Downloading the Data
if(!file.exists("StormData.csv.bz2")) {
urlData <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(urlData, destfile="StormData.csv.bz2")
}
Reading the data
dataStorm <- read.csv("StormData.csv.bz2", stringsAsFactors=FALSE, header = TRUE)
Formatting data Convert Event type to upper case
dataStorm$EVTYPE <- toupper(dataStorm$EVTYPE)
Sum fatalites group by event type and ordering decreasing
dataFatal <- aggregate(FATALITIES ~ EVTYPE, data = dataStorm, FUN = sum)
dataFatal <- dataFatal[order(dataFatal$FATALITIES, decreasing = TRUE), ]
head(dataFatal)
## EVTYPE FATALITIES
## 758 TORNADO 5633
## 116 EXCESSIVE HEAT 1903
## 138 FLASH FLOOD 978
## 243 HEAT 937
## 418 LIGHTNING 816
## 779 TSTM WIND 504
Sum injuries group by event type and ordering decreasing
dataInjury <- aggregate(INJURIES ~ EVTYPE, data = dataStorm, sum)
dataInjury <- dataInjury[order(dataInjury$INJURIES, decreasing = TRUE), ]
head(dataInjury)
## EVTYPE INJURIES
## 758 TORNADO 91346
## 779 TSTM WIND 6957
## 154 FLOOD 6789
## 116 EXCESSIVE HEAT 6525
## 418 LIGHTNING 5230
## 243 HEAT 2100
Draw fatalities and injuries
par(mfrow = c(2, 1))
barplot(dataFatal[1:10, 2], col = rainbow(10), legend.text = dataFatal[1:10,1], ylab = "Fatality", main = "10 natural events cause most fatality")
barplot(dataInjury[1:10, 2], col = rainbow(10), legend.text = dataInjury[1:10, 1], ylab = "Injuried people", main = "10 natural events cause most people injuries")
Sum Harmful group by event type and ordering decreasing
dataHarm <- aggregate(FATALITIES + INJURIES ~ EVTYPE, data = dataStorm, FUN = sum)
dataHarm <- dataHarm[order(dataHarm$`FATALITIES + INJURIES`, decreasing = TRUE), ]
head(dataHarm)
## EVTYPE FATALITIES + INJURIES
## 758 TORNADO 96979
## 116 EXCESSIVE HEAT 8428
## 779 TSTM WIND 7461
## 154 FLOOD 7259
## 418 LIGHTNING 6046
## 243 HEAT 3037
Draw Harmnful events
barplot(dataHarm[1:10, 2], col = rainbow(10), legend.text = dataHarm[1:10, 1], ylab = "Harmful people", main = "10 natural events cause most people harmful")
Sum data property damage group by event type and ordering decreasing
dataPropertyDamage <- aggregate(PROPDMG ~ EVTYPE, data = dataStorm, sum)
dataPropertyDamage <- dataPropertyDamage[order(dataPropertyDamage$PROPDMG, decreasing = TRUE), ]
head(dataPropertyDamage)
## EVTYPE PROPDMG
## 758 TORNADO 3212258.2
## 138 FLASH FLOOD 1420124.6
## 779 TSTM WIND 1335995.6
## 154 FLOOD 899938.5
## 685 THUNDERSTORM WIND 876844.2
## 212 HAIL 688693.4
Sum data crop damage group by event type and ordering decreasing
dataCropDamage <- aggregate(CROPDMG ~ EVTYPE, data = dataStorm, sum)
dataCropDamage <- dataCropDamage[order(dataCropDamage$CROPDMG, decreasing = TRUE), ]
head(dataCropDamage)
## EVTYPE CROPDMG
## 212 HAIL 579596.28
## 138 FLASH FLOOD 179200.46
## 154 FLOOD 168037.88
## 779 TSTM WIND 109202.60
## 758 TORNADO 100018.52
## 685 THUNDERSTORM WIND 66791.45
Draw property and crop damage
par(mfrow = c(2, 1))
barplot(dataPropertyDamage[1:10, 2], col = rainbow(10), legend.text = dataPropertyDamage[1:10,1], ylab = "Property damage", main = "10 natural events caused most property damage")
barplot(dataCropDamage[1:10, 2], col = rainbow(10), legend.text = dataCropDamage[1:10,1], ylab = "Crop damage", main = "10 natural events caused most crop damage")
Sum data damage group by event type and ordering decreasing
dataDamage <- aggregate(PROPDMG + CROPDMG ~ EVTYPE, data = dataStorm, FUN = sum)
dataDamage <- dataDamage[order(dataDamage$`PROPDMG + CROPDMG`, decreasing = TRUE), ]
head(dataDamage)
## EVTYPE PROPDMG + CROPDMG
## 758 TORNADO 3312276.7
## 138 FLASH FLOOD 1599325.1
## 779 TSTM WIND 1445198.2
## 212 HAIL 1268289.7
## 154 FLOOD 1067976.4
## 685 THUNDERSTORM WIND 943635.6
Draw damage
barplot(dataDamage[1:10, 2], col = rainbow(10), legend.text = dataDamage[1:10, 1], ylab = "Damage", main = "10 natural events cause most damage")