Which types of events (as indicated in the EVTYPE variable) across the United States:
This is my peer assignment 2 for Coursera: Reproducible Research. This document is my own work, based on several examples of workflow that I found in the Rpubs and StackOverflow site. One of the main reference is Ryan Cheung Git.
The following are the results:
Porperty Damage: Flood is the cause for most porperty damages(48%) among all weather events. Flood and tornado together are responsible for about 65% of all porperty damages.
Crop Damage: Drought is the cause for most crop damages(31%) among all weather events. Drought, flood and ice storm together are responsible for over 69% of all crop damages.
library(ggplot2)
library(scales)
data <- read.csv("repdata-data-StormData.csv", header = T)
names(data) <- tolower(names(data))
names(data) <- gsub("_","",names(data))
EVTYPE columnAccording to the dataset documentation the event name should be one of those fifty listed. Here I use tolower() function.
data$evtype <- tolower(as.character(data$evtype))
data$evtype <- gsub("^(([^:]+)://)?([^:/]+)(:([0-9]+))?(/.*)","",data$evtype)
data <- subset(data,nchar(data$evtype)>=2)
data$evtype[grep("summary", data$evtype)] <- "tbm"
data <- subset(data,data$evtype != "tbm")
grep() function to the datasetdata$evtype[grep("hail", data$evtype)] <- "Hail"
data$evtype[grep("wind", data$evtype)] <- "Wind"
data$evtype[grep("tornado", data$evtype)] <- "Tornado"
data$evtype[grep("flood", data$evtype)] <- "Flood"
data$evtype[grep("lightning", data$evtype)] <- "Lightning"
data$evtype[grep("snow", data$evtype)] <- "Snow"
data$evtype[grep("rain", data$evtype)] <- "Rain"
data$evtype[grep("winter", data$evtype)] <- "Winter"
data$evtype[grep("heat", data$evtype)] <- "Heat"
data$evtype[grep("fog", data$evtype)] <- "Fog"
data$evtype[grep("surf", data$evtype)] <- "Surf"
data$evtype[grep("ice storm", data$evtype)] <- "Ice storm"
data$evtype[grep("fire", data$evtype)] <- "Wild fire"
data$evtype[grep("storm surge", data$evtype)] <- "Strom surge"
data$evtype[grep("hurricane", data$evtype)] <- "Hurricane"
data$evtype[grep("drought", data$evtype)] <- "Drought"
data$evtype[grep("thunderstorm", data$evtype)] <- "Thunderstorm"
sum(data$evtype %in% c("Flood","Wind","Snow",
"Tornado","Hail","Rain",
"Lightning","Winter","Fog",
"Heat","Surf","Ice storm",
"Wild fire","Storm surge",
"Hurricane","Drought",
"Thunderstorm"))/nrow(data)
## [1] 0.9778
tbc <- data$evtype %in% c("Flood","Wind","Snow",
"Tornado","Hail","Rain",
"Lightning","Winter","Heat",
"Surf","Fog","Ice storm",
"Wild fire","Storm surge",
"Hurricane","Drought",
"Thunderstorm") == F
data$evtype[tbc == T] <- "other"
sort(table(data$evtype))
##
## Thunderstorm Hurricane Surf Fog Ice storm
## 92 199 833 1883 2006
## Drought Heat Wild fire Rain Lightning
## 2488 2630 2781 12136 15762
## Snow Winter other Tornado Flood
## 17569 18492 19813 60688 81967
## Hail Wind
## 289338 362164
data$propdmgexp <- as.character(data$propdmgexp)
data$propdmgexp[grep("K", data$propdmgexp)] <- "1000"
data$propdmgexp[grep("M", data$propdmgexp)] <- "1000000"
data$propdmgexp[grep("m", data$propdmgexp)] <- "1000000"
data$propdmgexp[grep("B", data$propdmgexp)] <- "1000000000"
tbc <- data$propdmgexp %in% c("1000","1000000","1000000000") == F
data$propdmgexp[tbc == T] <- "1"
data$propdmgexp <- as.numeric(data$propdmgexp)
data$cropdmgexp <- as.character(data$cropdmgexp)
data$cropdmgexp[grep("K", data$cropdmgexp)] <- "1000"
data$cropdmgexp[grep("M", data$cropdmgexp)] <- "1000000"
data$cropdmgexp[grep("m", data$cropdmgexp)] <- "1000000"
data$cropdmgexp[grep("B", data$cropdmgexp)] <- "1000000000"
tbc <- data$cropdmgexp %in% c("1000","1000000","1000000000") == F
data$cropdmgexp[tbc == T] <- "1"
data$cropdmgexp <- as.numeric(data$cropdmgexp)
data$propdamage <- data$propdmg * data$propdmgexp
data$cropdamage <- data$cropdmg * data$cropdmgexp
totalInjuries <- tapply(data$injuries, data$evtype, sum)
sort(totalInjuries, decreasing = T)[1]
## Tornado
## 91365
sum(sort(totalInjuries, decreasing = T)[1])/sum(totalInjuries)
## [1] 0.661
sort(totalInjuries, decreasing = T)[1:3]
## Tornado Wind Heat
## 91365 11319 9224
sum(sort(totalInjuries, decreasing = T)[1:3])/sum(totalInjuries)
## [1] 0.8096
totalFatal <- tapply(data$fatalities, data$evtype, sum)
sort(totalFatal, decreasing = T)[1]
## Tornado
## 5633
sum(sort(totalFatal, decreasing = T)[1])/sum(totalFatal)
## [1] 0.3846
sort(totalFatal, decreasing = T)[1:3]
## Tornado Heat Flood
## 5633 3119 1488
sum(sort(totalFatal, decreasing = T)[1:3])/sum(totalFatal)
## [1] 0.6992
Porperty Damage: Flood is the cause for most porperty damages(48%) among all weather events. Flood and tornado together are responsible for about 65% of all porperty damages.
Crop Damage: Drought is the cause for most crop damages(31%) among all weather events. Drought, flood and ice storm together are responsible for over 69% of all crop damages.
totalPropDamage <- tapply(data$propdamage, data$evtype, sum)
sort(totalPropDamage, decreasing = T)[1]
## Flood
## 1.67e+11
sort(totalPropDamage, decreasing = T)[1]/sum(totalPropDamage)
## Flood
## 0.4814
sort(totalPropDamage, decreasing = T)[1:2]
## Flood Tornado
## 1.670e+11 5.694e+10
sum(sort(totalPropDamage, decreasing = T)[1:2])/sum(totalPropDamage)
## [1] 0.6456
totalCropDamage <- tapply(data$cropdamage, data$evtype, sum)
sort(totalCropDamage, decreasing = T)[1]
## Drought
## 1.397e+10
sort(totalCropDamage, decreasing = T)[1]/sum(totalCropDamage)
## Drought
## 0.3106
sort(totalCropDamage, decreasing = T)[1:3]
## Drought Flood Ice storm
## 1.397e+10 1.217e+10 5.022e+09
sum(sort(totalCropDamage, decreasing = T)[1:3])/sum(totalCropDamage)
## [1] 0.6927
Figure 1 Barplot of damages to population health
plot.new()
par(mfrow=c(1,2))
barplot(sort(totalInjuries,
decreasing = T),
main = "Number of injuries")
barplot(sort(totalFatal,
decreasing = T),
main = "Number of fatalities")
plot of chunk unnamed-chunk-26
Figure 2 Barplot of weather events to economic consequences
par(mfrow=c(1,2))
barplot(sort(totalPropDamage,
decreasing = T),
main = "Number of property damage")
barplot(sort(totalCropDamage,
decreasing = T), main = "Number of crop damages")
plot of chunk unnamed-chunk-27
References