—————————————————————————————-
The storm data is loaded into a data frame for analysis.
setwd("C:/Users/aysegul/Desktop/coursera/reproducibleresearch/w4")
stormdata <- read.csv(bzfile("StormData.csv.bz2"),header=TRUE)
Question1:Across the United States, which types of events (as indicated in the EVTYPE variable) are mostharmful with respect to population health?
library(plyr)
evtype_total_casualties <- ddply(stormdata_cleaned,.(EVTYPE),
summarize,
totalFatalities=sum(FATALITIES),
totalInjuries=sum(INJURIES),
totalCasualties=sum(FATALITIES+INJURIES))
print(evtype_total_casualties[1:10,])
## EVTYPE totalFatalities totalInjuries totalCasualties
## 1 ? 0 0 0
## 2 ABNORMAL WARMTH 0 0 0
## 3 ABNORMALLY DRY 0 0 0
## 4 ABNORMALLY WET 0 0 0
## 5 ACCUMULATED SNOWFALL 0 0 0
## 6 AGRICULTURAL FREEZE 0 0 0
## 7 APACHE COUNTY 0 0 0
## 8 ASTRONOMICAL HIGH TIDE 0 0 0
## 9 ASTRONOMICAL LOW TIDE 0 0 0
## 10 AVALANCE 1 0 1
casualties_sorted <- evtype_total_casualties[order(evtype_total_casualties[,"totalCasualties"],
decreasing=TRUE),]
print(casualties_sorted[1:10,])
## EVTYPE totalFatalities totalInjuries totalCasualties
## 750 TORNADO 5633 91346 96979
## 108 EXCESSIVE HEAT 1903 6525 8428
## 771 TSTM WIND 504 6957 7461
## 146 FLOOD 470 6789 7259
## 410 LIGHTNING 816 5230 6046
## 235 HEAT 937 2100 3037
## 130 FLASH FLOOD 978 1777 2755
## 379 ICE STORM 89 1975 2064
## 677 THUNDERSTORM WIND 133 1488 1621
## 880 WINTER STORM 206 1321 1527
library(ggplot2)
g <- ggplot(casualties_sorted[1:10,], aes(y=totalCasualties, x=reorder(EVTYPE, -totalCasualties)))
g <- g + geom_bar(fill="red4",stat="identity")
g <- g + ggtitle("Top 10 Events with Highest total fatalities") + labs(x="EVENT TYPE", y="Total fatalities")
g <- g + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))
print(g)

Question2: Across the United States, which types of events have the greatest economic consequences?
TotalCasualities by Evtype and Year
evtype_yearly_total_casualties <- ddply(stormdata_cleaned,.(EVTYPE,YEAR),
summarize,
totalFatalities=sum(FATALITIES),
totalInjuries=sum(INJURIES),
totalCasualties=sum(FATALITIES+INJURIES))
tornado_casualties <- evtype_yearly_total_casualties[evtype_yearly_total_casualties$EVTYPE == "TORNADO",]
evtype_yearly_total_casualties[1:10,]
## EVTYPE YEAR totalFatalities totalInjuries
## 1 ? 1994 0 0
## 2 ABNORMAL WARMTH 1998 0 0
## 3 ABNORMALLY DRY 2001 0 0
## 4 ABNORMALLY DRY 2003 0 0
## 5 ABNORMALLY WET 2002 0 0
## 6 ACCUMULATED SNOWFALL 2001 0 0
## 7 AGRICULTURAL FREEZE 1995 0 0
## 8 AGRICULTURAL FREEZE 1997 0 0
## 9 APACHE COUNTY 1994 0 0
## 10 ASTRONOMICAL HIGH TIDE 2002 0 0
## totalCasualties
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
Damage to Property and Crops Property and crop damage is noted in the data along with a multiplier (K or M or B )
stormdata_damages <- mutate(stormdata_cleaned,PropDmg = PROPDMG * ifelse(PROPDMGEXP == "K",1000,ifelse(PROPDMGEXP=="M",1000000,ifelse(PROPDMGEXP=="B",1000000000,1))),CropDmg = CROPDMG * ifelse(CROPDMGEXP == "K",1000,ifelse(CROPDMGEXP=="M",1000000,ifelse(CROPDMGEXP =="B",1000000000,1))))
To determine the most damaging types of events, we sum up property and crop damage.
evtype_total_damages <- ddply(stormdata_damages,.(EVTYPE),summarize,
totalPropDmg=sum(PropDmg),
totalCropDmg=sum(CropDmg),
totalDmg = sum(PropDmg,CropDmg))
damages_sorted <- evtype_total_damages[order(evtype_total_damages[,"totalDmg"],
decreasing=TRUE),]
print(damages_sorted[1:10,])
## EVTYPE totalPropDmg totalCropDmg totalDmg
## 146 FLOOD 144657709807 5661968450 150319678257
## 364 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 750 TORNADO 56925660790 414953270 57340614060
## 591 STORM SURGE 43323536000 5000 43323541000
## 204 HAIL 15727367053 3025537890 18752904943
## 130 FLASH FLOOD 16140862067 1421317100 17562179167
## 76 DROUGHT 1046106000 13972566000 15018672000
## 355 HURRICANE 11868319010 2741910000 14610229010
## 521 RIVER FLOOD 5118945500 5029459000 10148404500
## 379 ICE STORM 3944927860 5022113500 8967041360
library(ggplot2)
g2 <- ggplot(damages_sorted[1:10,], aes(y=totalDmg, x=reorder(EVTYPE, -totalDmg)))
g2 <- g2 + geom_bar(fill="red4",stat="identity")
g2 <- g2 + ggtitle("Top 10 Events with Highest Damages") + labs(x="Event Type", y="Total Damages")
g2 <- g2 + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))
print(g2)
