—————————————————————————————-

The storm data is loaded into a data frame for analysis.

setwd("C:/Users/aysegul/Desktop/coursera/reproducibleresearch/w4")
stormdata  <- read.csv(bzfile("StormData.csv.bz2"),header=TRUE)

Cleaning EVTYPE a quick cleaning is possible with str_trim() and toupper()and year column formatted.

library(plyr)
library(stringr)
stormdata_cleaned <- mutate(stormdata,EVTYPE=toupper(str_trim(EVTYPE)), YEAR=format(strptime(BGN_DATE,format="%m/%d/%Y %T"),format="%Y"))

Question1:Across the United States, which types of events (as indicated in the EVTYPE variable) are mostharmful with respect to population health?

library(plyr)
evtype_total_casualties <- ddply(stormdata_cleaned,.(EVTYPE),
                                 summarize,
                                 totalFatalities=sum(FATALITIES),
                                 totalInjuries=sum(INJURIES),
                                 totalCasualties=sum(FATALITIES+INJURIES))

print(evtype_total_casualties[1:10,])
##                    EVTYPE totalFatalities totalInjuries totalCasualties
## 1                       ?               0             0               0
## 2         ABNORMAL WARMTH               0             0               0
## 3          ABNORMALLY DRY               0             0               0
## 4          ABNORMALLY WET               0             0               0
## 5    ACCUMULATED SNOWFALL               0             0               0
## 6     AGRICULTURAL FREEZE               0             0               0
## 7           APACHE COUNTY               0             0               0
## 8  ASTRONOMICAL HIGH TIDE               0             0               0
## 9   ASTRONOMICAL LOW TIDE               0             0               0
## 10               AVALANCE               1             0               1
casualties_sorted <- evtype_total_casualties[order(evtype_total_casualties[,"totalCasualties"],
                                                   decreasing=TRUE),]
print(casualties_sorted[1:10,])
##                EVTYPE totalFatalities totalInjuries totalCasualties
## 750           TORNADO            5633         91346           96979
## 108    EXCESSIVE HEAT            1903          6525            8428
## 771         TSTM WIND             504          6957            7461
## 146             FLOOD             470          6789            7259
## 410         LIGHTNING             816          5230            6046
## 235              HEAT             937          2100            3037
## 130       FLASH FLOOD             978          1777            2755
## 379         ICE STORM              89          1975            2064
## 677 THUNDERSTORM WIND             133          1488            1621
## 880      WINTER STORM             206          1321            1527
library(ggplot2)

g <- ggplot(casualties_sorted[1:10,], aes(y=totalCasualties, x=reorder(EVTYPE, -totalCasualties)))

g <- g + geom_bar(fill="red4",stat="identity")
g <- g + ggtitle("Top 10 Events with Highest total fatalities") + labs(x="EVENT TYPE", y="Total fatalities")
g <- g + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))
print(g)

Question2: Across the United States, which types of events have the greatest economic consequences?

TotalCasualities by Evtype and Year

evtype_yearly_total_casualties <- ddply(stormdata_cleaned,.(EVTYPE,YEAR),
                           summarize,
                           totalFatalities=sum(FATALITIES),
                           totalInjuries=sum(INJURIES),
                           totalCasualties=sum(FATALITIES+INJURIES))

tornado_casualties <- evtype_yearly_total_casualties[evtype_yearly_total_casualties$EVTYPE == "TORNADO",]
    evtype_yearly_total_casualties[1:10,]
##                    EVTYPE YEAR totalFatalities totalInjuries
## 1                       ? 1994               0             0
## 2         ABNORMAL WARMTH 1998               0             0
## 3          ABNORMALLY DRY 2001               0             0
## 4          ABNORMALLY DRY 2003               0             0
## 5          ABNORMALLY WET 2002               0             0
## 6    ACCUMULATED SNOWFALL 2001               0             0
## 7     AGRICULTURAL FREEZE 1995               0             0
## 8     AGRICULTURAL FREEZE 1997               0             0
## 9           APACHE COUNTY 1994               0             0
## 10 ASTRONOMICAL HIGH TIDE 2002               0             0
##    totalCasualties
## 1                0
## 2                0
## 3                0
## 4                0
## 5                0
## 6                0
## 7                0
## 8                0
## 9                0
## 10               0

Damage to Property and Crops Property and crop damage is noted in the data along with a multiplier (K or M or B )

stormdata_damages <- mutate(stormdata_cleaned,PropDmg = PROPDMG * ifelse(PROPDMGEXP == "K",1000,ifelse(PROPDMGEXP=="M",1000000,ifelse(PROPDMGEXP=="B",1000000000,1))),CropDmg = CROPDMG * ifelse(CROPDMGEXP == "K",1000,ifelse(CROPDMGEXP=="M",1000000,ifelse(CROPDMGEXP =="B",1000000000,1))))

To determine the most damaging types of events, we sum up property and crop damage.

evtype_total_damages <- ddply(stormdata_damages,.(EVTYPE),summarize,
                              totalPropDmg=sum(PropDmg),
                              totalCropDmg=sum(CropDmg),
                              totalDmg = sum(PropDmg,CropDmg))
damages_sorted <- evtype_total_damages[order(evtype_total_damages[,"totalDmg"],
                                             decreasing=TRUE),]
print(damages_sorted[1:10,])
##                EVTYPE totalPropDmg totalCropDmg     totalDmg
## 146             FLOOD 144657709807   5661968450 150319678257
## 364 HURRICANE/TYPHOON  69305840000   2607872800  71913712800
## 750           TORNADO  56925660790    414953270  57340614060
## 591       STORM SURGE  43323536000         5000  43323541000
## 204              HAIL  15727367053   3025537890  18752904943
## 130       FLASH FLOOD  16140862067   1421317100  17562179167
## 76            DROUGHT   1046106000  13972566000  15018672000
## 355         HURRICANE  11868319010   2741910000  14610229010
## 521       RIVER FLOOD   5118945500   5029459000  10148404500
## 379         ICE STORM   3944927860   5022113500   8967041360
library(ggplot2)

g2 <- ggplot(damages_sorted[1:10,], aes(y=totalDmg, x=reorder(EVTYPE, -totalDmg)))
g2 <- g2 + geom_bar(fill="red4",stat="identity")
g2 <- g2 + ggtitle("Top 10 Events with Highest Damages") + labs(x="Event Type", y="Total Damages")
g2 <- g2 + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))

print(g2)