library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(xtable)
#download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "/Users/eldadshulman/Cursera/reproducibleReaserch/StormData.csv")

Summary

Here I present the processing and analysis of the storm data from National Oceanic and Atmospheric Administration’s for assinment 2 in cursera. I answer two quastions: Across the United States, which types of events (as indicated in the variable) are most harmful with respect to population health? Across the United States, which types of events have the greatest economic consequences?

PROCESSING

Load the data

StormData <- read.csv("D:/cursera/reproducibleReaserch/assinment2/StormData.csv")

The problem is that property damege and crop damege are given in two colmns. One colomn is for the exponent. The following code turn the key of the exponent to its corresponding number (e.g H turn to 100) and then creat a colum of the absolute number of crop damege and property damege.

e <- StormData$PROPDMGEXP







exponent_create <- function(e) {

if(e == "m" | e == "M"){1000000}

else if(e == "h" | e == "H"){100} 
           
else if(e == "k" | e == "K"){1000}

else if(e == "b" | e == "B" ){1000000000}
          
else if(e == "?" | e == "-"){0}
                                          
 
else if(e == "0" | e == "1"| e ==  "2"| e == "3"| e == "4" | e == "5" | e == "6" | e == "7" | e == "8"){10}


else if(e == "+"){1}                                       

  else{1}
}                                         


d <- seq_along(e)  
  

for (i in seq_along(e)) {
d[i] <- exponent_create(e[i])      

}





p <- StormData$CROPDMGEXP

q <- seq_along(e)  
  

for (i in seq_along(e)) {
q[i] <- exponent_create(p[i])      

}










newData <- cbind(StormData,d,q)

To acess total economic damege I summed the crop and proporty damage for each event. The final line group the data by event type for the following analysis.

newData <- dplyr::mutate(newData, totalProp = d*PROPDMG, totalCrop = q*CROPDMG)

newData <- dplyr::group_by(newData, EVTYPE)

Now I creat a dataframe with avrage and sum of Crop demge, property demege, total demege, fatalities and injury per event type.

byEvent <- dplyr::summarise(newData, meanProp = mean(totalProp),  totalProp = sum(totalProp), totalcrop = sum(totalCrop), meancrop = mean(totalCrop), totalprop = sum(totalProp), MeanINJURIES = mean(INJURIES), TotalINJURIES = sum(INJURIES), meanFATALITIES = mean(FATALITIES), totalFATALITIES = sum(FATALITIES))

Results

Types of events most harmful with respect to population health

I answer this by looking at total fatalities. the cod allow to look at mean fatalties and mean/total injuries. But I bilive that toal fatalities is the most important parameter.

totalFatal <- dplyr::arrange(byEvent, desc(totalFATALITIES))

totalFatal <- totalFatal[1:10,]


xt <- xtable(totalFatal[,c(1,7:10)])

print(xt, type = "html")
EVTYPE MeanINJURIES TotalINJURIES meanFATALITIES totalFATALITIES
1 TORNADO 1.51 91346.00 0.09 5633.00
2 EXCESSIVE HEAT 3.89 6525.00 1.13 1903.00
3 FLASH FLOOD 0.03 1777.00 0.02 978.00
4 HEAT 2.74 2100.00 1.22 937.00
5 LIGHTNING 0.33 5230.00 0.05 816.00
6 TSTM WIND 0.03 6957.00 0.00 504.00
7 FLOOD 0.27 6789.00 0.02 470.00
8 RIP CURRENT 0.49 232.00 0.78 368.00
9 HIGH WIND 0.06 1137.00 0.01 248.00
10 AVALANCHE 0.44 170.00 0.58 224.00
ggplot(data = totalFatal, aes(EVTYPE, totalFATALITIES) ) +  geom_bar(stat="identity", col = "green" )+ ylab("Total Fatalities") + ggtitle("Total Fatalies by event type")

So it Tornado is the most hurmful event type

Types of events with the greatest economic consequences

totaleconomic <- dplyr::arrange(byEvent, desc(totalProp))



ggplot(data = totalFatal, aes(EVTYPE, totalProp) ) +  geom_bar(stat="identity", col = "green" )+ ylab("Total economic damege") + ggtitle("Total economic damege by event type")

xt <- xtable(totaleconomic[1:10,1:5])

print(xt, type = "html")
EVTYPE meanProp totalProp totalcrop meancrop
1 FLOOD 5711826.18 144657709807.00 5661968450.00 223563.47
2 HURRICANE/TYPHOON 787566363.64 69305840000.00 2607872800.00 29634918.18
3 TORNADO 938751.61 56937162900.00 414954710.00 6841.57
4 STORM SURGE 165990559.39 43323536000.00 5000.00 19.16
5 FLASH FLOOD 297378.54 16140815218.00 1421317100.00 26186.36
6 HAIL 54500.85 15732269934.00 3025954653.00 10482.73
7 HURRICANE 68208729.94 11868319010.00 2741910000.00 15758103.45
8 TROPICAL STORM 11165058.77 7703890550.00 678346000.00 983110.14
9 WINTER STORM 585016.82 6688497260.00 26944000.00 2356.69
10 HIGH WIND 260738.49 5270046280.00 638571300.00 31593.67

So floods cause the most damage to population.