The following analysis is has two goals. 1. Find across the United States, which types of events are most harmful with respsect to health. 2. Find across the United States, which types of events have the greatest economic consequences.
Required Packages:
library(knitr)
Now firstly we need to load the data:
stormData <- read.csv("repdata-data-StormData.csv")
Determine the top ten causes of fatalaties and injuries across America.
usFatalitiesMean <- with(stormData, tapply(FATALITIES,EVTYPE,mean))
usFatalitiesTopTenPerEvent <- sort(usFatalitiesMean, decreasing = TRUE)[1:10]
usInjuriesMean <- with(stormData, tapply(INJURIES,EVTYPE,mean))
usInjuriesTopTenPerEvent <- sort(usInjuriesMean, decreasing = TRUE)[1:10]
usFatalitiesTotal <- with(stormData, tapply(FATALITIES,EVTYPE,sum))
usFatalitiesTopTenTotal <- sort(usFatalitiesTotal, decreasing = TRUE)[1:10]
usInjuriesTotal <- with(stormData, tapply(INJURIES,EVTYPE,sum))
usInjuriesTopTenTotal <- sort(usInjuriesTotal, decreasing = TRUE)[1:10]
Now to find the greatest single event that caused the most fatalities/injuries.
greatestFatality <- stormData[which.max(stormData$FATALITIES),]
greatestFatalityDate <- as.character(greatestFatality$BGN_DATE)
greatestInjuries <- stormData[which.max(stormData$INJURIES),]
greatestInjuriesDate <- as.character(greatestInjuries$BGN_DATE)
Analysis from above claims that Tornados do the most damage to health. Let’s see what states they occur in.
usTornadoesByState <- with(stormData[stormData$EVTYPE == "TORNADO",],
tapply(FATALITIES,STATE,sum))
usTornadoesByStateTopTen <- sort(usTornadoesByState, decreasing = TRUE)[1:10]
For Economic Evaluation need to convert costs of each event to like terms.
moneyConverter <- function(xDMG,xEXP){
if (xEXP == "k" || xEXP == "K"){
xDMG = xDMG*1e3
} else if (xEXP == "m" || xEXP == "M"){
xDMG = xDMG*1e6
} else if (xEXP == "B") {
xDMG = xDMG*1e9
} else if (xEXP == "h" || xEXP == "H") {
xDMG = xDMG*1e2
}
if(is.numeric(xEXP)) {
xDMG = xDMG^xEXP
}
return (xDMG)
}
expensesByEvent <- with(stormData, tapply(
moneyConverter(PROPDMG,PROPDMGEXP)
+ moneyConverter(CROPDMG,CROPDMGEXP),EVTYPE,sum))
## Warning in if (xEXP == "B") {: the condition has length > 1 and only the
## first element will be used
expensesByEventTopTen <- sort(expensesByEvent, decreasing = TRUE)[1:10]
Million <- 1e6
expensesByEventTopTenInBillions <- expensesByEventTopTen/Million
kable(as.list(usFatalitiesTopTenPerEvent), col.names = names(usFatalitiesTopTenPerEvent),
format = "html", digits = 1, caption = "Events with the Highest Average
Number of Fatalities per event", align = 'c')
| TORNADOES, TSTM WIND, HAIL | COLD AND SNOW | TROPICAL STORM GORDON | RECORD/EXCESSIVE HEAT | EXTREME HEAT | HEAT WAVE DROUGHT | HIGH WIND/SEAS | MARINE MISHAP | WINTER STORMS | Heavy surf and wind |
|---|---|---|---|---|---|---|---|---|---|
| 25 | 14 | 8 | 5.7 | 4.4 | 4 | 4 | 3.5 | 3.3 | 3 |
kable(as.list(usInjuriesTopTenPerEvent),
col.names = names(usInjuriesTopTenPerEvent),
format = "html", digits = 1, caption = "Ten Highest Averages of US Injuries
per Event", align = 'c')
| Heat Wave | TROPICAL STORM GORDON | WILD FIRES | THUNDERSTORMW | HIGH WIND AND SEAS | SNOW/HIGH WINDS | GLAZE/ICE STORM | HEAT WAVE DROUGHT | WINTER STORM HIGH WINDS | HURRICANE/TYPHOON |
|---|---|---|---|---|---|---|---|---|---|
| 70 | 43 | 37.5 | 27 | 20 | 18 | 15 | 15 | 15 | 14.5 |
kable(as.list(usFatalitiesTopTenTotal),
col.names = names(usInjuriesTopTenTotal),
format = "html", digits = 1, caption = "Events with the Highest Total Number
of Fatalities", align = 'c')
| TORNADO | TSTM WIND | FLOOD | EXCESSIVE HEAT | LIGHTNING | HEAT | ICE STORM | FLASH FLOOD | THUNDERSTORM WIND | HAIL |
|---|---|---|---|---|---|---|---|---|---|
| 5633 | 1903 | 978 | 937 | 816 | 504 | 470 | 368 | 248 | 224 |
kable(as.list(usInjuriesTopTenTotal), col.names = names(usInjuriesTopTenTotal),
format = "html", digits = 1, caption = "Events with the Highest Total Number
of Injuries", align = 'c')
| TORNADO | TSTM WIND | FLOOD | EXCESSIVE HEAT | LIGHTNING | HEAT | ICE STORM | FLASH FLOOD | THUNDERSTORM WIND | HAIL |
|---|---|---|---|---|---|---|---|---|---|
| 91346 | 6957 | 6789 | 6525 | 5230 | 2100 | 1975 | 1777 | 1488 | 1361 |
sprintf("The Greatest Fatality occured in %s, which was a/an %s on %s
", greatestFatality$STATE, greatestFatality$EVTYPE
,strsplit(greatestFatalityDate," ")[[1]][1])
## [1] "The Greatest Fatality occured in IL, which was a/an HEAT on 7/12/1995\n "
sprintf("The Greatest Injuries occured in %s, which was a/an %s on %s
", greatestInjuries$STATE, greatestInjuries$EVTYPE
,strsplit(greatestInjuriesDate," ")[[1]][1])
## [1] "The Greatest Injuries occured in TX, which was a/an TORNADO on 4/10/1979\n "
barplot(usTornadoesByStateTopTen, main = "Total number of deaths by Tornadoes per State",
ylab = "Number of deaths", xlab = "State")
Economic Results:
kable(as.list(expensesByEventTopTenInBillions),
col.names = names(expensesByEventTopTenInBillions),
format = "html", digits = 2, caption = "Events with Highest Expenses (in millions)",
align = 'c')
| TORNADO | FLASH FLOOD | TSTM WIND | FLOOD | THUNDERSTORM WIND | HAIL | LIGHTNING | THUNDERSTORM WINDS | HIGH WIND | WINTER STORM |
|---|---|---|---|---|---|---|---|---|---|
| 3212.36 | 1420.3 | 1336.07 | 900.11 | 876.91 | 689.27 | 603.36 | 446.31 | 324.75 | 132.72 |