The data for this assignment is retrieved from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
Here’s the [Storm Data] (https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2) [47Mb]
Here’s some [Storm Data Documentation] (https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf) from National Weather Service.
Download the bz2 file from the above URL and load it,
if (!file.exists("repdata-data-StormData.csv.bz2")) {
download.file(url="http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile="repdata-data-StormData.csv.bz2")
}
stormData <- read.csv("repdata-data-StormData.csv.bz2",
header=T,
sep=",",
na.strings="",
stringsAsFactors=F)
This is how the data appears,
dim(stormData)
## [1] 902297 37
str(stormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr NA NA NA NA ...
## $ BGN_LOCATI: chr NA NA NA NA ...
## $ END_DATE : chr NA NA NA NA ...
## $ END_TIME : chr NA NA NA NA ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr NA NA NA NA ...
## $ END_LOCATI: chr NA NA NA NA ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr NA NA NA NA ...
## $ WFO : chr NA NA NA NA ...
## $ STATEOFFIC: chr NA NA NA NA ...
## $ ZONENAMES : chr NA NA NA NA ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr NA NA NA NA ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
We shall solve this question in 3 ways,
Include dplyr package. Summarize {fatalities} and {injuries} by {EVENT TYPES},
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
stormCasualties <- stormData %>%
group_by(EVTYPE) %>% summarize(fatalities = sum(FATALITIES, na.rm=TRUE),
injuries = sum(INJURIES, na.rm=TRUE))
Consider only those Casualties where there are both {fatalities} and {injuries} present. The proportion of, either, {fatalities} with zero {injuries} or {injuries} with 0 fatalities, is considered to be small and not taken into consideration for this analysis.
tidyStormCasualties <- subset(stormCasualties, fatalities > 0 & injuries > 0)
View Casualties Data,
dim(tidyStormCasualties)
## [1] 106 3
Notice that there are lot of anomalies in the {EVENT TYPE} variable. The NOAA Database officially defines only 48 EventTypes. Let’s fix the {EVENT TYPE} variable, by writing a function fixEvtype. We will be calling this function multiple times.
fixEvtype <- function(x) {
x$EVTYPE <- sub("^COLD$", "COLD/WIND CHILL", x$EVTYPE)
x$EVTYPE <- sub("^EXTREME COLD$", "EXTREME COLD/WIND CHILL", x$EVTYPE)
x$EVTYPE <- sub("^EXTREME HEAT$", "EXCESSIVE HEAT", x$EVTYPE)
x$EVTYPE <- sub("^EXTREME WINDCHILL$", "EXTREME COLD/WIND CHILL", x$EVTYPE)
x$EVTYPE <- sub("^FLASH FLOODING$", "FLASH FLOOD", x$EVTYPE)
x$EVTYPE <- sub("^FLOOD/FLASH FLOOD$", "FLOOD", x$EVTYPE)
x$EVTYPE <- sub("^FLOODING$", "FLOOD", x$EVTYPE)
x$EVTYPE <- sub("^FREEZING DRIZZLE$", "FROST/FREEZE", x$EVTYPE)
x$EVTYPE <- sub("^FREEZING RAIN$", "FROST/FREEZE", x$EVTYPE)
x$EVTYPE <- sub("^FROST$", "FROST/FREEZE", x$EVTYPE)
x$EVTYPE <- sub("^GUSTY WIND$", "HIGH WIND", x$EVTYPE)
x$EVTYPE <- sub("^GUSTY WINDS$", "HIGH WIND", x$EVTYPE)
x$EVTYPE <- sub("^HEAT WAVE$", "HEAT", x$EVTYPE)
x$EVTYPE <- sub("^HEAVY SURF$", "HIGH SURF", x$EVTYPE)
x$EVTYPE <- sub("^HEAVY SURF/HIGH SURF$", "HIGH SURF", x$EVTYPE)
x$EVTYPE <- sub("^HIGH WIND AND SEAS$", "MARINE HIGH WIND", x$EVTYPE)
x$EVTYPE <- sub("^HIGH WINDS$", "HIGH WIND", x$EVTYPE)
x$EVTYPE <- sub("^HURRICANE$", "HURRICANE/TYPHOON", x$EVTYPE)
x$EVTYPE <- sub("^HURRICANE ERIN$", "HURRICANE/TYPHOON", x$EVTYPE)
x$EVTYPE <- sub("^HURRICANE OPAL$", "HURRICANE/TYPHOON", x$EVTYPE)
x$EVTYPE <- sub("^High Surf$", "HIGH SURF", x$EVTYPE)
x$EVTYPE <- sub("^ICE$", "ICE STORM", x$EVTYPE)
x$EVTYPE <- sub("^LANDSLIDES$", "LANDSLIDE", x$EVTYPE)
x$EVTYPE <- sub("^MARINE TSTM WIND$", "MARINE THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^Marine Accident$", "MARINE ACCIDENT", x$EVTYPE)
x$EVTYPE <- sub("^Mudslide$", "MUDSLIDE", x$EVTYPE)
x$EVTYPE <- sub("^RECORD HEAT$", "EXCESSIVE HEAT", x$EVTYPE)
x$EVTYPE <- sub("^RIP CURRENTS$", "RIP CURRENT", x$EVTYPE)
x$EVTYPE <- sub("^ROUGH SURF$", "HIGH SURF", x$EVTYPE)
x$EVTYPE <- sub("^STORM SURGE$", "STORM SURGE/TIDE", x$EVTYPE)
x$EVTYPE <- sub("^STRONG WINDS$", "STRONG WIND", x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM$", "THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM WINDS$", "THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^TROPICAL STORM GORDON$", "TROPICAL STORM", x$EVTYPE)
x$EVTYPE <- sub("^TSTM WIND$", "THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^TSTM WIND/HAIL$", "THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^UNSEASONABLY WARM$", "EXCESSIVE HEAT", x$EVTYPE)
x$EVTYPE <- sub("^WATERSPOUT/TORNADO$", "WATERSPOUT", x$EVTYPE)
x$EVTYPE <- sub("^WILD FIRES$", "WILDFIRE", x$EVTYPE)
x$EVTYPE <- sub("^WILD/FOREST FIRE$", "WILDFIRE", x$EVTYPE)
x$EVTYPE <- sub("^WIND$", "STRONG WIND", x$EVTYPE)
x$EVTYPE <- sub("^WINDS$", "STRONG WIND", x$EVTYPE)
x$EVTYPE <- sub("^WINTER STORM HIGH WINDS$", "WINTER STORM", x$EVTYPE)
x$EVTYPE <- sub("^WINTER STORMS$", "WINTER STORM", x$EVTYPE)
x$EVTYPE <- sub("^WINTER WEATHER/MIX$", "WINTER WEATHER", x$EVTYPE)
x$EVTYPE <- sub("^WINTRY MIX$", "WINTER WEATHER", x$EVTYPE)
x$EVTYPE <- sub("^blowing snow$", "BLOWING SNOW", x$EVTYPE)
x$EVTYPE <- sub("^COASTAL FLOODING$", "COASTAL FLOOD",x$EVTYPE)
x$EVTYPE <- sub("^DUST STORM/HIGH WINDS$", "DUST STORM", x$EVTYPE)
x$EVTYPE <- sub("^FLASH FLOOD/FLOOD$", "FLASH FLOOD",x$EVTYPE)
x$EVTYPE <- sub("^FLASH FLOODING/FLOOD$", "FLASH FLOOD",x$EVTYPE)
x$EVTYPE <- sub("^Frost/Freeze$", "FROST/FREEZE",x$EVTYPE)
x$EVTYPE <- sub("^HAIL 100$", "HAIL",x$EVTYPE)
x$EVTYPE <- sub("^HAIL/WIND$", "HAIL",x$EVTYPE)
x$EVTYPE <- sub("^HAIL/WINDS$", "HAIL",x$EVTYPE)
x$EVTYPE <- sub("^HEAVY RAINS$", "HEAVY RAIN",x$EVTYPE)
x$EVTYPE <- sub("^HEAVY RAINS/FLOODING$", "HEAVY RAIN",x$EVTYPE)
x$EVTYPE <- sub("^HEAVY SNOW/HIGH WINDS & FLOOD$", "HEAVY SNOW",x$EVTYPE)
x$EVTYPE <- sub("^HIGH WINDS HEAVY RAINS$", "HIGH WIND",x$EVTYPE)
x$EVTYPE <- sub("^HIGH WINDS/COLD$", "HIGH WIND",x$EVTYPE)
x$EVTYPE <- sub("^HURRICANE FELIX$", "HURRICANE/TYPHOON",x$EVTYPE)
x$EVTYPE <- sub("^HURRICANE OPAL/HIGH WINDS$", "HURRICANE/TYPHOON", x$EVTYPE)
x$EVTYPE <- sub("^Heavy Rain/High Surf$", "HEAVY RAIN",x$EVTYPE)
x$EVTYPE <- sub("^RIVER FLOODING$", "RIVER FLOOD", x$EVTYPE)
x$EVTYPE <- sub("^River Flooding$", "RIVER FLOOD",x$EVTYPE)
x$EVTYPE <- sub("^SEVERE THUNDERSTORM WINDS$", "THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^SEVERE THUNDERSTORMS$", "THUNDERSTORM WIND", x$EVTYPE)
x$EVTYPE <- sub("^SMALL HAIL$", "HAIL",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM WINDS HAIL$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM WINDS LIGHTNING$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM WINDS/ FLOOD$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM WINDS/HAIL$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORM WINDSS$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORMS$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORMS WIND$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^THUNDERSTORMS WINDS$", "THUNDERSTORM WIND",x$EVTYPE)
x$EVTYPE <- sub("^TORNADO F0$", "TORNADO",x$EVTYPE)
x$EVTYPE <- sub("^TORNADOES, TSTM WIND, HAIL$", "TORNADO",x$EVTYPE)
x$EVTYPE <- sub("^TROPICAL STORM DEAN$", "TROPICAL STORM",x$EVTYPE)
x$EVTYPE <- sub("^TROPICAL STORM JERRY$", "TROPICAL STORM",x$EVTYPE)
x$EVTYPE <- sub("^URBAN FLOOD$", "FLOOD",x$EVTYPE)
x$EVTYPE <- sub("^URBAN FLOODING$", "FLOOD",x$EVTYPE)
x$EVTYPE <- sub("^URBAN/SML$", "STREAM FLD", x$EVTYPE)
x$EVTYPE <- sub("^WILD/FOREST FIRES$", "WILDFIRE",x$EVTYPE)
x$EVTYPE <- sub("^WILDFIRES$", "WILDFIRE",x$EVTYPE)
return(x)
}
Now let’s apply this function,
tidyStormCasualties2 <- tidyStormCasualties
tidyStormCasualties2 <- fixEvtype(tidyStormCasualties2)
tidyStormCasualties3 <- tidyStormCasualties2 %>%
group_by(EVTYPE) %>% summarize(fatalities = sum(fatalities, na.rm=TRUE),
injuries = sum(injuries, na.rm=TRUE))
View Casualties Data again,
dim(tidyStormCasualties3)
## [1] 62 3
summary(tidyStormCasualties3$fatalities)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.25 16.50 241.00 129.20 5633.00
Now, let’s find out those {EVENTS} where {fatalities} are Greater Than Mean,
topTidyStormCasualties <- subset(tidyStormCasualties3,
fatalities > mean(fatalities))
topTidyStormCasualties$EVTYPE <- factor(topTidyStormCasualties$EVTYPE)
Let’s shift our focus to {injuries} now. The fix for {EVENT TYPE} has already been applied.
View Injuries Data,
dim(tidyStormCasualties3)
## [1] 62 3
summary(tidyStormCasualties3$injuries)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.25 56.50 2261.00 687.20 91350.00
Get those {EVENTS} where {injuries} are Greater Than Mean,
topTidyStormInjuries <- subset(tidyStormCasualties3,
injuries > mean(injuries))
topTidyStormInjuries$EVTYPE <- factor(topTidyStormInjuries$EVTYPE)
Let’s find out the Top 5 Worst Hit States first.
stateCasualties <- stormData %>%
group_by(STATE) %>% summarize(fatalities = sum(FATALITIES, na.rm=TRUE),
injuries = sum(INJURIES, na.rm=TRUE),
casualties = fatalities + injuries)
names(stateCasualties) <- c("state","fatalities","injuries","casualties")
top5stateCasualties <- head(arrange(stateCasualties, desc(casualties)),5)
View Top 5 Worst Hit States,
top5stateCasualties[,1]
## Source: local data frame [5 x 1]
##
## state
## 1 TX
## 2 MO
## 3 AL
## 4 OH
## 5 MS
Get the Storm Data only for these 5 States now. Summarize it by {STATE} and {EVTYPE},
top5stateStormData <- subset(stormData, STATE %in% top5stateCasualties$state)
top5stateStormCasualties <- top5stateStormData %>%
group_by(STATE, EVTYPE) %>% summarize(fatalities = sum(FATALITIES, na.rm=TRUE),
injuries = sum(INJURIES, na.rm=TRUE),
casualties = fatalities + injuries)
Consider only those Casualties where there are both {fatalities} and {injuries},
top5stateTidyStormCasualties <- subset(top5stateStormCasualties, fatalities > 0 & injuries > 0)
View State Casualties Data,
dim(top5stateTidyStormCasualties)
## [1] 80 5
Apply the {EVENT TYPE} fix now,
top5stateTidyStormCasualties2 <- top5stateTidyStormCasualties
top5stateTidyStormCasualties2 <- fixEvtype(top5stateTidyStormCasualties2)
top5stateTidyStormCasualties3 <- top5stateTidyStormCasualties2 %>%
group_by(STATE, EVTYPE) %>% summarize(fatalities = sum(fatalities, na.rm=TRUE),
injuries = sum(injuries, na.rm=TRUE),
casualties = fatalities + injuries)
View State Casualties Data again,
dim(top5stateTidyStormCasualties3)
## [1] 65 5
Let’s select only Top 5 EVENTS per State,
top5state5stormCasualtyData <- filter(top5stateTidyStormCasualties3, STATE == "ZZ")
for (i in top5stateCasualties$state) {
a <- head(arrange(filter(top5stateTidyStormCasualties3, STATE == i), desc(casualties)), 5)
top5state5stormCasualtyData <- rbind(top5state5stormCasualtyData, a)
}
top5state5stormCasualtyData$EVTYPE <- factor(top5state5stormCasualtyData$EVTYPE)
We shall use BGN_DATE for our Date-Calculations. Let’s extract the Year from the BGN_DATE and build summary,
CasualtyDamageData <- subset(stormData, FATALITIES > 0 & INJURIES > 0)
CasualtyDamageData <- CasualtyDamageData %>%
mutate(beginYear = format(as.Date(BGN_DATE,"%m/%d/%Y"), "%Y"))
yearlyCasualtyDmgSummary <- CasualtyDamageData %>%
group_by(beginYear) %>% summarize(fatalities = sum(FATALITIES, na.rm=TRUE),
injuries = sum(INJURIES, na.rm=TRUE),
casualties = fatalities + injuries)
Now we turn our attention to second question,
We shall solve this question in 2 ways,
The economic data is stored in 4 variables, ie. {PROPDMG}, {PROPDMGEXP}, {CROPDMG}, {CROPDMGEXP}.
The numerical values are stored in {PROPDMG} and {CROPDMG}. The units are stored in {PROPDMGEXP} and {CROPDMGEXP}.
Let’s see how the units are distributed,
table(stormData$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5 6
## 1 8 5 216 25 13 4 4 28 4
## 7 8 B h H K m M
## 5 1 40 1 6 424665 7 11330
table(stormData$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 7 19 1 9 21 281832 1 1994
For the sake of this analysis, we shall take only those observations where {PROPDMG} is present and {CROPDMG} is present.
stormDamage <- subset(stormData, PROPDMG > 0 & !is.na(PROPDMGEXP) &
CROPDMG > 0 & !is.na(CROPDMGEXP))
Here are the units one more time,
table(stormDamage$PROPDMGEXP)
##
## 0 3 5 B K m M
## 4 1 2 15 14848 1 1363
table(stormDamage$CROPDMGEXP)
##
## 0 B k K m M
## 11 3 21 14906 1 1292
Let’s define 2 new variables, ie. {propDmgMultiplier}, {cropDmgMultiplier}, to calculate numerical values of units,
The only valid units we are considering, are,
stormDamage <- stormDamage %>%
mutate(propDmgMultiplier = 0, cropDmgMultiplier = 0)
for (i in 1:nrow(stormDamage)) {
if (stormDamage[i,]$PROPDMGEXP %in% c("k","K")) {
stormDamage[i,]$propDmgMultiplier = 1000
} else if (stormDamage[i,]$PROPDMGEXP %in% c("m","M")) {
stormDamage[i,]$propDmgMultiplier = 1000000
} else if (stormDamage[i,]$PROPDMGEXP %in% c("b","B")) {
stormDamage[i,]$propDmgMultiplier = 1000000000
} else if (stormDamage[i,]$PROPDMGEXP == "H") {
stormDamage[i,]$propDmgMultiplier = 100
} else {
stormDamage[i,]$propDmgMultiplier = 0
}
}
for (i in 1:nrow(stormDamage)) {
if (stormDamage[i,]$CROPDMGEXP %in% c("k","K")) {
stormDamage[i,]$cropDmgMultiplier = 1000
} else if (stormDamage[i,]$CROPDMGEXP %in% c("m","M")) {
stormDamage[i,]$cropDmgMultiplier = 1000000
} else if (stormDamage[i,]$CROPDMGEXP %in% c("b","B")) {
stormDamage[i,]$cropDmgMultiplier = 1000000000
} else if (stormDamage[i,]$CROPDMGEXP == "H") {
stormDamage[i,]$cropDmgMultiplier = 100
} else {
stormDamage[i,]$cropDmgMultiplier = 0
}
}
Now, it’s time to summarize Property and Crop damage by EVTYPE,
propDmgSummary <- stormDamage %>%
group_by(EVTYPE) %>%
summarize(propertyDamage = sum(PROPDMG * propDmgMultiplier),
cropDamage = sum(CROPDMG * cropDmgMultiplier))
dim(propDmgSummary)
## [1] 96 3
Let’s fix EVENT TYPES now,
propDmgSummary2 <- propDmgSummary
propDmgSummary2 <- fixEvtype(propDmgSummary2)
propDmgSummary3 <- propDmgSummary2 %>%
group_by(EVTYPE) %>% summarize(propertyDamage = sum(propertyDamage),
cropDamage = sum(cropDamage))
dim(propDmgSummary3)
## [1] 42 3
Get the Top 10 “PropertyDamage” Observations,
topPropDmgSummary <- head(arrange(propDmgSummary3, desc(propertyDamage)),10)
topPropDmgSummary$EVTYPE <- factor(topPropDmgSummary$EVTYPE)
Get all “CropDamage” Observations, which are Greater Than MEAN,
topCropDmgSummary <- subset(propDmgSummary3,
cropDamage > mean(cropDamage))
topCropDmgSummary$EVTYPE <- factor(topCropDmgSummary$EVTYPE)
We shall use BGN_DATE for our Date-Calculations. Let’s extract the Year from the BGN_DATE and build summary,
stormDamage2 <- stormDamage
stormDamage2 <- stormDamage2 %>%
mutate(beginYear = format(as.Date(BGN_DATE,"%m/%d/%Y"), "%Y"))
yearlyPropDmgSummary <- stormDamage2 %>%
group_by(beginYear) %>%
summarize(propertyDamage = sum(PROPDMG * propDmgMultiplier),
cropDamage = sum(CROPDMG * cropDmgMultiplier))
We shall answer this question in 3 ways,
Let’s review the summarized {fatalities} data,
select(arrange(topTidyStormCasualties, desc(fatalities)), EVTYPE, fatalities)
## Source: local data frame [10 x 2]
##
## EVTYPE fatalities
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 2012
## 3 HEAT 1109
## 4 FLASH FLOOD 997
## 5 LIGHTNING 816
## 6 THUNDERSTORM WIND 707
## 7 RIP CURRENT 572
## 8 FLOOD 493
## 9 EXTREME COLD/WIND CHILL 302
## 10 HIGH WIND 288
Let’s review the summarized {injuries} data,
select(arrange(topTidyStormInjuries, desc(injuries)), EVTYPE, injuries)
## Source: local data frame [6 x 2]
##
## EVTYPE injuries
## 1 TORNADO 91346
## 2 THUNDERSTORM WIND 9460
## 3 FLOOD 6806
## 4 EXCESSIVE HEAT 6747
## 5 LIGHTNING 5230
## 6 HEAT 2409
Plot the chart of Top Fatalities By Event Versus Top Injuries By Event,
par(mfrow=c(1,2), oma=c(0,0,2,0))
par(mar=c(13.1, 4.1, 4.1, 2.1), mgp=c(3, 0.5, 0), cex=0.75)
with(topTidyStormCasualties, {
plot(EVTYPE, fatalities, xlab="",
ylab="Number of Fatalities",
type="n",
main="Top Fatalities By Event",
font.main=2, cex.main=1.5, las=2)
points(EVTYPE, fatalities, pch=19, col="black")
lines(EVTYPE, fatalities, lwd=2, col="green")
mtext("Figure 1", outer = TRUE)
})
with(topTidyStormInjuries, {
plot(EVTYPE, injuries, xlab="",
ylab="Number of Injuries",
type="n",
main="Top Injuries By Event",
font.main=2, cex.main=1.5, las=2)
points(EVTYPE, injuries, pch=19, col="black")
lines(EVTYPE, injuries, lwd=2, col="blue")
})
Let’s review the Worst Hit Top 5 STATES, along with their Top 5 EVENTS.
{casualties} are calculated as {fatalities} + {injuries},
options(dplyr.print_max = 25)
top5state5stormCasualtyData
## Source: local data frame [25 x 5]
## Groups: STATE
##
## STATE EVTYPE fatalities injuries casualties
## 1 TX TORNADO 538 8207 8745
## 2 TX FLOOD 49 6338 6387
## 3 TX HEAT 29 774 803
## 4 TX FLASH FLOOD 177 587 764
## 5 TX THUNDERSTORM WIND 48 706 754
## 6 MO TORNADO 388 4330 4718
## 7 MO EXCESSIVE HEAT 190 3525 3715
## 8 MO HEAT 39 660 699
## 9 MO THUNDERSTORM WIND 14 320 334
## 10 MO FLASH FLOOD 72 36 108
## 11 AL TORNADO 617 7929 8546
## 12 AL THUNDERSTORM WIND 40 431 471
## 13 AL LIGHTNING 28 154 182
## 14 AL DENSE FOG 1 71 72
## 15 AL HEAT 9 50 59
## 16 OH TORNADO 191 4438 4629
## 17 OH ICE STORM 2 1652 1654
## 18 OH THUNDERSTORM WIND 46 310 356
## 19 OH LIGHTNING 32 163 195
## 20 OH HEAT 18 75 93
## 21 MS TORNADO 450 6244 6694
## 22 MS THUNDERSTORM WIND 21 246 267
## 23 MS HURRICANE/TYPHOON 16 104 120
## 24 MS LIGHTNING 16 39 55
## 25 MS EXCESSIVE HEAT 24 5 29
And here’s their chart,
library(ggplot2)
g <- ggplot(top5state5stormCasualtyData,aes(EVTYPE,casualties,fill=STATE)) +
guides(fill=FALSE) +
geom_bar(stat="identity") +
facet_grid(STATE~.) +
labs(x="", y="Number of Casualties") +
labs(title="Figure 2 : Top 5 Casualty Events By State") +
theme(axis.text.x=element_text(angle=90),
plot.title = element_text(face="bold", size=20),
strip.text.y = element_text(face="bold", size = 12),
strip.background = element_rect(fill="orange"))
print(g)
Extract the Top 5 Years for Worst Casualties,
head(select(arrange(yearlyCasualtyDmgSummary, desc(casualties)), beginYear, casualties),5)
## Source: local data frame [5 x 2]
##
## beginYear casualties
## 1 2011 6573
## 2 1974 5622
## 3 1953 5196
## 4 1998 4871
## 5 1965 4413
We shall answer this question in 2 ways,
Let’s review the summarized {Property Damage} data,
select(arrange(topPropDmgSummary, desc(propertyDamage)), EVTYPE, propertyDamage)
## Source: local data frame [10 x 2]
##
## EVTYPE propertyDamage
## 1 FLOOD 122101960050
## 2 HURRICANE/TYPHOON 37074073000
## 3 RIVER FLOOD 5185675000
## 4 TORNADO 3587241200
## 5 FLASH FLOOD 3260763010
## 6 HAIL 1960933940
## 7 THUNDERSTORM WIND 1771077900
## 8 HIGH WIND 1512136200
## 9 WILDFIRE 1474543000
## 10 TROPICAL STORM 868880400
Let’s review the summarized {Crop Damage} data,
select(arrange(topCropDmgSummary, desc(cropDamage)), EVTYPE, cropDamage)
## Source: local data frame [8 x 2]
##
## EVTYPE cropDamage
## 1 HURRICANE/TYPHOON 5332242800
## 2 RIVER FLOOD 5056759000
## 3 ICE STORM 5022113500
## 4 FLOOD 4178622950
## 5 HAIL 1879124150
## 6 FLASH FLOOD 1393066400
## 7 DROUGHT 1231755000
## 8 THUNDERSTORM WIND 1087681350
Plot the chart of Top Property Damage By Event Versus Top Crop Damage By Event,
par(mfrow=c(1,2), oma=c(0,0,2,0))
par(mar=c(13.1, 4.1, 4.1, 2.1), mgp=c(3, 0.5, 0), cex=0.75)
with(topPropDmgSummary, {
plot(EVTYPE, propertyDamage/1000000000, xlab="",
ylab="Propety Damage in Billion Dollars",
type="n",
main="Top Property Damage By Event",
font.main=2, cex.main=1.5, las=2)
points(EVTYPE, propertyDamage/1000000000, pch=19, col="red")
mtext("Figure 3", outer = TRUE)
})
with(topCropDmgSummary, {
plot(EVTYPE, cropDamage/1000000000, xlab="",
ylab="Crop Damage in Billion Dollars",
type="n",
main="Top Crop Damage By Event",
font.main=2, cex.main=1.5, las=2)
points(EVTYPE, cropDamage/1000000000, pch=19, col="orange")
})
Extract the Top 5 Years for Property & Crop Damage,
head(select(arrange(yearlyPropDmgSummary, desc(propertyDamage)), beginYear, propertyDamage),5)
## Source: local data frame [5 x 2]
##
## beginYear propertyDamage
## 1 2006 115993852200
## 2 2004 19591293950
## 3 2005 8250911600
## 4 1993 7604110650
## 5 1999 4605981300
head(select(arrange(yearlyPropDmgSummary, desc(cropDamage)), beginYear, cropDamage),5)
## Source: local data frame [5 x 2]
##
## beginYear cropDamage
## 1 1994 5590603400
## 2 1993 5421179800
## 3 2005 2596764900
## 4 2008 2010060000
## 5 1999 1930171400
Based on this Analysis, we draw following Conclusions,
This completes our analysis.
Thank You for participation.