There are dependencies w.r.t. CRAN packages.
# attaching libraries
sapply(c("plyr", "dplyr", "reshape2", "ggplot2", "knitr"),
library, character.only= T)
Data file download
# making a subdirectory under the current working one.
if (!file.exists("RepResProj2")){
dir.create("RepResProj2")
}
url <-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, destfile="./RepResProj2/StormData.zip", method = "curl")
# filling the NOAAWeatherDf dataframe with the content of the downloaded data file.
NOAAWeatherDf <- read.csv("./RepResProj2/StormData.zip", header = T,
na.strings = "?")
# how many types of weather events?
length(levels(NOAAWeatherDf$EVTYPE))
## [1] 984
# quick look at the start and end of the weather events' set.
head(sort(gsub("^\\s*", "", levels(NOAAWeatherDf$EVTYPE))), n=1)
## [1] "ABNORMAL WARMTH"
head(sort(gsub("^\\s*", "", levels(NOAAWeatherDf$EVTYPE)), decreasing = T), n=2)
## [1] "WND" "WINTRY MIX"
# time frame of the data colleaction
range(as.Date(NOAAWeatherDf$BGN_DATE, "%m/%d/%Y"))
## [1] "1950-01-03" "2011-11-30"
# computing the cumulative number of fatalities and injuries per event.
EventOnPopulationDf <- ddply(NOAAWeatherDf, .(EVTYPE),
summarize, Deaths = sum(FATALITIES),
Injuries = sum(INJURIES))
kable(summary(EventOnPopulationDf)[,c(2,3)])
Deaths | Injuries | |
---|---|---|
Min. : 0.00 | Min. : 0.0 | |
1st Qu.: 0.00 | 1st Qu.: 0.0 | |
Median : 0.00 | Median : 0.0 | |
Mean : 15.38 | Mean : 142.7 | |
3rd Qu.: 0.00 | 3rd Qu.: 0.0 | |
Max. :5633.00 | Max. :91346.0 | |
NA | NA |
sum((head(arrange(EventOnPopulationDf, desc(Deaths)), n=20))[,2]) / sum(EventOnPopulationDf$Deaths)
## [1] 0.8922417
SevereEvents <- head(arrange(EventOnPopulationDf, desc(Deaths))[,1], n=20)
# subsetting the NOAAWeatherDf for the top 20 most severe events
NOAAWeatherDf_subset1 <- filter(NOAAWeatherDf, EVTYPE %in% SevereEvents)
# selecting the 3 relevant attributes for the subsequent plot
EvtypePopHealthDf <- select(NOAAWeatherDf_subset1, EVTYPE, FATALITIES, INJURIES)
# reshapping the EvtypePopHealthDf into a tidy data set.
EvtypePopHealthDfMelted <- melt(EvtypePopHealthDf,
variable.name = "Outcome",
value.name = "Occurrences")
## Using EVTYPE as id variables
# quick look at the relevant data
range(NOAAWeatherDf$PROPDMG)
## [1] 0 5000
range(NOAAWeatherDf$CROPDMG)
## [1] 0 990
levels(NOAAWeatherDf$PROPDMGEXP)
## [1] "" "-" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m"
## [18] "M"
# filtering the NOAAWeatherDf for values of PROPDMG or CROPDMG equals to 0.
NOAAWeatherDf_subset2 <- filter(NOAAWeatherDf, PROPDMG > 0 | CROPDMG > 0)
# creation of a vector that maps the character code of the PROPDMGEXP and
# CROPDMGEXP attributes to actual numeric multiplier
multiplier <- c('H' = 100, 'K' = 1000, 'M' = 1000000, 'B' = 1000000000)
# Creation of 2 new variables featuring the actual numerical $ values of the PROP
# and CROP damages
NOAAWeatherDf_subset2 <-
mutate(NOAAWeatherDf_subset2,
PropDamage = PROPDMG * multiplier[toupper(PROPDMGEXP)],
CropDamage = CROPDMG * multiplier[toupper(CROPDMGEXP)] )
# property damages per event.
EventOnPropertiesDf <- ddply(NOAAWeatherDf_subset2, .(EVTYPE),
summarize, Cost = sum(PropDamage))
# selecting the top 20 for subsequent barchart
EventOnPropertiesDf_top20 <- head(arrange(EventOnPropertiesDf,
desc(Cost)), n=20)
# crop damages per event.
EventOnCropsDf <- ddply(NOAAWeatherDf_subset2, .(EVTYPE),
summarize, Cost = sum(CropDamage))
# selecting the top 20 for subsequent barchart
EventOnCropsDf_top20 <- head(arrange(EventOnCropsDf,
desc(Cost)), n=20)
Barchart displaying the number of cumulative death and injuries of the top 20 most harmful weather events
ggplot(EvtypePopHealthDfMelted,
aes(reorder(factor(EVTYPE), Occurrences), Occurrences, Occurrences,
fill = Outcome)) +
geom_bar(stat = "identity", position = position_dodge()) +
ggtitle("Fatalities and injuries caused by weather events") +
labs(x ="Weather Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
kable(head(arrange(EventOnPopulationDf, desc(Deaths)), n=20))
EVTYPE | Deaths | Injuries |
---|---|---|
TORNADO | 5633 | 91346 |
EXCESSIVE HEAT | 1903 | 6525 |
FLASH FLOOD | 978 | 1777 |
HEAT | 937 | 2100 |
LIGHTNING | 816 | 5230 |
TSTM WIND | 504 | 6957 |
FLOOD | 470 | 6789 |
RIP CURRENT | 368 | 232 |
HIGH WIND | 248 | 1137 |
AVALANCHE | 224 | 170 |
WINTER STORM | 206 | 1321 |
RIP CURRENTS | 204 | 297 |
HEAT WAVE | 172 | 309 |
EXTREME COLD | 160 | 231 |
THUNDERSTORM WIND | 133 | 1488 |
HEAVY SNOW | 127 | 1021 |
EXTREME COLD/WIND CHILL | 125 | 24 |
STRONG WIND | 103 | 280 |
BLIZZARD | 101 | 805 |
HIGH SURF | 101 | 152 |
Barchart displaying the amount of propery damages for the top 20 most damaging weather events
ggplot(EventOnPropertiesDf_top20, aes(EVTYPE, Cost)) +
geom_bar(stat="identity") +
ggtitle("Property damages caused by weather events") +
labs(x ="Weather Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
kable(head(arrange(EventOnPropertiesDf, desc(Cost)), n=20))
EVTYPE | Cost |
---|---|
HURRICANE/TYPHOON | 69305840000 |
STORM SURGE | 43323536000 |
STORM SURGE/TIDE | 4641188000 |
HURRICANE OPAL | 3172846000 |
HEAVY RAIN/SEVERE WEATHER | 2500000000 |
TORNADOES, TSTM WIND, HAIL | 1600000000 |
BLIZZARD | 659213950 |
WILD FIRES | 624100000 |
TYPHOON | 600230000 |
LANDSLIDE | 324596000 |
HAILSTORM | 241000000 |
COASTAL FLOOD | 237665560 |
TSUNAMI | 144062000 |
COASTAL FLOODING | 126640500 |
HIGH WINDS/COLD | 110500000 |
River Flooding | 106155000 |
MAJOR FLOOD | 105000000 |
WILDFIRES | 100500000 |
HURRICANE OPAL/HIGH WINDS | 100000000 |
HIGH SURF | 89575000 |
Barchart displaying the amount of crop damages for the top 20 most damaging weather events
ggplot(EventOnCropsDf_top20, aes(EVTYPE, Cost)) +
geom_bar(stat="identity") +
ggtitle("Crop damages caused by weather events") +
labs(x ="Weather Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
kable(head(arrange(EventOnCropsDf, desc(Cost)), n=20))
EVTYPE | Cost |
---|---|
FROST/FREEZE | 1094086000 |
EXCESSIVE WETNESS | 142000000 |
FLOOD/RAIN/WINDS | 112800000 |
COLD AND WET CONDITIONS | 66000000 |
Early Frost | 42000000 |
Damaging Freeze | 34130000 |
AGRICULTURAL FREEZE | 28820000 |
UNSEASONABLY COLD | 25042500 |
Extreme Cold | 20000000 |
TROPICAL STORM JERRY | 16000000 |
HARD FREEZE | 13100000 |
Freeze | 10500000 |
HURRICANE OPAL/HIGH WINDS | 10000000 |
UNSEASONAL RAIN | 10000000 |
HIGH WINDS/COLD | 7000000 |
Unseasonable Cold | 5100000 |
COOL AND WET | 5000000 |
WINTER STORM HIGH WINDS | 5000000 |
TORNADOES, TSTM WIND, HAIL | 2500000 |
Heavy Rain/High Surf | 1500000 |
1 the National Oceanic and Atmospheric Administration.
2 the National Weather Service.