Synopsis
-Which types of events are most harmful with respect to population? -Which types of events are most harmful with respect to economy?
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
rawdata <- read.csv("repdata_data_StormData.csv")
dim(rawdata)
## [1] 902297 37
str(rawdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
rawdata <- tbl_df(rawdata)
deaths <- aggregate(FATALITIES~EVTYPE, rawdata, sum)
deaths <- arrange(deaths, desc(FATALITIES))
deaths5 <- deaths[1:5, ]
nondeaths <- aggregate(INJURIES~EVTYPE, rawdata, sum)
nondeaths <- arrange(nondeaths, desc(INJURIES))
nondeaths5 <- nondeaths[1:5,]
Top 5 events and accountability for deaths and non-deaths
deaths5
## EVTYPE FATALITIES
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
nondeaths5
## EVTYPE INJURIES
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
Plots: Top 5 events and accountability for deaths and non-deaths
plotdeaths5 <- ggplot(deaths5, aes(x = EVTYPE, y = FATALITIES)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
xlab("Event Type") +
ylab("Deaths") +
ggtitle("Deaths by top 5 Event Types") +
theme(plot.title = element_text(size = 8))
plotnondeaths5 <- ggplot(nondeaths5, aes(x = EVTYPE, y = INJURIES)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
xlab("Event Type") +
ylab("Nondeaths") +
ggtitle("Nondeaths by top 5 Event Types") +
theme(plot.title = element_text(size = 8))
grid.arrange(plotdeaths5, plotnondeaths5, ncol = 2, top = "Events most harmful with respect to population")
Property damage calculation and corp damage calculation
unique(rawdata$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
Pdamage <- select(rawdata, EVTYPE, PROPDMG, PROPDMGEXP)
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "0"] <- 1
## Warning: Unknown or uninitialised column: `ChangeExp`.
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "1"] <- 10
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "2"] <- 100
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "3"] <- 1000
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "4"] <- 10000
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "5"] <- 1e+05
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "6"] <- 1e+06
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "7"] <- 1e+07
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "8"] <- 1e+08
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "B"] <- 1e+09
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "h"] <- 100
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "H"] <- 100
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "K"] <- 1000
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "m"] <- 1e+06
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "M"] <- 1e+06
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == ""] <- 1
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "+"] <- 0
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "-"] <- 0
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "?"] <- 0
Pdamage$dValue <- Pdamage$PROPDMG*Pdamage$ChangeExp
DamageValue <- aggregate(dValue ~ EVTYPE, Pdamage, sum)
DamageValue <- arrange(DamageValue, desc(dValue))
DamageValue5 <- DamageValue[1:5,]
unique(rawdata$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
Cdamage <- select(rawdata, EVTYPE, CROPDMG, CROPDMGEXP)
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "0"] <- 1
## Warning: Unknown or uninitialised column: `ChangeExp`.
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "2"] <- 100
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "B"] <- 1e+09
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "K"] <- 1000
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "k"] <- 1000
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "m"] <- 1e+06
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "M"] <- 1e+06
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "?"] <- 0
Cdamage$dValue <- Cdamage$CROPDMG*Cdamage$ChangeExp
CdamageValue <- aggregate(dValue ~ EVTYPE, Cdamage, sum)
CdamageValue <- arrange(CdamageValue, desc(dValue))
CdamageValue5 <- CdamageValue[1:5,]
Top 5 events and accountability for property and crop damage
DamageValue5
## EVTYPE dValue
## 1 FLOOD 144657709807
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56947380617
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16822673979
CdamageValue5
## EVTYPE dValue
## 1 DROUGHT 13972566000
## 2 FLOOD 5661968450
## 3 RIVER FLOOD 5029459000
## 4 ICE STORM 5022113500
## 5 HAIL 3025954470
Plots: Top 5 events and accountability for property and crop damage
plotDamageValue5 <- ggplot(DamageValue5, aes(x = EVTYPE, y = dValue/(10^9))) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
xlab("Event Type") +
ylab("Property Damage") +
ggtitle("Property Damage by top 5 Weather Event Types") +
theme(plot.title = element_text(size = 8))
plotCdamageValue5 <- ggplot(CdamageValue5, aes(x = EVTYPE, y = dValue/(10^9))) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
xlab("Event Type") +
ylab("Corp Damage") +
ggtitle("Crop Damage by top 5 Weather Event Types") +
theme(plot.title = element_text(size = 8))
grid.arrange(plotDamageValue5, plotCdamageValue5, ncol = 2, top = "Events most harmful with respect to property and crop")
Tornados have the greatest number of deaths and nondeaths for the registered period. 5633 and 91346 respectively.
Floods caused the greatest damage to property, 144657709807 dollars in the records.
Droughts caused for the greatest losses to crops. 13972566000 dollars.