OVERALL WHEATHER EVENTS IMPACT ON POPULATION AND ECONOMY

Synopsis

-Which types of events are most harmful with respect to population? -Which types of events are most harmful with respect to economy?

  1. Data Load including packages and Explore:
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
rawdata <- read.csv("repdata_data_StormData.csv")
dim(rawdata)
## [1] 902297     37
str(rawdata)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
rawdata <- tbl_df(rawdata)
  1. types of events most harmful with respect to population?
deaths <- aggregate(FATALITIES~EVTYPE, rawdata, sum)
deaths <- arrange(deaths, desc(FATALITIES))
deaths5 <- deaths[1:5, ]

nondeaths <- aggregate(INJURIES~EVTYPE, rawdata, sum)
nondeaths <- arrange(nondeaths, desc(INJURIES))
nondeaths5 <- nondeaths[1:5,]

Top 5 events and accountability for deaths and non-deaths

deaths5
##           EVTYPE FATALITIES
## 1        TORNADO       5633
## 2 EXCESSIVE HEAT       1903
## 3    FLASH FLOOD        978
## 4           HEAT        937
## 5      LIGHTNING        816
nondeaths5
##           EVTYPE INJURIES
## 1        TORNADO    91346
## 2      TSTM WIND     6957
## 3          FLOOD     6789
## 4 EXCESSIVE HEAT     6525
## 5      LIGHTNING     5230

Plots: Top 5 events and accountability for deaths and non-deaths

plotdeaths5 <- ggplot(deaths5, aes(x = EVTYPE, y = FATALITIES)) +
  geom_bar(stat = "identity", fill = "blue") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
  xlab("Event Type") +
  ylab("Deaths") +
  ggtitle("Deaths by top 5 Event Types") +
  theme(plot.title = element_text(size = 8))

plotnondeaths5 <- ggplot(nondeaths5, aes(x = EVTYPE, y = INJURIES)) +
  geom_bar(stat = "identity", fill = "red") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
  xlab("Event Type") +
  ylab("Nondeaths") +
  ggtitle("Nondeaths by top 5 Event Types") +
  theme(plot.title = element_text(size = 8))

grid.arrange(plotdeaths5, plotnondeaths5, ncol = 2, top = "Events most harmful with respect to population")

  1. types of events most harmful with respect to economy?

Property damage calculation and corp damage calculation

unique(rawdata$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
Pdamage <- select(rawdata, EVTYPE, PROPDMG, PROPDMGEXP)

Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "0"] <- 1
## Warning: Unknown or uninitialised column: `ChangeExp`.
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "1"] <- 10
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "2"] <- 100
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "3"] <- 1000
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "4"] <- 10000
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "5"] <- 1e+05
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "6"] <- 1e+06
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "7"] <- 1e+07
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "8"] <- 1e+08
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "B"] <- 1e+09
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "h"] <- 100
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "H"] <- 100
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "K"] <- 1000
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "m"] <- 1e+06
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "M"] <- 1e+06
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == ""] <- 1
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "+"] <- 0
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "-"] <- 0
Pdamage$ChangeExp[Pdamage$PROPDMGEXP == "?"] <- 0

Pdamage$dValue <- Pdamage$PROPDMG*Pdamage$ChangeExp
DamageValue <- aggregate(dValue ~ EVTYPE, Pdamage, sum)
DamageValue <- arrange(DamageValue, desc(dValue))
DamageValue5 <- DamageValue[1:5,]

unique(rawdata$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
Cdamage <- select(rawdata, EVTYPE, CROPDMG, CROPDMGEXP)

Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "0"] <- 1
## Warning: Unknown or uninitialised column: `ChangeExp`.
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "2"] <- 100
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "B"] <- 1e+09
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "K"] <- 1000
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "k"] <- 1000
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "m"] <- 1e+06
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "M"] <- 1e+06
Cdamage$ChangeExp[Cdamage$CROPDMGEXP == "?"] <- 0

Cdamage$dValue <- Cdamage$CROPDMG*Cdamage$ChangeExp
CdamageValue <- aggregate(dValue ~ EVTYPE, Cdamage, sum)
CdamageValue <- arrange(CdamageValue, desc(dValue))
CdamageValue5 <- CdamageValue[1:5,]

Top 5 events and accountability for property and crop damage

DamageValue5
##              EVTYPE       dValue
## 1             FLOOD 144657709807
## 2 HURRICANE/TYPHOON  69305840000
## 3           TORNADO  56947380617
## 4       STORM SURGE  43323536000
## 5       FLASH FLOOD  16822673979
CdamageValue5
##        EVTYPE      dValue
## 1     DROUGHT 13972566000
## 2       FLOOD  5661968450
## 3 RIVER FLOOD  5029459000
## 4   ICE STORM  5022113500
## 5        HAIL  3025954470

Plots: Top 5 events and accountability for property and crop damage

plotDamageValue5 <- ggplot(DamageValue5, aes(x = EVTYPE, y = dValue/(10^9))) + 
geom_bar(stat = "identity", fill = "blue") + 
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
xlab("Event Type") + 
ylab("Property Damage") + 
ggtitle("Property Damage by top 5 Weather Event Types") + 
theme(plot.title = element_text(size = 8)) 

plotCdamageValue5 <- ggplot(CdamageValue5, aes(x = EVTYPE, y = dValue/(10^9))) + 
geom_bar(stat = "identity", fill = "red") + 
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) + 
xlab("Event Type") + 
ylab("Corp Damage") + 
ggtitle("Crop Damage by top 5 Weather Event Types") + 
theme(plot.title = element_text(size = 8)) 

grid.arrange(plotDamageValue5, plotCdamageValue5, ncol = 2, top = "Events most harmful with respect to property and crop")

Conclusions

Tornados have the greatest number of deaths and nondeaths for the registered period. 5633 and 91346 respectively.

Floods caused the greatest damage to property, 144657709807 dollars in the records.

Droughts caused for the greatest losses to crops. 13972566000 dollars.