Socioeconomic impacts of severe weather conditions in the United States

Synopsis:

This report aims to assess the the impact of severe weather conditions in the United States with respect to population health as well as economic consequences. We used the NOAA Strom database available on: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

Analysis:

First we load the neccessary libraries:

library(data.table)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

We read the data into a data table and extract the columns that contain information about the number of injuries and fatalitites, as well as the cost from damaged properties and crops:

dt1 <- fread('data', stringsAsFactors=TRUE)
dt2 <- data.table(events=dt1$EVTYPE, injuries=dt1$INJURIES, fatalities=dt1$FATALITIES, property_damage = dt1$PROPDMG, pd_multiplier=dt1$PROPDMGEXP, crop_damage = dt1$CROPDMG, cd_multiplier = dt1$CROPDMGEXP)

To assess the impact of severe weather conditions on health conditions, we arrange the number of injuries and fatalitiets in a descending order and list the first few lines of the data table:

## Question 1
population_data <- aggregate(cbind(injuries, fatalities) ~ events, dt2, sum, na.rm=TRUE)
arranged_population_data <- arrange(population_data, desc(injuries+fatalities))
head(arranged_population_data)
##           events injuries fatalities
## 1        TORNADO    91346       5633
## 2 EXCESSIVE HEAT     6525       1903
## 3      TSTM WIND     6957        504
## 4          FLOOD     6789        470
## 5      LIGHTNING     5230        816
## 6           HEAT     2100        937

It appears that tornadoes cause considerably larger number of injries and fatalities than other events. To visualize the data, we only observe the six largest numbers and categorize the rest of the events as “OTHERS”:

# pick the top 7 events and arrange the rest into a single group
n_other <- 7
n_total <- dim(arranged_population_data)[1]
critical_population_data1 <- arranged_population_data[1:n_other-1,]
other_injuries <- sum(arranged_population_data[(n_other):n_total,2])
other_fatalities <- sum(arranged_population_data[(n_other):n_total,3])
critical_population_data2 <- data.frame(events="OTHERS", injuries =other_injuries, fatalities=other_fatalities)
critical_population_data3 <- rbind(critical_population_data1, critical_population_data2)

We convert the data to percentages:

critical_population_data3$injuries_percent <- paste(as.character(round(with(critical_population_data3, injuries / sum(injuries))*1000)/10), '%')
critical_population_data3$fatalities_percent <- paste(as.character(round(with(critical_population_data3, fatalities / sum(fatalities))*1000)/10), '%')

We visualize the data in percentages. Note that the total number of injuries and fatalitites are 140528 and 15145 respectively:

par(mfrow=c(1,2))
par(mar=c(1,1,1,1))
with(critical_population_data3, pie(injuries, labels = injuries_percent, col=hcl.colors(n_other), main="Injuries"))
legend(x=xy.coords(x=0.31,y=1.05), legend=critical_population_data3$events, fill=hcl.colors(n_other), cex=0.6)
with(critical_population_data3, pie(fatalities, labels = fatalities_percent, col=hcl.colors(n_other), main="Fatalities"))

Next, we aimed to assess the economic burden of the severe weather conditions. We assess the total property damage and and crop damage cost. The documentation provided on https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf indicates that the costs are reported in different units. We first examine the reported units to ensure that we sum the costs correctly:

table(dt2$pd_multiplier)
## 
##             +      -      0      1      2      3      4      5      6 
## 465934      5      1    216     25     13      4      4     28      4 
##      7      8      ?      B      H      K      M      h      m 
##      5      1      8     40      6 424665  11330      1      7
table(dt2$cd_multiplier)
## 
##             0      2      ?      B      K      M      k      m 
## 618413     19      1      7      9 281832   1994     21      1

We then convert the units to mulipliers:

dt2$pd_multiplier_num[dt2$pd_multiplier=='h' | dt2$pd_multiplier=='H'] <- 100
dt2$pd_multiplier_num[dt2$pd_multiplier=='k' | dt2$pd_multiplier=='K'] <- 1000
dt2$pd_multiplier_num[dt2$pd_multiplier=='m' | dt2$pd_multiplier=='M'] <- 1000000
dt2$pd_multiplier_num[dt2$pd_multiplier=='b' | dt2$pd_multiplier=='B'] <- 1000000000

dt2$cd_multiplier_num[dt2$cd_multiplier=='h' | dt2$cd_multiplier=='H'] <- 100
dt2$cd_multiplier_num[dt2$cd_multiplier=='k' | dt2$cd_multiplier=='K'] <- 1000
dt2$cd_multiplier_num[dt2$cd_multiplier=='m' | dt2$cd_multiplier=='M'] <- 1000000
dt2$cd_multiplier_num[dt2$cd_multiplier=='b' | dt2$cd_multiplier=='B'] <- 1000000000

Subsequently we use the multipleirs to correctly find the property damage and crop damage costs:

dt2$property_damage <- dt2$property_damage * dt2$pd_multiplier_num
dt2$crop_damage <- dt2$crop_damage * dt2$cd_multiplier_num

Now we can sort the costs in a descening order similar to what we did earlier

cost_data <- aggregate(cbind(property_damage, crop_damage) ~ events, dt2, sum, na.rm=TRUE)
arranged_cost_data <- arrange(cost_data, desc(property_damage+crop_damage))
head(arranged_cost_data)
##              events property_damage crop_damage
## 1             FLOOD    132836489050  5170955450
## 2 HURRICANE/TYPHOON     26740295000  2607872800
## 3           TORNADO     16166771690   353376460
## 4         HURRICANE      9716358000  2688910000
## 5       RIVER FLOOD      5079635000  5028734000
## 6              HAIL      7991783690  2028807900

A quick examination of the numbers suggest that the largest cost is associated with flood. Now we can rearrange the data to only view the damage costs associated with the 6 most costly events, and then categorize the rest of the events as OTHERS.

critical_cost_data1 <- arranged_cost_data[1:n_other-1,]
other_property <- sum(arranged_cost_data[(n_other):n_total,3],na.rm=T)
other_crop <- sum(arranged_cost_data[(n_other):n_total,3],na.rm=T)
critical_cost_data2 <- data.frame(events="OTHERS", property_damage =other_property, crop_damage=other_crop)
critical_cost_data3 <- rbind(critical_cost_data1, critical_cost_data2)

We convert all costs to bilions of dollars:

critical_cost_data3$property_damage_amount <- paste('$', as.character(round(with(critical_cost_data3, property_damage / 100000000))/10), 'B', sep='')
critical_cost_data3$crop_damage_amount <- paste('$', as.character(round(with(critical_cost_data3, crop_damage / 100000000))/10), 'B', sep='')

We visualize the total cost in bilions of dollars Note that the total damage cost on properties and crops are $211.3B and $30.7B respectively:

par(mfrow=c(1,2))
par(mar=c(1,1,1,1))
with(critical_cost_data3, pie(property_damage, labels = property_damage_amount, col=hcl.colors(n_other), main="Property Damage"))
legend(x=xy.coords(x=-0.1,y=1.05), legend=critical_cost_data3$events, fill=hcl.colors(n_other), cex=0.6)
with(critical_cost_data3, pie(crop_damage, labels = crop_damage_amount, col=hcl.colors(n_other), main="Crop Damage"))