The Adverse Effects of Natural Disasters on Health and Economy of the United States

Synposis

  • The Effects of Storm Events in the United States is analyzed.
  • They are analyzed regarding impact upon health and economy.
  • The data is from National Oceanic and Atmospheric Administration (NOAA) for the time period 1950-2011.
  • The results are about the events most harmful to health, and also those which had the largest economic costs.
  • For the top events, we give cumulative numbers and also breakdown with year.

1. Using dplyr and lubridate

library(dplyr)
library(lubridate)

Data Processing

  • Data is read from the file “repdata_data_StormData.csv.bz2”
  • It is read into the data frame named dat
  • The data frame dat contains 902297 obserations of 37 variables.
  • As data is processed to dat1, dat2, dat3, dat4, and dat5, we keep track of how health and economic costs are changed in these transformations
  • We have to clean as the duplicates of events arise from being capital or not
  • There are duplicates as well from spelling typos
  • dat (as read from file, exply only year is retained for the date)
  • dat -> dat1 (only info for 50 states and DC retained, only needed columns kept )
  • dat1 -> dat2 (property damage calculated based on mutliplier)
  • dat2 -> dat3 (crop damage calculated based on multiplier)
  • dat3 -> dat4 (events with more than 50 occurences retained, outlier in CA corrected)
  • dat4 -> dat5 (events aggregrated over year)

2. Loading Data, keeping only year information for storm event

file.name <- "repdata_data_StormData.csv.bz2" 
dat <- read.table(file.name, header = TRUE, sep = ",",
                  stringsAsFactors = FALSE, na.strings = "")
dat$BGN_DATE <- year(as.POSIXct(dat$BGN_DATE, format = "%m/%d/%Y %H:%M:%S" ))
names(dat)[2] <- "YEAR"

3. Summary of Health Costs

tot.fatalaties <- sum(dat$FATALITIES)
cat("The total number of fatalaties is",tot.fatalaties,"\n")
## The total number of fatalaties is 15145
tot.injuries <- sum(dat$INJURIES)
cat("The total number of injuries is",tot.injuries,"\n")
## The total number of injuries is 140528

4. Select the subset of 50 states and DC, into dat1

states.all <- unique(dat$STATE)
states <- states.all[c(1:50,52)]
cat("states:\n")
## states:
print(states)
##  [1] "AL" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN"
## [15] "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV"
## [29] "NH" "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN"
## [43] "TX" "UT" "VT" "VA" "WA" "WV" "WI" "WY" "AK"
col.keep <- c("YEAR","EVTYPE","STATE","FATALITIES","INJURIES", "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
dat1 <- subset(x = dat, subset = STATE %in% states, select = col.keep)
dat1.fatalaties <- sum(dat1$FATALITIES)
cat("The total number of fatalaties in dat1 is",dat1.fatalaties,"\n")
## The total number of fatalaties in dat1 is 14867
cat("The ratio of fatalaties retained in dat1 is",dat1.fatalaties/tot.fatalaties,"\n")
## The ratio of fatalaties retained in dat1 is 0.9816441
dat1.injuries <- sum(dat1$INJURIES)
cat("The total number of injuries in dat1 is",dat1.injuries,"\n")
## The total number of injuries in dat1 is 139835
cat("The ratio of injuries retained in dat1 is",dat1.injuries/tot.injuries,"\n")
## The ratio of injuries retained in dat1 is 0.9950686
cat("The number of rows in dat1 is",nrow(dat1),"\n")
## The number of rows in dat1 is 883623
cat("The ratio of rows retained in dat1 is",nrow(dat1)/nrow(dat),"\n")
## The ratio of rows retained in dat1 is 0.9793039

5. Processing to find property damage costs in dat2

dat1$PROPDMGEXP[is.na(dat1$PROPDMGEXP)] <- 1
dat2 <- filter(dat1, PROPDMGEXP %in% c("K","M","B","1"))
dat2$PROPDMGEXP <- ifelse(dat2$PROPDMGEXP=="K",1000,
                          ifelse(dat2$PROPDMGEXP=="M",1000000,
                                 ifelse(dat2$PROPDMGEXP=="B",1000000000,1)))
dat2$PROPDMG <- dat2$PROPDMG*dat2$PROPDMGEXP
dat2.fatalaties <- sum(dat2$FATALITIES)
cat("The total number of fatalaties in dat2 is",dat2.fatalaties,"\n")
## The total number of fatalaties in dat2 is 14860
cat("The ratio of fatalaties retained in dat2 is",dat2.fatalaties/tot.fatalaties,"\n")
## The ratio of fatalaties retained in dat2 is 0.9811819
dat2.injuries <- sum(dat2$INJURIES)
cat("The total number of injuries in dat2 is",dat2.injuries,"\n")
## The total number of injuries in dat2 is 139763
cat("The ratio of injuries retained in dat2 is",dat2.injuries/tot.injuries,"\n")
## The ratio of injuries retained in dat2 is 0.9945562
cat("The number of rows in dat2 is",nrow(dat2),"\n")
## The number of rows in dat2 is 883320
cat("The ratio of rows retained in dat2 is",nrow(dat2)/nrow(dat),"\n")
## The ratio of rows retained in dat2 is 0.9789681
property.dat2 <- sum(dat2$PROPDMG)
cat("The total Property Damage in dat2 is",property.dat2,"\n")
## The total Property Damage in dat2 is 423825059797

6. Processing to find crop damage costs in dat3

dat2 <- dat2[,-7]
dat2$CROPDMGEXP[is.na(dat2$CROPDMGEXP)] <- 1
dat3 <- filter(dat2, CROPDMGEXP %in% c("K","M","B","1"))
dat3$CROPDMGEXP <- ifelse(dat3$CROPDMGEXP=="K",1000,
                          ifelse(dat3$CROPDMGEXP=="M",1000000,
                                 ifelse(dat3$CROPDMGEXP=="B",1000000000,1)))
dat3$CROPDMG <- dat3$CROPDMG*dat3$CROPDMGEXP
dat3.fatalaties <- sum(dat3$FATALITIES)
cat("The total number of fatalaties in dat3 is",dat3.fatalaties,"\n")
## The total number of fatalaties in dat3 is 14857
cat("The ratio of fatalaties retained in dat3 is",dat3.fatalaties/tot.fatalaties,"\n")
## The ratio of fatalaties retained in dat3 is 0.9809838
dat3.injuries <- sum(dat3$INJURIES)
cat("The total number of injuries in dat3 is",dat3.injuries,"\n")
## The total number of injuries in dat3 is 139743
cat("The ratio of injuries retained in dat3 is",dat3.injuries/tot.injuries,"\n")
## The ratio of injuries retained in dat3 is 0.9944139
cat("The number of rows in dat3 is",nrow(dat3),"\n")
## The number of rows in dat3 is 883272
cat("The ratio of rows retained in dat3 is",nrow(dat3)/nrow(dat),"\n")
## The ratio of rows retained in dat3 is 0.9789149
property.dat3 <- sum(dat3$PROPDMG)
cat("The total Property Damage in dat3 is",property.dat3,"\n")
## The total Property Damage in dat3 is 423823458297
cat("The ratio of Property Damage retained in dat3 is",property.dat3/property.dat2,"\n")
## The ratio of Property Damage retained in dat3 is 0.9999962
crop.dat3 <- sum(dat3$CROPDMG)
cat("The total Crop Damage in dat3 is",crop.dat3,"\n")
## The total Crop Damage in dat3 is 48291524921
dat3 <- dat3[,-8]

7. Cleaning up EVTYPE in dat4

cat("The number of unique events in EVTYPE is", length(unique(dat3$EVTYPE)), "\n")
## The number of unique events in EVTYPE is 949
evts <- table(dat$EVTYPE)
evts.name <- rownames(evts)
evts.vector <- as.vector(evts)
names(evts.vector) <- evts.name
evts.sort <- sort(evts.vector, decreasing = TRUE)
evts.sort.50 <- evts.sort[evts.sort>50]
cat("There are",length(evts.sort.50),"events with more than 50 occurences.\n")
## There are 87 events with more than 50 occurences.
dat4 <- filter(dat3, EVTYPE %in% names(evts.sort.50))
dat4.fatalaties <- sum(dat4$FATALITIES)
cat("The total number of fatalaties in dat4 is",dat4.fatalaties,"\n")
## The total number of fatalaties in dat4 is 14456
cat("The ratio of fatalaties retained in dat4 is",dat4.fatalaties/tot.fatalaties,"\n")
## The ratio of fatalaties retained in dat4 is 0.9545064
dat4.injuries <- sum(dat4$INJURIES)
cat("The total number of injuries in dat4 is",dat4.injuries,"\n")
## The total number of injuries in dat4 is 138538
cat("The ratio of injuries retained in dat4 is",dat4.injuries/tot.injuries,"\n")
## The ratio of injuries retained in dat4 is 0.9858391
cat("The number of rows in dat4 is",nrow(dat4),"\n")
## The number of rows in dat4 is 880049
cat("The ratio of rows retained in dat4 is",nrow(dat4)/nrow(dat),"\n")
## The ratio of rows retained in dat4 is 0.9753429
property.dat4 <- sum(dat4$PROPDMG)
cat("The total Property Damage in dat4 is",property.dat4,"\n")
## The total Property Damage in dat4 is 412916487081
cat("The ratio of Property Damage retained in dat4 is",property.dat4/property.dat2,"\n")
## The ratio of Property Damage retained in dat4 is 0.9742616
crop.dat4 <- sum(dat4$CROPDMG)
cat("The total Crop Damage in dat4 is",crop.dat4,"\n")
## The total Crop Damage in dat4 is 47154361491
cat("The ratio of Crop Damage retained in dat4 is",crop.dat4/crop.dat3,"\n")
## The ratio of Crop Damage retained in dat4 is 0.9764521

8. Correcting for outlier in CA (mentioned in class forum, multiplier wrong)

temp <- dat4[dat4$STATE == "CA" & dat4$PROPDMG>100000000000,]
cat("There is an outlier here:\n")
## There is an outlier here:
print(temp)
##        YEAR EVTYPE STATE FATALITIES INJURIES  PROPDMG  CROPDMG
## 593640 2006  FLOOD    CA          0        0 1.15e+11 32500000
num <- as.integer(rownames(temp))
dat4[num,"PROPDMG"] = dat4[num,"PROPDMG"]/1000

9. Aggregating over years in dat5

dat5 <- aggregate(cbind(FATALITIES, INJURIES, PROPDMG, CROPDMG )
                  ~ YEAR + STATE + EVTYPE, sum, data = dat4)
dat5.fatalaties <- sum(dat5$FATALITIES)
cat("The total number of fatalaties in dat5 is",dat5.fatalaties,"\n")
## The total number of fatalaties in dat5 is 14456
cat("The ratio of fatalaties retained in dat5 is",dat5.fatalaties/tot.fatalaties,"\n")
## The ratio of fatalaties retained in dat5 is 0.9545064
dat5.injuries <- sum(dat5$INJURIES)
cat("The total number of injuries in dat5 is",dat5.injuries,"\n")
## The total number of injuries in dat5 is 138538
cat("The ratio of injuries retained in dat5 is",dat5.injuries/tot.injuries,"\n")
## The ratio of injuries retained in dat5 is 0.9858391
cat("The number of rows in dat5 is",nrow(dat5),"\n")
## The number of rows in dat5 is 21693
cat("The ratio of rows retained in dat5 is",nrow(dat5)/nrow(dat),"\n")
## The ratio of rows retained in dat5 is 0.02404197
property.dat5 <- sum(dat5$PROPDMG)
cat("Note the reduction from last value of Property Damage, for dat4, is only due to outlier correction\n")
## Note the reduction from last value of Property Damage, for dat4, is only due to outlier correction
cat("The total Property Damage in dat5 is",property.dat5,"\n")
## The total Property Damage in dat5 is 298031487081
cat("The ratio of Property Damage retained in dat5 is",property.dat5/property.dat2,"\n")
## The ratio of Property Damage retained in dat5 is 0.7031946
crop.dat5 <- sum(dat5$CROPDMG)
cat("The total Crop Damage in dat5 is",crop.dat5,"\n")
## The total Crop Damage in dat5 is 47154361491
cat("The ratio of Crop Damage retained in dat5 is",crop.dat5/crop.dat3,"\n")
## The ratio of Crop Damage retained in dat5 is 0.9764521

Results

  • The top 6 events that effect health are listed and then graphed.
  • The top 6 events that effect economy are listed and then graphed.
  • We can see that for most events, we have records only for later years
  • Only for tornado, do we have results throughout the time period from 1950 to 2011
  • dat5 -> dat6 (top 6 events for health effects)
  • dat5 -> dat7 (top 6 event for economic effects)

10. Events Effect on Health

events.health.df <- aggregate(cbind(FATALITIES, INJURIES)
                                       ~ EVTYPE, sum, data = dat5)
events.health.vec <- events.health.df$FATALITIES+events.health.df$INJURIES
names(events.health.vec) <- events.health.df$EVTYPE
top.6.health <- sort(events.health.vec, decreasing = TRUE)[1:6]
cat("The top 6 events causing injuring and fatalaties are:\n")
## The top 6 events causing injuring and fatalaties are:
print(top.6.health)
##        TORNADO EXCESSIVE HEAT      TSTM WIND          FLOOD      LIGHTNING 
##          96915           8428           7460           7250           6030 
##           HEAT 
##           3037
dat6 <- filter(dat5, EVTYPE %in% names(top.6.health))
dat6[["HEALTH"]] <- dat6$FATALITIES + dat6$INJURIES
dat6 <- dat6 [-(4:7)]
dat6 <- dat6[order(dat6$YEAR, decreasing = FALSE),]
par(mfrow=c(2,3))
for (evt in names(top.6.health)) {
  temp <- filter(dat6, EVTYPE == evt)
  if (nrow(temp)) {
    plot(temp$YEAR,temp$HEALTH, main = evt, xlab = "YEAR",
         ylab = "HEALTH", col = "blue", pch = 19 )
  }
}

11. Events Effect on Health

events.cost.df <- aggregate(cbind(PROPDMG, CROPDMG)
                              ~ EVTYPE, sum, data = dat5)
events.cost.vec <- events.cost.df$PROPDMG+events.cost.df$CROPDMG
names(events.cost.vec) <- events.cost.df$EVTYPE
top.6.cost <- sort(events.cost.vec, decreasing = TRUE)[1:6]
cat("The top 6 events causing property and crop damage are:\n")
## The top 6 events causing property and crop damage are:
print(top.6.cost)
## HURRICANE/TYPHOON           TORNADO       STORM SURGE             FLOOD 
##       71636600800       57289964593       43323541000       35270287257 
##              HAIL       FLASH FLOOD 
##       18727696730       17290580167
dat7 <- filter(dat5, EVTYPE %in% names(top.6.cost))
dat7[["COST"]] <- dat7$PROPDMG + dat7$CROPDMG
dat7 <- dat7 [-(4:7)]
dat7 <- dat7[order(dat7$YEAR, decreasing = FALSE),]
par(mfrow=c(2,3))
for (evt in names(top.6.cost)) {
  temp <- filter(dat7, EVTYPE == evt)
  if (nrow(temp)) {
    plot(temp$YEAR,temp$COST, main = evt, xlab = "YEAR",
         ylab = "COST", col = "blue", pch = 19 )
  }
}

12. Top 5 states with most health and economic effects for the top 3 events

dat8 <- aggregate(cbind(HEALTH)~STATE+EVTYPE, sum, data = dat6 )
dat9 <- aggregate(cbind(COST)~STATE+EVTYPE, sum, data = dat7 )
par(mfrow=c(2,3))
for (evt in names(top.6.health[1:3])) {
  temp <- dat8[dat8$EVTYPE==evt,]
  temp <- temp[order(temp$HEALTH,decreasing = TRUE),]
  top.5 <- temp$STATE[1:5]
  barplot(temp$HEALTH[1:5], names.arg = top.5, main = evt,
          col = "blue", ylab = "HEALTH")
}
for (evt in names(top.6.cost[1:3])) {
  temp <- dat9[dat9$EVTYPE==evt,]
  temp <- temp[order(temp$COST,decreasing = TRUE),]
  top.5 <- temp$STATE[1:5]
  barplot(temp$COST[1:5], names.arg = top.5, main = evt,
          col = "blue", ylab = "COST")
}