Synopsis

Severe wheater events can cause public health issues and property damage. National Oceanic and Atmoshperic Administration’s (NOAA) storm database is a good resourse to study and plan for these events. This report contains analysis of most harmfull events to human health and mortality and the events with the most economic impact (property damage and crop damage). Other analysis could be done but they were not the purpose of this class.

Data processing

1.Downloading U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database the from here. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

library(readr)
 URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
 download.file(URL, destfile = "StormData")
StormData <- read.csv("StormData", sep = ",", header = TRUE)
head(StormData)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

StormDataReduced only represents the columns that are used in this analysis.

StormDataReduced <- subset(StormData, EVTYPE !="?", select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG","PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))

Exluding NAs

StormDataReducedNoNA <- na.omit(StormDataReduced)
dim(StormDataReducedNoNA)
## [1] 902296      7
sum(is.na(StormDataReducedNoNA))
## [1] 0

How many different events types are in the database?

length(unique(StormDataReducedNoNA$EVTYPE))
## [1] 984

Reducing Event Types in the DataBase

There are 984 event types (EVTYPE) in the database, I am converting them to all CAPs to decrease the types:

StormDataReducedNoNA$EVTYPE <- toupper(StormDataReducedNoNA$EVTYPE)
length(unique(StormDataReducedNoNA$EVTYPE))
## [1] 897

Still a lot of events(897), lets try to consolidate them:

# AVALANCHE
 StormDataReducedNoNA$EVTYPE <- gsub('.*AVALANCE.*', 'AVALANCHE', StormDataReducedNoNA$EVTYPE)

# BLIZZARD
StormDataReducedNoNA$EVTYPE <- gsub('.*BLIZZARD.*', 'BLIZZARD', StormDataReducedNoNA$EVTYPE)

# CLOUD
StormDataReducedNoNA$EVTYPE <- gsub('.*CLOUD.*', 'CLOUD', StormDataReducedNoNA$EVTYPE)

# COLD
StormDataReducedNoNA$EVTYPE <- gsub('.*COLD.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*FREEZ.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*FROST.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*ICE.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*LOW TEMPERATURE RECORD.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*LO.*TEMP.*', 'COLD', StormDataReducedNoNA$EVTYPE)

# DRY
StormDataReducedNoNA$EVTYPE <- gsub('.*DRY.*', 'DRY', StormDataReducedNoNA$EVTYPE)

# DUST
StormDataReducedNoNA$EVTYPE <- gsub('.*DUST.*', 'DUST', StormDataReducedNoNA$EVTYPE)

# FIRE
StormDataReducedNoNA$EVTYPE <- gsub('.*FIRE.*', 'FIRE', StormDataReducedNoNA$EVTYPE)

# FLOOD
StormDataReducedNoNA$EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', StormDataReducedNoNA$EVTYPE)

# FOG
StormDataReducedNoNA$EVTYPE <- gsub('.*FOG.*', 'FOG', StormDataReducedNoNA$EVTYPE)

#HAIL
StormDataReducedNoNA$EVTYPE <- gsub('.*HAIL.*', 'HAIL', StormDataReducedNoNA$EVTYPE)

# HEAT
StormDataReducedNoNA$EVTYPE <- gsub('.*HEAT.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*WARM.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*HIGH.*TEMP.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*RECORD HIGH TEMPERATURES.*', 'HEAT', StormDataReducedNoNA$EVTYPE)

# HYPOTHERMIA/EXPOSURE
StormDataReducedNoNA$EVTYPE <- gsub('.*HYPOTHERMIA.*', 'HYPOTHERMIA/EXPOSURE', StormDataReducedNoNA$EVTYPE)

# LANDSLIDE
StormDataReducedNoNA$EVTYPE <- gsub('.*LANDSLIDE.*', 'LANDSLIDE', StormDataReducedNoNA$EVTYPE)

# LIGHTNING
StormDataReducedNoNA$EVTYPE <- gsub('^LIGHTNING.*', 'LIGHTNING', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('^LIGNTNING.*', 'LIGHTNING', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('^LIGHTING.*', 'LIGHTNING', StormDataReducedNoNA$EVTYPE)

# MICROBURST
StormDataReducedNoNA$EVTYPE <- gsub('.*MICROBURST.*', 'MICROBURST', StormDataReducedNoNA$EVTYPE)

# MUDSLIDE
StormDataReducedNoNA$EVTYPE <- gsub('.*MUDSLIDE.*', 'MUDSLIDE', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*MUD SLIDE.*', 'MUDSLIDE', StormDataReducedNoNA$EVTYPE)

# RAIN
StormDataReducedNoNA$EVTYPE <- gsub('.*RAIN.*', 'RAIN', StormDataReducedNoNA$EVTYPE)

# RIP CURRENT
StormDataReducedNoNA$EVTYPE <- gsub('.*RIP CURRENT.*', 'RIP CURRENT', StormDataReducedNoNA$EVTYPE)

# STORM
StormDataReducedNoNA$EVTYPE <- gsub('.*STORM.*', 'STORM', StormDataReducedNoNA$EVTYPE)

# SUMMARY
StormDataReducedNoNA$EVTYPE <- gsub('.*SUMMARY.*', 'SUMMARY', StormDataReducedNoNA$EVTYPE)

# TORNADO
StormDataReducedNoNA$EVTYPE <- gsub('.*TORNADO.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*TORNDAO.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*LANDSPOUT.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*WATERSPOUT.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)

# SURF
StormDataReducedNoNA$EVTYPE <- gsub('.*SURF.*', 'SURF', StormDataReducedNoNA$EVTYPE)

# VOLCANIC
StormDataReducedNoNA$EVTYPE <- gsub('.*VOLCANIC.*', 'VOLCANIC', StormDataReducedNoNA$EVTYPE)

# WET
StormDataReducedNoNA$EVTYPE <- gsub('.*WET.*', 'WET', StormDataReducedNoNA$EVTYPE)

# WIND
StormDataReducedNoNA$EVTYPE <- gsub('.*WIND.*', 'WIND', StormDataReducedNoNA$EVTYPE)


# WINTER
StormDataReducedNoNA$EVTYPE <- gsub('.*WINTER.*', 'WINTER', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*WINTRY.*', 'WINTER', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*SNOW.*', 'WINTER', StormDataReducedNoNA$EVTYPE)

How many unique events do we have now:

length(unique(StormDataReducedNoNA$EVTYPE))
## [1] 136

Calculating monetary property and crop damage from PROPDMG, PROPDMGEXP, CROPDMG and CROPDMGEXP

Information about the property damage consists of two parts: PROPDMG is the significant number and PROPDMGEXP is the multiplier. The same is true of the CROPDMG and CROPDMGEXP. PROPDMGEXP and CROPDMGEXP contain ‘K’ for Thousand, ‘M’ for Million and ‘B’ for Billion. We’ll calculate multiplier for both property damage and crop damage and will create two new columns for a total damage: pCost and cCost

Multiplier <- function(exp) {
    exp <- toupper(exp);
    if (exp == "")  return (10^0);
    if (exp == "-") return (10^0);
    if (exp == "?") return (10^0);
    if (exp == "+") return (10^0);
    if (exp == "0") return (10^0);
    if (exp == "1") return (10^1);
    if (exp == "2") return (10^2);
    if (exp == "3") return (10^3);
    if (exp == "4") return (10^4);
    if (exp == "5") return (10^5);
    if (exp == "6") return (10^6);
    if (exp == "7") return (10^7);
    if (exp == "8") return (10^8);
    if (exp == "9") return (10^9);
    if (exp == "H") return (10^2);
    if (exp == "K") return (10^3);
    if (exp == "M") return (10^6);
    if (exp == "B") return (10^9);
    return (NA);
}

Final step of calculating costs for property and crop damage

StormDataReducedNoNA$pCost <- with(StormDataReducedNoNA, as.numeric(PROPDMG) * sapply(PROPDMGEXP, Multiplier))/10^9
StormDataReducedNoNA$cCost <- with(StormDataReducedNoNA, as.numeric(CROPDMG) * sapply(CROPDMGEXP, Multiplier))/10^9

Creating a summarized dataset of damage impact costs

CostImpact <- aggregate(x = list(DAMAGE_IMPACT = StormDataReducedNoNA$pCost + StormDataReducedNoNA$cCost), by = list(EVENT_TYPE = StormDataReducedNoNA$EVTYPE),
                        FUN = sum, na.rm = TRUE)
CostImpact <- CostImpact[order(CostImpact$DAMAGE_IMPACT, decreasing = TRUE),]
head(CostImpact, 10)
##            EVENT_TYPE DAMAGE_IMPACT
## 30              FLOOD     180.58156
## 64  HURRICANE/TYPHOON      71.91371
## 108             STORM      70.44994
## 112           TORNADO      57.42785
## 37               HAIL      20.73720
## 22            DROUGHT      15.01867
## 56          HURRICANE      14.61023
## 15               COLD      12.69944
## 134              WIND      12.00554
## 28               FIRE       8.90491

Results

1. Across the United States, which types of events (as indicated in the EVTYPE) are most harmful with respect to population health?

HealthImpact <- aggregate(x = list(HEALTH_IMPACT = StormDataReducedNoNA$FATALITIES + StormDataReducedNoNA$INJURIES),by = list(EVENT_TYPE = StormDataReducedNoNA$EVTYPE), FUN = sum,na.rm = TRUE)
HealthImpact <- HealthImpact[order(HealthImpact$HEALTH_IMPACT, decreasing = TRUE),]
head(HealthImpact, 10)
##     EVENT_TYPE HEALTH_IMPACT
## 112    TORNADO         97075
## 38        HEAT         12392
## 30       FLOOD         10127
## 134       WIND          9893
## 70   LIGHTNING          6049
## 108      STORM          4780
## 15        COLD          3100
## 135     WINTER          1924
## 28        FIRE          1698
## 37        HAIL          1512

Presenting results

library(ggplot2)
HealthImpactChart <-  ggplot(head(HealthImpact, 10),               aes(x=reorder(EVENT_TYPE,-HEALTH_IMPACT), y=HEALTH_IMPACT, fill = EVENT_TYPE))+
geom_bar(stat = "identity",
           alpha = 0.5)+
  theme(axis.text.x = element_blank())+
  labs(title = "Top 10 Most Harmfull Weather Events", x = "Event Type", y = "Health Impact")
 HealthImpactChart

Tornados are the most harmfull event in respect to human health

2. Across the United States, which types of events have the greatest economic consequences?

Creating a graph for the economic impact of natural disasters

CostImpactChart <- ggplot(head(CostImpact, 10),                   aes(x=reorder(EVENT_TYPE,-DAMAGE_IMPACT),
             y = DAMAGE_IMPACT, fill = EVENT_TYPE))+
               geom_bar(stat = "identity")+
  theme(axis.text.x = element_blank())+
               labs(title = "Top Weather Events With Worse Economic Consequences", x = "Event Type", y = "Economic Impact")
CostImpactChart

Floods are the most economically significant events