Severe weather events consequences in US

Synopsis

The goal of this document is to give answers to questions about which types of severe weather events are most harmful with respect to population health and which types of events have the greatest economic consequences. Analysis is based on the data from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. First we must prepare the data. Then we need to aggregate data by type of severe weather events and finally we sort the data in descending order. Conclusions of our analysis shows that across the United States the tornados are most harmful with respect to population health and the floods have the greatest economic consequences.

Data Processing

Before we can do our analysis we must download the data from the Coursera “Reproducible Research” course.

    if(!file.exists("./StormData.csv.bz2"))
    {
        url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
        download.file(url, "./StormData.csv.bz2", method = "curl")
    }

Then we read the downloaded data, which are in the form of a comma-separated-value file compressed via the bzip2.

    storm_data <- read.table(bzfile("./StormData.csv.bz2"),header = TRUE, sep=",")

After reading in the data we check the first few rows in this dataset.

    dim(storm_data)
## [1] 902297     37
    head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Then we select only columns that are relevant for our analysis and calculate property damages:

    consequences <- storm_data[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP")]
    
    magnitude <- function(x){
        m <- 1
        if (x=="K" | x=="k" ) m <-1000 
        if (x=="M" | x=="m" ) m <-1000000 
        if (x=="B" | x=="b" ) m <-1000000000
        return(m)
    }
    
    consequences$PROPDMGEXP <- sapply(consequences$PROPDMGEXP,magnitude)
    consequences$PROPDMG <- consequences$PROPDMG*consequences$PROPDMGEXP

To answer the questions we need to aggregate data by type of severe weather events and eliminate the types of events with zero values.

    fatalities_by_evtype <-  aggregate(FATALITIES ~ EVTYPE,data=consequences,FUN=sum, na.rm=TRUE)
    fatalities_by_evtype <- fatalities_by_evtype[fatalities_by_evtype$FATALITIES!=0,]
    injuries_by_evtype <-  aggregate(INJURIES ~ EVTYPE,data=consequences,FUN=sum, na.rm=TRUE)
    injuries_by_evtype <- injuries_by_evtype[injuries_by_evtype$INJURIES!=0,]
    propdmg_by_evtype <-  aggregate(PROPDMG ~ EVTYPE,data=consequences,FUN=sum, na.rm=TRUE)
    propdmg_by_evtype <- propdmg_by_evtype[propdmg_by_evtype$PROPDMG!=0,]

Results

Now we are ready to do analysis to answer the questions. First we sort our data descending and then show the first five rows.

    fatalities_by_evtype <-  fatalities_by_evtype[order(-fatalities_by_evtype$FATALITIES),]
    head(fatalities_by_evtype,5)
##             EVTYPE FATALITIES
## 830        TORNADO       5633
## 123 EXCESSIVE HEAT       1903
## 147    FLASH FLOOD        978
## 269           HEAT        937
## 452      LIGHTNING        816
    injuries_by_evtype <- injuries_by_evtype[order(-injuries_by_evtype$INJURIES),]
    head(injuries_by_evtype,5)
##             EVTYPE INJURIES
## 830        TORNADO    91346
## 854      TSTM WIND     6957
## 164          FLOOD     6789
## 123 EXCESSIVE HEAT     6525
## 452      LIGHTNING     5230
    propdmg_by_evtype <- propdmg_by_evtype[order(-propdmg_by_evtype$PROPDMG),]
    head(propdmg_by_evtype,5)
##                EVTYPE   PROPDMG
## 164             FLOOD 1.447e+11
## 406 HURRICANE/TYPHOON 6.931e+10
## 830           TORNADO 5.694e+10
## 666       STORM SURGE 4.332e+10
## 147       FLASH FLOOD 1.614e+10

So now we can say that across the United States the TORNADOs are most harmful with respect to population health and the FLOODs have the greatest economic consequences. We can also represent this by adequate plots.

    library(ggplot2)
    qplot(EVTYPE,FATALITIES,data=fatalities_by_evtype[fatalities_by_evtype$FATALITIES>100,], 
          geom="bar", stat="identity", xlab="Weather event type",ylab="Number of fatalities")+ 
            theme(axis.text.x = element_text(angle = 90)) + 
            labs(title="Number of fatalities by\n severe weather event type")

plot of chunk unnamed-chunk-7

    qplot(EVTYPE,INJURIES,data=injuries_by_evtype[injuries_by_evtype$INJURIES>1000,], 
          geom="bar", stat="identity", xlab="Weather event type",ylab="Number of injuries")+ 
            theme(axis.text.x = element_text(angle = 90))+ 
            labs(title="Number of injuries by\n severe weather event type")

plot of chunk unnamed-chunk-7

    qplot(EVTYPE,PROPDMG,data=propdmg_by_evtype[propdmg_by_evtype$PROPDMG>5000000000,], 
          geom="bar", stat="identity", xlab="Weather event type",ylab="Properity damages in $")+ 
            theme(axis.text.x = element_text(angle = 90))+ 
            labs(title="Properity damages by\n severe weather event type")

plot of chunk unnamed-chunk-7