Summary

In this analysis was used U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. It has records from 1950 to 2018 of the characteristics of major storms and weather events and estimatives of any fatalities, injuries, and property damage it caused.

The objective o the analysis was to answer the type of natural calamity is most harmful with respect to population health and the type of events that have greatest economic consequences.

  1. Data Processing - data loading

The data was loaded using the read.csv() function.

setwd("C:/Users/beatr/Desktop/CourseraR/5_Reproducible research/Project 2")
storm <- read.csv("repdata_data_StormData.csv.bz2")
head(storm)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6
str(storm)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
##  $ BGN_TIME  : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
##  $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
##  $ STATE     : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : Factor w/ 35 levels "","  N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_DATE  : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_TIME  : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WFO       : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ZONENAMES : Factor w/ 25112 levels "","                                                                                                               "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
  1. Data processing - variable of interest

It was select the variable of interest: EVTYPE, FATALITIES, PROPDMG, PROODGMEXP, CROPDMG, CROPDGMEXP.

The total human harm will be equal to the sum of total fatalities and total injuries. It was grouped by type of event.

The total demage wil be equal to the sum of total propriety demages and total crop demages.

The sum was grouped by type of event.

library(plyr)
#Separate Variables of interest: EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP CROPDMG CROPDMGEXP

storm1 <- storm[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
str(storm1)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
# Create new variable harm = FATALITIES + INJURIES

storm2 <- mutate(storm1, harm = INJURIES + FATALITIES)
head(storm2)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO          0       15    25.0          K       0              15
## 2 TORNADO          0        0     2.5          K       0               0
## 3 TORNADO          0        2    25.0          K       0               2
## 4 TORNADO          0        2     2.5          K       0               2
## 5 TORNADO          0        2     2.5          K       0               2
## 6 TORNADO          0        6     2.5          K       0               6
#Create new variable demage = PROPDMG*PROPDMGEXP + CROPDMG*CROPDMGEXP
unique(storm2$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
#- ? + 0 1 2 3 4 5 6 7 8 B h H K m M

storm3 <- mutate(storm2, 
                 PEXP =revalue(storm2$PROPDMGEXP, c(" "=NA, "-"=NA, "?"=NA, "+"=NA, "h"=2, "H"=2, "K"=3, "m"=5 ,"M"=5, "B"=9)),
                 CEXP =revalue(storm2$CROPDMGEXP, c(" "=NA, "-"=NA, "?"=NA, "+"=NA, "h"=2, "H"=2, "K"=3, "m"=5 ,"M"=5, "B"=9))
                 ) 
## The following `from` values were not present in `x`:
## The following `from` values were not present in `x`:  , -, +, h, H
head(storm3)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO          0       15    25.0          K       0              15
## 2 TORNADO          0        0     2.5          K       0               0
## 3 TORNADO          0        2    25.0          K       0               2
## 4 TORNADO          0        2     2.5          K       0               2
## 5 TORNADO          0        2     2.5          K       0               2
## 6 TORNADO          0        6     2.5          K       0               6
##   PEXP CEXP
## 1    3     
## 2    3     
## 3    3     
## 4    3     
## 5    3     
## 6    3
storm4 <- mutate(storm3, 
                 PEXP =revalue(storm3$PEXP, c("1"=10, "2"=100, "3"=1000, "4"=10000 ,"5"=1000000, "6"=1000000, "7"=10000000, "8"=10000000, "9"=1000000000)),
                 CEXP =revalue(storm3$CEXP, c("1"=10, "2"=100, "3"=1000, "4"=10000 ,"5"=1000000, "6"=1000000, "7"=10000000, "8"=10000000, "9"=1000000000))
                )
## The following `from` values were not present in `x`: 1, 4, 6, 7, 8
storm4$PEXP <- as.numeric(as.character(storm4$PEXP))
storm4$CEXP <- as.numeric(as.character(storm4$CEXP))
## Warning: NAs introduzidos por coerção
head(storm4)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO          0       15    25.0          K       0              15
## 2 TORNADO          0        0     2.5          K       0               0
## 3 TORNADO          0        2    25.0          K       0               2
## 4 TORNADO          0        2     2.5          K       0               2
## 5 TORNADO          0        2     2.5          K       0               2
## 6 TORNADO          0        6     2.5          K       0               6
##   PEXP CEXP
## 1 1000   NA
## 2 1000   NA
## 3 1000   NA
## 4 1000   NA
## 5 1000   NA
## 6 1000   NA
str(storm4)
## 'data.frame':    902297 obs. of  10 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ harm      : num  15 0 2 2 2 6 1 0 15 0 ...
##  $ PEXP      : num  1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
##  $ CEXP      : num  NA NA NA NA NA NA NA NA NA NA ...
storm5<-mutate(storm4, PRODMG1=PROPDMG*PEXP, CROPDMG1=CROPDMG*CEXP, na.omit())
head(storm5)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO          0       15    25.0          K       0              15
## 2 TORNADO          0        0     2.5          K       0               0
## 3 TORNADO          0        2    25.0          K       0               2
## 4 TORNADO          0        2     2.5          K       0               2
## 5 TORNADO          0        2     2.5          K       0               2
## 6 TORNADO          0        6     2.5          K       0               6
##   PEXP CEXP PRODMG1 CROPDMG1
## 1 1000   NA   25000       NA
## 2 1000   NA    2500       NA
## 3 1000   NA   25000       NA
## 4 1000   NA    2500       NA
## 5 1000   NA    2500       NA
## 6 1000   NA    2500       NA
storm6<-mutate(storm5, demage=PRODMG1+CROPDMG1)
head(storm6)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO          0       15    25.0          K       0              15
## 2 TORNADO          0        0     2.5          K       0               0
## 3 TORNADO          0        2    25.0          K       0               2
## 4 TORNADO          0        2     2.5          K       0               2
## 5 TORNADO          0        2     2.5          K       0               2
## 6 TORNADO          0        6     2.5          K       0               6
##   PEXP CEXP PRODMG1 CROPDMG1 demage
## 1 1000   NA   25000       NA     NA
## 2 1000   NA    2500       NA     NA
## 3 1000   NA   25000       NA     NA
## 4 1000   NA    2500       NA     NA
## 5 1000   NA    2500       NA     NA
## 6 1000   NA    2500       NA     NA
storm7 <- storm6[,c("EVTYPE","FATALITIES","INJURIES","PRODMG1","CROPDMG1","harm", "demage")]
head(storm7)
##    EVTYPE FATALITIES INJURIES PRODMG1 CROPDMG1 harm demage
## 1 TORNADO          0       15   25000       NA   15     NA
## 2 TORNADO          0        0    2500       NA    0     NA
## 3 TORNADO          0        2   25000       NA    2     NA
## 4 TORNADO          0        2    2500       NA    2     NA
## 5 TORNADO          0        2    2500       NA    2     NA
## 6 TORNADO          0        6    2500       NA    6     NA
str(storm7)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PRODMG1   : num  25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
##  $ CROPDMG1  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ harm      : num  15 0 2 2 2 6 1 0 15 0 ...
##  $ demage    : num  NA NA NA NA NA NA NA NA NA NA ...
#types of events have the greatest human harm

#order harms decrescent
fatal <- aggregate(FATALITIES ~ EVTYPE, storm7, FUN = sum)
fatal5 <- fatal[order(-fatal$FATALITIES), ][1:5, ]
head(fatal5)
##             EVTYPE FATALITIES
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
injury <- aggregate(INJURIES ~ EVTYPE, storm7, FUN = sum)
injury5 <- injury[order(-injury$INJURIES), ][1:5, ]
head(injury5)
##             EVTYPE INJURIES
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230
harm <- aggregate(harm ~ EVTYPE, storm7, FUN = sum)
harm5<- harm[order(-harm$harm), ][1:5, ]
head(harm5)
##             EVTYPE  harm
## 834        TORNADO 96979
## 130 EXCESSIVE HEAT  8428
## 856      TSTM WIND  7461
## 170          FLOOD  7259
## 464      LIGHTNING  6046
#types of events have the greatest economic consequences

propdm <- aggregate(PRODMG1 ~ EVTYPE, storm7, FUN = sum)
propdm5 <- propdm[order(-propdm$PRODMG1), ][1:5, ]
head(propdm5)
##                EVTYPE      PRODMG1
## 62              FLOOD 144657709800
## 179 HURRICANE/TYPHOON  69305840000
## 333           TORNADO  57039360480
## 281       STORM SURGE  43323536000
## 50        FLASH FLOOD  16839233510
cropdm <- aggregate(CROPDMG1 ~ EVTYPE, storm7, FUN = sum)
cropdm5 <- cropdm[order(-cropdm$CROPDMG1), ][1:5, ]
head(cropdm5)
##         EVTYPE    CROPDMG1
## 16     DROUGHT 13972566000
## 34       FLOOD  5661968450
## 98 RIVER FLOOD  5029459000
## 85   ICE STORM  5022113500
## 52        HAIL  3025537450
demage <- aggregate(demage ~ EVTYPE, storm7, FUN = sum)
demage5<- demage[order(-demage$demage), ][1:5, ]
head(demage5)
##               EVTYPE       demage
## 23             FLOOD 138007444500
## 62 HURRICANE/TYPHOON  29348167800
## 99           TORNADO  16570326150
## 57         HURRICANE  12405268000
## 75       RIVER FLOOD  10108369000
demage5<-mutate(demage5, demageB=demage/1000000000, na.omit())
head(demage5)
##               EVTYPE       demage   demageB
## 23             FLOOD 138007444500 138.00744
## 62 HURRICANE/TYPHOON  29348167800  29.34817
## 99           TORNADO  16570326150  16.57033
## 57         HURRICANE  12405268000  12.40527
## 75       RIVER FLOOD  10108369000  10.10837
library(ggplot2)
  1. Results
ggplot(harm5,aes(y=harm,x=EVTYPE)) + 
        geom_bar(stat = "identity") +
        geom_text(aes(label=harm), 
                  position=position_dodge(width=0.9), 
                  vjust=-0.25,
                  size = 3) +
        ylim(0,100000)+
        theme_bw()+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        labs(x="Event type",
             y="Fatalities and injuries",
             title="U.S. top 5 event type - Fatalities and injuries")

ggplot(demage5,aes(y=demageB,x=EVTYPE)) + 
        geom_bar(stat = "identity") +
        geom_text(aes(label=demageB), 
                  position=position_dodge(width=0.9),
                  vjust=-0.25,
                  size = 3) +
                ylim(0,150)+
                theme_bw()+
                theme(axis.text.x = element_text(angle = 45, hjust = 1))+
                labs(x="Event type",
             y="Demage",
             title="U.S. top 5 event type - Demage in US$ bilions")

Floods were responsible for the highest demage. Tornados were the main responsible for human harm.