Synopsis

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

In our conclusion, we found Tornado is the most harmful event, causing 5633 fatalities and 91346 injuries in USA during 1950-2011. From perspective on the economic consequences, flood causes the most damage to our society, in terms of lose on crop and property damage.

Data Processing

The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. The Data source is provided by U.S. National Oceanic and Atmospheric Administration’s. URL: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

# download the file if it's not exists
if(!file.exists('repdata-data-StormData.csv.bz2')){
    download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
                  destfile='repdata-data-StormData.csv.bz2')
}

# uncompress the file if it's exitst
if(file.exists('repdata-data-StormData.csv.bz2')){
    StormData <- read.csv(bzfile('repdata-data-StormData.csv.bz2'), header = TRUE)
}

Explorer the dataset

Structure of dataset

str(StormData)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
##  $ BGN_TIME  : Factor w/ 3608 levels "000","0000","0001",..: 152 167 2645 1563 2524 3126 122 1563 3126 3126 ...
##  $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
##  $ STATE     : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 826 826 826 826 826 826 826 826 826 826 ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : Factor w/ 35 levels "","  N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_LOCATI: Factor w/ 54429 levels ""," Christiansburg",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_DATE  : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_TIME  : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_LOCATI: Factor w/ 34506 levels ""," CANTON"," TULIA",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","+","-","0",..: 16 16 16 16 16 16 16 16 16 16 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","0","2","?",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WFO       : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ZONENAMES : Factor w/ 25112 levels "","                                                                                                                               "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : Factor w/ 436781 levels "","\t","\t\t",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Dimension of dataset

dim(StormData)
## [1] 902297     37

Column Names and the some sample content of dataset

names(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
head(StormData,3)
##   STATE__          BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1 4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1 4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1 2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3

Proessing data

These are the 7 columns we need from the data set:

  • EVTYPE - measure of event type (e.g. tornado, flood, etc.)
  • FATALITIES - measure of harm to human health
  • INJURIES - measure of harm to human health
  • PROPDMG - measure of property damage and hence economic damage in USD
  • PROPDMGEXP - measure of magnitude of property damage (e.g. thousands, millions USD, etc.)
  • CROPDMG - measure of crop damage and hence economic damage in USD
  • CROPDMGEXP - measure of magnitude of crop damage (e.g. thousands, millions USD, etc.)

  • NewStormData <- StormData[,c("EVTYPE", "FATALITIES", "INJURIES",
                                 "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
    
    # Calculate and sorting the numbers if fatalities and injuries
    sumofdead <- aggregate(FATALITIES ~ EVTYPE, data = NewStormData, "sum")
    sumofinjuries <- aggregate(INJURIES ~ EVTYPE, data = NewStormData, "sum")
    
    top10sumofdead <- sumofdead[order(-sumofdead$FATALITIES),][1:10,]
    top10sumofinjuries <- sumofinjuries[order(-sumofinjuries$INJURIES),][1:10,]

    Q1. Across the United States, which types of events (as indicated in the 𝙴𝚅𝚃𝚈𝙿𝙴 variable) are most harmful with respect to population health?

    Top 10 events of caused the most fatalities

    top10sumofdead
    ##             EVTYPE FATALITIES
    ## 826        TORNADO       5633
    ## 124 EXCESSIVE HEAT       1903
    ## 151    FLASH FLOOD        978
    ## 271           HEAT        937
    ## 453      LIGHTNING        816
    ## 846      TSTM WIND        504
    ## 167          FLOOD        470
    ## 572    RIP CURRENT        368
    ## 343      HIGH WIND        248
    ## 19       AVALANCHE        224

    Top 10 events of caused the most injuries

    top10sumofinjuries
    ##                EVTYPE INJURIES
    ## 826           TORNADO    91346
    ## 846         TSTM WIND     6957
    ## 167             FLOOD     6789
    ## 124    EXCESSIVE HEAT     6525
    ## 453         LIGHTNING     5230
    ## 271              HEAT     2100
    ## 422         ICE STORM     1975
    ## 151       FLASH FLOOD     1777
    ## 753 THUNDERSTORM WIND     1488
    ## 241              HAIL     1361

    RESULTS: The Chart for Top events of fatalities and injuries

    Tornado is the major cause with respect to population health.

    par(mfrow=c(1,2),mar=c(10,3,10,3), las=2, cex=0.8)
    
    # Top 10 events causing fatalities
    barplot(top10sumofdead$FATALITIES, 
            names.arg = top10sumofdead$EVTYPE,
            col = top10sumofdead$EVTYPE, 
            main = "Top 10 events causing fatalities",
            ylab = "number of events")
    
    # Top 10 events causing injuries
    barplot(top10sumofinjuries$INJURIES, 
            names.arg = top10sumofinjuries$EVTYPE,
            col = top10sumofinjuries$EVTYPE, 
            main = "Top 10 events causing injuries",
            ylab = "number of events")

    Data processing for economic impact

    Alphabetical characters used to signify magnitude include “K” for thousands, “M” for millions, and “B” for billions.

    NewStormData$PROPDMGEXP <- as.character(NewStormData$PROPDMGEXP)
    NewStormData$CROPDMGEXP <- as.character(NewStormData$CROPDMGEXP)
    
    NewStormData$PROPDMGEXP = gsub("H|h","2",NewStormData$PROPDMGEXP)
    NewStormData$CROPDMGEXP = gsub("H|h","2",NewStormData$CROPDMGEXP)
    NewStormData$PROPDMGEXP = gsub("K|k","3",NewStormData$PROPDMGEXP)
    NewStormData$CROPDMGEXP = gsub("K|k","3",NewStormData$CROPDMGEXP)
    NewStormData$PROPDMGEXP = gsub("M|m","6",NewStormData$PROPDMGEXP)
    NewStormData$CROPDMGEXP = gsub("M|m","6",NewStormData$CROPDMGEXP)
    NewStormData$PROPDMGEXP = gsub("B|b","9",NewStormData$PROPDMGEXP)
    NewStormData$CROPDMGEXP = gsub("B|b","9",NewStormData$CROPDMGEXP)
    
    NewStormData$PROPDMGEXP <- as.numeric(NewStormData$PROPDMGEXP)
    ## Warning: NAs introduced by coercion
    NewStormData$CROPDMGEXP <- as.numeric(NewStormData$CROPDMGEXP)
    ## Warning: NAs introduced by coercion
    NewStormData$PROPDMGEXP[is.na(NewStormData$PROPDMGEXP)] = 0
    NewStormData$CROPDMGEXP[is.na(NewStormData$CROPDMGEXP)] = 0
    
    NewStormData$NewPropDMG <- NewStormData$PROPDMG *10^NewStormData$PROPDMGEXP
    NewStormData$NewCropDMG <- NewStormData$CROPDMG *10^NewStormData$CROPDMGEXP

    Q2. Across the United States, which types of events have the greatest economic consequences?

    Top 10 events impact the greatest property damage

    sumofpropertyDMG <- aggregate(NewPropDMG ~ EVTYPE, data = NewStormData, "sum")
    top10sumofpropmg <- sumofpropertyDMG[order(-sumofpropertyDMG$NewPropDMG),][1:10,]
    top10sumofpropmg
    ##                EVTYPE   NewPropDMG
    ## 167             FLOOD 144657709807
    ## 393 HURRICANE/TYPHOON  69305840000
    ## 826           TORNADO  56947380676
    ## 656       STORM SURGE  43323536000
    ## 151       FLASH FLOOD  16822673978
    ## 241              HAIL  15735267513
    ## 385         HURRICANE  11868319010
    ## 839    TROPICAL STORM   7703890550
    ## 962      WINTER STORM   6688497251
    ## 343         HIGH WIND   5270046295

    Top 10 events impact the greatest crop damage

    sumofcropDMG <- aggregate(NewCropDMG ~ EVTYPE, data = NewStormData, "sum")
    top10sumofcropmg <- sumofcropDMG[order(-sumofcropDMG$NewCropDMG),][1:10,]
    top10sumofcropmg
    ##                EVTYPE  NewCropDMG
    ## 91            DROUGHT 13972566000
    ## 167             FLOOD  5661968450
    ## 577       RIVER FLOOD  5029459000
    ## 422         ICE STORM  5022113500
    ## 241              HAIL  3025954473
    ## 385         HURRICANE  2741910000
    ## 393 HURRICANE/TYPHOON  2607872800
    ## 151       FLASH FLOOD  1421317100
    ## 132      EXTREME COLD  1292973000
    ## 198      FROST/FREEZE  1094086000

    Top 10 events impact the greatest economic consequences (in terms of crop and property damage)

    NewStormData$sumofDamage <- NewStormData$NewPropDMG + NewStormData$NewCropDMG
    Damages <- aggregate(sumofDamage ~ EVTYPE, data = NewStormData, "sum")
    top10damage <- Damages[order(-Damages$sumofDamage),][1:10,]
    top10damage
    ##                EVTYPE  sumofDamage
    ## 167             FLOOD 150319678257
    ## 393 HURRICANE/TYPHOON  71913712800
    ## 826           TORNADO  57362333946
    ## 656       STORM SURGE  43323541000
    ## 241              HAIL  18761221986
    ## 151       FLASH FLOOD  18243991078
    ## 91            DROUGHT  15018672000
    ## 385         HURRICANE  14610229010
    ## 577       RIVER FLOOD  10148404500
    ## 422         ICE STORM   8967041360

    RESULTS: The Chart for Top events of property, crop damages and greatest economic consequences

    Flood is the major cause of property damages and have the greatest economic consequences.

    par(mfrow=c(1,3),mar=c(10,3,10,3), las=2, cex=0.5, title=" ")
    ## Warning in par(mfrow = c(1, 3), mar = c(10, 3, 10, 3), las = 2, cex =
    ## 0.5, : "title" is not a graphical parameter
    # Top events causing Property Damage
    barplot(top10sumofpropmg$NewPropDMG, 
            names.arg = top10sumofpropmg$EVTYPE,
            col = top10sumofdead$EVTYPE, 
            main = "Top events causing Property Damage",
            ylab = "number of events")
    
    # Top events causing Crop Damage
    barplot(top10sumofcropmg$NewCropDMG, 
            names.arg = top10sumofcropmg$EVTYPE,
            col = top10sumofcropmg$EVTYPE, 
            main = "Top events causing Crop Damage",
            ylab = "number of events")
    
    # Top events causing Most Economic impact
    barplot(top10damage$sumofDamage, 
            names.arg = top10damage$EVTYPE,
            col = top10damage$EVTYPE, 
            main = "Top events causing Most Economic impact",
            ylab = "number of events")

    Conclusion

    In our conclusion, we found Tornado is the most harmful event, causing 5633 fatalities and 91346 injuries in USA during 1950-2011. From perspective on the economic consequences, flood causes the most damage to our society, in terms of lose on crop and property damage.