Synopsis

This analysis will explore the U.S. National Oceanic and Atmosphereic Administrations’s (NOAA) Storm Database. The goal is to answer some basic questions about severe weather events as they relate to public health (human fatalities and injuries) and economic problems (property and crop damage).

The data can be found at:

The documentation can be found at:

Data Processing

     setwd("C:\\Users\\frase\\Documents\\GitHub\\RepData_PeerAssessment2")
  ## File was previously unzipped
     StormData <- read.csv("repdata-data-StormData.csv", stringsAsFactors = FALSE,colClasses = "character" ,na.strings = "NA")

Data Information

  • Now let’s look at some information about our file.
  ## view some information about the data
    dim(StormData)
## [1] 902297     37
    str(StormData)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : chr  "1.00" "1.00" "1.00" "1.00" ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : chr  "97.00" "3.00" "57.00" "89.00" ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ COUNTYENDN: chr  "" "" "" "" ...
##  $ END_RANGE : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : chr  "14.00" "2.00" "0.10" "0.00" ...
##  $ WIDTH     : chr  "100.00" "150.00" "123.00" "100.00" ...
##  $ F         : chr  "3" "2" "2" "2" ...
##  $ MAG       : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ FATALITIES: chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ INJURIES  : chr  "15.00" "0.00" "2.00" "2.00" ...
##  $ PROPDMG   : chr  "25.00" "2.50" "25.00" "2.50" ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : chr  "3040.00" "3042.00" "3340.00" "3458.00" ...
##  $ LONGITUDE : chr  "8812.00" "8755.00" "8742.00" "8626.00" ...
##  $ LATITUDE_E: chr  "3051.00" "0.00" "0.00" "0.00" ...
##  $ LONGITUDE_: chr  "8806.00" "0.00" "0.00" "0.00" ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : chr  "1.00" "2.00" "3.00" "4.00" ...
    head(StormData)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1    1.00  4/18/1950 0:00:00     0130       CST  97.00     MOBILE    AL
## 2    1.00  4/18/1950 0:00:00     0145       CST   3.00    BALDWIN    AL
## 3    1.00  2/20/1951 0:00:00     1600       CST  57.00    FAYETTE    AL
## 4    1.00   6/8/1951 0:00:00     0900       CST  89.00    MADISON    AL
## 5    1.00 11/15/1951 0:00:00     1500       CST  43.00    CULLMAN    AL
## 6    1.00 11/15/1951 0:00:00     2000       CST  77.00 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO      0.00                                            0.00
## 2 TORNADO      0.00                                            0.00
## 3 TORNADO      0.00                                            0.00
## 4 TORNADO      0.00                                            0.00
## 5 TORNADO      0.00                                            0.00
## 6 TORNADO      0.00                                            0.00
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH  WIDTH F  MAG FATALITIES
## 1                 0.00                     14.00 100.00 3 0.00       0.00
## 2                 0.00                      2.00 150.00 2 0.00       0.00
## 3                 0.00                      0.10 123.00 2 0.00       0.00
## 4                 0.00                      0.00 100.00 2 0.00       0.00
## 5                 0.00                      0.00 150.00 2 0.00       0.00
## 6                 0.00                      1.50 177.00 2 0.00       0.00
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1    15.00   25.00          K    0.00                                    
## 2     0.00    2.50          K    0.00                                    
## 3     2.00   25.00          K    0.00                                    
## 4     2.00    2.50          K    0.00                                    
## 5     2.00    2.50          K    0.00                                    
## 6     6.00    2.50          K    0.00                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1  3040.00   8812.00    3051.00    8806.00           1.00
## 2  3042.00   8755.00       0.00       0.00           2.00
## 3  3340.00   8742.00       0.00       0.00           3.00
## 4  3458.00   8626.00       0.00       0.00           4.00
## 5  3412.00   8642.00       0.00       0.00           5.00
## 6  3450.00   8748.00       0.00       0.00           6.00
    names(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
## are there any missing values
    sum(is.na(StormData))   
## [1] 0

Data Analysis - Public Health

Which types of events are most harmful with respect to public health?

FATALITIES

  • Subset StormData to include only for EVTYPE and FATALITIES columns where Fatalities are greater than 0 (StormFatal)
  • Sum FATALITIES by EVTYPE (StormFatalAgg)
  • Change column names of StormFatalAgg to “Event”" and “Fatalities”
  • Sort StormFatalAgg by descending Fatalities count
  • Display the first 6 rows of the sorted data (StormFatalSort)
   library(dplyr)
## get data for the Event type and Fatalities and sum by event
   StormFatal <- subset(StormData,as.numeric(StormData$FATALITIES)>0)
   StormFatal <- data.frame(StormFatal$EVTYPE,StormFatal$FATALITIES)
   StormFatalAgg <- aggregate(as.numeric(StormFatal.FATALITIES) ~ StormFatal.EVTYPE,StormFatal,sum)
   colnames(StormFatalAgg) <- c("Event","Fatalities")
## Sort by Fatalities descending
   StormFatalSort <- arrange(StormFatalAgg,desc(Fatalities))
## 
   head(StormFatalSort)
##            Event Fatalities
## 1        TORNADO      19335
## 2 EXCESSIVE HEAT       5944
## 3    FLASH FLOOD       4038
## 4          FLOOD       1956
## 5      LIGHTNING       1518
## 6      TSTM WIND       1493

INJURIES

  • Subset StormData to include only EVTYPE and INJURIES columns where Injuries are greater than 0 (StormInj)
  • Sum INJURIES by EVTYPE (StormInjAgg)
  • Change column names of StormInjAgg to “Event”" and “Injuries”
  • Sort StormInjAgg by descending Injuries count
  • Display the first 6 rows of the sorted data (StormInjSort)
## get data for the Event type and INjuries and sum by event
   StormInj <- subset(StormData,as.numeric(StormData$INJURIES)>0)
   StormInj <- data.frame(StormInj$EVTYPE,StormInj$INJURIES)
   StormInjAgg <- aggregate(as.numeric(StormInj.INJURIES) ~ StormInj.EVTYPE,StormInj,sum)
   colnames(StormInjAgg) <- c("Event","Injuries")
## Sort by Injuries descending
   StormInjSort <- arrange(StormInjAgg,desc(Injuries))
##
   head(StormInjSort)
##               Event Injuries
## 1           TORNADO   525450
## 2         TSTM WIND   114907
## 3         LIGHTNING    80405
## 4 THUNDERSTORM WIND    20768
## 5         HIGH WIND    18199
## 6       FLASH FLOOD    16429

Data Analysis - Property and Crop Damage

Which types of events are most harmful with respect to Property Damage and Crop Damage? Although Flood Damage is mentioned in the documentation, there does not seem to be a column or columns for flood damage and it will not be included in this analysis.

PROPERTY DAMAGE

  • Subset StormData to include only rows where PROPDMG is greater than 0 (StormPropDmg)
  • Determine the unique values for PROPDMGEXP columnthat will be used as a multipler of the PROPDMG column
  • Create a new column, PropertyDamage, that will be the result of PROPDMG multiplied by the corresponding value for PROPDMGEXP
    • blank,0,1 - multiply by 1
    • -,+ - multiply by 0
    • n=2 to 8 - multiply by 10^n
    • h, H - multiply by 100
    • k, K - multiply by 1000
    • m, M - multiply by 10000000
    • b, B - multiply by 1000000000
  • Create a data frame of just Event and PropertyDamage
  • Sum PropertyDamage by EVTYPE (StormPropDmgAgg)
  • Change column names of StormPropDmgAgg to “Event”" and “PropertyDamage”
  • Sort StormPropDmgAgg by descending PropertyDamage count
  • Display the first 6 rows of the sorted data (StormPropDmgSort)
## get data for the Event type and Property Damage and sum by event
## multiply PROPDMG by appropriate value depending on PROPDMGEXP
StormPropDmg <- subset(StormData,as.numeric(StormData$PROPDMG)!=0)
## find the values in PROPDMGEXP to be used as multipliers 
table(StormPropDmg$PROPDMGEXP)
## 
##             -      +      0      2      3      4      5      6      7 
##     76      1      5    209      1      1      4     18      3      2 
##      B      h      H      K      m      M 
##     40      1      6 227481      7  11319
## do the multiplication
StormPropDmg$PropertyDamage <- as.numeric(StormPropDmg$PROPDMG)*
   if (toupper(StormPropDmg$PROPDMGEXP)=="B") 1000000000 else 
      if (toupper(StormPropDmg$PROPDMGEXP)=="M") 1000000 else 
         if (toupper(StormPropDmg$PROPDMGEXP)=="K") 1000 else 
            if (toupper(StormPropDmg$PROPDMGEXP)=="H") 100 else 
               if(StormPropDmg$PROPDMGEXP=="-" || StormPropDmg$PROPDMGEXP=="+") 0 else
                  if(StormPropDmg$PROPDMGEXP=="2") 100 else if(StormPropDmg$PROPDMGEXP=="3") 1000 else
                     if(StormPropDmg$PROPDMGEXP=="4") 10000 else if(StormPropDmg$PROPDMGEXP=="5") 100000 else
                        if (StormPropDmg$PROPDMGEXP=="6") 1000000 else if(StormPropDmg$PROPDMGEXP=="7") 10000000 else
                           1
StormPropDmg <- data.frame(StormPropDmg$EVTYPE,StormPropDmg$PropertyDamage)
StormPropDmgAgg <- aggregate(as.numeric(StormPropDmg.PropertyDamage) ~ StormPropDmg.EVTYPE,StormPropDmg,sum)
colnames(StormPropDmgAgg) <- c("Event","PropertyDamage")
## Sort by Property Damage amount descending
StormPropDmgSort <- arrange(StormPropDmgAgg,desc(PropertyDamage))
head(StormPropDmgSort)
##               Event PropertyDamage
## 1           TORNADO     3212258160
## 2       FLASH FLOOD     1420124590
## 3         TSTM WIND     1335965610
## 4             FLOOD      899938480
## 5 THUNDERSTORM WIND      876844170
## 6              HAIL      688693380

CROP DAMAGE

  • Subset StormData to include only rows where CROPDMG is greater than 0 (StormCropDmg)
  • Determine the unique values for CROPDMGEXP columnthat will be used as a multipler of the CROPDMG column
  • Create a new column, CropDamage, that will be the result of CROPDMG multiplied by the corresponding value for CROPDMGEXP
    • blank,0,1 - multiply by 1
    • -,+ - multiply by 0
    • n=2 to 8 - multiply by 10^n
    • h, H - multiply by 100
    • k, K - multiply by 1000
    • m, M - multiply by 10000000
    • b, B - multiply by 1000000000
  • Create a data frame of just Event and CropDamage
  • Sum CropDamage by EVTYPE (StormCropDmgAgg)
  • Change column names of StormCropDmgAgg to “Event”" and “CropDamage”
  • Sort StormCropDmgAgg by descending CropDamage count
  • Display the first 6 rows of the sorted data (StormCropDmgSort)
## get data for the Event type and Crop Damage and sum by event
## multiply CROPDMG by appropriate value depending on CROPDMGEXP
StormCropDmg <- subset(StormData,as.numeric(StormData$CROPDMG)!=0)
## find the values in CROPDMGEXP to be used as multipliers 
table(StormCropDmg$CROPDMGEXP)
## 
##           0     B     k     K     m     M 
##     3    12     7    21 20137     1  1918
## do the multiplication
StormCropDmg$CropDamage <- as.numeric(StormCropDmg$CROPDMG)*
   if (toupper(StormCropDmg$CROPDMGEXP)=="B") 1000000000 else 
      if (toupper(StormCropDmg$CROPDMGEXP)=="M") 1000000 else 
         if (toupper(StormCropDmg$CROPDMGEXP)=="K") 1000 else 
            if (toupper(StormCropDmg$CROPDMGEXP)=="H") 100 else 1


StormCropDmg <- data.frame(StormCropDmg$EVTYPE,StormCropDmg$CropDamage)
StormCropDmgAgg <- aggregate(as.numeric(StormCropDmg.CropDamage) ~ StormCropDmg.EVTYPE,StormCropDmg,sum)
colnames(StormCropDmgAgg) <- c("Event","CropDamage")
## Sort by Crop Damage amount descending
StormCropDmgSort <- arrange(StormCropDmgAgg,desc(CropDamage))
head(StormCropDmgSort)
##               Event   CropDamage
## 1              HAIL 579596280000
## 2       FLASH FLOOD 179200460000
## 3             FLOOD 168037880000
## 4         TSTM WIND 109202600000
## 5           TORNADO 100018520000
## 6 THUNDERSTORM WIND  66791450000

Results

Since there are so many different types of events, only the Top 20 events will be displayed in a bar charts below.

FATALITIES AND INJURIES

  • Create a data frame from the first 20 rows of the Sorted data frames fo Fatalities and Injuries (StormFatalSort and StormInjSort)
  • create two lattice barchart graphics showing the Top 20 causes for Fatalities and Injuries
   library(lattice)
    StormFatalTop20 <- head(StormFatalSort,n=20)
   StormInjTop20 <- head(StormInjSort,n=20)
   
   Fatal <- barchart( reorder(Event, Fatalities) ~ Fatalities,
                   data=StormFatalTop20,
                   cex=0.6,
                   main="Top Twenty Weather-related Causes of Fatalities")
   
   Inj <- barchart( reorder(Event, Injuries) ~ Injuries,
                   data=StormInjTop20,
                   cex=0.6,
                   main="Top Twenty Weather-related Causes of Injuries")

The Top Twenty Causes of Fatalities and Injuries from Severe Weather Events. We see that Tornado tops the list for both Fatalities and Injuries.

PROPERTY AND CROP DAMAGE

  • Create a data frame from the first 20 rows of the Sorted data frames for Property Damage and Crop Damage (StormPropDmgSort and StormCropDmgSort)
  • create two lattice barchart graphics showing the Top 20 causes for Property Damage and Crop Damage
StormPropDmgTop20 <- head(StormPropDmgSort,n=20)
StormCropDmgTop20 <- head(StormCropDmgSort,n=20)
PD <- barchart( reorder(Event, PropertyDamage) ~ PropertyDamage,
  data=StormPropDmgTop20,
  cex=0.6,
  main="Top Twenty Weather-related Causes of Property Damage")


CD <- barchart( reorder(Event, CropDamage) ~ CropDamage,
  data=StormCropDmgTop20,
  cex=0.6,
  main="Top Twenty Weather-related Causes of Crop Damage")

The Top Twenty Causes of Property Damage and Crop Damage from Severe Weather Events.