OVERVIEW:

SYNOPSIS:

OBJECTIVE:

Explore the NOAA Storm Database to help answer important questions about severe weather events.

DATA PROCESSING:

DATA PREP

1.Install packages & Load libraries

Install packages …

# load libraries ...

library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.0 (2020-02-14 07:10:20 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.23.0 successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
## 
##     throw
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, load, save
## R.utils v2.9.2 successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
##     warnings
library(rmarkdown)
library(knitr)

2.Loading the data & reading the file

library(readr)
StormData <- read_csv("StormData.csv")
## Warning: Missing column names filled in: 'X38' [38], 'X39' [39], 'X40' [40]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   BGN_DATE = col_character(),
##   TIME_ZONE = col_character(),
##   COUNTYNAME = col_character(),
##   STATE = col_character(),
##   EVTYPE = col_character(),
##   BGN_AZI = col_logical(),
##   BGN_LOCATI = col_logical(),
##   END_DATE = col_logical(),
##   END_TIME = col_logical(),
##   COUNTYENDN = col_logical(),
##   END_AZI = col_logical(),
##   END_LOCATI = col_logical(),
##   PROPDMGEXP = col_character(),
##   CROPDMGEXP = col_character(),
##   WFO = col_logical(),
##   STATEOFFIC = col_logical(),
##   ZONENAMES = col_logical(),
##   REMARKS = col_logical(),
##   X38 = col_logical(),
##   X39 = col_logical()
##   # ... with 1 more columns
## )
## See spec(...) for full column specifications.
## Warning: 5637166 parsing failures.
##  row col           expected actual            file
## 1671 WFO 1/0/T/F/TRUE/FALSE     NG 'StormData.csv'
## 1673 WFO 1/0/T/F/TRUE/FALSE     NG 'StormData.csv'
## 1674 WFO 1/0/T/F/TRUE/FALSE     NG 'StormData.csv'
## 1675 WFO 1/0/T/F/TRUE/FALSE     NG 'StormData.csv'
## 1678 WFO 1/0/T/F/TRUE/FALSE     NG 'StormData.csv'
## .... ... .................. ...... ...............
## See problems(...) for more details.
summary(StormData)
##     STATE__       BGN_DATE            BGN_TIME       TIME_ZONE        
##  Min.   : 1.0   Length:903870      Min.   :   0     Length:903870     
##  1st Qu.:19.0   Class :character   1st Qu.:1330     Class :character  
##  Median :30.0   Mode  :character   Median :1630     Mode  :character  
##  Mean   :31.2                      Mean   :1516                       
##  3rd Qu.:45.0                      3rd Qu.:1900                       
##  Max.   :95.0                      Max.   :9999                       
##  NA's   :1716                      NA's   :654961                     
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:903870      Length:903870      Length:903870     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##  NA's   :1716                                                            
##    BGN_RANGE        BGN_AZI        BGN_LOCATI      END_DATE      
##  Min.   :   0.000   Mode:logical   Mode:logical   Mode :logical  
##  1st Qu.:   0.000   NA's:903870    TRUE:1         FALSE:143      
##  Median :   0.000                  NA's:903869    NA's :903727   
##  Mean   :   1.484                                                
##  3rd Qu.:   1.000                                                
##  Max.   :3749.000                                                
##  NA's   :1716                                                    
##   END_TIME         COUNTY_END      COUNTYENDN       END_RANGE       
##  Mode :logical   Min.   : 0.0000   Mode:logical   Min.   :  0.0000  
##  FALSE:17        1st Qu.: 0.0000   NA's:903870    1st Qu.:  0.0000  
##  NA's :903853    Median : 0.0000                  Median :  0.0000  
##                  Mean   : 0.0006                  Mean   :  0.9858  
##                  3rd Qu.: 0.0000                  3rd Qu.:  0.0000  
##                  Max.   :16.0000                  Max.   :925.0000  
##                  NA's   :1573                     NA's   :1716      
##   END_AZI        END_LOCATI          LENGTH              WIDTH         
##  Mode :logical   Mode :logical   Min.   :   0.0000   Min.   :   0.000  
##  FALSE:68        FALSE:68        1st Qu.:   0.0000   1st Qu.:   0.000  
##  TRUE :8         NA's :903802    Median :   0.0000   Median :   0.000  
##  NA's :903794                    Mean   :   0.2299   Mean   :   7.489  
##                                  3rd Qu.:   0.0000   3rd Qu.:   0.000  
##                                  Max.   :2315.0000   Max.   :4400.000  
##                                  NA's   :1641        NA's   :1573      
##        F               MAG             FATALITIES          INJURIES       
##  Min.   : 0.0     Min.   :    0.00   Min.   :  0.0000   Min.   :   0.000  
##  1st Qu.: 0.0     1st Qu.:    0.00   1st Qu.:  0.0000   1st Qu.:   0.000  
##  Median : 1.0     Median :   50.00   Median :  0.0000   Median :   0.000  
##  Mean   : 0.9     Mean   :   46.89   Mean   :  0.0218   Mean   :   0.155  
##  3rd Qu.: 1.0     3rd Qu.:   75.00   3rd Qu.:  0.0000   3rd Qu.:   0.000  
##  Max.   :12.0     Max.   :22000.00   Max.   :800.0000   Max.   :1700.000  
##  NA's   :845068   NA's   :1573       NA's   :1573       NA's   :1716      
##     PROPDMG         PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Min.   :   0.00   Length:903870      Min.   :  0.000   Length:903870     
##  1st Qu.:   0.00   Class :character   1st Qu.:  0.000   Class :character  
##  Median :   0.00   Mode  :character   Median :  0.000   Mode  :character  
##  Mean   :  12.06                      Mean   :  6.349                     
##  3rd Qu.:   0.50                      3rd Qu.:  0.000                     
##  Max.   :5000.00                      Max.   :990.000                     
##  NA's   :1573                         NA's   :1716                        
##    WFO          STATEOFFIC     ZONENAMES         LATITUDE      LONGITUDE     
##  Mode:logical   Mode:logical   Mode:logical   Min.   :   0   Min.   :-14451  
##  TRUE:7166      NA's:903870    NA's:903870    1st Qu.:2802   1st Qu.:  7247  
##  NA's:896704                                  Median :3540   Median :  8707  
##                                               Mean   :2875   Mean   :  6940  
##                                               3rd Qu.:4019   3rd Qu.:  9605  
##                                               Max.   :9706   Max.   : 17124  
##                                               NA's   :1620   NA's   :1573    
##    LATITUDE_E     LONGITUDE_     REMARKS            REFNUM         X38         
##  Min.   :   0   Min.   :-14455   Mode:logical   Min.   :     1   Mode:logical  
##  1st Qu.:   0   1st Qu.:     0   NA's:903870    1st Qu.:225538   NA's:903870   
##  Median :   0   Median :     0                  Median :451224                 
##  Mean   :1451   Mean   :  3511                  Mean   :451163                 
##  3rd Qu.:3549   3rd Qu.:  8734                  3rd Qu.:676760                 
##  Max.   :9706   Max.   :368923                  Max.   :902297                 
##  NA's   :1756   NA's   :1710                    NA's   :1722                   
##    X39            X40         
##  Mode:logical   Mode:logical  
##  NA's:903870    NA's:903870   
##                               
##                               
##                               
##                               
## 
names(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"     "X38"        "X39"        "X40"

RESULTS:

Questions: 1.0.) Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

StrmData<- subset(StormData, select=c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP"))

dim(StrmData)
## [1] 903870      7

1.1.) Variable selection (reducing the data set to only needed columns and variables)

names(StrmData)
## [1] "EVTYPE"     "FATALITIES" "INJURIES"   "PROPDMG"    "PROPDMGEXP"
## [6] "CROPDMG"    "CROPDMGEXP"

1.2) Reviewing events that cause the most fatalities (The Top-10 Fatalities by Weather Event)

## Procedure = aggregate the top 10 fatalities by the event type and sort the output in descending order

Fatalities <- aggregate(FATALITIES ~ EVTYPE, data = StrmData, FUN = sum)
Top10_Fatalities <- Fatalities[order(-Fatalities$FATALITIES), ][1:10, ]
Top10_Fatalities
##             EVTYPE FATALITIES
## 839        TORNADO     5593.0
## 125 EXCESSIVE HEAT     1903.0
## 148    FLASH FLOOD      978.0
## 270           HEAT      937.0
## 459      LIGHTNING      816.0
## 517              N      812.0
## 992            WNW      560.5
## 599              S      544.0
## 861      TSTM WIND      504.0
## 530             NW      500.1

1.3) Reviewing events that cause the most injuries (The Top-10 Injuries by Weather Event)

## Procedure = aggregate the top 10 injuries by the event type and sort the output in descending order

Injuries <- aggregate(INJURIES ~ EVTYPE, data = StrmData, FUN = sum)
Top10_Injuries <- Injuries[order(-Injuries$INJURIES), ][1:10, ]
Top10_Injuries
##                EVTYPE INJURIES
## 826           TORNADO    90671
## 848         TSTM WIND     6957
## 162             FLOOD     6789
## 122    EXCESSIVE HEAT     6525
## 456         LIGHTNING     5230
## 267              HEAT     2100
## 419         ICE STORM     1975
## 145       FLASH FLOOD     1777
## 752 THUNDERSTORM WIND     1488
## 236              HAIL     1361

1.4) Plot of Top 10 Fatalities & Injuries for Weather Event Types (Population Health Impact )

## Procedure = plot graphs showing the top 10 fatalities and injuries

par(mfrow=c(1,2),mar=c(10,3,3,2))

barplot(Top10_Fatalities$FATALITIES,names.arg = Top10_Fatalities$EVTYPE,las=2,col = "purple",ylab = "fatalities",main = "Top 10 fatalities")

barplot(Top10_Injuries$INJURIES,names.arg = Top10_Injuries$EVTYPE,las=2,col = "purple",ylab = "injuries",main = "Top 10 injuries")

Figure 1: The weather event responsible for the highest fatalities and injuries is the ‘Tornado’.

2.0) Across the United States, which types of events have the greatest economic consequences?

# To answer the question an analysis of the weather events responsible for the greatest economic consequences is required.

# We formulate an hypothesis.
# Hypothesis: 
  # Economic consequences means damages.The two significant types of damages typically caused by the weather services include:
    # 1. Properties
    # 2. Crops

2.1) Data Exploration & Findings…

# Upon reviewing the column names, the property damage(PROPDMG) and crop damage(CROPDMG) columns both have another related column titled 'exponents'(i.e - PROPDMGEXP and CROPDMGEXP respectively).

# As a result, let's convert the exponent columns into numeric data for the calculation of total property and crop damages encountered.

2.2) Defining & Calculating [Property Damage]

## Property damage exponents for each level listed out & assigned those values for the property exponent data.
## Invalid data was excluded by assigning the value as '0'.
## Then, the property damage value was calculated by multiplying the property damage and property exponent value.

unique(StrmData$PROPDMGEXP)
##  [1] "K"                                                                                                                                                                                                                  
##  [2] "M"                                                                                                                                                                                                                  
##  [3] NA                                                                                                                                                                                                                   
##  [4] "B"                                                                                                                                                                                                                  
##  [5] "m"                                                                                                                                                                                                                  
##  [6] "+"                                                                                                                                                                                                                  
##  [7] "0"                                                                                                                                                                                                                  
##  [8] "5"                                                                                                                                                                                                                  
##  [9] "6"                                                                                                                                                                                                                  
## [10] "?"                                                                                                                                                                                                                  
## [11] "4"                                                                                                                                                                                                                  
## [12] "2"                                                                                                                                                                                                                  
## [13] "3"                                                                                                                                                                                                                  
## [14] "h"                                                                                                                                                                                                                  
## [15] "7"                                                                                                                                                                                                                  
## [16] "H"                                                                                                                                                                                                                  
## [17] "-"                                                                                                                                                                                                                  
## [18] "1"                                                                                                                                                                                                                  
## [19] "8"                                                                                                                                                                                                                  
## [20] "F0"                                                                                                                                                                                                                 
## [21] "formed 6 miles west-northwest of Kingfisher and also rotated briefly around E3. Its life-span was short"                                                                                                            
## [22] "and there was no damage (F0)."                                                                                                                                                                                      
## [23] "and formed about 6 miles west of Kingfisher while tornado E3 was most intense. This tornado rotated around E3 for a short period of time before dissipating. No damage was observed (F0). Another satellite tornado"
# Assigning values for the property exponent StrmData

StrmData$PROPEXP[StrmData$PROPDMGEXP == "K"] <- 1000
## Warning: Unknown or uninitialised column: `PROPEXP`.
StrmData$PROPEXP[StrmData$PROPDMGEXP == "M"] <- 1e+06
StrmData$PROPEXP[StrmData$PROPDMGEXP == " "] <- 1
StrmData$PROPEXP[StrmData$PROPDMGEXP == "B"] <- 1e+09
StrmData$PROPEXP[StrmData$PROPDMGEXP == "m"] <- 1e+06
StrmData$PROPEXP[StrmData$PROPDMGEXP == "0"] <- 1
StrmData$PROPEXP[StrmData$PROPDMGEXP == "5"] <- 1e+05
StrmData$PROPEXP[StrmData$PROPDMGEXP == "6"] <- 1e+06
StrmData$PROPEXP[StrmData$PROPDMGEXP == "4"] <- 10000
StrmData$PROPEXP[StrmData$PROPDMGEXP == "2"] <- 100
StrmData$PROPEXP[StrmData$PROPDMGEXP == "3"] <- 1000
StrmData$PROPEXP[StrmData$PROPDMGEXP == "h"] <- 100
StrmData$PROPEXP[StrmData$PROPDMGEXP == "7"] <- 1e+07
StrmData$PROPEXP[StrmData$PROPDMGEXP == "H"] <- 100
StrmData$PROPEXP[StrmData$PROPDMGEXP == "1"] <- 10
StrmData$PROPEXP[StrmData$PROPDMGEXP == "8"] <- 1e+08

# Assigning '0' to invalid exponent StrmData

StrmData$PROPEXP[StrmData$PROPDMGEXP == "+"] <- 0
StrmData$PROPEXP[StrmData$PROPDMGEXP == "-"] <- 0
StrmData$PROPEXP[StrmData$PROPDMGEXP == "?"] <- 0

# Calculating the property damage value
StrmData$PROPDMGVAL <- StrmData$PROPDMG * StrmData$PROPEXP

2.3) Defining & Calculating [Crop Damage]

## Crop damage exponents for each level listed out & assigned those values for the crop exponent data.
## Invalid data was excluded by assigning the value as '0'.
## Then, the crop damage value was calculated by multiplying the crop damage and crop exponent value.

unique(StrmData$CROPDMGEXP)
##  [1] "K"                                                                                                                                                                                                                  
##  [2] "M"                                                                                                                                                                                                                  
##  [3] NA                                                                                                                                                                                                                   
##  [4] "B"                                                                                                                                                                                                                  
##  [5] "m"                                                                                                                                                                                                                  
##  [6] "+"                                                                                                                                                                                                                  
##  [7] "0"                                                                                                                                                                                                                  
##  [8] "5"                                                                                                                                                                                                                  
##  [9] "6"                                                                                                                                                                                                                  
## [10] "?"                                                                                                                                                                                                                  
## [11] "4"                                                                                                                                                                                                                  
## [12] "2"                                                                                                                                                                                                                  
## [13] "3"                                                                                                                                                                                                                  
## [14] "h"                                                                                                                                                                                                                  
## [15] "7"                                                                                                                                                                                                                  
## [16] "H"                                                                                                                                                                                                                  
## [17] "-"                                                                                                                                                                                                                  
## [18] "1"                                                                                                                                                                                                                  
## [19] "8"                                                                                                                                                                                                                  
## [20] "F0"                                                                                                                                                                                                                 
## [21] "formed 6 miles west-northwest of Kingfisher and also rotated briefly around E3. Its life-span was short"                                                                                                            
## [22] "and there was no damage (F0)."                                                                                                                                                                                      
## [23] "and formed about 6 miles west of Kingfisher while tornado E3 was most intense. This tornado rotated around E3 for a short period of time before dissipating. No damage was observed (F0). Another satellite tornado"
# Assigning values for the crop exponent StrmData

StrmData$CROPEXP[StrmData$CROPDMGEXP == "M"] <- 1e+06
## Warning: Unknown or uninitialised column: `CROPEXP`.
StrmData$CROPEXP[StrmData$CROPDMGEXP == "K"] <- 1000
StrmData$CROPEXP[StrmData$CROPDMGEXP == "m"] <- 1e+06
StrmData$CROPEXP[StrmData$CROPDMGEXP == "B"] <- 1e+09
StrmData$CROPEXP[StrmData$CROPDMGEXP == "0"] <- 1
StrmData$CROPEXP[StrmData$CROPDMGEXP == "k"] <- 1000
StrmData$CROPEXP[StrmData$CROPDMGEXP == "2"] <- 100
StrmData$CROPEXP[StrmData$CROPDMGEXP == " "] <- 1

# Assigning '0' to invalid exponent StrmData
StrmData$CROPEXP[StrmData$CROPDMGEXP == "?"] <- 0

# Calculating the crop damage 
StrmData$CROPDMGVAL <- StrmData$CROPDMG * StrmData$CROPEXP

2.4) Property Damage Summary

## Procedure = aggregate the property damage by the event type and sort the output it in descending order

prop <- aggregate(PROPDMGVAL~EVTYPE,data=StrmData,FUN=sum,na.rm=TRUE)
prop <- prop[with(prop,order(-PROPDMGVAL)),]
prop <- head(prop,10)
print(prop)
##                EVTYPE   PROPDMGVAL
## 59              FLOOD 144657709800
## 178 HURRICANE/TYPHOON  69305840000
## 332           TORNADO  56658823514
## 280       STORM SURGE  43323536000
## 47        FLASH FLOOD  16822723772
## 101              HAIL  15734437456
## 170         HURRICANE  11868319010
## 340    TROPICAL STORM   7703890550
## 399      WINTER STORM   6688497251
## 155         HIGH WIND   5270046260

2.5) Crop Damage Summary

## Procedure = aggregate the crop damage by the event type and sort the output it in descending order

crop <- aggregate(CROPDMGVAL~EVTYPE,data=StrmData,FUN=sum,na.rm=TRUE)
crop <- crop[with(crop,order(-CROPDMGVAL)),]
crop <- head(crop,10)
print(crop)
##                EVTYPE   CROPDMGVAL
## 176 HURRICANE/TYPHOON 732768451330
## 58              FLOOD  76645688290
## 329           TORNADO  57347075714
## 46        FLASH FLOOD  29398844922
## 99               HAIL  11219073636
## 168         HURRICANE   9096291000
## 385          WILDFIRE   7180255200
## 153         HIGH WIND   6992210190
## 292 THUNDERSTORM WIND   5506786030
## 395      WINTER STORM   5287832401

2.6) Plot of Top 10 Property & Crop damages by Weather Event Types (Economic Consequences)

par(mfrow=c(1,2),mar=c(11,3,3,2))

barplot(prop$PROPDMGVAL/(10^9),names.arg=prop$EVTYPE,las=2,col="gold",ylab="Prop.damage(billions)",main="Top10 Prop.Damages")

barplot(crop$CROPDMGVAL/(10^9),names.arg=crop$EVTYPE,las=2,col="gold",ylab="Crop damage(billions)",main="Top10 Crop.Damages")

Conclusion Summary: