OVERVIEW:
SYNOPSIS:
OBJECTIVE:
Explore the NOAA Storm Database to help answer important questions about severe weather events.
DATA PREP
1.Install packages & Load libraries
Install packages …
# load libraries ...
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.0 (2020-02-14 07:10:20 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.23.0 successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
##
## throw
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, load, save
## R.utils v2.9.2 successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
## warnings
library(rmarkdown)
library(knitr)
2.Loading the data & reading the file
library(readr)
StormData <- read_csv("StormData.csv")
## Warning: Missing column names filled in: 'X38' [38], 'X39' [39], 'X40' [40]
## Parsed with column specification:
## cols(
## .default = col_double(),
## BGN_DATE = col_character(),
## TIME_ZONE = col_character(),
## COUNTYNAME = col_character(),
## STATE = col_character(),
## EVTYPE = col_character(),
## BGN_AZI = col_logical(),
## BGN_LOCATI = col_logical(),
## END_DATE = col_logical(),
## END_TIME = col_logical(),
## COUNTYENDN = col_logical(),
## END_AZI = col_logical(),
## END_LOCATI = col_logical(),
## PROPDMGEXP = col_character(),
## CROPDMGEXP = col_character(),
## WFO = col_logical(),
## STATEOFFIC = col_logical(),
## ZONENAMES = col_logical(),
## REMARKS = col_logical(),
## X38 = col_logical(),
## X39 = col_logical()
## # ... with 1 more columns
## )
## See spec(...) for full column specifications.
## Warning: 5637166 parsing failures.
## row col expected actual file
## 1671 WFO 1/0/T/F/TRUE/FALSE NG 'StormData.csv'
## 1673 WFO 1/0/T/F/TRUE/FALSE NG 'StormData.csv'
## 1674 WFO 1/0/T/F/TRUE/FALSE NG 'StormData.csv'
## 1675 WFO 1/0/T/F/TRUE/FALSE NG 'StormData.csv'
## 1678 WFO 1/0/T/F/TRUE/FALSE NG 'StormData.csv'
## .... ... .................. ...... ...............
## See problems(...) for more details.
summary(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:903870 Min. : 0 Length:903870
## 1st Qu.:19.0 Class :character 1st Qu.:1330 Class :character
## Median :30.0 Mode :character Median :1630 Mode :character
## Mean :31.2 Mean :1516
## 3rd Qu.:45.0 3rd Qu.:1900
## Max. :95.0 Max. :9999
## NA's :1716 NA's :654961
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:903870 Length:903870 Length:903870
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
## NA's :1716
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Mode:logical Mode:logical Mode :logical
## 1st Qu.: 0.000 NA's:903870 TRUE:1 FALSE:143
## Median : 0.000 NA's:903869 NA's :903727
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
## NA's :1716
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Mode :logical Min. : 0.0000 Mode:logical Min. : 0.0000
## FALSE:17 1st Qu.: 0.0000 NA's:903870 1st Qu.: 0.0000
## NA's :903853 Median : 0.0000 Median : 0.0000
## Mean : 0.0006 Mean : 0.9858
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :16.0000 Max. :925.0000
## NA's :1573 NA's :1716
## END_AZI END_LOCATI LENGTH WIDTH
## Mode :logical Mode :logical Min. : 0.0000 Min. : 0.000
## FALSE:68 FALSE:68 1st Qu.: 0.0000 1st Qu.: 0.000
## TRUE :8 NA's :903802 Median : 0.0000 Median : 0.000
## NA's :903794 Mean : 0.2299 Mean : 7.489
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
## NA's :1641 NA's :1573
## F MAG FATALITIES INJURIES
## Min. : 0.0 Min. : 0.00 Min. : 0.0000 Min. : 0.000
## 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.000
## Median : 1.0 Median : 50.00 Median : 0.0000 Median : 0.000
## Mean : 0.9 Mean : 46.89 Mean : 0.0218 Mean : 0.155
## 3rd Qu.: 1.0 3rd Qu.: 75.00 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :12.0 Max. :22000.00 Max. :800.0000 Max. :1700.000
## NA's :845068 NA's :1573 NA's :1573 NA's :1716
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:903870 Min. : 0.000 Length:903870
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 6.349
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
## NA's :1573 NA's :1716
## WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## Mode:logical Mode:logical Mode:logical Min. : 0 Min. :-14451
## TRUE:7166 NA's:903870 NA's:903870 1st Qu.:2802 1st Qu.: 7247
## NA's:896704 Median :3540 Median : 8707
## Mean :2875 Mean : 6940
## 3rd Qu.:4019 3rd Qu.: 9605
## Max. :9706 Max. : 17124
## NA's :1620 NA's :1573
## LATITUDE_E LONGITUDE_ REMARKS REFNUM X38
## Min. : 0 Min. :-14455 Mode:logical Min. : 1 Mode:logical
## 1st Qu.: 0 1st Qu.: 0 NA's:903870 1st Qu.:225538 NA's:903870
## Median : 0 Median : 0 Median :451224
## Mean :1451 Mean : 3511 Mean :451163
## 3rd Qu.:3549 3rd Qu.: 8734 3rd Qu.:676760
## Max. :9706 Max. :368923 Max. :902297
## NA's :1756 NA's :1710 NA's :1722
## X39 X40
## Mode:logical Mode:logical
## NA's:903870 NA's:903870
##
##
##
##
##
names(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM" "X38" "X39" "X40"
Questions: 1.0.) Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
StrmData<- subset(StormData, select=c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP"))
dim(StrmData)
## [1] 903870 7
1.1.) Variable selection (reducing the data set to only needed columns and variables)
names(StrmData)
## [1] "EVTYPE" "FATALITIES" "INJURIES" "PROPDMG" "PROPDMGEXP"
## [6] "CROPDMG" "CROPDMGEXP"
1.2) Reviewing events that cause the most fatalities (The Top-10 Fatalities by Weather Event)
## Procedure = aggregate the top 10 fatalities by the event type and sort the output in descending order
Fatalities <- aggregate(FATALITIES ~ EVTYPE, data = StrmData, FUN = sum)
Top10_Fatalities <- Fatalities[order(-Fatalities$FATALITIES), ][1:10, ]
Top10_Fatalities
## EVTYPE FATALITIES
## 839 TORNADO 5593.0
## 125 EXCESSIVE HEAT 1903.0
## 148 FLASH FLOOD 978.0
## 270 HEAT 937.0
## 459 LIGHTNING 816.0
## 517 N 812.0
## 992 WNW 560.5
## 599 S 544.0
## 861 TSTM WIND 504.0
## 530 NW 500.1
1.3) Reviewing events that cause the most injuries (The Top-10 Injuries by Weather Event)
## Procedure = aggregate the top 10 injuries by the event type and sort the output in descending order
Injuries <- aggregate(INJURIES ~ EVTYPE, data = StrmData, FUN = sum)
Top10_Injuries <- Injuries[order(-Injuries$INJURIES), ][1:10, ]
Top10_Injuries
## EVTYPE INJURIES
## 826 TORNADO 90671
## 848 TSTM WIND 6957
## 162 FLOOD 6789
## 122 EXCESSIVE HEAT 6525
## 456 LIGHTNING 5230
## 267 HEAT 2100
## 419 ICE STORM 1975
## 145 FLASH FLOOD 1777
## 752 THUNDERSTORM WIND 1488
## 236 HAIL 1361
1.4) Plot of Top 10 Fatalities & Injuries for Weather Event Types (Population Health Impact )
## Procedure = plot graphs showing the top 10 fatalities and injuries
par(mfrow=c(1,2),mar=c(10,3,3,2))
barplot(Top10_Fatalities$FATALITIES,names.arg = Top10_Fatalities$EVTYPE,las=2,col = "purple",ylab = "fatalities",main = "Top 10 fatalities")
barplot(Top10_Injuries$INJURIES,names.arg = Top10_Injuries$EVTYPE,las=2,col = "purple",ylab = "injuries",main = "Top 10 injuries")
Figure 1: The weather event responsible for the highest fatalities and injuries is the ‘Tornado’.
2.0) Across the United States, which types of events have the greatest economic consequences?
# To answer the question an analysis of the weather events responsible for the greatest economic consequences is required.
# We formulate an hypothesis.
# Hypothesis:
# Economic consequences means damages.The two significant types of damages typically caused by the weather services include:
# 1. Properties
# 2. Crops
2.1) Data Exploration & Findings…
# Upon reviewing the column names, the property damage(PROPDMG) and crop damage(CROPDMG) columns both have another related column titled 'exponents'(i.e - PROPDMGEXP and CROPDMGEXP respectively).
# As a result, let's convert the exponent columns into numeric data for the calculation of total property and crop damages encountered.
2.2) Defining & Calculating [Property Damage]
## Property damage exponents for each level listed out & assigned those values for the property exponent data.
## Invalid data was excluded by assigning the value as '0'.
## Then, the property damage value was calculated by multiplying the property damage and property exponent value.
unique(StrmData$PROPDMGEXP)
## [1] "K"
## [2] "M"
## [3] NA
## [4] "B"
## [5] "m"
## [6] "+"
## [7] "0"
## [8] "5"
## [9] "6"
## [10] "?"
## [11] "4"
## [12] "2"
## [13] "3"
## [14] "h"
## [15] "7"
## [16] "H"
## [17] "-"
## [18] "1"
## [19] "8"
## [20] "F0"
## [21] "formed 6 miles west-northwest of Kingfisher and also rotated briefly around E3. Its life-span was short"
## [22] "and there was no damage (F0)."
## [23] "and formed about 6 miles west of Kingfisher while tornado E3 was most intense. This tornado rotated around E3 for a short period of time before dissipating. No damage was observed (F0). Another satellite tornado"
# Assigning values for the property exponent StrmData
StrmData$PROPEXP[StrmData$PROPDMGEXP == "K"] <- 1000
## Warning: Unknown or uninitialised column: `PROPEXP`.
StrmData$PROPEXP[StrmData$PROPDMGEXP == "M"] <- 1e+06
StrmData$PROPEXP[StrmData$PROPDMGEXP == " "] <- 1
StrmData$PROPEXP[StrmData$PROPDMGEXP == "B"] <- 1e+09
StrmData$PROPEXP[StrmData$PROPDMGEXP == "m"] <- 1e+06
StrmData$PROPEXP[StrmData$PROPDMGEXP == "0"] <- 1
StrmData$PROPEXP[StrmData$PROPDMGEXP == "5"] <- 1e+05
StrmData$PROPEXP[StrmData$PROPDMGEXP == "6"] <- 1e+06
StrmData$PROPEXP[StrmData$PROPDMGEXP == "4"] <- 10000
StrmData$PROPEXP[StrmData$PROPDMGEXP == "2"] <- 100
StrmData$PROPEXP[StrmData$PROPDMGEXP == "3"] <- 1000
StrmData$PROPEXP[StrmData$PROPDMGEXP == "h"] <- 100
StrmData$PROPEXP[StrmData$PROPDMGEXP == "7"] <- 1e+07
StrmData$PROPEXP[StrmData$PROPDMGEXP == "H"] <- 100
StrmData$PROPEXP[StrmData$PROPDMGEXP == "1"] <- 10
StrmData$PROPEXP[StrmData$PROPDMGEXP == "8"] <- 1e+08
# Assigning '0' to invalid exponent StrmData
StrmData$PROPEXP[StrmData$PROPDMGEXP == "+"] <- 0
StrmData$PROPEXP[StrmData$PROPDMGEXP == "-"] <- 0
StrmData$PROPEXP[StrmData$PROPDMGEXP == "?"] <- 0
# Calculating the property damage value
StrmData$PROPDMGVAL <- StrmData$PROPDMG * StrmData$PROPEXP
2.3) Defining & Calculating [Crop Damage]
## Crop damage exponents for each level listed out & assigned those values for the crop exponent data.
## Invalid data was excluded by assigning the value as '0'.
## Then, the crop damage value was calculated by multiplying the crop damage and crop exponent value.
unique(StrmData$CROPDMGEXP)
## [1] "K"
## [2] "M"
## [3] NA
## [4] "B"
## [5] "m"
## [6] "+"
## [7] "0"
## [8] "5"
## [9] "6"
## [10] "?"
## [11] "4"
## [12] "2"
## [13] "3"
## [14] "h"
## [15] "7"
## [16] "H"
## [17] "-"
## [18] "1"
## [19] "8"
## [20] "F0"
## [21] "formed 6 miles west-northwest of Kingfisher and also rotated briefly around E3. Its life-span was short"
## [22] "and there was no damage (F0)."
## [23] "and formed about 6 miles west of Kingfisher while tornado E3 was most intense. This tornado rotated around E3 for a short period of time before dissipating. No damage was observed (F0). Another satellite tornado"
# Assigning values for the crop exponent StrmData
StrmData$CROPEXP[StrmData$CROPDMGEXP == "M"] <- 1e+06
## Warning: Unknown or uninitialised column: `CROPEXP`.
StrmData$CROPEXP[StrmData$CROPDMGEXP == "K"] <- 1000
StrmData$CROPEXP[StrmData$CROPDMGEXP == "m"] <- 1e+06
StrmData$CROPEXP[StrmData$CROPDMGEXP == "B"] <- 1e+09
StrmData$CROPEXP[StrmData$CROPDMGEXP == "0"] <- 1
StrmData$CROPEXP[StrmData$CROPDMGEXP == "k"] <- 1000
StrmData$CROPEXP[StrmData$CROPDMGEXP == "2"] <- 100
StrmData$CROPEXP[StrmData$CROPDMGEXP == " "] <- 1
# Assigning '0' to invalid exponent StrmData
StrmData$CROPEXP[StrmData$CROPDMGEXP == "?"] <- 0
# Calculating the crop damage
StrmData$CROPDMGVAL <- StrmData$CROPDMG * StrmData$CROPEXP
2.4) Property Damage Summary
## Procedure = aggregate the property damage by the event type and sort the output it in descending order
prop <- aggregate(PROPDMGVAL~EVTYPE,data=StrmData,FUN=sum,na.rm=TRUE)
prop <- prop[with(prop,order(-PROPDMGVAL)),]
prop <- head(prop,10)
print(prop)
## EVTYPE PROPDMGVAL
## 59 FLOOD 144657709800
## 178 HURRICANE/TYPHOON 69305840000
## 332 TORNADO 56658823514
## 280 STORM SURGE 43323536000
## 47 FLASH FLOOD 16822723772
## 101 HAIL 15734437456
## 170 HURRICANE 11868319010
## 340 TROPICAL STORM 7703890550
## 399 WINTER STORM 6688497251
## 155 HIGH WIND 5270046260
2.5) Crop Damage Summary
## Procedure = aggregate the crop damage by the event type and sort the output it in descending order
crop <- aggregate(CROPDMGVAL~EVTYPE,data=StrmData,FUN=sum,na.rm=TRUE)
crop <- crop[with(crop,order(-CROPDMGVAL)),]
crop <- head(crop,10)
print(crop)
## EVTYPE CROPDMGVAL
## 176 HURRICANE/TYPHOON 732768451330
## 58 FLOOD 76645688290
## 329 TORNADO 57347075714
## 46 FLASH FLOOD 29398844922
## 99 HAIL 11219073636
## 168 HURRICANE 9096291000
## 385 WILDFIRE 7180255200
## 153 HIGH WIND 6992210190
## 292 THUNDERSTORM WIND 5506786030
## 395 WINTER STORM 5287832401
2.6) Plot of Top 10 Property & Crop damages by Weather Event Types (Economic Consequences)
par(mfrow=c(1,2),mar=c(11,3,3,2))
barplot(prop$PROPDMGVAL/(10^9),names.arg=prop$EVTYPE,las=2,col="gold",ylab="Prop.damage(billions)",main="Top10 Prop.Damages")
barplot(crop$CROPDMGVAL/(10^9),names.arg=crop$EVTYPE,las=2,col="gold",ylab="Crop damage(billions)",main="Top10 Crop.Damages")
Tornadoes are responsible for the maximum number of fatalities and injuries, followed by Excessive Heat for fatalities and Thunderstorm wind for injuries.
Floods are responsible for maximum property damage, while Droughts cause maximum crop damage. Second major events that caused the maximum damage was Hurricanes/Typhoons for property damage and Floods for crop damage.