This is the second project assignment for the Reproducible Research course in Coursera’s Data Science specialization track. The purpose of the assignment is to answer questions using data collected from the NOAA’s storm database.This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Original Data are available online (https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2) as well as documentaion (https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf) and FAQ (https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf).
The data used for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size.The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
if (!require("knitr")) {
install.packages("knitr")}
## Loading required package: knitr
if (!require("R.utils")) {
install.packages("R.utils")} ##to have bunzip2 to unzip the original file
## Loading required package: R.utils
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.5.0 (2016-11-07) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
if (!require("stringr")) {
install.packages("stringr")}
## Loading required package: stringr
if (!require("data.table")) {
install.packages("data.table")}
## Loading required package: data.table
if (!require("ggplot2")) {
install.packages("ggplot2")}
## Loading required package: ggplot2
library(knitr)
library(R.utils)
library(stringr)
library(data.table)
library(ggplot2)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(cache=TRUE)
if(!file.exists("./RepDataAssignment2")) {
dir.create("./RepDataAssignment2")}
if(!file.exists("./RepDataAssignment2/repdata%2Fdata%2FStormData.csv.bz2")) {
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile="./RepDataAssignment2/repdata%2Fdata%2FStormData.csv.bz2")}
if(!file.exists("./RepDataAssignment2/stormdata.csv")) {
bunzip2 ("./RepDataAssignment2/repdata%2Fdata%2FStormData.csv.bz2", overwrite=TRUE, remove=FALSE, destname="./RepDataAssignment2/stormdata.csv")}
StormData <- read.csv("./RepDataAssignment2/stormdata.csv", header=TRUE, stringsAsFactors=FALSE)
dim(StormData)
## [1] 902297 37
head(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
str(StormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
The data set StormData contains 902297 observations in 37 variables However we don’t need all of them for our exploratory analysis to answer the questions regarding the impact of meteorological events on Humans.
Therefore we’ll limit our focus to the following columns: * STATE: The state where happened the storm * EVTYPE: The type of storm event * FATALITIES: The number of fatalities as a result of the storm event * INJURIES: The number of injuries as a result of the storm event * PROPDMG: The total property damage rounded to three significant digits * PROPDMGEXP: A letter code indicating the magnitude of the PROPDMG dollar amount {“K”,“M”,“B”} for “thousands”, “millions” and “billions” respectively. * CROPDMG: The total crop damage rounded to three significant digits * CROPDMGEXP: A letter code indicating the magnitude of the CROPDMG dollar amount {“K”,“M”,“B”} for “thousands”, “millions” and “billions” respectively. * REFNUM: A unique reference number for the event.
TrimmedData <- StormData[,c("STATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP","CROPDMG", "CROPDMGEXP", "REFNUM")]
dim(TrimmedData)
## [1] 902297 9
head(TrimmedData)
## STATE EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 AL TORNADO 0 15 25.0 K 0
## 2 AL TORNADO 0 0 2.5 K 0
## 3 AL TORNADO 0 2 25.0 K 0
## 4 AL TORNADO 0 2 2.5 K 0
## 5 AL TORNADO 0 2 2.5 K 0
## 6 AL TORNADO 0 6 2.5 K 0
## REFNUM
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
str(TrimmedData)
## 'data.frame': 902297 obs. of 9 variables:
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
sum(is.na(TrimmedData))
## [1] 0
There are no missing values.Nothing to do here.
eventtype <- sort(unique(TrimmedData$EVTYPE))
eventtype[1:200]
## [1] " HIGH SURF ADVISORY" " COASTAL FLOOD"
## [3] " FLASH FLOOD" " LIGHTNING"
## [5] " TSTM WIND" " TSTM WIND (G45)"
## [7] " WATERSPOUT" " WIND"
## [9] "?" "ABNORMAL WARMTH"
## [11] "ABNORMALLY DRY" "ABNORMALLY WET"
## [13] "ACCUMULATED SNOWFALL" "AGRICULTURAL FREEZE"
## [15] "APACHE COUNTY" "ASTRONOMICAL HIGH TIDE"
## [17] "ASTRONOMICAL LOW TIDE" "AVALANCE"
## [19] "AVALANCHE" "BEACH EROSIN"
## [21] "Beach Erosion" "BEACH EROSION"
## [23] "BEACH EROSION/COASTAL FLOOD" "BEACH FLOOD"
## [25] "BELOW NORMAL PRECIPITATION" "BITTER WIND CHILL"
## [27] "BITTER WIND CHILL TEMPERATURES" "Black Ice"
## [29] "BLACK ICE" "BLIZZARD"
## [31] "BLIZZARD AND EXTREME WIND CHIL" "BLIZZARD AND HEAVY SNOW"
## [33] "Blizzard Summary" "BLIZZARD WEATHER"
## [35] "BLIZZARD/FREEZING RAIN" "BLIZZARD/HEAVY SNOW"
## [37] "BLIZZARD/HIGH WIND" "BLIZZARD/WINTER STORM"
## [39] "BLOW-OUT TIDE" "BLOW-OUT TIDES"
## [41] "BLOWING DUST" "blowing snow"
## [43] "Blowing Snow" "BLOWING SNOW"
## [45] "BLOWING SNOW- EXTREME WIND CHI" "BLOWING SNOW & EXTREME WIND CH"
## [47] "BLOWING SNOW/EXTREME WIND CHIL" "BREAKUP FLOODING"
## [49] "BRUSH FIRE" "BRUSH FIRES"
## [51] "COASTAL FLOODING/EROSION" "COASTAL EROSION"
## [53] "Coastal Flood" "COASTAL FLOOD"
## [55] "coastal flooding" "Coastal Flooding"
## [57] "COASTAL FLOODING" "COASTAL FLOODING/EROSION"
## [59] "Coastal Storm" "COASTAL STORM"
## [61] "COASTAL SURGE" "COASTAL/TIDAL FLOOD"
## [63] "COASTALFLOOD" "COASTALSTORM"
## [65] "Cold" "COLD"
## [67] "COLD AIR FUNNEL" "COLD AIR FUNNELS"
## [69] "COLD AIR TORNADO" "Cold and Frost"
## [71] "COLD AND FROST" "COLD AND SNOW"
## [73] "COLD AND WET CONDITIONS" "Cold Temperature"
## [75] "COLD TEMPERATURES" "COLD WAVE"
## [77] "COLD WEATHER" "COLD WIND CHILL TEMPERATURES"
## [79] "COLD/WIND CHILL" "COLD/WINDS"
## [81] "COOL AND WET" "COOL SPELL"
## [83] "CSTL FLOODING/EROSION" "DAM BREAK"
## [85] "DAM FAILURE" "Damaging Freeze"
## [87] "DAMAGING FREEZE" "DEEP HAIL"
## [89] "DENSE FOG" "DENSE SMOKE"
## [91] "DOWNBURST" "DOWNBURST WINDS"
## [93] "DRIEST MONTH" "Drifting Snow"
## [95] "DROUGHT" "DROUGHT/EXCESSIVE HEAT"
## [97] "DROWNING" "DRY"
## [99] "DRY CONDITIONS" "DRY HOT WEATHER"
## [101] "DRY MICROBURST" "DRY MICROBURST 50"
## [103] "DRY MICROBURST 53" "DRY MICROBURST 58"
## [105] "DRY MICROBURST 61" "DRY MICROBURST 84"
## [107] "DRY MICROBURST WINDS" "DRY MIRCOBURST WINDS"
## [109] "DRY PATTERN" "DRY SPELL"
## [111] "DRY WEATHER" "DRYNESS"
## [113] "DUST DEVEL" "Dust Devil"
## [115] "DUST DEVIL" "DUST DEVIL WATERSPOUT"
## [117] "DUST STORM" "DUST STORM/HIGH WINDS"
## [119] "DUSTSTORM" "EARLY FREEZE"
## [121] "Early Frost" "EARLY FROST"
## [123] "EARLY RAIN" "EARLY SNOW"
## [125] "Early snowfall" "EARLY SNOWFALL"
## [127] "Erosion/Cstl Flood" "EXCESSIVE"
## [129] "Excessive Cold" "EXCESSIVE HEAT"
## [131] "EXCESSIVE HEAT/DROUGHT" "EXCESSIVE PRECIPITATION"
## [133] "EXCESSIVE RAIN" "EXCESSIVE RAINFALL"
## [135] "EXCESSIVE SNOW" "EXCESSIVE WETNESS"
## [137] "EXCESSIVELY DRY" "Extended Cold"
## [139] "Extreme Cold" "EXTREME COLD"
## [141] "EXTREME COLD/WIND CHILL" "EXTREME HEAT"
## [143] "EXTREME WIND CHILL" "EXTREME WIND CHILL/BLOWING SNO"
## [145] "EXTREME WIND CHILLS" "EXTREME WINDCHILL"
## [147] "EXTREME WINDCHILL TEMPERATURES" "EXTREME/RECORD COLD"
## [149] "EXTREMELY WET" "FALLING SNOW/ICE"
## [151] "FIRST FROST" "FIRST SNOW"
## [153] "FLASH FLOOD" "FLASH FLOOD - HEAVY RAIN"
## [155] "FLASH FLOOD FROM ICE JAMS" "FLASH FLOOD LANDSLIDES"
## [157] "FLASH FLOOD WINDS" "FLASH FLOOD/"
## [159] "FLASH FLOOD/ FLOOD" "FLASH FLOOD/ STREET"
## [161] "FLASH FLOOD/FLOOD" "FLASH FLOOD/HEAVY RAIN"
## [163] "FLASH FLOOD/LANDSLIDE" "FLASH FLOODING"
## [165] "FLASH FLOODING/FLOOD" "FLASH FLOODING/THUNDERSTORM WI"
## [167] "FLASH FLOODS" "FLASH FLOOODING"
## [169] "Flood" "FLOOD"
## [171] "FLOOD & HEAVY RAIN" "FLOOD FLASH"
## [173] "FLOOD FLOOD/FLASH" "FLOOD WATCH/"
## [175] "FLOOD/FLASH" "Flood/Flash Flood"
## [177] "FLOOD/FLASH FLOOD" "FLOOD/FLASH FLOODING"
## [179] "FLOOD/FLASH/FLOOD" "FLOOD/FLASHFLOOD"
## [181] "FLOOD/RAIN/WIND" "FLOOD/RAIN/WINDS"
## [183] "FLOOD/RIVER FLOOD" "Flood/Strong Wind"
## [185] "FLOODING" "FLOODING/HEAVY RAIN"
## [187] "FLOODS" "FOG"
## [189] "FOG AND COLD TEMPERATURES" "FOREST FIRES"
## [191] "Freeze" "FREEZE"
## [193] "Freezing drizzle" "Freezing Drizzle"
## [195] "FREEZING DRIZZLE" "FREEZING DRIZZLE AND FREEZING"
## [197] "Freezing Fog" "FREEZING FOG"
## [199] "Freezing rain" "Freezing Rain"
The EVTYPE (event type) variable contains duplicate categories based on mixed cases, therefore names will be converted in uppercases, then transformed in factor, to facilitate next data aggregations.
TrimmedData$EVTYPE <- toupper(TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- as.factor(TrimmedData$EVTYPE)
eventtype <- sort(unique(TrimmedData$EVTYPE))
eventtype[1:200]
## [1] HIGH SURF ADVISORY COASTAL FLOOD
## [3] FLASH FLOOD LIGHTNING
## [5] TSTM WIND TSTM WIND (G45)
## [7] WATERSPOUT WIND
## [9] ? ABNORMAL WARMTH
## [11] ABNORMALLY DRY ABNORMALLY WET
## [13] ACCUMULATED SNOWFALL AGRICULTURAL FREEZE
## [15] APACHE COUNTY ASTRONOMICAL HIGH TIDE
## [17] ASTRONOMICAL LOW TIDE AVALANCE
## [19] AVALANCHE BEACH EROSIN
## [21] BEACH EROSION BEACH EROSION/COASTAL FLOOD
## [23] BEACH FLOOD BELOW NORMAL PRECIPITATION
## [25] BITTER WIND CHILL BITTER WIND CHILL TEMPERATURES
## [27] BLACK ICE BLIZZARD
## [29] BLIZZARD AND EXTREME WIND CHIL BLIZZARD AND HEAVY SNOW
## [31] BLIZZARD SUMMARY BLIZZARD WEATHER
## [33] BLIZZARD/FREEZING RAIN BLIZZARD/HEAVY SNOW
## [35] BLIZZARD/HIGH WIND BLIZZARD/WINTER STORM
## [37] BLOW-OUT TIDE BLOW-OUT TIDES
## [39] BLOWING DUST BLOWING SNOW
## [41] BLOWING SNOW- EXTREME WIND CHI BLOWING SNOW & EXTREME WIND CH
## [43] BLOWING SNOW/EXTREME WIND CHIL BREAKUP FLOODING
## [45] BRUSH FIRE BRUSH FIRES
## [47] COASTAL FLOODING/EROSION COASTAL EROSION
## [49] COASTAL FLOOD COASTAL FLOODING
## [51] COASTAL FLOODING/EROSION COASTAL STORM
## [53] COASTAL SURGE COASTAL/TIDAL FLOOD
## [55] COASTALFLOOD COASTALSTORM
## [57] COLD COLD AIR FUNNEL
## [59] COLD AIR FUNNELS COLD AIR TORNADO
## [61] COLD AND FROST COLD AND SNOW
## [63] COLD AND WET CONDITIONS COLD TEMPERATURE
## [65] COLD TEMPERATURES COLD WAVE
## [67] COLD WEATHER COLD WIND CHILL TEMPERATURES
## [69] COLD/WIND CHILL COLD/WINDS
## [71] COOL AND WET COOL SPELL
## [73] CSTL FLOODING/EROSION DAM BREAK
## [75] DAM FAILURE DAMAGING FREEZE
## [77] DEEP HAIL DENSE FOG
## [79] DENSE SMOKE DOWNBURST
## [81] DOWNBURST WINDS DRIEST MONTH
## [83] DRIFTING SNOW DROUGHT
## [85] DROUGHT/EXCESSIVE HEAT DROWNING
## [87] DRY DRY CONDITIONS
## [89] DRY HOT WEATHER DRY MICROBURST
## [91] DRY MICROBURST 50 DRY MICROBURST 53
## [93] DRY MICROBURST 58 DRY MICROBURST 61
## [95] DRY MICROBURST 84 DRY MICROBURST WINDS
## [97] DRY MIRCOBURST WINDS DRY PATTERN
## [99] DRY SPELL DRY WEATHER
## [101] DRYNESS DUST DEVEL
## [103] DUST DEVIL DUST DEVIL WATERSPOUT
## [105] DUST STORM DUST STORM/HIGH WINDS
## [107] DUSTSTORM EARLY FREEZE
## [109] EARLY FROST EARLY RAIN
## [111] EARLY SNOW EARLY SNOWFALL
## [113] EROSION/CSTL FLOOD EXCESSIVE
## [115] EXCESSIVE COLD EXCESSIVE HEAT
## [117] EXCESSIVE HEAT/DROUGHT EXCESSIVE PRECIPITATION
## [119] EXCESSIVE RAIN EXCESSIVE RAINFALL
## [121] EXCESSIVE SNOW EXCESSIVE WETNESS
## [123] EXCESSIVELY DRY EXTENDED COLD
## [125] EXTREME COLD EXTREME COLD/WIND CHILL
## [127] EXTREME HEAT EXTREME WIND CHILL
## [129] EXTREME WIND CHILL/BLOWING SNO EXTREME WIND CHILLS
## [131] EXTREME WINDCHILL EXTREME WINDCHILL TEMPERATURES
## [133] EXTREME/RECORD COLD EXTREMELY WET
## [135] FALLING SNOW/ICE FIRST FROST
## [137] FIRST SNOW FLASH FLOOD
## [139] FLASH FLOOD - HEAVY RAIN FLASH FLOOD FROM ICE JAMS
## [141] FLASH FLOOD LANDSLIDES FLASH FLOOD WINDS
## [143] FLASH FLOOD/ FLASH FLOOD/ FLOOD
## [145] FLASH FLOOD/ STREET FLASH FLOOD/FLOOD
## [147] FLASH FLOOD/HEAVY RAIN FLASH FLOOD/LANDSLIDE
## [149] FLASH FLOODING FLASH FLOODING/FLOOD
## [151] FLASH FLOODING/THUNDERSTORM WI FLASH FLOODS
## [153] FLASH FLOOODING FLOOD
## [155] FLOOD & HEAVY RAIN FLOOD FLASH
## [157] FLOOD FLOOD/FLASH FLOOD WATCH/
## [159] FLOOD/FLASH FLOOD/FLASH FLOOD
## [161] FLOOD/FLASH FLOODING FLOOD/FLASH/FLOOD
## [163] FLOOD/FLASHFLOOD FLOOD/RAIN/WIND
## [165] FLOOD/RAIN/WINDS FLOOD/RIVER FLOOD
## [167] FLOOD/STRONG WIND FLOODING
## [169] FLOODING/HEAVY RAIN FLOODS
## [171] FOG FOG AND COLD TEMPERATURES
## [173] FOREST FIRES FREEZE
## [175] FREEZING DRIZZLE FREEZING DRIZZLE AND FREEZING
## [177] FREEZING FOG FREEZING RAIN
## [179] FREEZING RAIN AND SLEET FREEZING RAIN AND SNOW
## [181] FREEZING RAIN SLEET AND FREEZING RAIN SLEET AND LIGHT
## [183] FREEZING RAIN/SLEET FREEZING RAIN/SNOW
## [185] FREEZING SPRAY FROST
## [187] FROST/FREEZE FROST\\FREEZE
## [189] FUNNEL FUNNEL CLOUD
## [191] FUNNEL CLOUD. FUNNEL CLOUD/HAIL
## [193] FUNNEL CLOUDS FUNNELS
## [195] GLAZE GLAZE ICE
## [197] GLAZE/ICE STORM GRADIENT WIND
## [199] GRADIENT WINDS GRASS FIRES
## 898 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
We have 898 types of event. There are a lot of events who are named nearly the same and which are the same event types as documented in the page 6 of the storm data documentation. So some of the data of the same types will be aggregated and few misspellings will be corrected.
TrimmedData$EVTYPE <- str_trim(TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("FREEZE", "FROST/FREEZE", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("HEATWAVE", "EXCESSIVE HEAT", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("HURRICANE/TYPHOON", "HURRICANE", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("WILD/FOREST FIRE", "WILDFIRE", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("marinethunderstormwind/hail", "marinethunderstormwind", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("TSTM WIND/HAIL","MARINE THUNDERSTORM WIND", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("flashflooding", "flashflood", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("TSTM WIND", "MARINE THUNDERSTORM WIND" , TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("URBAN/SML STREAM FLD", "HEAVY RAIN", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("MARINE TSTM WIND","MARINE THUNDERSTORM WIND", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("STRONG WINDS","ASTROMICAL LOW TIDE", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("COASTAL FLOODING", "COASTAL FLOOD ", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("RIVER FLOOD", "FLOOD", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("RECORD WARMTH", "HEAT", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("RECORD HEAT", "HEAT", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("FLOOD/FLASH FLOOD", "FLASH FLOOD", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("WINTER data/MIX", "WINTER data", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("RIP CURRENTS", "RIP CURRENT", TrimmedData$EVTYPE)
TrimmedData$EVTYPE <- gsub("DENSEDENSEFOG", "DENSE FOG", TrimmedData$EVTYPE)
TrimmedData$EVTYPE[grepl("FLOOD|EXCESSIVE WETNESS", TrimmedData$EVTYPE, ignore.case = T)] <- "FLOOD"
TrimmedData$EVTYPE[grepl("HURRICANE|TYPHOON|TROPICAL", TrimmedData$EVTYPE, ignore.case = T)] <- "HURRICANE"
TrimmedData$EVTYPE[grepl("TORN|WATERSPROUT|FUNNEL|SPOUT|MICRO", TrimmedData$EVTYPE, ignore.case = T)] <- "TORNADO"
TrimmedData$EVTYPE[grepl("HAIL", TrimmedData$EVTYPE, ignore.case = T)] <- "HAIL"
TrimmedData$EVTYPE[grepl("RAIN|HEAVY PRECIP", TrimmedData$EVTYPE, ignore.case = T)] <- "HEAVY RAIN"
TrimmedData$EVTYPE[grepl("FIRE|SMOKE", TrimmedData$EVTYPE, ignore.case = T)] <- "WILDFIRE"
TrimmedData$EVTYPE[grepl("HEAT|RECORD HIGH TEMPERATURE|RECORD HIGH|HIGH TEMPERATURE RECORD|RECORD WARMTH", TrimmedData$EVTYPE, ignore.case = T)] <- "HEAT"
TrimmedData$EVTYPE[grepl("SEICHE|Marine|SEAS|SWELLS|ASTRONOMICAL LOW TIDE|ASTRONOMICAL HIGH TIDE|SURF|RIP CURRENT|HIGH SEAS| HIGH TIDES|HIGH SURF|MARINE MISHAP|STORM SURGE|COASTAL|WAVE|BEACH|TIDE",
TrimmedData$EVTYPE, ignore.case = T)] <- "MARINE"
TrimmedData$EVTYPE[grepl("AVALANCHE|AVALANCE", TrimmedData$EVTYPE, ignore.case = T)] <- "AVALANCHE"
TrimmedData$EVTYPE[grepl("LANDSLIDE|LANDSLUMP|MUD SLIDE|MUDSLIDE|ROCK SLIDE", TrimmedData$EVTYPE, ignore.case = T)] <- "LANDSLIDE"
TrimmedData$EVTYPE[grepl("OTHER", TrimmedData$EVTYPE, ignore.case = T)] <- "OTHER"
TrimmedData$EVTYPE[grepl("DAM BREAK|DAM FAILURE", TrimmedData$EVTYPE, ignore.case = T)] <- "DAM BREAK"
TrimmedData$EVTYPE[grepl("TSUNAMI", TrimmedData$EVTYPE, ignore.case = T)] <- "TSUNAMI"
TrimmedData$EVTYPE[grepl("VOLCANIC ASH", TrimmedData$EVTYPE, ignore.case = T)] <- "VOLCANIC ACTIVITY"
TrimmedData$EVTYPE[grepl("FOG|WALL CLOUD", TrimmedData$EVTYPE, ignore.case = T)] <- "FOG"
TrimmedData$EVTYPE[grepl("DRY|DROUGHT|DUST STORM|DUST", TrimmedData$EVTYPE, ignore.case = T)] <- "DROUGHT"
TrimmedData$EVTYPE[grepl("THUNDERSTORM|TSTM|LIGHTNING|LIGHTING|LIGNTNING", TrimmedData$EVTYPE)] <- "THUNDERSTORM"
TrimmedData$EVTYPE[grepl("MIXED|MIX|SNOW|WINT|ICE|GLAZE|BLIZZARD|FREEZ|SLEET", TrimmedData$EVTYPE, ignore.case = T)] <- "WINTER STORM"
TrimmedData$EVTYPE[grepl("CHILL|FROST|HYPOTHERMIA|ICY|COLD|RECORD LOW|LOW TEMPERATURE",TrimmedData$EVTYPE, ignore.case = T)] <- "COLD"
TrimmedData$EVTYPE[grepl("WIND|GUST", TrimmedData$EVTYPE, ignore.case = T)] <- "HIGH WIND"
TrimmedData$EVTYPE <- as.factor(TrimmedData$EVTYPE)
eventtype <- sort(unique(TrimmedData$EVTYPE))
eventtype[1:200]
## [1] ? ABNORMAL WARMTH
## [3] ABNORMALLY WET APACHE COUNTY
## [5] AVALANCHE BELOW NORMAL PRECIPITATION
## [7] COLD COOL AND WET
## [9] COOL SPELL DAM BREAK
## [11] DOWNBURST DRIEST MONTH
## [13] DROUGHT DROWNING
## [15] EXCESSIVE EXCESSIVE PRECIPITATION
## [17] EXTREMELY WET FLASH FLOOODING
## [19] FLOOD FOG
## [21] HAIL HEAT
## [23] HEAVY RAIN HEAVY SHOWER
## [25] HEAVY SHOWERS HIGH
## [27] HIGH WATER HIGH WIND
## [29] HOT PATTERN HOT SPELL
## [31] HOT WEATHER HURRICANE
## [33] HYPERTHERMIA/EXPOSURE LANDSLIDE
## [35] MARINE METRO STORM, MAY 26
## [37] MILD PATTERN MONTHLY PRECIPITATION
## [39] MONTHLY TEMPERATURE NO SEVERE WEATHER
## [41] NONE NORMAL PRECIPITATION
## [43] NORTHERN LIGHTS OTHER
## [45] PROLONG WARMTH RAPIDLY RISING WATER
## [47] RECORD COOL RECORD PRECIPITATION
## [49] RECORD TEMPERATURE RECORD TEMPERATURES
## [51] RECORD WARM RECORD WARM TEMPS.
## [53] RED FLAG CRITERIA REMNANTS OF FLOYD
## [55] SEVERE TURBULENCE SMALL STREAM
## [57] SMALL STREAM AND SML STREAM FLD
## [59] SOUTHEAST SUMMARY JAN 17
## [61] SUMMARY JULY 23-24 SUMMARY JUNE 18-19
## [63] SUMMARY JUNE 5-6 SUMMARY JUNE 6
## [65] SUMMARY OF APRIL 12 SUMMARY OF APRIL 13
## [67] SUMMARY OF APRIL 21 SUMMARY OF APRIL 27
## [69] SUMMARY OF APRIL 3RD SUMMARY OF JULY 11
## [71] SUMMARY OF JULY 2 SUMMARY OF JULY 22
## [73] SUMMARY OF JULY 26 SUMMARY OF JULY 29
## [75] SUMMARY OF JULY 3 SUMMARY OF JUNE 10
## [77] SUMMARY OF JUNE 11 SUMMARY OF JUNE 12
## [79] SUMMARY OF JUNE 13 SUMMARY OF JUNE 15
## [81] SUMMARY OF JUNE 16 SUMMARY OF JUNE 18
## [83] SUMMARY OF JUNE 23 SUMMARY OF JUNE 24
## [85] SUMMARY OF JUNE 3 SUMMARY OF JUNE 30
## [87] SUMMARY OF JUNE 4 SUMMARY OF JUNE 6
## [89] SUMMARY OF MARCH 14 SUMMARY OF MARCH 23
## [91] SUMMARY OF MARCH 24 SUMMARY OF MARCH 24-25
## [93] SUMMARY OF MARCH 27 SUMMARY OF MARCH 29
## [95] SUMMARY OF MAY 10 SUMMARY OF MAY 13
## [97] SUMMARY OF MAY 14 SUMMARY OF MAY 22
## [99] SUMMARY OF MAY 22 AM SUMMARY OF MAY 22 PM
## [101] SUMMARY OF MAY 26 AM SUMMARY OF MAY 26 PM
## [103] SUMMARY OF MAY 31 AM SUMMARY OF MAY 31 PM
## [105] SUMMARY OF MAY 9-10 SUMMARY SEPT. 25-26
## [107] SUMMARY SEPTEMBER 20 SUMMARY SEPTEMBER 23
## [109] SUMMARY SEPTEMBER 3 SUMMARY SEPTEMBER 4
## [111] SUMMARY: NOV. 16 SUMMARY: NOV. 6-7
## [113] SUMMARY: OCT. 20-21 SUMMARY: OCTOBER 31
## [115] SUMMARY: SEPT. 18 TEMPERATURE RECORD
## [117] THUNDERSTORM TORNADO
## [119] TSUNAMI UNUSUAL WARMTH
## [121] UNUSUALLY WARM URBAN AND SMALL
## [123] URBAN AND SMALL STREAM URBAN SMALL
## [125] URBAN/SMALL URBAN/SMALL STREAM
## [127] URBAN/SMALL STRM FLDG VERY WARM
## [129] VOG VOLCANIC ACTIVITY
## [131] VOLCANIC ERUPTION WARM WEATHER
## [133] WET MICOBURST WET MONTH
## [135] WET WEATHER WET YEAR
## [137] WILDFIRE WINTER STORM
## [139] WND <NA>
## [141] <NA> <NA>
## [143] <NA> <NA>
## [145] <NA> <NA>
## [147] <NA> <NA>
## [149] <NA> <NA>
## [151] <NA> <NA>
## [153] <NA> <NA>
## [155] <NA> <NA>
## [157] <NA> <NA>
## [159] <NA> <NA>
## [161] <NA> <NA>
## [163] <NA> <NA>
## [165] <NA> <NA>
## [167] <NA> <NA>
## [169] <NA> <NA>
## [171] <NA> <NA>
## [173] <NA> <NA>
## [175] <NA> <NA>
## [177] <NA> <NA>
## [179] <NA> <NA>
## [181] <NA> <NA>
## [183] <NA> <NA>
## [185] <NA> <NA>
## [187] <NA> <NA>
## [189] <NA> <NA>
## [191] <NA> <NA>
## [193] <NA> <NA>
## [195] <NA> <NA>
## [197] <NA> <NA>
## [199] <NA> <NA>
## 139 Levels: ? ABNORMAL WARMTH ABNORMALLY WET APACHE COUNTY ... WND
We now have “only” 139 types of event. In some cases, the event type is listed as the “Summary of XXX” for a particular day. Fully cleaning the EVTYPE is outside the scope of this particular analysis, hopefully these remainaing bad records shouldn’t impact the further analyses.
Need to aggregate the fatalities.
Fatalities <- as.data.table(subset(aggregate(FATALITIES ~ EVTYPE, data = TrimmedData, FUN = "sum"), FATALITIES > 0))
Fatalities <- Fatalities[order(-FATALITIES), ]
Only keep the top 10 of the most harmful fatalities and draw an histogram.
top10 <- Fatalities[1:10, ]
ggplot(data = top10, aes(EVTYPE, FATALITIES, fill = FATALITIES)) + geom_bar(stat = "Identity") +
xlab("Events") + ylab("Number of Fatalities") + ggtitle("Fatalities caused by Events (only the top 10) ") +
coord_flip() + theme(legend.position = "none")
The graph shows that tornado’s are the most deadly event over all the years.
Need to aggregate the Injuries
Injuries <- as.data.table(subset(aggregate(INJURIES ~ EVTYPE, data = TrimmedData,
FUN = "sum"), INJURIES > 0))
Injuries <- Injuries[order(-INJURIES), ]
Only show the top 10 of the most harmful injuries and draw an histogram.
top10injuries <- Injuries[1:10, ]
ggplot(data = top10injuries, aes(EVTYPE, INJURIES, fill = INJURIES)) + geom_bar(stat = "Identity") +
xlab("Events") + ylab("Number of Injuries") + ggtitle("Injuries caused by Events (only the top 10) ") +
coord_flip() + theme(legend.position = "none")
Tornado is again the most harmful event causing injuries according to the graph above.
Here, we investigate the total economic cost caused by each type of event, to evaluate those that caused the largest costs. So, the two types of economic costs: property and crop damages, are summed-up over the years. From the cookbook, the letter “K” stands for thousands, while “M” for millions and “B” for billions. We find both upper and lower case from these letters, we need to take all in consideration. e also have to calculate the damage costs using the “factor” to multiply with the “symbol” multiplier.
symbol <- c("", "+", "-", "?", 0:9, "h", "H", "k", "K", "m", "M", "b", "B"); ##extract the symbol multiplier
factor <- c(rep(0,4), 0:9, 2, 2, 3, 3, 6, 6, 9, 9)
multiplier <- data.frame(symbol, factor)
TrimmedData$damage.prop <- TrimmedData$PROPDMG*10^multiplier[match(TrimmedData$PROPDMGEXP,multiplier$symbol),2]##calculations for the property costs
TrimmedData$damage.crop <- TrimmedData$CROPDMG*10^multiplier[match(TrimmedData$CROPDMGEXP,multiplier$symbol),2]##calculations for the crop costs
TrimmedData$damage <- TrimmedData$damage.prop + TrimmedData$damage.crop ##sum both costs
damage <- aggregate(damage~EVTYPE, TrimmedData, sum);##aggregate both costs
damage$bilion <- damage$damage / 1e9;## dividing costs by billion (for a better plotting)
damage <- damage[order(damage$bilion, decreasing=TRUE),]
damage$EVTYPE <- factor(damage$EVTYPE , levels = damage$EVTYPE)
Only show the top 10 of the most costly events and draw an histogram.
top10cost <- damage[1:10, ]
ggplot(data = top10cost, aes(EVTYPE, bilion, fill = bilion)) + geom_bar(stat = "Identity") +
xlab("Events") + ylab("Economic costs in billion $") +
ggtitle("Economic costs caused by Events (only the top 10) ") + coord_flip() + theme(legend.position = "none")
This time, flood is the event that has caused the largest economic damage, as shown in the above histogram.
Over the years from 1950 to 2011, Tornado is the event that has most affected human health, both in terms of fatalities and injuries. In contrast, Flood is the event which caused the largest economic damage, with Tornado “only”" at 3rd place.