In this analysis NOAA Storm Database is used to answer some basic questions about severe weather events.This study explores the criticality of different events based on three parameters- fatalities, injuries and economic damage. It identifies the top five critical events across each parameter and compares with the others.Result is presented by barplots of the top five events.
The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. After the data is downloaded from the website, it is uncompressed and read into R environment.
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "repdata_data_StormData.csv.bz2")
# Reading data
StormData <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))
Key variables used for the analysis are:
EVTYPE : Type of the event FATALITIES : Number of fatalities from the event INJURIES : Nunber of injuries from the event PROPDMG : Property damage measured CROPDMG : Crop damage measured PROPDMGEXP : Property damage exponent (Mns, Bns etc) CROPDMGEXP : Crop damage exponent (Mns, Bns etc) Last two variables mentioned above do not have clean data as shown below.
unique(StormData$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
StormData$PROPDMGEXP <- toupper(StormData$PROPDMGEXP)
StormData$CROPDMGEXP <- toupper(StormData$CROPDMGEXP)
unique(StormData$PROPDMGEXP)
## [1] "K" "M" "" "B" "+" "0" "5" "6" "?" "4" "2" "3" "H" "7" "-" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] "" "M" "K" "B" "?" "0" "2"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("", "+", "-", "?")] <- "0"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("", "+", "-", "?")] <- "0"
unique(StormData$PROPDMGEXP)
## [1] "K" "M" "0" "B" "5" "6" "4" "2" "3" "H" "7" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] "0" "M" "K" "B" "2"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("B")] <- "9"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("M")] <- "6"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("K")] <- "3"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("H")] <- "2"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("B")] <- "9"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("M")] <- "6"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("K")] <- "3"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("H")] <- "2"
unique(StormData$PROPDMGEXP)
## [1] "3" "6" "0" "9" "5" "4" "2" "7" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] "0" "6" "3" "9" "2"
StormData$PROPDMGTOTAL <- StormData$PROPDMG * (10 ^ as.numeric(StormData$PROPDMGEXP))
StormData$CROPDMGTOTAL <- StormData$CROPDMG * (10 ^ as.numeric(StormData$CROPDMGEXP))
# calculate total damage
StormData$DMGTOTAL <- StormData$PROPDMGTOTAL + StormData$CROPDMGTOTAL
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
SummStormData <- StormData %>%
group_by(EVTYPE) %>%
summarize(SUMFATALITIES = sum(FATALITIES),
SUMINJURIES = sum(INJURIES),
SUMPROPDMG = sum(PROPDMGTOTAL),
SUMCROPDMG = sum(CROPDMGTOTAL),
TOTALDMG = sum(DMGTOTAL))
head(SummStormData)
## # A tibble: 6 × 6
## EVTYPE SUMFATALITIES SUMINJURIES SUMPROPDMG SUMCROPDMG TOTALDMG
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 " HIGH SURF ADVISO… 0 0 200000 0 200000
## 2 " COASTAL FLOOD" 0 0 0 0 0
## 3 " FLASH FLOOD" 0 0 50000 0 50000
## 4 " LIGHTNING" 0 0 0 0 0
## 5 " TSTM WIND" 0 0 8100000 0 8100000
## 6 " TSTM WIND (G45)" 0 0 8000 0 8000
SummStormDataFatality <- arrange(SummStormData, desc(SUMFATALITIES))
FatalityData <- head(SummStormDataFatality)
FatalityData
## # A tibble: 6 × 6
## EVTYPE SUMFATALITIES SUMINJURIES SUMPROPDMG SUMCROPDMG TOTALDMG
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 56947380676. 414953270 57362333946.
## 2 EXCESSIVE HEAT 1903 6525 7753700 492402000 500155700
## 3 FLASH FLOOD 978 1777 16822673978. 1421317100 18243991078.
## 4 HEAT 937 2100 1797000 401461500 403258500
## 5 LIGHTNING 816 5230 930379430. 12092090 942471520.
## 6 TSTM WIND 504 6957 4484928495 554007350 5038935845
Creating plot of top 5 eventtype and no of fatalities
ggplot(FatalityData[1:5, ], aes(EVTYPE, y = SUMFATALITIES)) + geom_bar(stat = "identity") +
xlab("Event Type") + ylab("Number of Fatalities") + ggtitle("Fatalities by Event type")
We can see Tornado caused most fatalities.
SummStormDataInjuries <- arrange(SummStormData, desc(SUMINJURIES))
InjuriesData <- head(SummStormDataInjuries)
InjuriesData
## # A tibble: 6 × 6
## EVTYPE SUMFATALITIES SUMINJURIES SUMPROPDMG SUMCROPDMG TOTALDMG
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 56947380676. 414953270 5.74e10
## 2 TSTM WIND 504 6957 4484928495 554007350 5.04e 9
## 3 FLOOD 470 6789 144657709807 5661968450 1.50e11
## 4 EXCESSIVE HEAT 1903 6525 7753700 492402000 5.00e 8
## 5 LIGHTNING 816 5230 930379430. 12092090 9.42e 8
## 6 HEAT 937 2100 1797000 401461500 4.03e 8
Creating plot of top 5 eventtype and no of injuries.
ggplot(InjuriesData[1:5, ], aes(EVTYPE, y = SUMINJURIES)) + geom_bar(stat = "identity") +
xlab("Event Type") + ylab("Number of Injuries") + ggtitle("Injuries by Event type")
We can see Tornado caused most injuries.
SummStormDataDamage <- arrange(SummStormData, desc(TOTALDMG))
DamageData <- head(SummStormDataDamage)
DamageData
## # A tibble: 6 × 6
## EVTYPE SUMFATALITIES SUMINJURIES SUMPROPDMG SUMCROPDMG TOTALDMG
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FLOOD 470 6789 144657709807 5661968450 1.50e11
## 2 HURRICANE/TYPHOON 64 1275 69305840000 2607872800 7.19e10
## 3 TORNADO 5633 91346 56947380676. 414953270 5.74e10
## 4 STORM SURGE 13 38 43323536000 5000 4.33e10
## 5 HAIL 15 1361 15735267513. 3025954473 1.88e10
## 6 FLASH FLOOD 978 1777 16822673978. 1421317100 1.82e10
Creating plot of top 5 eventtype and total damage
ggplot(DamageData[1:5, ], aes(EVTYPE, y = TOTALDMG)) + geom_bar(stat = "identity") +
xlab("Event Type") + ylab("Total Damage") + ggtitle("Total damage by Event type")
We can see Flood caused most injuries.
In all three parameters, it is observed that Floods are responsible for the most economic damage, but tornadoes cause the most injuries and fatalities.