knitr::opts_chunk$set(echo = TRUE)
Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Storm database published by NOAA was taken and analysed. The data had 37 varaiables and the events starting from 1950 until Nov 2011. The weather events are harmful, because, it may cause, death or injuries to humans, without these factor, that person could have had normal life. In this dataset, we need to find out how many deaths/injuries happened for everyevent and we need to group them together irrespective of which year it has occured, or which place this has occured. These values are aggregated and then compared with one another to check which one had highest effect on causing deaths and Injuries.
Like wise, we need to group the expenses which was occured because of the events the value need to be aggregated and compared and analysed.
By performing above analysis, its found that Tornado causes more deaths and injuries and Ice strorms (obviously, more money involved in clearing ices) create economic damages.
StormData was downloaded, and data was extracted and loaded to perform storm data analysis. As the data is more in size, the downloading and loading to ram was done with cache option.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(stringr)
library(utils)
library(ggplot2)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.4.3
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
if (file.exists("FStormData.csv.bz2")== FALSE) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile= "FStormData.csv.bz2")
}
stormdata <- read.csv(bzfile("FStormData.csv.bz2"))
str(stormdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
names(stormdata)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
dim(stormdata)
## [1] 902297 37
summary(stormdata$EVTYPE)
## HAIL TSTM WIND THUNDERSTORM WIND
## 288661 219940 82563
## TORNADO FLASH FLOOD FLOOD
## 60652 54277 25326
## THUNDERSTORM WINDS HIGH WIND LIGHTNING
## 20843 20212 15754
## HEAVY SNOW HEAVY RAIN WINTER STORM
## 15708 11723 11433
## WINTER WEATHER FUNNEL CLOUD MARINE TSTM WIND
## 7026 6839 6175
## MARINE THUNDERSTORM WIND WATERSPOUT STRONG WIND
## 5812 3796 3566
## URBAN/SML STREAM FLD WILDFIRE BLIZZARD
## 3392 2761 2719
## DROUGHT ICE STORM EXCESSIVE HEAT
## 2488 2006 1678
## HIGH WINDS WILD/FOREST FIRE FROST/FREEZE
## 1533 1457 1342
## DENSE FOG WINTER WEATHER/MIX TSTM WIND/HAIL
## 1293 1104 1028
## EXTREME COLD/WIND CHILL HEAT HIGH SURF
## 1002 767 725
## TROPICAL STORM FLASH FLOODING EXTREME COLD
## 690 682 655
## COASTAL FLOOD LAKE-EFFECT SNOW FLOOD/FLASH FLOOD
## 650 636 624
## LANDSLIDE SNOW COLD/WIND CHILL
## 600 587 539
## FOG RIP CURRENT MARINE HAIL
## 538 470 442
## DUST STORM AVALANCHE WIND
## 427 386 340
## RIP CURRENTS STORM SURGE FREEZING RAIN
## 304 261 250
## URBAN FLOOD HEAVY SURF/HIGH SURF EXTREME WINDCHILL
## 249 228 204
## STRONG WINDS DRY MICROBURST ASTRONOMICAL LOW TIDE
## 196 186 174
## HURRICANE RIVER FLOOD LIGHT SNOW
## 174 173 154
## STORM SURGE/TIDE RECORD WARMTH COASTAL FLOODING
## 148 146 143
## DUST DEVIL MARINE HIGH WIND UNSEASONABLY WARM
## 141 135 126
## FLOODING ASTRONOMICAL HIGH TIDE MODERATE SNOWFALL
## 120 103 101
## URBAN FLOODING WINTRY MIX HURRICANE/TYPHOON
## 98 90 88
## FUNNEL CLOUDS HEAVY SURF RECORD HEAT
## 87 84 81
## FREEZE HEAT WAVE COLD
## 74 74 72
## RECORD COLD ICE THUNDERSTORM WINDS HAIL
## 64 61 61
## TROPICAL DEPRESSION SLEET UNSEASONABLY DRY
## 60 59 56
## FROST GUSTY WINDS THUNDERSTORM WINDSS
## 53 53 51
## MARINE STRONG WIND OTHER SMALL HAIL
## 48 48 47
## FUNNEL FREEZING FOG THUNDERSTORM
## 46 45 45
## Temperature record TSTM WIND (G45) Coastal Flooding
## 43 39 38
## WATERSPOUTS MONTHLY PRECIPITATION WINDS
## 37 36 36
## (Other)
## 2940
summary(stormdata$FATALITIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.0168 0.0000 583.0000
summary(stormdata$INJURIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1557 0.0000 1700.0000
summary(stormdata$PROPDMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 12.06 0.50 5000.00
summary(stormdata$PROPDMGEXP)
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
summary(stormdata$CROPDMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 1.527 0.000 990.000
summary(stormdata$CROPDMGEXP)
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
As per the dataset, the loss of human life due to corresponding events are recorded along with the injuries due to that event.
The sum of fatalities and injuries need to be consolidated. The list of fatal and injures
EVTYPE and INJURIES and FATALITIES are grouped to gether in one separate table. Then, following consideration taken, when the
eventdata_Injfat <- stormdata %>% group_by(EVTYPE) %>%
select(EVTYPE, INJURIES, FATALITIES)%>%
summarise_at(c("FATALITIES", "INJURIES"), sum) %>%
arrange(desc(FATALITIES)) %>% filter(!(FATALITIES==0 & INJURIES== 0)) %>%
filter( (FATALITIES >100))
eventdata_Injfat
## # A tibble: 20 x 3
## EVTYPE FATALITIES INJURIES
## <fctr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
## 11 WINTER STORM 206 1321
## 12 RIP CURRENTS 204 297
## 13 HEAT WAVE 172 309
## 14 EXTREME COLD 160 231
## 15 THUNDERSTORM WIND 133 1488
## 16 HEAVY SNOW 127 1021
## 17 EXTREME COLD/WIND CHILL 125 24
## 18 STRONG WIND 103 280
## 19 BLIZZARD 101 805
## 20 HIGH SURF 101 152
Fp <- ggplot(eventdata_Injfat, aes(y=FATALITIES , x=reorder(EVTYPE,-FATALITIES,sum) ))+ geom_bar(stat = "identity", color = "black", fill ="gray80" , width = 0.8) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Fatalities >100 for Events") + ylab("Fatalities sum for Event Type")+
expand_limits(y = seq(0, 6000, by =1000)) + xlab("Event Types ")
#Fp
eventdata_Injfat <- stormdata %>% group_by(EVTYPE) %>%
select(EVTYPE, INJURIES, FATALITIES)%>%
summarise_at(c("FATALITIES", "INJURIES"), sum) %>%
arrange(desc(FATALITIES)) %>% filter(!(FATALITIES==0 & INJURIES== 0)) %>%
filter( (INJURIES > 100))
eventdata_Injfat
## # A tibble: 37 x 3
## EVTYPE FATALITIES INJURIES
## <fctr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
## # ... with 27 more rows
Ip <- ggplot(eventdata_Injfat, aes(y=INJURIES , x=reorder(EVTYPE,-INJURIES,sum) ))+ geom_bar(stat = "identity", color = "black", fill ="blue" , width = 0.8) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Injuries >100 for Events") + ylab("Injuries sum for Event Type")+
expand_limits(y = seq(0, 100000, by =5000)) + xlab("Event Types ")
#Ip
ggarrange(Fp, Ip ,
labels = c("A", "B"),
ncol = 2, nrow = 1)
Following Events causes more fatalities:
TORNADO and EXCESSIVE HEAT , FLASHFLOOD, HEAT, LIGHTNING, TSTMWIND
Following Events causes more Injuries: TORNADO, TSTMWIND, FLOOD, EXCESSIVE HEAT, LIGHTNING
For this, I have taken the property damage, and corp damage summary. I summed up the damage expenses and filtered for the expenses which are costing billions, as these money shall be spend from goverment pockets. I have also summarised the millions too in another plot.
eventdata_eco <- stormdata %>% group_by(EVTYPE) %>%
filter(PROPDMGEXP=="B" | CROPDMGEXP =="B") %>%
select(EVTYPE, PROPDMG, CROPDMG)%>%
summarise_at(c("PROPDMG", "CROPDMG"), sum) %>%
arrange(desc(PROPDMG)) %>%
filter(!(PROPDMG==0 & CROPDMG== 0))
eventdata_eco
## # A tibble: 23 x 3
## EVTYPE PROPDMG CROPDMG
## <fctr> <dbl> <dbl>
## 1 ICE STORM 500.00 5.00
## 2 FLOOD 122.50 33.50
## 3 HURRICANE/TYPHOON 65.50 729.71
## 4 STORM SURGE 42.56 0.00
## 5 HURRICANE 5.70 801.00
## 6 TORNADO 5.30 0.00
## 7 TROPICAL STORM 5.15 0.00
## 8 RIVER FLOOD 5.00 5.00
## 9 WINTER STORM 5.00 0.00
## 10 STORM SURGE/TIDE 4.00 0.00
## # ... with 13 more rows
bbp <- ggplot(eventdata_eco, aes(y=PROPDMG , x=reorder(EVTYPE,-(PROPDMG),sum)))+ geom_bar(stat = "identity", color = "black", fill ="grey80" , width = 0.8) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Property Damage in Billions for Events") +
ylab("Property Damage sum (in B)")+
expand_limits(y = seq(0, 150, by =10)) + xlab("Event Types ")
#bbp
bbp1 <- ggplot(eventdata_eco, aes(y=CROPDMG , x=reorder(EVTYPE,-(CROPDMG),sum)))+ geom_bar(stat = "identity", color = "black", fill ="green" , width = 0.8) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Crop Damage in Billions for Events") +
ylab("Crop Damage sum (in B)")+
expand_limits(y = seq(100, 800, by =100)) + xlab("Event Types ")
#bbp1
ggarrange(bbp, bbp1,
labels = c("A", "B"),
ncol =2, nrow = 1)
Following events causes more property damages in billions: ICESTORM, FLOOD, HURRICANE/TYPHOON, STORM SURGE
Following events causes more property corps in billions:
HURRICANE , TYPHOON, FLOOD, HIGH WINDS
Now, let us check the damages done in millions:
eventdata_eco_m <- stormdata %>% group_by(EVTYPE) %>%
filter(PROPDMGEXP=="M" | PROPDMGEXP=="m" | CROPDMGEXP=="M"| CROPDMGEXP=="m" ) %>%
select(EVTYPE, PROPDMG, CROPDMG)%>%
summarise_at(c("PROPDMG", "CROPDMG"), sum) %>%
arrange(desc(PROPDMG)) %>%
filter(!(PROPDMG==0 & CROPDMG== 0)) %>% filter((PROPDMG>1000 | CROPDMG > 1000))
eventdata_eco_m
## # A tibble: 24 x 3
## EVTYPE PROPDMG CROPDMG
## <fctr> <dbl> <dbl>
## 1 TORNADO 57068.68 28424.41
## 2 HAIL 54962.36 16809.83
## 3 FLOOD 47124.28 56655.93
## 4 FLASH FLOOD 29468.73 39047.86
## 5 TSTM WIND 18510.27 7890.75
## 6 THUNDERSTORM WIND 8936.03 5508.40
## 7 HIGH WIND 6674.00 7305.01
## 8 HURRICANE 6573.67 2739.31
## 9 HURRICANE/TYPHOON 4705.62 3494.97
## 10 WILDFIRE 4610.34 873.40
## # ... with 14 more rows
mbp1 <- ggplot(eventdata_eco_m, aes(y=PROPDMG , x=reorder(EVTYPE,-PROPDMG,sum) ))+ geom_bar(stat = "identity", color = "black", fill ="gray80" , width = 0.8) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Property Damage in Millions for Events") +
ylab("Property Damage sum (in M)")+
expand_limits(y = seq(0, 500, by =10)) + xlab("Event Types ")
#mbp1
mbp2 <- ggplot(eventdata_eco_m, aes(y=CROPDMG , x=reorder(EVTYPE,-CROPDMG,sum) ))+ geom_bar(stat = "identity", color = "black", fill ="green" , width = 0.8) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Crop Damage in Millions for Events") +
ylab("Corp Damage sum (in M)")+
expand_limits(y = seq(0, 500, by =10)) + xlab("Event Types ")
#mbp2
ggarrange(mbp1, mbp2,
labels = c("A", "B"),
ncol =2, nrow = 1)
Following events causes more property damages in Millions: TORNADO, HAIL, FLOOD, FLASH FLOOD
Following events causes more property corps in Millions: FLOOD, FLASH FLOOD, TORNADO and HAIL
The EVENTS like TORNADO and EXCESSIVE HEAT, TSTMWIND are more harmful to human population
The events like ICE STROM and FLOOD, HURRICANES, STORM SURGE, TORNADO, HAIL cause major economic consequences.