Synopsis: This analyis involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to determine the underlying trend of the data. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage from year 1950 to 2011.Based on the current analysis, it is observed that Tornado, Heat, Flood, Lighting caused more fatalities across USA. Also we could say that Tornado, Flood, Thunderstrom wind and Heat caused more no. of injuries or lifeloss than other event types. Flood, Hurricane, Tornado and Hail caused more property damages, whereas Drought, Flood, Ice Storm and Hurricane caused a significant impact on the crop damage situtaions across USA. So we can draw below conclusions based on the above mentioned observations -
Across the United States, the event type Tornado, Flood, Heat, Thunderstorm wind and Lighting are most harmful with respect to population health. Across the United States, the event type Flood, Hurricane, Tornado, Hail, Ice Storm and Drought have the greatest economic consequences.
library(R.utils)
library(dplyr)
library(ggplot2)
setwd("C:/Users/soudey/Documents/Data Science/NOAA storm data research")
bunzip2("repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv", remove = FALSE, skip = TRUE)
## [1] "repdata_data_StormData.csv"
## attr(,"temporary")
## [1] FALSE
storm_data <- read.csv("repdata_data_StormData.csv")
dim(storm_data)
## [1] 902297 37
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
After loading the data , the next step is to categorize the different catastropic events based on the event types in order to remove the data ambiguity.
storm_data$EVTYPE[storm_data$EVTYPE == "TSTM WIND"] <- "TUNDERSTORM WIND"
storm_data$EVTYPE[storm_data$EVTYPE == "THUNDERTORM WINDS"] <- "TUNDERSTORM WIND"
storm_data$EVTYPE[storm_data$EVTYPE == "THUNDERSTORM WINDS"] <- "TUNDERSTORM WIND"
storm_data$EVTYPE[storm_data$EVTYPE == "FLASH FLOOD"] <- "FLOOD"
storm_data$EVTYPE[storm_data$EVTYPE == "RIVER FLOOD"] <- "FLOOD"
storm_data$EVTYPE[storm_data$EVTYPE == "HURRICANE/TYPHOON"] <- "HURRICANE"
storm_data$EVTYPE[storm_data$EVTYPE == "EXCESSIVE HEAT"] <- "HEAT"
head(unique(storm_data$EVTYPE))
## [1] TORNADO TUNDERSTORM WIND HAIL
## [4] FREEZING RAIN SNOW ICE STORM/FLASH FLOOD
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
This section will showcase the no.of fatalities caused due to the natural calamites
library(dplyr)
fatal_accidents <- aggregate(FATALITIES ~ EVTYPE, data = storm_data, sum)
fatal_accidents <- filter(fatal_accidents,fatal_accidents$FATALITIES > 0)
fatal_accidents <- arrange(fatal_accidents,desc(fatal_accidents$FATALITIES))
head(fatal_accidents, 10)
## EVTYPE FATALITIES
## 1 TORNADO 5633
## 2 HEAT 2840
## 3 FLOOD 1450
## 4 LIGHTNING 816
## 5 TUNDERSTORM WIND 569
## 6 RIP CURRENT 368
## 7 HIGH WIND 248
## 8 AVALANCHE 224
## 9 WINTER STORM 206
## 10 RIP CURRENTS 204
This section will showcase the no.of persons got injured due to the natural calamites
injuries_accidents <- aggregate(INJURIES ~ EVTYPE, data = storm_data, sum)
injuries_accidents <- filter(injuries_accidents,injuries_accidents$INJURIES > 0)
injuries_accidents <- arrange(injuries_accidents,desc(injuries_accidents$INJURIES))
head(injuries_accidents, 10)
## EVTYPE INJURIES
## 1 TORNADO 91346
## 2 HEAT 8625
## 3 FLOOD 8568
## 4 TUNDERSTORM WIND 7865
## 5 LIGHTNING 5230
## 6 ICE STORM 1975
## 7 THUNDERSTORM WIND 1488
## 8 HAIL 1361
## 9 HURRICANE 1321
## 10 WINTER STORM 1321
Based on the NOAA documentation we need to transform the property and crop damage quantity into dollar amount Estimates should be rounded to three significant digits, followed by an alphabetical character signifying the magnitude of the number, i.e., 1.55B for $1,550,000,000. Alphabetical characters used to signify magnitude include “K” for thousands, “M” for millions, and “B” for billions.
View(unique(storm_data$PROPDMGEXP))
View(unique(storm_data$CROPDMGEXP))
storm_data[storm_data$PROPDMGEXP == "K",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "K",]$PROPDMG * 1000
storm_data[storm_data$PROPDMGEXP == "m",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "m",]$PROPDMG * 1000000
storm_data[storm_data$PROPDMGEXP == "M",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "M",]$PROPDMG * 1000000
storm_data[storm_data$PROPDMGEXP == "B",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "B",]$PROPDMG * 1000000000
storm_data[storm_data$CROPDMGEXP == "K",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "K",]$CROPDMG * 1000
storm_data[storm_data$CROPDMGEXP == "k",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "k",]$CROPDMG * 1000
storm_data[storm_data$CROPDMGEXP == "m",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "m",]$CROPDMG * 1000000
storm_data[storm_data$CROPDMGEXP == "M",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "M",]$CROPDMG * 1000000
storm_data[storm_data$CROPDMGEXP == "B",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "B",]$CROPDMG * 1000000000
This section will showcase the total amount of property damage caused due to the natural calamites
property_damage <- aggregate(PROPDMG ~ EVTYPE, data = storm_data, sum)
property_damage <- filter(property_damage,property_damage$PROPDMG > 0)
property_damage <- arrange(property_damage,desc(property_damage$PROPDMG))
head(property_damage, 10)
## EVTYPE PROPDMG
## 1 FLOOD 165917467374
## 2 HURRICANE 81174159010
## 3 TORNADO 56937160779
## 4 STORM SURGE 43323536000
## 5 HAIL 15732267048
## 6 TROPICAL STORM 7703890550
## 7 WINTER STORM 6688497251
## 8 TUNDERSTORM WIND 6221568518
## 9 HIGH WIND 5270046295
## 10 WILDFIRE 4765114000
This section will showcase the total amount of crop damage caused due to the natural calamites
crop_damage <- aggregate(CROPDMG ~ EVTYPE, data = storm_data, sum)
crop_damage <- filter(crop_damage,crop_damage$CROPDMG > 0)
crop_damage <- arrange(crop_damage,desc(crop_damage$CROPDMG))
head(crop_damage, 10)
## EVTYPE CROPDMG
## 1 DROUGHT 13972566000
## 2 FLOOD 12112744550
## 3 HURRICANE 5349782800
## 4 ICE STORM 5022113500
## 5 HAIL 3025954473
## 6 EXTREME COLD 1292973000
## 7 FROST/FREEZE 1094086000
## 8 HEAT 893863500
## 9 TUNDERSTORM WIND 744662138
## 10 HEAVY RAIN 733399800
Fatalities per event type
library(ggplot2)
ggplot(data=fatal_accidents[1:10,], aes(x = factor(EVTYPE), y = FATALITIES, fill=EVTYPE)) +
geom_bar(stat = "identity",width=0.3) +
coord_flip() +
ylab("Natural Calamities") +
xlab("No. of Fatalities") +
ggtitle("Total no. of fatalities per event type across USA")
Injuries per event type
library(ggplot2)
ggplot(data=injuries_accidents[1:10,], aes(x = factor(EVTYPE), y = INJURIES, fill=EVTYPE)) +
geom_bar(stat = "identity",width=0.3) +
coord_flip() +
ylab("Natural Calamities") +
xlab("No. of injuries") +
ggtitle("Total no. of injuries per event type across USA")
Property Damage per event type
library(ggplot2)
ggplot(data=property_damage[1:10,], aes(x = factor(EVTYPE), y = PROPDMG, fill=EVTYPE)) +
geom_bar(stat = "identity",width=0.3) +
coord_flip() +
ylab("Natural Calamities") +
xlab("Total amount of property damage") +
ggtitle("Total amount of property damage per event type across USA")
Crop Damage per event type
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.