Patterns exist in severe weather of both health and financial loss with consequences both to the individual and their community as a whole. Some can be preventable while others are only treatable, and death, which is the result of severe weather, either preventable or an almost inconsolable eventuality.
Here we are looking for patterns in the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. With the understanding of blizzards, Tornados, and Hurricanes do create a great toll on life and property.
Fatalities and Injury patterns suggest that Floods are biggest threat, and patterns of Property and Crop damages suggest that Hurricanes are the main influence here.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
## Warning: package 'data.table' was built under R version 4.1.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destfile <- "C:\\Users\\shawn\\Downloads\\R programing\\Projects/2FStormData.csv.bz2"
download.file(url, destfile)
StormData <- read.csv(destfile, header = TRUE, sep=",")
str(StormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
colHeaders <- c( "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
File01 <- StormData[, colHeaders]
head((File01))
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
Removing missing values
File02 <- subset(File01, EVTYPE != "?" & FATALITIES > 0 & INJURIES > 0 & PROPDMG > 0 & CROPDMG > 0)
head((File02))
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 187910 TORNADO 6 130 5 M 5
## 188605 TORNADO 22 150 500 K 50
## 191345 WINTER STORM 3 5 5 M 5
## 192361 WINTER STORM HIGH WINDS 1 15 60 M 5
## 192372 WINTER STORMS 10 17 500 K 500
## 195017 TROPICAL STORM GORDON 8 43 500 K 500
## CROPDMGEXP
## 187910 K
## 188605 M
## 191345 M
## 192361 M
## 192372 K
## 195017 K
Examining EVTYPE
table(File02$EVTYPE)
##
## BLIZZARD EXCESSIVE HEAT FLASH FLOOD
## 3 1 11
## FLOOD HEAT HEAT WAVE DROUGHT
## 14 2 1
## HEAVY SNOW HIGH WIND HIGH WINDS
## 2 7 1
## HURRICANE HURRICANE/TYPHOON ICE STORM
## 1 2 1
## THUNDERSTORM WIND THUNDERSTORM WINDS TORNADO
## 1 3 40
## TROPICAL STORM TROPICAL STORM GORDON TSTM WIND
## 4 1 4
## TSUNAMI WILDFIRE WINTER STORM
## 1 5 3
## WINTER STORM HIGH WINDS WINTER STORMS
## 1 1
File02$EVTYPE <- gsub('.*HEAT.*', 'HEAT', File02$EVTYPE)
File02$EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', File02$EVTYPE)
File02$EVTYPE <- gsub('.*WIND.*', 'WIND', File02$EVTYPE)
File02$EVTYPE <- gsub('.*BLIZZARD.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*SNOW.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*ICE.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*WINTER.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*HURRICANE.*', 'HURRICANE', File02$EVTYPE)
File02$EVTYPE <- gsub('.*TROPICAL.*', 'HURRICANE', File02$EVTYPE)
File02$EVTYPE <- gsub('.*TSUNAMI.*', 'HURRICANE', File02$EVTYPE)
table(File02$EVTYPE)
##
## FLOOD HEAT HURRICANE TORNADO WILDFIRE
## 25 4 9 40 5
## WIND WINTER STORMS
## 17 10
File02$PROPDMGEXP <- gsub('B', 10^9, File02$PROPDMGEXP)
File02$PROPDMGEXP <- gsub('M', 10^6, File02$PROPDMGEXP)
File02$PROPDMGEXP <- gsub('K', 10^3, File02$PROPDMGEXP)
File02$CROPDMGEXP <- gsub('B', 10^9, File02$CROPDMGEXP)
File02$CROPDMGEXP <- gsub('M', 10^6, File02$CROPDMGEXP)
File02$CROPDMGEXP <- gsub('K', 10^3, File02$CROPDMGEXP)
Change to numeric
File02$PROPDMGEXP <- as.numeric(File02$PROPDMGEXP)
File02$CROPDMGEXP <- as.numeric(File02$CROPDMGEXP)
Creating new Columns
File02$PROPDMGAC <- File02$PROPDMG*File02$PROPDMGEXP
File02$CROPDMGACT <- File02$CROPDMG*File02$CROPDMGEXP
File03 <- aggregate(x = list(H_IMPACT = File02$FATALITIES + File02$INJURIES),
by = list(EVENT = File02$EVTYPE),
FUN = sum)
File03 <- File03[order(File03$H_IMPACT, decreasing = TRUE),]
head(File03)
## EVENT H_IMPACT
## 1 FLOOD 2796
## 7 WINTER STORMS 2077
## 4 TORNADO 1820
## 3 HURRICANE 1404
## 2 HEAT 425
## 6 WIND 196
File04 <- aggregate(x = list(T_DAMAGES = File02$PROPDMGAC + File02$CROPDMGACT),
by = list(EVENT = File02$EVTYPE),
FUN = sum)
File04 <- File04[order(File04$T_DAMAGES, decreasing = TRUE),]
head(File04)
## EVENT T_DAMAGES
## 3 HURRICANE 14194485000
## 6 WIND 1482028500
## 5 WILDFIRE 1240270000
## 4 TORNADO 1145427000
## 2 HEAT 493545000
## 1 FLOOD 339128000
Plots of Weather Events most costly to health
ggplot(File03, aes(x = reorder(EVENT, H_IMPACT), y = H_IMPACT, color = EVENT))+
geom_point()+
xlab("Weather Events") +
ylab("Sum of Fatalities and Injures") +
ggtitle("Weather Events most costly to health")
Plots of Weather Events most costly to property
ggplot(File04, aes(x = reorder(EVENT, T_DAMAGES), y = T_DAMAGES, color = EVENT))+
geom_point()+
xlab("Weather Events") +
ylab("Sum of Property and Crop Damages") +
ggtitle("Weather Events most costly to Property")
## Conclusion