knitr:: opts_chunk$set(echo = TRUE, results = "show")
options(scipen =999)
knitr:: opts_chunk$set(cache = TRUE)
library(ggplot2)
library (grid)
library (gridExtra)
include = TRUE
eval = TRUE
Data from the NOAA Storm Database were used to investigate the effect of weather events on health and economy in the United States of America. The data was cleaned to group weather events in the 55 event categories as described by NOOA. For impact on health, the number of injuries and casualities was considered while for economic impact damaged on property and crops. The analysis revealed that the event that has the greatest impact on injuries is Tornado; whereas excessive heat causes the most deaths. With regards to impact on economy, droughts cause the most damage in crops and hurricanes on property.
The data was obtained from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database and is available for downloading. For more information regarding the data see the Storm Data Documentation and FAQ
url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
f <- "StormData.csv.bz2"
if (!file.exists(f)){
download.file(url, f)
}
The Storm Data is an official publication of NOOA which documents:
The occurrence of stores data on the occurence of storms and other significant weather phenomena having sufficient intesity to cause loss of life, injures, significant property damage, and/or disruption to commerce.
Rare, unusual weather phenomena that generate media attention, such as snow flurries in South Florida or the San Diego coastal area; and
Other significant meteorological events, such as record maxumum or minimum temperatures or precipitation that occur in connection with another event.
The data set has 37 variables and 902297 observations. However, for our analysis we are only interested in a subset:
data <- read.table(f, header = TRUE, sep = ",", na.strings="NA")
sub_storm<- data[c(2,8,23:28)]
Record collection started in 1950 and the data displays entries until 2011. However, early records have a reduced number of entries compared to latest ones (see plot below). Hence, the analysis will be carried out from 1995 to 2011.
sub_storm$BGN_DATE<- as.Date(sub_storm$BGN_DATE, format = "%m/%d/%Y")
sub_storm$Year<-format(as.data.frame.Date(sub_storm$BGN_DATE, format = "%d %m %Y"), "%Y")
plot(table(sub_storm$Year), xlab= "Year", ylab= "Number of records", main = "Number of Weather Event Records in USA per year")
sub_storm <- subset(sub_storm, sub_storm$BGN_DATE > as.Date("1995/01/01"))
Events that affected health and economy have a number of fatalities, injuries, property and crop damage. Hence we discard records which did not affect health and economy.
sub_storm <- sub_storm [which (sub_storm$FATALITIES >0 | sub_storm$INJURIES>0 | sub_storm$PROPDMG>0 | sub_storm$CROPDMG>0),]
We noticed that there are 985 event types registered in the data set. However, the documentation restricts the even types to 55. The data set variable EVTYPE was cleaned by grouping the events into the 55 categories. This was a time consuming tasks which was quite subjective in some instances. Cleaning the data in this way could have affected the outcome of the results.
sub_storm$EVTYPE <- toupper(sub_storm$EVTYPE )
sub_storm[sub_storm$EVTYPE == "COASTAL FLOODING" | sub_storm$EVTYPE == "COASTAL FLOODING/EROSION" | sub_storm$EVTYPE == "COASTAL FLOODING/EROSION", "EVTYPE"]<- "COASTAL FLOOD"
sub_storm[sub_storm$EVTYPE == "RIP CURRENTS" | sub_storm$EVTYPE == "RIP CURRENTS/HEAVY SURF" | sub_storm$EVTYPE == "COASTAL EROSION", "EVTYPE"]<- "RIP CURRENT"
sub_storm[sub_storm$EVTYPE == "WILD FIRES" | sub_storm$EVTYPE == "WILD/FOREST FIRE" | sub_storm$EVTYPE == "WILD/FOREST FIRES", "EVTYPE"]<- "WILDFIRE"
sub_storm[sub_storm$EVTYPE == "DROUGHT/EXCESSIVE HEAT" | sub_storm$EVTYPE == "HEAT WAVE DROUGHT", "EVTYPE"]<- "DROUGHT"
sub_storm[sub_storm$EVTYPE == "DUST DEVIL WATERSPOUT" | sub_storm$EVTYPE == "BLOWING DUST" , "EVTYPE"]<- "DUST DEVIL"
sub_storm[sub_storm$EVTYPE == "FLASH FLOODING" | sub_storm$EVTYPE == "FLOOD/FLASH FLOOD" | sub_storm$EVTYPE == "FLASH FLOOD WINDS" | sub_storm$EVTYPE == "FLASH FLOODS" | sub_storm$EVTYPE == "FLASH FLOOD WINDS" | sub_storm$EVTYPE == "FLASH FLOODS" | sub_storm$EVTYPE == "FLASH FLOOD - HEAVY RAIN" | sub_storm$EVTYPE == "FLASH FLOOD/ STREET" | sub_storm$EVTYPE == "FLASH FLOOD/FLOOD" | sub_storm$EVTYPE == "FLOOD/FLASH/FLOOD" | sub_storm$EVTYPE == " FLASH FLOOD" | sub_storm$EVTYPE == "FLOOD/FLASH", "EVTYPE"]<- "FLASH FLOOD"
sub_storm[sub_storm$EVTYPE == "LAKE FLOOD", "EVTYPE"]<- "LAKESHORE FLOOD"
sub_storm[sub_storm$EVTYPE == "TIDAL FLOODING" | sub_storm$EVTYPE == "EROSION/CSTL FLOOD", "EVTYPE"]<- "COASTAL FLOOD"
sub_storm[sub_storm$EVTYPE == "BREAKUP FLOODING" | sub_storm$EVTYPE == "RIVER FLOOD" | sub_storm$EVTYPE == "FLOODING/HEAVY RAIN" | sub_storm$EVTYPE == "FLOOD/RAIN/WINDS" | sub_storm$EVTYPE == "URBAN FLOOD" | sub_storm$EVTYPE == "URBAN/SMALL STREAM FLOOD" | sub_storm$EVTYPE == "FLOODS" | sub_storm$EVTYPE == "RURAL FLOOD" | sub_storm$EVTYPE == "URBAN FLOODING" | sub_storm$EVTYPE == "FLOODING" | sub_storm$EVTYPE == "ICE JAM FLOODING", "EVTYPE"]<- "FLOOD"
sub_storm[sub_storm$EVTYPE == "RIVER FLOODING" | sub_storm$EVTYPE == "FLOOD & HEAVY RAIN" | sub_storm$EVTYPE == "URBAN AND SMALL STREAM FLOODIN" | sub_storm$EVTYPE == "MUD SLIDES URBAN FLOODING" | sub_storm$EVTYPE == "ICE JAM FLOOD (MINOR" | sub_storm$EVTYPE == "RIVER AND STREAM FLOOD", "EVTYPE"]<- "FLOOD"
sub_storm[sub_storm$EVTYPE == "FROST" | sub_storm$EVTYPE == "EARLY FROST" | sub_storm$EVTYPE == "AGRICULTURAL FREEZE" | sub_storm$EVTYPE == "HARD FREEZE" | sub_storm$EVTYPE == "FREEZE", "EVTYPE"]<- "FROST/FREEZE"
sub_storm[sub_storm$EVTYPE == "HAIL 150" | sub_storm$EVTYPE == "HAIL 200" | sub_storm$EVTYPE == "HAIL 175" | sub_storm$EVTYPE == "HAIL DAMAGE" | sub_storm$EVTYPE == "GUSTY WIND/HAIL" | sub_storm$EVTYPE == "HAIL 075" | sub_storm$EVTYPE == "HAIL 0.75" | sub_storm$EVTYPE == "HAIL 275" | sub_storm$EVTYPE == "HAIL 100" | sub_storm$EVTYPE == "HAIL 450" | sub_storm$EVTYPE == "HAIL 125" | sub_storm$EVTYPE == "HAIL 75" | sub_storm$EVTYPE == "HAILSTORM" | sub_storm$EVTYPE == "SMALL HAIL", "EVTYPE"]<- "HAIL"
sub_storm[sub_storm$EVTYPE == "EXTREME HEAT" | sub_storm$EVTYPE == "RECORD HEAT" | sub_storm$EVTYPE == "HEAT WAVE" | sub_storm$EVTYPE == "GRASS FIRES" | sub_storm$EVTYPE == "BRUSH FIRE", "EVTYPE"]<- "EXCESSIVE HEAT"
sub_storm[sub_storm$EVTYPE == "HEAVY RAINS" | sub_storm$EVTYPE == "HEAVY RAIN/SEVERE WEATHER" | sub_storm$EVTYPE == "HEAVY RAIN AND FLOOD" | sub_storm$EVTYPE == "HEAVY RAIN/HIGH SURF" | sub_storm$EVTYPE == "UNSEASONAL RAIN" | sub_storm$EVTYPE == "HVY RAIN" | sub_storm$EVTYPE == "RAIN" | sub_storm$EVTYPE == "MIXED PRECIP" | sub_storm$EVTYPE == "EXCESSIVE WETNESS", "EVTYPE"]<- "HEAVY RAIN"
sub_storm[sub_storm$EVTYPE == "HEAVY SNOW SQUALLS" | sub_storm$EVTYPE == "HEAVY SNOW AND STRONG WINDS" | sub_storm$EVTYPE == "HEAVY SNOW AND HIGH WINDS" | sub_storm$EVTYPE == "HEAVY SNOW-SQUALLS" | sub_storm$EVTYPE == "HEAVY SNOW SHOWER" | sub_storm$EVTYPE == "HEAVY SNOW/HIGH WINDS & FLOOD" | sub_storm$EVTYPE == "HEAVY SNOW/ICE", "EVTYPE"]<- "HEAVY SNOW"
sub_storm[sub_storm$EVTYPE == "HEAVY SURF COASTAL FLOODING" | sub_storm$EVTYPE == "ROUGH SURF" | sub_storm$EVTYPE == " HIGH SURF ADVISORY" | sub_storm$EVTYPE == "HAZARDOUS SURF" | sub_storm$EVTYPE == "HEAVY SURF" | sub_storm$EVTYPE == "HEAVY SURF AND WIND" | sub_storm$EVTYPE == "HEAVY SURF/HIGH SURF" | sub_storm$EVTYPE == "HIGH SEAS", "EVTYPE"]<- "HIGH SURF"
sub_storm[sub_storm$EVTYPE == "HIGH WIND DAMAGE" | sub_storm$EVTYPE == "HIGH WINDS" | sub_storm$EVTYPE == "HIGH WINDS HEAVY RAINS" | sub_storm$EVTYPE == "HIGH WINDS" | sub_storm$EVTYPE == "HIGH WIND (G40)" | sub_storm$EVTYPE == "WIND" | sub_storm$EVTYPE == "WINDS" | sub_storm$EVTYPE == "GUSTY WIND" | sub_storm$EVTYPE == "GUSTY WIND/HVY RAIN" | sub_storm$EVTYPE == "GUSTNADO" | sub_storm$EVTYPE == "NON-SEVERE WIND DAMAGE" | sub_storm$EVTYPE == "WIND DAMAGE", "EVTYPE"]<- "HIGH WIND"
sub_storm[sub_storm$EVTYPE == "TYPHOON" | sub_storm$EVTYPE == "HURRICANE OPAL/HIGH WINDS" | sub_storm$EVTYPE == "HURRICANE FELIX" | sub_storm$EVTYPE == "HURRICANE ERIN" | sub_storm$EVTYPE == "HURRICANE OPAL" | sub_storm$EVTYPE == "HURRICANE-GENERATED SWELLS" | sub_storm$EVTYPE == "HURRICANE EDOUARD" | sub_storm$EVTYPE == "HURRICANE", "EVTYPE"]<- "HURRICANE/TYPHOON"
sub_storm[sub_storm$EVTYPE == "WINTER WEATHER MIX" | sub_storm$EVTYPE == "WINTER WEATHER" | sub_storm$EVTYPE == "WINTER WEATHER/MIX" | sub_storm$EVTYPE == "WINTRY MIX" , "EVTYPE"]<- "WINTER WEATHER"
sub_storm[sub_storm$EVTYPE == "WINTER STORM HIGH WINDS" , "EVTYPE"]<- "WINTER STORM"
sub_storm[sub_storm$EVTYPE == "TORNADO F0" | sub_storm$EVTYPE == "WATERSPOUT/ TORNADO" | sub_storm$EVTYPE == "TORNADO F3" | sub_storm$EVTYPE == "WATERSPOUT TORNADO" | sub_storm$EVTYPE == "TORNADO F1" | sub_storm$EVTYPE == "WATERSPOUT/TORNADO" | sub_storm$EVTYPE == "TORNADO F2" | sub_storm$EVTYPE == "WATERSPOUT-TORNADO" | sub_storm$EVTYPE == "LANDSPOUT" , "EVTYPE"]<- "TORNADO"
sub_storm[sub_storm$EVTYPE == "TROPICAL STORM JERRY" | sub_storm$EVTYPE == "TROPICAL STORM DEAN" , "EVTYPE"]<- "TROPICAL STORM"
sub_storm[sub_storm$EVTYPE == "LIGHTNING THUNDERSTORM WINDS" | sub_storm$EVTYPE == "LIGNTNING" | sub_storm$EVTYPE == "LIGHTNING FIRE" | sub_storm$EVTYPE == "LIGHTNING INJURY" | sub_storm$EVTYPE == "LIGHTNING WAUSEON" | sub_storm$EVTYPE == "LIGHTNING AND HEAVY RAIN" | sub_storm$EVTYPE == "LIGHTNING AND THUNDERSTORM WIN" | sub_storm$EVTYPE == "LIGHTNING." , "EVTYPE"]<- "LIGHTNING"
sub_storm[sub_storm$EVTYPE == "TSTM WIND" | sub_storm$EVTYPE == "TSTM WIND" | sub_storm$EVTYPE == "TSTM WIND 40" | sub_storm$EVTYPE == "TSTM WIND AND LIGHTNING" | sub_storm$EVTYPE == " TSTM WIND" | sub_storm$EVTYPE == "TSTM WIND 55" | sub_storm$EVTYPE == "TSTM WIND/HAIL" | sub_storm$EVTYPE == "TSTM WIND (41)" | sub_storm$EVTYPE == "TSTM WIND (G45)" | sub_storm$EVTYPE == "TSTM WIND (G45)" | sub_storm$EVTYPE == "TSTM WIND DAMAGE" | sub_storm$EVTYPE == "TSTM WIND (G45)" | sub_storm$EVTYPE == "TSTM WIND (G40)" | sub_storm$EVTYPE == "TSTM WIND (G35)", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "TSTM WIND G58" | sub_storm$EVTYPE == "TSTM WINDS" | sub_storm$EVTYPE == "TSTM WIND 65)" | sub_storm$EVTYPE == "TSTM WIND 45" | sub_storm$EVTYPE == " TSTM WIND (G45)" | sub_storm$EVTYPE == "TSTM WIND G45" | sub_storm$EVTYPE == "WET MICROBURST", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "STRONG WINDS" | sub_storm$EVTYPE == "STORM FORCE WINDS" | sub_storm$EVTYPE == "GUSTY WIND/RAIN" | sub_storm$EVTYPE == "NON-TSTM WIND" | sub_storm$EVTYPE == "WIND STORM" | sub_storm$EVTYPE == "GRADIENT WIND" | sub_storm$EVTYPE == "NON TSTM WIND" , "EVTYPE"]<- "STRONG WIND"
sub_storm[sub_storm$EVTYPE == "STORM SURGE" | sub_storm$EVTYPE == "ROUGH SEAS" | sub_storm$EVTYPE == "BEACH EROSION" | sub_storm$EVTYPE == "ASTRONOMICAL HIGH TIDE", "EVTYPE"]<- "STORM SURGE/TIDE"
sub_storm[sub_storm$EVTYPE == "HIGH WATER" | sub_storm$EVTYPE == "COLD WAVE" | sub_storm$EVTYPE == "ROGUE WAVE" | sub_storm$EVTYPE == "RAPIDLY RISING WATER" | sub_storm$EVTYPE == "WIND AND WAVE" | sub_storm$EVTYPE == "HIGH WAVES" | sub_storm$EVTYPE == "HEAVY SEAS" | sub_storm$EVTYPE == "HIGH SWELLS" | sub_storm$EVTYPE == "HEAVY SWELLS", "EVTYPE"]<- "SNEAKER WAVE"
sub_storm[sub_storm$EVTYPE == "SNOW/SLEET/FREEZING RAIN" , "EVTYPE"]<- "SLEET"
sub_storm[sub_storm$EVTYPE == "COASTAL STORM" | sub_storm$EVTYPE == "COASTALSTORM" , "EVTYPE"]<- "MARINE TROPICAL STORM"
sub_storm[sub_storm$EVTYPE == "MARINE TSTM WIND" , "EVTYPE"]<- "MARINE THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "THUNDERSTORM WINDS LIGHTNING" | sub_storm$EVTYPE == "THUNDERSTORM WIND/LIGHTNING" | sub_storm$EVTYPE == "THUNDERSTORM WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WINDS HAIL" | sub_storm$EVTYPE == "SEVERE THUNDERSTORM" | sub_storm$EVTYPE == "THUNDERSTORM WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WINDS G60" | sub_storm$EVTYPE == "THUNDERSTORM WIND 98 MPH", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "SEVERE THUNDERSTORM WINDS" | sub_storm$EVTYPE == "THUNDERTORM WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WIND 65MPH" | sub_storm$EVTYPE == "THUNDERSTORM" | sub_storm$EVTYPE == "THUNDERSTORM DAMAGE TO" | sub_storm$EVTYPE == "THUNDERSTORM WINDS AND" | sub_storm$EVTYPE == "THUNERSTORM WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WIND (G40)", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "THUNDERSTORMS WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WIND G55" | sub_storm$EVTYPE == "THUNDERSTORM WIND TREES" | sub_storm$EVTYPE == "THUNDERSTORM WIND 65 MPH" | sub_storm$EVTYPE == "THUNDERESTORM WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WINDS/ FLOOD" | sub_storm$EVTYPE == "THUNDERSTORM WINS" | sub_storm$EVTYPE == "THUNDERSTORM WINDSS", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "TUNDERSTORM WIND" | sub_storm$EVTYPE == "THUNDERSTORM WIND 60 MPH" | sub_storm$EVTYPE == "THUNDERSTORM WINDSHAIL" | sub_storm$EVTYPE == "THUNDERSTORM WINDS 63 MPH" | sub_storm$EVTYPE == "THUNDERSTORM WIND G52" | sub_storm$EVTYPE == "SEVERE THUNDERSTORMS" | sub_storm$EVTYPE == "THUNDERSTORM WIND/ TREES" | sub_storm$EVTYPE == "THUNDERSTORM WIND/ TREE", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "THUNDERSTORM WIND/AWNING" | sub_storm$EVTYPE == "THUNDERSTORMW" | sub_storm$EVTYPE == "THUNDERSTORM WINDS/HAIL" | sub_storm$EVTYPE == "THUNDERSTORM WIND." | sub_storm$EVTYPE == "THUNDERSTORM WINDS53" | sub_storm$EVTYPE == "THUNDEERSTORM WINDS" | sub_storm$EVTYPE == "THUNDERSTORM WIND G60" | sub_storm$EVTYPE == "THUNDERSTORMS WIND", "EVTYPE"]<- "THUNDERSTORM WIND"
sub_storm[sub_storm$EVTYPE == "EXTREME WIND CHILL" | sub_storm$EVTYPE == "EXTREME WINDCHILL" | sub_storm$EVTYPE == "EXTREME WINDCHILL" | sub_storm$EVTYPE == "COLD/WIND CHILL" , "EVTYPE"]<- "EXTREME COLD/WIND CHILL"
sub_storm[sub_storm$EVTYPE == "LAKE EFFECT SNOW" , "EVTYPE"]<- "LAKE-EFFECT SNOW"
sub_storm[sub_storm$EVTYPE == "COLD" | sub_storm$EVTYPE == "RECORD COLD" | sub_storm$EVTYPE == "EXTENDED COLD" | sub_storm$EVTYPE == "UNSEASONABLY COLD" | sub_storm$EVTYPE == "COLD TEMPERATURE" | sub_storm$EVTYPE == "COLD AND WET CONDITIONS" | sub_storm$EVTYPE == "COLD WEATHER" | sub_storm$EVTYPE == "COLD AND SNOW" | sub_storm$EVTYPE == "UNSEASONABLE COLD", "EVTYPE"]<- "EXTREME COLD"
sub_storm[sub_storm$EVTYPE == "BLACK ICE" | sub_storm$EVTYPE == "ICY ROADS" | sub_storm$EVTYPE == "SNOW AND ICE" | sub_storm$EVTYPE == "ICE ROADS" | sub_storm$EVTYPE == "ICE ON ROAD" | sub_storm$EVTYPE == "GLAZE ICE" | sub_storm$EVTYPE == "ICE/STRONG WINDS" | sub_storm$EVTYPE == "FALLING SNOW/ICE" | sub_storm$EVTYPE == "ICE" | sub_storm$EVTYPE == "SNOW/ICE", "EVTYPE"]<- "ICE STORM"
sub_storm[sub_storm$EVTYPE == "MIXED PRECIPITATION" | sub_storm$EVTYPE == "EXCESSIVE RAINFALL" | sub_storm$EVTYPE == "TORRENTIAL RAINFALL" | sub_storm$EVTYPE == "HVY RAIN" | sub_storm$EVTYPE == "DOWNBURST" , "EVTYPE"]<- "HEAVY RAIN"
sub_storm[sub_storm$EVTYPE == "EXTREME COLD" | sub_storm$EVTYPE == "GLAZE" | sub_storm$EVTYPE == "DAMAGING FREEZE", "EVTYPE"]<- "EXTREME COLD/WIND CHILL"
sub_storm[sub_storm$EVTYPE == "BLOWING SNOW" | sub_storm$EVTYPE == "SNOW SQUALLS" | sub_storm$EVTYPE == "SNOW" | sub_storm$EVTYPE == "LIGHT SNOW" | sub_storm$EVTYPE == "EXCESSIVE SNOW" | sub_storm$EVTYPE == "LIGHT SNOWFALL" | sub_storm$EVTYPE == "THUNDERSNOW" | sub_storm$EVTYPE == "LATE SEASON SNOW" | sub_storm$EVTYPE == "FREEZING RAIN/SNOW" | sub_storm$EVTYPE == "RAIN/SNOW" | sub_storm$EVTYPE == "SNOW FREEZING RAIN" | sub_storm$EVTYPE == "SNOW SQUALL","EVTYPE"]<- "HEAVY SNOW"
sub_storm[sub_storm$EVTYPE == "LANDSLIDE" | sub_storm$EVTYPE == "LANDSLIDES" | sub_storm$EVTYPE == "MUDSLIDE" | sub_storm$EVTYPE == "MUDSLIDES" | sub_storm$EVTYPE == "MUD SLIDE"| sub_storm$EVTYPE == "LANDSLUMP" | sub_storm$EVTYPE == "ROCK SLIDE", "EVTYPE"]<- "AVALANCHE"
sub_storm[sub_storm$EVTYPE == "URBAN/SML STREAM FLD" , "EVTYPE"]<- "FLOOD"
sub_storm[sub_storm$EVTYPE == "FREEZING RAIN" | sub_storm$EVTYPE == "FREEZING DRIZZLE" | sub_storm$EVTYPE == "FREEZING SPRAY" | sub_storm$EVTYPE == "LIGHT FREEZING RAIN", "EVTYPE"]<- "EXTREME COLD/WIND CHILL"
sub_storm[sub_storm$EVTYPE == "UNSEASONABLY WARM AND DRY" | sub_storm$EVTYPE == "UNSEASONABLY WARM" , "EVTYPE"]<- "EXCESIVE HEAT"
sub_storm[sub_storm$EVTYPE == "GUSTY WINDS" | sub_storm$EVTYPE == "MICROBURST" | sub_storm$EVTYPE == "DRY MICROBURST" , "EVTYPE"]<- "HIGH WIND"
sub_storm[sub_storm$EVTYPE == "FOG" , "EVTYPE"]<- "DENSE FOG"
sub_storm[sub_storm$EVTYPE == "MARINE MISHAP" | sub_storm$EVTYPE == "MARINE ACCIDENT", "EVTYPE"]<- "MARINE STRONG WIND"
sub_storm[sub_storm$EVTYPE == "WARM WEATHER" , "EVTYPE"]<- "HEAT"
sub_storm[sub_storm$EVTYPE == "DROWNING" | sub_storm$EVTYPE == "HYPERTHERMIA/EXPOSURE" | sub_storm$EVTYPE == "HYPOTHERMIA/EXPOSURE" | sub_storm$EVTYPE == "DAM BREAK" , "EVTYPE"]<- "OTHER"
sub_storm[sub_storm$EVTYPE == "WHIRLWIND" , "EVTYPE"]<- "TROPICAL STORM"
Assessing the impact on health implies weather events that caused human injuries:
storm_inj <- sub_storm [ which (sub_storm$INJURIES > 0),]
storm_inj$EVTYPE <- as.factor(storm_inj$EVTYPE)
We group together all the events and calculate the number of injuries from 1995 to 2011 across USA.
inj <- aggregate(storm_inj$INJURIES, by = list(Category = storm_inj$EVTYPE), FUN = sum)
names(inj)[names(inj)=="x"] <- "No.injuries"
names(inj)[names(inj)=="Category"] <- "Event.Type"
In order to find out the five events that have produced the most number of injuries:
inj$Event.Type <- factor(inj$Event.Type, levels = inj$Event.Type[order(-inj$No.injuries)])
x <- inj[order(-inj$No.injuries),]
x <- x[1:5,]
topinjuries <- x[,1]
We can found that the top five weather events that cause the most injuries in the USA as accounted for from 1995 until 2011 are TORNADO, EXCESSIVE HEAT, FLOOD, THUNDERSTORM WIND, LIGHTNING.
The impact of weather events in health is also quantified by the number of fatalities.
storm_fat <- sub_storm [which(sub_storm$FATALITIES>0),]
storm_fat$EVTYPE <- as.factor(storm_fat$EVTYPE)
We group together all the events and calculate the number of fatalities from 1995 to 2011 across USA.
fat <- aggregate(storm_fat$FATALITIES, by = list(Category = storm_fat$EVTYPE), FUN = sum)
names(fat)[names(fat)=="x"] <- "No.fatalities"
names(fat)[names(fat)=="Category"] <- "Event.Type"
In order to find out the five events that have produced the most number of fatalities, we order them by decreased number of fatalities:
fat$Event.Type <- factor(fat$Event.Type, levels = fat$Event.Type[order(-fat$No.fatalities)])
y <- fat[order(-fat$No.fatalities),]
## Chose first 5
y <- y[1:5,]
topfatalities<- y[,1]
We can then plot the top five events that have caused the most impact on health since 1995 up to 2011 across the USA.
p1 <- ggplot(x, aes(x = x$Event.Type, y = x$No.injuries)) + theme_bw() + xlab("Event type") + ylab( "Number of injuries") + ggtitle ("Number of total injuries") + geom_bar(colour = "black", fill = "#DD8888", width = 0.8, stat = "identity")
p2 <- ggplot(y, aes(x = y$Event.Type, y = y$No.fatalities)) + theme_bw() + xlab("Event type") + ylab( "Number of fatalities") + ggtitle ("Number of total fatalities") + geom_bar(colour = "black", fill = "#DD8888", width = 0.8, stat = "identity")
grid.arrange(p1, p2, ncol = 1, top = "Impact of weather eventes on health in the USA from 1995 to 2011")
Assessing the impact on economy when the events have caused damage on crops:
storm_crop <- sub_storm [which(sub_storm$CROPDMG > 0.0),]
storm_crop$EVTYPE <- as.factor(storm_crop$EVTYPE)
The damage on crops has bee recorded in the Billions, Millions, Kilo and Hundreds of dollars. We need to multiply the cost value by the exponent.
storm_crop$CROPDMGEXP <- toupper(storm_crop$CROPDMGEXP)
storm_crop[storm_crop$CROPDMGEXP == "H" , "CROPDMGEXP"]<- c("100")
storm_crop[storm_crop$CROPDMGEXP == "K" , "CROPDMGEXP"]<- c("1000")
storm_crop[storm_crop$CROPDMGEXP == "M" , "CROPDMGEXP"]<- c("1000000")
storm_crop[storm_crop$CROPDMGEXP == "B" , "CROPDMGEXP"]<- c("1000000000")
storm_crop$CROPDMGEXP <- as.integer(storm_crop$CROPDMGEXP)
storm_crop$totcrop <- storm_crop$CROPDMG * storm_crop$CROPDMGEXP
We then group together the damage on crops by event:
crop <- aggregate(storm_crop$totcrop, by = list(Category = storm_crop$EVTYPE), FUN = sum)
names(crop)[names(crop) == "x"] <- "Crop.Damage"
names(crop)[names(crop) == "Category"] <- "Event.Type"
In order to find out the five events that have produced the most damage on crops we order them in decreasing order.
crop$Event.Type <- factor(crop$Event.Type, levels = crop$Event.Type[order(-crop$Crop.Damage)])
a <- crop[order(-crop$Crop.Damage),]
a<- a[1:5,]
topcrops<- a[,1]
The weather events that had the most impact on crop damage are: DROUGHT, FLOOD, HURRICANE/TYPHOON, HAIL, EXTREME COLD/WIND CHILL.
Damage on property is directly expresse in the propdmg variable.
storm_prop <- sub_storm [which(sub_storm$PROPDMG > 0.0),]
storm_prop$EVTYPE <- as.factor(storm_prop$EVTYPE)
We group all the records of the damage according to the type of event.
storm_prop <- sub_storm [which(sub_storm$PROPDMG > 0.0),]
storm_prop$EVTYPE <- as.factor(storm_prop$EVTYPE)
The damage in property is provided in the PROPDMGEXP variable, which needs to be multiplied by the exponent according to the magnitud (billions, millions, etc.)
storm_prop$PROPDMGEXP<- toupper(storm_prop$PROPDMGEXP)
storm_prop[storm_prop$PROPDMGEXP == "-" |storm_prop$PROPDMGEXP == "+" |storm_prop$PROPDMGEXP == "0" |storm_prop$PROPDMGEXP == "2" |storm_prop$PROPDMGEXP == "3" |storm_prop$PROPDMGEXP == "4" |storm_prop$PROPDMGEXP == "5" |storm_prop$PROPDMGEXP == "6" |storm_prop$PROPDMGEXP == "7" |storm_prop$PROPDMGEXP == "H" ,"PROPDMGEXP"]<- c("1")
storm_prop[storm_prop$PROPDMGEXP == "H" , "PROPDMGEXP"]<- c("100")
storm_prop[storm_prop$PROPDMGEXP == "K" , "PROPDMGEXP"]<- c("1000")
storm_prop[storm_prop$PROPDMGEXP == "M" , "PROPDMGEXP"]<- c("1000000")
storm_prop[storm_prop$PROPDMGEXP == "B" , "PROPDMGEXP"]<- c("1000000000")
storm_prop[storm_prop$PROPDMGEXP == "-" |storm_prop$PROPDMGEXP == "+" |storm_prop$PROPDMGEXP == "0" |storm_prop$PROPDMGEXP == "2" |storm_prop$PROPDMGEXP == "3" |storm_prop$PROPDMGEXP == "4" |storm_prop$PROPDMGEXP == "5" |storm_prop$PROPDMGEXP == "6" |storm_prop$PROPDMGEXP == "7" ,"PROPDMGEXP"]<- c("1000")
storm_prop$PROPDMGEXP <- as.integer(storm_prop$PROPDMGEXP)
storm_prop$totprop <- storm_prop$PROPDMG * storm_prop$PROPDMGEXP
We group all the events that have produced damage on property.
prop <- aggregate(storm_prop$totprop, by = list(Category = storm_prop$EVTYPE), FUN = sum)
names(prop)[names(prop) == "x"] <- "Property.Damage"
names(prop)[names(prop) == "Category"] <- "Event.Type"
We can arrange the events in decreasing order to obtain the top five events that have had the most effect on property damage.
prop$Event.Type <- factor(prop$Event.Type, levels = prop$Event.Type[order(-prop$Property.Damage)])
b <- prop[order(-prop$Property.Damage),]
b<- b[1:5,]
topproperty <- b[,1]
The weather events that have caused the most damage in properties are HURRICANE/TYPHOON, STORM SURGE/TIDE, TORNADO, WILDFIRE, TROPICAL STORM
A graph showing the impact of weather events in economics in the USA is shown below.
p3<- ggplot(a, aes(x = a$Event.Type, y = a$Crop.Damage/1000000000)) + theme_bw() + xlab("Event type") + ylab( "Crop Damage, BUSD") + ggtitle ("Crop damage across the USA per weather event since 1995") + geom_bar(colour = "black", fill = "#DD8888", width = 0.8, stat = "identity")
p4 <- ggplot(b, aes(x = b$Event.Type, y = b$Property.Damage/1000000000)) + theme_bw() + xlab("Event type") + ylab( "Property Damage, BUSD") + ggtitle ("Crop damage across the USA per weather event since 1995") + geom_bar(colour = "black", fill = "#DD8888", width = 0.8, stat = "identity")
grid.arrange(p3, p4, ncol = 1, top = "Impact of weather eventes in USA economy from 1995 to 2011")
Data from the NOAA Storm Databased were used to investigate the effect of weather conditions on health and economy across the USA from 1995 to 2011. After processing the data grouping it into the 55 defined weather events, it was found that the events that produced the higher number of injuries were TORNADO, EXCESSIVE HEAT, FLOOD, THUNDERSTORM WIND, LIGHTNING. The events that produced the higher number of fatalities were EXCESSIVE HEAT, TORNADO, FLASH FLOOD, HEAT, LIGHTNING.
With regards to impact on economy, this was quantified by assessing the damage on crops and property. The events that have caused the most damage on crops are: DROUGHT, FLOOD, HURRICANE/TYPHOON, HAIL, EXTREME COLD/WIND CHILL whereas HURRICANE/TYPHOON, STORM SURGE/TIDE, TORNADO, WILDFIRE, TROPICAL STORM have caused the most damage on property.
The results may be quiet dependent on the grouping of events according to the 55 defined by NOAA.