Objective of this analysis is to observe the impact of various weather hazards such as Storms, Tornados etc. on People and Economy of USA. Data is obtained from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database has various parameters. However, we will restrict our analysis to categorizing the various weather events into generic categories and evaluate the impact on people by measuring Fatalities i.e. Deaths and Injury. Also, we will evaluate the economic loss caused to United States of America Economy caused with the destruction to Property and Crops by weather events.
Set the working directory to the directory havinf the file repdata_data_StormData.csv.bz2 downloaded from database.and Read the file using read.csv
Storing the read dataset to storm_data data frame
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("C:/Users/r.pratap.singh/Desktop/JohnHopkins")
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
toupper function used to make exponential feature of Property Damage and Crop Damage.storm_data_processed <- storm_data %>%
select(EVTYPE, MAG, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) %>%
mutate(PROPDMGEXP = toupper(PROPDMGEXP)) %>%
mutate(CROPDMGEXP = toupper(CROPDMGEXP))
as.numeric and later assigning NA to zero.Table to show the tabular representation.
storm_data_processed[(storm_data_processed$PROPDMGEXP == 'K'), ]$PROPDMGEXP <- 3
storm_data_processed[(storm_data_processed$PROPDMGEXP == 'M'), ]$PROPDMGEXP <- 6
storm_data_processed[(storm_data_processed$PROPDMGEXP == 'B'), ]$PROPDMGEXP <- 9
storm_data_processed[(storm_data_processed$PROPDMGEXP == 'H'), ]$PROPDMGEXP <- 2
storm_data_processed$PROPDMGEXP <- as.numeric(storm_data_processed$PROPDMGEXP)
storm_data_processed[is.na(storm_data_processed$PROPDMGEXP), ]$PROPDMGEXP <- 0
table(storm_data_processed$PROPDMGEXP)
##
## 0 1 2 3 4 5 6 7 8 9
## 466164 25 20 424669 4 28 11341 5 1 40
storm_data_processed[(storm_data_processed$CROPDMGEXP == 'K'), ]$CROPDMGEXP <- 3
storm_data_processed[(storm_data_processed$CROPDMGEXP == 'M'), ]$CROPDMGEXP <- 6
storm_data_processed[(storm_data_processed$CROPDMGEXP == 'B'), ]$CROPDMGEXP <- 9
storm_data_processed$CROPDMGEXP <- as.numeric(storm_data_processed$CROPDMGEXP)
storm_data_processed[is.na(storm_data_processed$CROPDMGEXP), ]$CROPDMGEXP <- 0
table(storm_data_processed$CROPDMGEXP)
##
## 0 2 3 6 9
## 618439 1 281853 1995 9
Prop_damage and Crop_damage by multiplying the Property damage, Crop damage to their respective exponential factor with base 10.storm_data_summ <- storm_data_processed %>%
mutate(Prop_damage = PROPDMG * (10^PROPDMGEXP)) %>%
mutate(Crop_damage = CROPDMG * (10^CROPDMGEXP)) %>%
select(EVTYPE, FATALITIES, INJURIES, Prop_damage, Crop_damage) %>%
group_by(EVTYPE) %>%
summarise_all(funs(sum))
grepl function used by ignoring the case (upper and lower) to identify the index in the EVTYPE Column Vector list and storing in mvc vector list.mvc vector list of index is used to allocate the Generic Name to events.mvc <- (grepl("WINTER", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("WEATHER", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("WINTRY", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("MIX", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("WINTERY", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("MIX", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "WINTER WEATHER"
mvc <- grepl("AVALANCHE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("AVALANCE", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "AVALANCHE"
mvc <- grepl("HURRICANE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("WATERSPOUT", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "HURRICANE"
mvc <- grepl("THUNDERSTORM", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("THUNDERSTROM", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("THUNDERTORM", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("THUNDERTSORM", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("THUNERSTORM", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("THUNDESTORM", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "THUNDERSTORM"
mvc <- grepl("DRY", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("DROUGHT", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "DRY CONDITIONS"
mvc <- grepl("TORNDAO", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("TORNADO", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("LANDSPOUT", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "TORNADOS"
mvc <- (grepl("COAST", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FLOOD", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("COAST", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("SURGE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("STORM", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("SURGE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("COAST", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("EROSION", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("BEACH", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("EROSION", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("COAST", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("STORM", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- as.factor("COASTAL FLOOD")
mvc <- grepl("HAIL", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("GUSTNADO", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("FUNNEL CLOUD", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("DUST", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "HAIL"
mvc <- grepl("RAIN", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("PRECIP", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("EXCESSIVE RAINFALL", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("HEAVY MIX", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("HEAVY SHOWER", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("HEAVY SWELLS", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("MICROBURST", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("DOWNBURST", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("EXCESSIVE WETNESS", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "HEAVY RAIN"
mvc <- (grepl("TIDAL", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FLOOD", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "TIDAL FLOODING"
mvc <- grepl("URBAN", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "URBAN FLOOD"
mvc <- grepl("COLD", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("COOL", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("FREEZ", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("FROST", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("LOW TEMPERATURE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("HYPOTHERMIA", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "COLD"
mvc <- (grepl("FLASH", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FLOOD", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
grepl("DAM BREAK", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "FLASH FLOOD"
mvc <- grepl("HIGH", storm_data_summ$EVTYPE, ignore.case = TRUE) &
(grepl("SURF", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("WATER", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("SEA", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("SWELL", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("WAVE", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "HIGH SURF"
mvc <- (grepl("RIP", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("CURRENT", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("ASTRONOMICAL", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("TIDE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("HIGH", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("TIDE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("ROGUE", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("WAVE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("HEAVY", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("SEA", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("HAZARDOUS", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("SURF", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("HEAVY", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("SURF", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("ROUGH", storm_data_summ$EVTYPE, ignore.case = TRUE) &
(grepl("WAVE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("SEA", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("SURF", storm_data_summ$EVTYPE, ignore.case = TRUE)))
storm_data_summ$EVTYPE[mvc] <- "HIGH SURF"
mvc <- (grepl("HEAVY", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("SNOW", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
grepl("BLIZZARD", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "HEAVY SNOW"
mvc <- grepl("TSTM", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "TSTM WIND"
mvc <- (grepl("WILD", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FIRE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("BRUSH", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FIRE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("FOREST", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FIRE", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
(grepl("GRASS", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("FIRE", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "WILDFIRE"
mvc <- (grepl("WINTER", storm_data_summ$EVTYPE, ignore.case = TRUE) &
grepl("STORM", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "WINTER STORM"
mvc <- grepl("HEAT", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("HYPERTHERMIA", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("WARM", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "HEAT WAVE"
mvc <- (grepl("SNOW", storm_data_summ$EVTYPE, ignore.case = TRUE) &
!grepl("HEAVY", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "SNOW"
mvc <- grepl("ICE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("SLEET", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("GLAZE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("ICY", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "ICE"
mvc <- grepl("FLOOD", storm_data_summ$EVTYPE, ignore.case = TRUE) &
!(grepl("COASTAL", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("FLASH", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("TIDAL", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("URBAN", storm_data_summ$EVTYPE, ignore.case = TRUE))
storm_data_summ$EVTYPE[mvc] <- "FLOOD/RIVER FLOOD"
mvc <- (grepl("WIND", storm_data_summ$EVTYPE, ignore.case = TRUE) &
!grepl("TSTM", storm_data_summ$EVTYPE, ignore.case = TRUE)) |
grepl("SEVERE TURBULENCE", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "WIND"
mvc <- grepl("FOG", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "FOG"
mvc <- grepl("LIGHTNING", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("LIGHTING", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("LIGNTNING", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "LIGHTNING"
mvc <- grepl("LANDSLIDE", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("ROCK SLIDE", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "LANDSLIDE"
mvc <- grepl("Mudslide", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("Landslump", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("MUD SLIDE", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "MUD SLIDES"
mvc <- grepl("TROPICAL", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "TROPICAL STORM"
mvc <- grepl("MARINE", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "Marine Accident"
mvc <- grepl("VOLCANIC ASH", storm_data_summ$EVTYPE, ignore.case = TRUE) |
grepl("SMOKE", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "SMOKE"
mvc <- grepl("OTHER", storm_data_summ$EVTYPE, ignore.case = TRUE)
storm_data_summ$EVTYPE[mvc] <- "OTHER"
group_by done on EVTYPE Column.storm_data_person dataframe.storm_data_person <- storm_data_summ %>%
group_by(EVTYPE) %>%
summarise_all(funs(sum)) %>%
filter(FATALITIES > 0 | INJURIES > 0 | Crop_damage > 0 | Prop_damage > 0)
table(as.factor(as.character(storm_data_person$EVTYPE)))
##
## ? APACHE COUNTY AVALANCHE
## 1 1 1
## COASTAL FLOOD COLD DROWNING
## 1 1 1
## DRY CONDITIONS FLASH FLOOD FLOOD/RIVER FLOOD
## 1 1 1
## FOG HAIL HEAT WAVE
## 1 1 1
## HEAVY RAIN HEAVY SNOW HIGH
## 1 1 1
## HIGH SURF HURRICANE ICE
## 1 1 1
## LANDSLIDE LIGHTNING Marine Accident
## 1 1 1
## MUD SLIDES OTHER RAPIDLY RISING WATER
## 1 1 1
## SEICHE SMOKE SNOW
## 1 1 1
## THUNDERSTORM TIDAL FLOODING TORNADOS
## 1 1 1
## TROPICAL STORM TSTM WIND TSUNAMI
## 1 1 1
## TYPHOON URBAN FLOOD WILDFIRE
## 1 1 1
## WIND WINTER STORM WINTER WEATHER
## 1 1 1
Following Steps Followed: - ggplot2 library used. - Selecting top 10 Fatalities and Injury in Descending order and store the graphic objects into g1 and g2. - gridExtra library used to plot g1 and g2 object to plot it in same grid. - grid.arrange is used to generate it on same as nrow is used as 1.
library(ggplot2)
g1 <- ggplot(arrange(storm_data_person, desc(FATALITIES))[1:10,],
aes(x= reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity", fill = "sky blue") +
geom_text(aes(label=FATALITIES), vjust=0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Event Type") +
ylab("FATALITIES") +
ggtitle("Number of fatalities (Death) by top 10 Weather Events")
g2 <- ggplot(arrange(storm_data_person, desc(INJURIES))[1:10,],
aes(x= reorder(EVTYPE, -INJURIES), y = INJURIES)) +
geom_bar(stat = "identity", fill = "sky blue") +
geom_text(aes(label=INJURIES), vjust=0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Event Type") +
ylab("Injuries") +
ggtitle("Number of injuries by top 10 Weather Events")
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(g1,g2, nrow=1)
Following Steps Followed: - Selecting top 10 Property Damage and Crop Damage in Descending order and store the graphic objects into g3 and g4. - options(scipen = 999) used to remove the exponential labeling on y axis. - plot g3 and g4 objct.
#to avoid exponentional notation on y label
options(scipen = 999)
g3 <- ggplot(arrange(storm_data_person, desc(Prop_damage))[1:10,],
aes(x= reorder(EVTYPE, -Prop_damage), y = Prop_damage)) +
geom_bar(stat = "identity", fill = "sky blue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Event Type") +
ylab("Property damage") +
ggtitle("Amount of Property Damage by top 10 Weather Events")
g3
g4 <- ggplot(arrange(storm_data_person, desc(Crop_damage))[1:10,],
aes(x= reorder(EVTYPE, -Crop_damage), y = Crop_damage)) +
geom_bar(stat = "identity", fill = "sky blue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Event Type") +
ylab("Crop Damages") +
ggtitle("Amount of Property Damages by top 10 Weather Events")
g4