Based on the data collected by the NOAA, priorities for budget allocation for disaster prevention and recovery can be set. In this analysis the weather events that are most harmful based on human and economic impact are outlined. Human impact is based on number of injuries and fatalities, while economic impact is determined based on the property damage inflicted by the weather type.
Reading the csv
setwd("C:\\Users\\maknickel\\Desktop")
download.file(url = 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2', destfile = 'stormdata.csv.bz2')
data <- read.csv("repdata_data_StormData.csv.bz2", header = TRUE)
dframe <- data.frame(data)
1b. Preprocessing
load needed libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.1
## Warning: package 'tibble' was built under R version 4.4.1
## Warning: package 'tidyr' was built under R version 4.4.1
## Warning: package 'readr' was built under R version 4.4.1
## Warning: package 'purrr' was built under R version 4.4.1
## Warning: package 'stringr' was built under R version 4.4.1
## Warning: package 'forcats' was built under R version 4.4.1
## Warning: package 'lubridate' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(chron)
## Warning: package 'chron' was built under R version 4.4.1
##
## Attaching package: 'chron'
##
## The following objects are masked from 'package:lubridate':
##
## days, hours, minutes, seconds, years
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.1
To determine the Event type that causes the most harm first a count of the number of fatalities and injuries per event type will be calculated.
colnames <- c("evtype", "fatalities", "injuries")
evtype <- unique(dframe$EVTYPE)
eventtypes <- length(evtype)
mylist.names <- NULL
fatalities <- vector(mode = "integer", length = eventtypes)
injuries <- vector(mode = "integer", length = eventtypes)
simpframe <- data.frame(evtype, fatalities, injuries)
for(i in 1:nrow(dframe))
{
evtyperow <- which(simpframe$evtype == dframe$EVTYPE[i])
simpframe$fatalities[evtyperow] <- dframe$FATALITIES[i] + simpframe$fatalities[evtyperow]
simpframe$injuries[evtyperow] <- dframe$INJURIES[i] + simpframe$injuries[evtyperow]
}
In order to determine the most harmful event on economic factors, the impact of each event type (in $) will be calculated
colnames <- c("evtype", "cost")
cost <- vector(mode = "integer", length = eventtypes)
simpframe2 <- data.frame(evtype, cost)
number <- 1
for(i in 1:nrow(dframe))
{
if(dframe$PROPDMGEXP[i] == "K")
{
number <- dframe$PROPDMG[i]*1000
}
if(dframe$PROPDMGEXP[i] == "M")
{
number <- dframe$PROPDMG[i]*1000000
}
if(dframe$PROPDMGEXP[i] == "B")
{
number <- dframe$PROPDMG[i]*1000000000
}
else
{
number <- dframe$PROPDMG[i]
}
evtyperow <- which(simpframe2$evtype == dframe$EVTYPE[i])
simpframe2$cost[evtyperow] <- number + simpframe2$cost[evtyperow]
}
The most harmful weather event is:
simpframe$evtype[which.max(simpframe$fatalities)]
## [1] "TORNADO"
tophits <- slice_max(simpframe, fatalities, n=10)
barplot(height = tophits$fatalities, name = tophits$evtype)
This barplot shows the top 10 most damaging weather events in human cost
and their respective fatalities.
The most economically damaging weather event is:
simpframe2$evtype[which.max(simpframe2$cost)]
## [1] "FLOOD"
tophits <- slice_max(simpframe2, cost, n=10)
barplot(height = tophits$cost, name = tophits$evtype)
This barplot shows the top 10 most damaging weather events economically
and their respective property damgage in USD.