Synopsis The data was downloaded from the course website URL and read in to R with no issues. Minimal adjustment and modification was needed to prepare the data for analysis. The primary questions to answer were: What are the Top 10 weather events in terms of the number of injuries and casualties? Analysis Result:Tornadoes were by far the most destructive weather event in terms of injuries and casualties, with 91346 injuries, and 5633 casualties. What are the Top 10 weather events in terms of economic impact? Analysis Result:Tornadoes were by far the most destructive weather event in terms of economic impact, with over $300 million in property and crop damage.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(knitr)
library(R.utils)
## Warning: package 'R.utils' was built under R version 4.0.3
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 4.0.3
## Loading required package: R.methodsS3
## Warning: package 'R.methodsS3' was built under R version 4.0.3
## R.methodsS3 v1.8.1 (2020-08-26 16:20:06 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.24.0 (2020-08-26 16:11:58 UTC) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
##
## throw
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, load, save
## R.utils v2.10.1 (2020-08-26 22:50:31 UTC) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
## warnings
if (!file.exists('StormData.csv.bz2')) {
url <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
download.file(url, destfile = 'StormData.csv.bz2', method = 'curl')
}
StormData <- read.csv('StormData.csv.bz2', stringsAsFactors = FALSE)
colnames(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
dim(StormData)
## [1] 902297 37
str(StormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
knitr::opts_chunk$set(echo = TRUE)
###grouping and then summarizing the data by storm type and then the number of fatalities and injuries###
Fatalities_by_Storm_Type <- StormData %>%
group_by(EVTYPE) %>%
summarise(FATALITIES = sum(FATALITIES),
INJURIES = sum(INJURIES)) %>%
arrange(desc(FATALITIES + INJURIES)) %>%
slice(1:10) %>%
gather(CType, Value, c(FATALITIES, INJURIES))
## `summarise()` ungrouping output (override with `.groups` argument)
Casualties <- ggplot(data = Fatalities_by_Storm_Type,
aes(x = reorder(EVTYPE, -Value),
y = Value,
fill = (CType))) +
geom_bar(stat = 'identity', col = 'blue') +
labs(title = 'Top 10 Events By Casualties',
x = 'Type of event',
y = 'Counts',
fill = 'Type') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(Casualties)
### Create property ad crop damage for use in the analysis###
Property_Damage <- StormData$PROPDMG
Property_Damage_Exponent <- StormData$PROPDMGEXP
Crop_Damage <- StormData$CROPDMG
Crop_Damage_Exponent <- StormData$CROPDMGEXP
Property_Damage_Exponent.n <- as.numeric(Property_Damage_Exponent)
## Warning: NAs introduced by coercion
Property_Damage <- Property_Damage * 10 ** replace(Property_Damage_Exponent.n, is.na(Property_Damage_Exponent.n), 0)
Property_Damage[Property_Damage_Exponent %in% "B"] <- Property_Damage[Property_Damage_Exponent %in% "B"] * 1e9
Property_Damage[Property_Damage_Exponent %in% c("M", "m")] <- Property_Damage[Property_Damage_Exponent %in% c("M", "m")] * 1e6
Property_Damage[Property_Damage_Exponent %in% c("K")] <- Property_Damage[Property_Damage_Exponent %in% c("K")] * 1e3
Property_Damage[Property_Damage_Exponent %in% c("H", "h")] <- Property_Damage[Property_Damage_Exponent %in% c("H", "h")] * 1e2
Property_Damage[!(Property_Damage_Exponent %in% c("B", "M", "m", "K", "H", "h"))] <- Property_Damage[!(Property_Damage %in% c("B", "M", "m", "K", "H", "h"))] * 1
## Warning in Property_Damage[!(Property_Damage_Exponent %in% c("B", "M", "m", :
## number of items to replace is not a multiple of replacement length
Crop_Damage_Exponent.n <- as.numeric(Crop_Damage_Exponent)
## Warning: NAs introduced by coercion
Crop_Damage <- Crop_Damage * 10 ** replace(Crop_Damage_Exponent.n, is.na(Crop_Damage_Exponent.n), 0)
Crop_Damage[Crop_Damage_Exponent %in% "B"] <- Crop_Damage[Crop_Damage_Exponent %in% "B"] * 1e9
Crop_Damage[Crop_Damage_Exponent %in% c("M", "m")] <- Crop_Damage[Crop_Damage_Exponent %in% c("M", "m")] * 1e6
Crop_Damage[Crop_Damage_Exponent %in% c("K", "k")] <- Crop_Damage[Crop_Damage_Exponent %in% c("K", "k")] * 1e3
Crop_Damage[!(Crop_Damage_Exponent %in% c("B", "M", "m", "K", "k"))] <- Crop_Damage[!(Crop_Damage_Exponent %in% c("B", "M", "m",
"K", "k"))] * 1
pd <- StormData$PROPDMG
cd <- StormData$CROPDMG
StormData <- StormData %>%
# Dropping the columns with exponents
select(-c(CROPDMGEXP, PROPDMGEXP)) %>%
# Grouping by event type
group_by(EVTYPE) %>%
# Aggregating by property damage and crops damage
# also shifting to millions
summarise(PROPDMG = sum(PROPDMG) / 1e6,
CROPDMG = sum(CROPDMG) / 1e6) %>%
# Sorting by sum of property damage and crops damage in descending order
arrange(desc(PROPDMG + CROPDMG)) %>%
# Taking first 10 records
slice(1:10) %>%
# Melting crops/property damage by type for plotting
gather(TYPE, VALUE, CROPDMG:PROPDMG)
## `summarise()` ungrouping output (override with `.groups` argument)
Economic_Plot <- ggplot(StormData,
aes(x = reorder(EVTYPE, -VALUE),
y = VALUE, fill = TYPE)) +
geom_bar(stat = "identity", col = 'green') +
labs(x = "Type of event", y = "Value (in Millions)") +
labs(title = "Top 10 Types of Events By Economic Consequences") +
labs(fill = "Type") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0))
print(Economic_Plot)