The data was downloaded to the report repository
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if(!file.exists("StormData.bz2")) {
download.file(fileURL, destfile = "StormData.bz2", method = "curl")
}
Steps involved in cleaning up of the data include,
dat <- read.csv("StormData.bz2", header = TRUE, sep = ",")
This data set has dimensions 902297, 37 and contains the following variables
str(dat)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
names(dat) <- tolower(names(dat))
if(system.file(package = "dplyr") == "") install.packages("dplyr")
library(dplyr, quietly = TRUE, warn.conflicts = FALSE)
stormdat <- dat %>%
mutate(bgn_date = as.Date(bgn_date, format = "%m/%d/%Y")) %>%
filter(bgn_date >= "1996-01-01") %>%
# Conervting all event types to lower case for easier comaprison later
mutate(evtype = tolower(evtype)) %>%
select(refnum, evtype, bgn_date, state__,
fatalities, injuries,
propdmg, propdmgexp, cropdmg, cropdmgexp)
Dimensions of this data set : 653530, 10
# Creating the exponential to number conversion code
propdamagecode <- data.frame(code = unique(stormdat$propdmgexp),
key = c(10^3, 0, 10^6, 10^9, 0))
cropdamagecode <- data.frame(code = unique(stormdat$cropdmgexp),
key = c(10^3, 0, 10^6, 10^9))
# Changing exponential columns to numbers
stormdat$propdmgexp <- sapply(stormdat$propdmgexp, function(exp, code) {
code$key[which(code$code == exp)]
}, propdamagecode)
stormdat$cropdmgexp <- sapply(stormdat$cropdmgexp, function(exp, code) {
code$key[which(code$code == exp)]
}, cropdamagecode)
# Creating new columns for actual damage burden in dollars and removing the old ones
stormdat <- stormdat %>%
mutate(propertyDamageBurden = propdmg*propdmgexp) %>%
mutate(cropDamageBurden = cropdmg*cropdmgexp) %>%
select(refnum, evtype, bgn_date, state__,
fatalities, injuries,
propertyDamageBurden, cropDamageBurden)
stormdat <- stormdat %>% filter(fatalities != 0 |
injuries != 0 |
propertyDamageBurden != 0 |
cropDamageBurden != 0)
New dimensions of the data set : 201318, 8
Correction of typos/mistakes in the event type column : This was done in the following steps,
if(system.file(package = "rJava") == "") {install.packages("rJava")}
library(rJava)
# Downloading and loading the tabulizer package which allows to
# extract tables from PDF files
if(system.file(package = "tabulizer") == "") {
remotes::install_github(c("ropensci/tabulizerjars", "ropensci/tabulizer"),
INSTALL_opts = "--no-multiarch")
}
library(tabulizer)
# Extracting the table containing the official event list
PDFfile <- "https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf"
tabs <- extract_tables(PDFfile, pages = 6)
eventlist <- as.vector(tabs[[1]][-(1:3),c(1,3)])
uniqueEvents <- unique(stormdat$evtype)
correctedUniqueEvents <- gsub("tstm", "thunderstorm", uniqueEvents)
# Getting a vector of corrected event names with only "thunderstorm wind"
thunderstormWindList <- strsplit(correctedUniqueEvents[grep("thunderstorm wind",
correctedUniqueEvents)], " ")
# Removing the wind strength included with the event name
thunderstormWindCorrected <- sapply(thunderstormWindList, function(name) {
if(name[1] == "thunderstorm") {
name[1:2]
}else if(name[1] == "") {
name[2:3]
}else {
name
}
})
if(system.file(package = "stringr") == "") {install.packages("stringr")}
library(stringr)
thunderstormWindCorrected <- sapply(thunderstormWindCorrected, function(name) {
paste(name[1], name[2], name[3], sep = " ")
})
thunderstormWindCorrected <- sub(" NA", "", thunderstormWindCorrected)
# Replacing with the corrected names
correctedUniqueEvents[grep("thunderstorm wind",
correctedUniqueEvents)] <- thunderstormWindCorrected
correctedUniqueEvents <- gsub("heavy surf", "high surf", correctedUniqueEvents)
correctedUniqueEvents <- gsub("hurricane edouard", "hurricane", correctedUniqueEvents)
if(system.file(package = "stringdist") == "") {install.packages("stringdist")}
library(stringdist)
# Creating an index which will be used to replace values of corrected event
# names based on the official event type list
matchingIndex <- amatch(correctedUniqueEvents, eventlist, method = "osa", maxDist = 8)
# Replacing the event names with the approximately matched names from
# the official event type list
correctedUniqueEvents <- sapply(matchingIndex, function(index) {
eventlist[index]
})
# Replacing the NA values with the uncorrected raw values to not lose any data
indNA <- which(is.na(correctedUniqueEvents))
correctedUniqueEvents[indNA] <- uniqueEvents[indNA]
Several values for the maxDist argument were tried, but the value 8 managed to achieve a good balance of NAs (22.58%) and corrections.
# Creating a replacement code
eventsCode <- data.frame(code = uniqueEvents, key = correctedUniqueEvents)
# Replacing the values
stormdat$correctedEventType <- sapply(stormdat$evtype, function(name, code) {
code$key[which(code$code == name)]
}, eventsCode)
Thus, unique number of event types were reduced from 186 to 90 which is pretty close to the number of event types in the official list (48)
Changing the state fips column to represent the corresponding regions of the country : The state fips given can be used to generate a column of regions where the event took place. The regions used here include, South, West, Northeast, Midwest, Territories, and Maritime Areas. The state FIPS to state conversion was obtained from the Census Website. The state name to region conversion was obatined from the following github repository Github Repo which obtained its information from Census Region Division Map
# Downloading and reading in the state fips to state name conversion file
url <- "https://www2.census.gov/geo/docs/reference/state.txt"
if(!file.exists("state_fips.txt")) {
download.file(url, "state_fips.txt", quiet = TRUE)
}
stateFips <- read.table("state_fips.txt", header = TRUE, sep = "|")
stateFips <- stateFips %>%
select(STATE, STATE_NAME)
names(stateFips) <- tolower(names(stateFips))
# Rreading in the state name to region conversion file
url <- "https://raw.githubusercontent.com/cphalpert/census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv"
if(!file.exists("regions.csv")) {
download.file(url, "regions.csv", quiet = TRUE)
}
regionsbyState <- read.csv("regions.csv", header = TRUE)
regionsbyState <- regionsbyState %>%
select(State, Region)
# Extracting the unique state fips in the data set
uniqueStateFips <- unique(stormdat$state__)
# Matching the state fips in our data set with the official list
states <- match(uniqueStateFips, stateFips$state)
# The NAs in this represent the maritime areas
indNA <- which(is.na(states))
# Replacing the non NA values with the state names
states[-indNA] <- sapply(states[-indNA], function(index) {
stateFips$state_name[index]
})
# Convert all the values into regions
regions <- sapply(states, function(state) {
ifelse(is.na(state), "Maritime Areas",
regionsbyState$Region[match(state, regionsbyState$State)])
})
# Now the NA values in this belong to the territories
regions[which(is.na(regions))] <- "Territories"
# Adding a region column to each event
regionCode <- data.frame(code = uniqueStateFips, key = as.vector(regions))
index <- match(stormdat$state__, regionCode$code)
stormdat$region <- regionCode$key[index]
Final processing step : Removing the old event type column and the state fips column and keeping the corrected event types and regions only, refnum is kept as a reference for any values to the original data set and the beginning date is converted to year the event took place
stormdat <- stormdat %>%
mutate(year = format(bgn_date, "%Y")) %>%
select(correctedEventType, year, region,
fatalities, injuries,
propertyDamageBurden, cropDamageBurden, refnum)Thus the cleaned data set on which the analysis in this report is done
stormdat
str(stormdat)
## 'data.frame': 201318 obs. of 8 variables:
## $ correctedEventType : chr "Winter Storm" "Tornado" "Thunderstorm Wind" "Thunderstorm Wind" ...
## $ year : chr "1996" "1996" "1996" "1996" ...
## $ region : chr "South" "South" "South" "South" ...
## $ fatalities : num 0 0 0 0 0 0 0 0 0 0 ...
## $ injuries : num 0 0 0 0 0 0 0 0 0 0 ...
## $ propertyDamageBurden: num 380000 100000 3000 5000 2000 400000 12000 8000 12000 75000 ...
## $ cropDamageBurden : num 38000 0 0 0 0 0 0 0 0 0 ...
## $ refnum : num 248768 248769 248770 248771 248772 ...
Code Book for this data set
The following analysis was done to arrive at the results,
Creating a summary data frame which summarizes the total number of fatalities and injuries, total property(in billions of US dollars) and crop damage(in billions of US dollars) burden and frequency of each event type. Mean values of each variable is also added to provide additional information on events which occur very rarely but have devastating effects. Total burden on population health and economy(in billions of US dollars) is also added
stormDataSummary <- stormdat %>%
summarise(totalFatalities = sum(fatalities),
meanFatalities = round(mean(fatalities), 2),
totalInjuries = sum(injuries),
meanInjuries = round(mean(injuries), 2),
totalHealthBurden = totalFatalities + totalInjuries,
totalPropertyDamageBurden = round(sum(propertyDamageBurden)/10^9, 2),
totalCropDamageBurden = round(sum(cropDamageBurden)/10^9, 2),
totalEconomicBurden = totalPropertyDamageBurden + totalCropDamageBurden,
eventFrequency = length(correctedEventType),
.by = correctedEventType)
# This will be used to figure out the top events
The top event types for effect on population health and economy were selected by first by getting the top 5 events for each sub category of effect like fatalities, injuries, property damage expenses, crop damage expenses and then both the lists were combined to make sure the effect is not being confounded by the sub categories
fatalityIndex <- order(stormDataSummary$totalFatalities, decreasing = TRUE)[1:5]
top5Fatalities <- stormDataSummary$correctedEventType[fatalityIndex]
injuryIndex <- order(stormDataSummary$totalInjuries, decreasing = TRUE)[1:5]
top5Injuries <- stormDataSummary$correctedEventType[injuryIndex]
topEventsbyHealth <- unique(c(top5Fatalities, top5Injuries))
topEventsbyHealth <- c(topEventsbyHealth, "Tsunami")
healthIndex <- which(stormdat$correctedEventType %in% topEventsbyHealth)
ind1 <- which(stormDataSummary$correctedEventType %in% topEventsbyHealth)
topHealthEventsSummary <- stormDataSummary[ind1,] %>%
select(correctedEventType, totalHealthBurden,
totalFatalities, totalInjuries,
meanFatalities, meanInjuries, eventFrequency) %>%
arrange(desc(totalHealthBurden))
# This will be used later to create a table
# Calculating the total fatalities and injuries and creating a factor column
# to separate the 2 statistics
totalFatalities <- stormdat[healthIndex,] %>%
summarise(total = sum(fatalities), .by = c("correctedEventType", "year")) %>%
mutate(healthEffect = "Fatalities")
totalInjuries <- stormdat[healthIndex,] %>%
summarise(total = sum(injuries), .by = c("correctedEventType", "year")) %>%
mutate(healthEffect = "Direct Injuries")
# Combine the summaries
totalHealthEffect <- rbind(totalFatalities, totalInjuries)
# Changing the levels of event type to arrange in highest effect to lowest effect
totalHealthEffect$correctedEventType <- factor(
totalHealthEffect$correctedEventType,
levels = topHealthEventsSummary$correctedEventType
)
# This will be used later to create a plot
propertyIndex <- order(stormDataSummary$totalPropertyDamageBurden, decreasing = TRUE)[1:5]
top5propertyDamage <- stormDataSummary$correctedEventType[propertyIndex]
cropIndex <- order(stormDataSummary$totalCropDamageBurden, decreasing = TRUE)[1:5]
top5cropDamage <- stormDataSummary$correctedEventType[cropIndex]
topEventsbyEconomy <- unique(c(top5propertyDamage, top5cropDamage))
economyIndex <- which(stormdat$correctedEventType %in% topEventsbyEconomy)
ind2 <- which(stormDataSummary$correctedEventType %in% topEventsbyEconomy)
topEconomyEventsSummary <- stormDataSummary[ind2,] %>%
select(correctedEventType, totalPropertyDamageBurden,
totalCropDamageBurden, totalEconomicBurden,
eventFrequency) %>%
arrange(desc(totalEconomicBurden))
# This will be used later to create a table
# Calculating the total property and crop damage in millions of dollars and creating a factor column
# to separate the 2 statistics
totalpropertyDamage <- stormdat[economyIndex,] %>%
summarise(total = round(sum(propertyDamageBurden)/10^6, 2), .by = c("correctedEventType", "year")) %>%
mutate(economyEffect = "Property Damage")
totalcropDamage <- stormdat[economyIndex,] %>%
summarise(total = round(sum(cropDamageBurden)/10^6, 2), .by = c("correctedEventType", "year")) %>%
mutate(economyEffect = "Crop Damage")
# Combine the summaries
totalEconomyEffect <- rbind(totalpropertyDamage, totalcropDamage)
# Changing the levels of event type to arrange in highest effect to lowest effect
totalEconomyEffect$correctedEventType <- factor(
totalEconomyEffect$correctedEventType,
levels = topEconomyEventsSummary$correctedEventType
)
# This will be used later to create a plot
topEvents <- unique(c(topEventsbyHealth, topEventsbyEconomy))
topIndex <- which(stormdat$correctedEventType %in% topEvents)
frequencyByRegionYear <- stormdat[topIndex,] %>%
summarise(frequency = length(correctedEventType),
.by = c("correctedEventType", "year", "region"))
# This will be used later to create a plot
frequencyByYear <- summarise(stormdat[topIndex, ],
frequency = length(correctedEventType),
.by = c("year", "correctedEventType"))
# This will be used to draw some conclusions later
# Installing if necessary and loading required packages
if(system.file(package = "tibble") == "") install.packages("tibble", quiet = TRUE)
if(system.file(package = "gt") == "") install.packages("gt", quiet = TRUE)
suppressPackageStartupMessages(library(gt, warn.conflicts = FALSE, quietly = TRUE))
## Warning: package 'gt' was built under R version 4.3.1
library(tibble)
# Creating a table object for top type of events most harmful for population health
table1 <- gt(as_tibble(topHealthEventsSummary)) %>%
tab_header(
title = md("**Top types of Events most harmful with respect to Population Health**"),
subtitle = md("*January 1996 to 2011*")
) %>%
tab_source_note(
source_note = md("Reference : Storm Data collected by *U.S. National Oceanic and Atmospheric Administration (NOAA)*")
) %>%
tab_spanner(
label = md("**Fatalities**"),
columns = c(totalFatalities, meanFatalities)
) %>%
tab_spanner(
label = md("**Direct Injuries**"),
columns = c(totalInjuries, meanInjuries)
) %>%
cols_label(
correctedEventType = md("**Event Type**"),
totalHealthBurden = md("**Total Health Burden**"),
totalFatalities = md("*Total*"),
totalInjuries = md("*Total*"),
meanFatalities = md("*Mean*"),
meanInjuries = md("*Mean*"),
eventFrequency = md("**Frequency**")
) %>%
tab_style(
style = cell_borders(sides = "all", style = "solid"),
locations = list(cells_body(columns = everything(), row = everything()),
cells_column_spanners(spanners = everything()),
cells_column_labels(columns = everything()))
)
# Printing the table
table1
| Top types of Events most harmful with respect to Population Health | ||||||
| January 1996 to 2011 | ||||||
| Event Type | Total Health Burden | Fatalities | Direct Injuries | Frequency | ||
|---|---|---|---|---|---|---|
| Total | Mean | Total | Mean | |||
| Tornado | 22204 | 1515 | 0.12 | 20689 | 1.67 | 12384 |
| Flood | 8309 | 514 | 0.05 | 7795 | 0.80 | 9743 |
| Excessive Heat | 8190 | 1797 | 2.53 | 6393 | 9.00 | 710 |
| Thunderstorm Wind | 5509 | 379 | 0.00 | 5130 | 0.05 | 105374 |
| Lightning | 4796 | 653 | 0.06 | 4143 | 0.37 | 11294 |
| Flash Flood | 2562 | 888 | 0.05 | 1674 | 0.09 | 19094 |
| Rip Current | 1045 | 542 | 0.90 | 503 | 0.83 | 603 |
| Tsunami | 162 | 33 | 2.36 | 129 | 9.21 | 14 |
| Reference : Storm Data collected by U.S. National Oceanic and Atmospheric Administration (NOAA) | ||||||
Conclusions :
if(system.file(package = "ggplot2") == "") install.packages("ggplot2", quiet = TRUE)
if(system.file(package = "lemon") == "") install.packages("lemon", quiet = TRUE)
library(ggplot2)
suppressPackageStartupMessages(library(lemon))
## Warning: package 'lemon' was built under R version 4.3.1
# Defining the first plot with years on x axis, total effect on health on y axis,
# color fill of the bar plots to fatalities or injuries
plot1 <- ggplot(totalHealthEffect, aes(year, total, fill = healthEffect)) +
# Plotting the bar plot
geom_col() +
# Dividing the plot in panels for each type of event
facet_wrap("correctedEventType", scales = "free") +
# Flipping to the plots 90 degrees to the right
coord_flip() +
# Changing the theme to black and white and increasing the text size
theme_bw(base_size = 40) +
# Adds descriptive titles and axes labels
labs(title = "Fatalities and Injuries for each top types of events for each year",
x = "Years", y = "Counts", fill = "",
caption = "Reference : Storm Data collected by U.S. National Oceanic and Atmospheric Administration (NOAA)") +
# Changing the size of title, axes labels and legend
theme(title = element_text(size = 60), legend.text = element_text(size = 60),
plot.caption = element_text(face = "italic"))
# Printing the plot with Changed position of the legend to one of the empty panels
reposition_legend(plot1, panel = "panel-3-3", position = "center")
Conclusions :
# Creating a table object for top type of events most harmful for economy
table2 <- gt(as_tibble(topEconomyEventsSummary)) %>%
tab_header(
title = md("**Top types of Events with greatest Economic Consequences**"),
subtitle = md("*January 1996 to 2011*")
) %>%
tab_source_note(
source_note = md("Reference : Storm Data collected by *U.S. National Oceanic and Atmospheric Administration (NOAA)*")
) %>%
tab_spanner(
label = md("*(Expenses in billions of U.S. Dollars)*"),
columns = c(totalPropertyDamageBurden, totalCropDamageBurden,
totalEconomicBurden)
) %>%
cols_label(
correctedEventType = md("**Event Type**"),
totalPropertyDamageBurden = md("**Property Damage**"),
totalCropDamageBurden = md("**Crop Damage**"),
totalEconomicBurden = md("**Total**"),
eventFrequency = md("**Frequency**")
) %>%
tab_style(
style = cell_borders(sides = "all", style = "solid"),
locations = list(cells_body(columns = everything(), row = everything()),
cells_column_spanners(spanners = everything()),
cells_column_labels(columns = everything()))
)
# Printing the table
table2
| Top types of Events with greatest Economic Consequences | ||||
| January 1996 to 2011 | ||||
| Event Type | (Expenses in billions of U.S. Dollars) | Frequency | ||
|---|---|---|---|---|
| Property Damage | Crop Damage | Total | ||
| Flood | 144.56 | 4.98 | 149.54 | 9743 |
| Hurricane (Typhoon) | 69.31 | 2.61 | 71.92 | 72 |
| Storm Surge/Tide | 47.83 | 0.00 | 47.83 | 216 |
| Tornado | 24.62 | 0.28 | 24.90 | 12384 |
| Hail | 14.60 | 2.48 | 17.08 | 22683 |
| Flash Flood | 15.24 | 1.34 | 16.58 | 19094 |
| Seiche | 11.81 | 2.74 | 14.55 | 136 |
| Drought | 1.05 | 13.37 | 14.42 | 265 |
| Reference : Storm Data collected by U.S. National Oceanic and Atmospheric Administration (NOAA) | ||||
Conclusions :
# Defining the second plot with years on x axis, total effect on economy on y axis,
# color fill of the bar plots to property or crop damage
plot2 <- ggplot(totalEconomyEffect, aes(year, total, fill = economyEffect)) +
# Plotting the bar plot
geom_col() +
# Dividing the plot in panels for each type of event
facet_wrap("correctedEventType", scales = "free") +
# Flipping to the plots 90 degrees to the right
coord_flip() +
# Changing the theme to black and white and increasing the text size
theme_bw(base_size = 40) +
# Adds descriptive titles and axes labels
labs(title = "Expenses on Property and Crop Damage for each top types of events for each year",
x = "Years", y = "Expenses (in Millions of U.S. Dollars)", fill = "",
caption = "Reference : Storm Data collected by U.S. National Oceanic and Atmospheric Administration (NOAA)") +
# Changing the size of title, axes labels and legend
theme(title = element_text(size = 60), legend.text = element_text(size = 60),
plot.caption = element_text(face = "italic"))
# Printing the plot with Changed position of the legend to one of the empty panels
reposition_legend(plot2, panel = "panel-3-3", position = "center")
Conclusions :
# Defining the third plot with year on the x axis, frequency of events on the y axis,
# fill color of the bar plots set to region
if(system.file(package = "RColorBrewer") == "") install.packages("RColorBrewer")
library(RColorBrewer)
plot3 <- ggplot(frequencyByRegionYear, aes(year, frequency, fill = region)) +
# Plotting the bar plot
geom_col() +
# Dividing the plot in panels for each type of event
facet_wrap("correctedEventType", scales = "free") +
# Adds descriptive titles and axes labels
labs(title = "Frequency of each top types of events by year and region of the country",
x = "Years", y = "Counts", fill = "Region",
caption = "Reference : Storm Data collected by U.S. National Oceanic and Atmospheric Administration (NOAA)") +
# Changing the palette for the fill color
scale_fill_brewer(palette = "Dark2") +
# Changing the theme to black and white and increasing the text size
theme_bw(base_size = 45) +
# Flipping the plots by 90 degrees to the right
coord_flip() +
# Changing the size of title, axes labels and legend
theme(title = element_text(size = 65), legend.text = element_text(size = 65),
plot.caption = element_text(face = "italic"))
# Printing the plot with Changed position of the legend to one of the empty panels
reposition_legend(plot3, panel = "panel-4-4", position = "center")
Conclusions :
The regions used to divide the areas in this plot were taken from the Census Region and Divisions of the United States
The limitations of this report include,
The U.S. National Oceanic and Atmospheric Administration (NOAA) maintains a storm database which records information on the location and timing of such events, the “magnitude” of such events which are measured differently for different types of events, estimates on fatalities, injuries, property and crop damages, any other remarks unique to the event.
The data was made available by the instructors of Reproducible Research Course by providing the following link, Storm Data The database covers events from 1950 to November 2011 The documentation providing explanation of the variables is available here,
State FIPS to state name conversion was obtained from the Census Website
State name to region conversion was obatined from the following github repository Github Repo which obtained its information from Census Region Division Map
Written in Rmarkdown file in R version 4.3.0 (2023-04-21
ucrt) using RStudio IDE
Packages used for this report,
Creation Date of Rmarkdown file : 2023-06-27
22:42:11.726139
Last Modified Date of Rmarkdown file : 2023-07-01
17:26:01.310686