This report explores Oklahoma tornado data such as paths and density through the use of map visaulizations. Due to an abundance of data, data from 2016 to 2021 was selected for the exploration. The below libraries are used for handling file importation, data tidying/transformation, and visualization.
First, county, tornado points, and tornado paths .shp files are read and filtered for data ranging from 2016 to 2021. Then, the points and paths are subset to only include essential fields for ease of reference.
# read in .shp files
county <- st_read(here("data/Chapter5", "ok_counties.shp"), quiet = TRUE)
point <- st_read(here("data/Chapter5", "ok_tornado_point.shp"), quiet = TRUE)
path <- st_read(here("data/Chapter5", "ok_tornado_path.shp"), quiet = TRUE)
# Subset tornado data
# 2016 to 2021 points
points16_21 <- point %>%
# filter between 2016 and 2021
filter(yr >= 2016 & yr <= 2021) %>%
# subset cols
select(om, yr, date)
# 2016 to 2021 paths
paths16_21 <- path %>%
# filter between 2016 and 2021
filter(yr >= 2016 & yr <= 2021) %>%
# subset cols
select(om, yr, date)
# summarize by yr
tornadoyears = points16_21 %>%
# group by year
group_by(yr) %>%
# calculate count
summarise(n = n())
In order to visualize the Oklahoma tornado data set, first a map of Oklahoma counties are drawn with the tornado points overlayed on top. The points correspond to the location of a recorded tornado event. The color of each point corresponds to the year in which that tornado occurred.
# create map of tornado points
mp_points <- ggplot() +
geom_sf(data = county,
# no fill color
fill = NA,
# grey boundaries
color = "grey",
linewidth = .75) +
geom_sf(data = paths16_21,
# format year as factor
aes(color = as.factor(yr)),
linewidth = 1.25) +
scale_color_brewer(name = "Year", palette = "Dark2") +
theme_void()
# create map of tornado paths
mp_paths <- ggplot() +
geom_sf(data = county,
fill = NA,
color = "grey",
linewidth = .75) +
geom_sf(data = points16_21,
aes(color = as.factor(yr)),
size = 1.5) +
scale_color_brewer(name = "Year", palette = "Dark2") +
coord_sf(datum = NA) +
theme_void()
With the tornado point and path maps created, the maps are then combined together using the plot_grid() function. On first glance, tornadoes were most active in 2019 with 148 reported tornadoes, followed by 2017 and 2021 with 85 and 63 respectively.
# create composite map of tornado paths and points
mp_pathspoints <- plot_grid(mp_paths, mp_points,
# set map grid labels
labels = c("Points", "Paths"),
# set label size
label_size = 10,
# set # of cols in grid
ncol = 1,
# set alignment
align = "v") +
# set composite map title
ggtitle("Oklahoma Tornados 2016 to 2024") +
# center title
theme(plot.title = element_text(hjust = 0.5, face = "bold", size=12))
# view composite map
print(mp_pathspoints)
Next, the density of tornado events by county and year are calculated. The summary results are visualized using a faceted plot. Tornado density is calculated based on the number of tornadoes per 10,000 square kilometers for each county. Based on this information, tornado density in Oklahoma has ranged from 0.168 to 5.861, with a mean density of 0.9595 based on 217 recorded tornadoes from 2016 to 2021.
# summarize tornadoes by year and county
yearcounty <- st_join(points16_21, county) %>%
st_drop_geometry() %>%
group_by(GEOID, yr) %>%
summarise(tornado_n = n(), .groups = "drop")
# join summary to county for plot
countymap <- county %>%
# left join on GEOID
left_join(yearcounty, by = "GEOID") %>%
# calculate density
mutate(
# fill NA values as 0
tornado_n = ifelse(is.na(tornado_n), 0, tornado_n),
# calculate polygon area
area = st_area(.) %>% drop_units(),
# calculate tornado density per 10000 KM
Density = 10^6 * 10^3 * tornado_n / area
) %>%
# filter out NA year rows
filter(!is.na(yr))
# view summary statistics for tornado density
summary(countymap$Density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1680 0.4528 0.6590 0.9595 1.2355 5.8611
The tornado density results are visualized each year by county below. Overall, Tulsa County had the highest tornado density in 2019, with 5.86 tornadoes per 10,000 KM2
# create faceted map showing county-level tornado density each year
mp_density <- ggplot(countymap) +
# add empty county map for background
geom_sf(data = county,
fill = NA,
color = "black",
linewidth = .26) +
# add county density
geom_sf(aes(fill = Density),
color = "black",
linewidth = 0.25) +
scale_fill_distiller(
# set name
name = "Tornadoes per 10,000 km2",
# set palette
palette = "YlOrRd",
# set direction
direction = 1,
# adjust bar look
guide = guide_colorbar(barwidth=12,barheight=.75)
) +
# facet by year
facet_wrap(~yr) +
theme_void() +
# set title properties
ggtitle("Oklahoma Counties Tornado Density 2016 to 2021") +
# set plot element themes
# set title centered, bold, and size
theme(
# set title properties
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
# set facet text bold
strip.text = element_text(face = "bold"),
# legend pos on bottom
legend.position = "bottom",
# set legend text properties
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9)
)
# print map
print(mp_density)
Finally, four choropleth maps are created based on tornado density for 3, 4, 5, and 6 classes. A composite map is generated to compare and contrast how the number of classes affects the interpretation of the map.
#
# create map for 3 classess
# set number of classes
numclas <- 3
# set breaks
qbrks <- seq(0, 1, length.out = numclas + 1)
# add fill field
countymap <- countymap %>%
mutate(tdens_c3 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
# create map
mp_class3 <- ggplot(data = countymap) +
geom_sf(aes(fill = tdens_c3)) +
scale_fill_brewer(palette = "YlOrRd") +
theme_void() +
theme(legend.position = "right") +
# remove legend title
guides(fill = guide_legend(title = NULL))
#### Repeat for rest.... ####
# create map for 4 classess
# set number of classes
numclas <- 4
qbrks <- seq(0, 1, length.out = numclas + 1)
countymap <- countymap %>%
mutate(tdens_c4 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
mp_class4 <- ggplot(data = countymap) +
geom_sf(aes(fill = tdens_c4)) +
scale_fill_brewer(palette = "YlOrRd") +
theme_void() +
theme(legend.position = "right") +
guides(fill = guide_legend(title = NULL))
# create map for 5 classess
# set number of classes
numclas <- 5
qbrks <- seq(0, 1, length.out = numclas + 1)
countymap <- countymap %>%
mutate(tdens_c5 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
mp_class5 <- ggplot(data = countymap) +
geom_sf(aes(fill = tdens_c5)) +
scale_fill_brewer(palette = "YlOrRd") +
theme_void() +
theme(legend.position = "right") +
guides(fill = guide_legend(title = NULL))
# create map for 6 classess
# set number of classes
numclas <- 6
qbrks <- seq(0, 1, length.out = numclas + 1)
countymap <- countymap %>%
mutate(tdens_c6 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
mp_class6 <- ggplot(data = countymap) +
geom_sf(aes(fill = tdens_c6)) +
scale_fill_brewer(palette = "YlOrRd") +
theme_void() +
theme(legend.position = "right") +
guides(fill = guide_legend(title = NULL))
# create composite map of choropleth maps
mp_choropleth <- plot_grid(mp_class3,
mp_class4,
mp_class5,
mp_class6,
# set map grid labels
labels = c("3 Classes", "4 Classes", "5 Classes", "6 Classes"),
# set label size
label_size = 12,
# set # of cols in grid
ncol = 2,
# set alignment
align = "v") +
# set composite map title
ggtitle("Oklahoma Tornados 2016 to 2024") +
# center title
theme(plot.title = element_text(hjust = 0.5, face = "bold", size=12))+
guides(fill = guide_legend(title = NULL))
# view composite map
print(mp_choropleth)
Based on the number of classes, there are certainly variations in each choropleth map illustrated above. A greater number of classes results in finer granularity which distinguishes tornado density between counties. Whereas a smaller number of classes results in larger groups.