The visualizations in this dashboard represents a summary of global shark attacks from 1901 – 2023. The plots include the top 10 countries, the top 10 shark species, and the number of attacks by year. This represents a very limited view of the data and this does not make the distinction between confirmed or not confirmed shark attacks.
# I paste some code in here, maybe to identify all of the libraries I need to use and then to read in the data and to report some details about the data.
library(flexdashboard)
library(lubridate)
library(dplyr)
library(scales)
library(ggthemes)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(data.table)
library(plotly)
library(igraph)
# Read in the dataset
df <- fread("C:/Users/dkwlk/OneDrive/Documents/Nates Docs/Loyola_MSDS/2026_Spring/DS-736_Data-Visualizations/2-Global_Shark_Incidents/global_shark_attacks.csv",na.strings = c(NA,""))
#df
df <- na.omit(df)
# I paste some code in here if needed. This might be manipulation of the data after reading it in, to remove bad data, for example.
# create a new df, countrycount to see only the repeat countries
countrycount <- data.frame(count(df, country))
#now we want to sort the courcount dataframe in decreasing order
countrycount <- countrycount[order(countrycount$n, decreasing = TRUE ),]
# chk the country and NA
#df$country %in% c(NA, "NEW CALEDONIA", "PHILIPPINES", "REUNION", "<NA>")
# this looks for the 5-items in the country col and returns true or false
# now get the top 10 with the activity from the global shark
top10 <- df[df$country %in% c(NA, "NEW CALEDONIA", "PHILIPPINES", "REUNION", "<NA>"), "activity"]
# create a new df of the top 10 activity w/the count function
df_top10 <- count(top10, activity)
#sort the rows in this data frame
df_top10 <- df_top10[order(df_top10$n, decreasing = TRUE),]
# omit the NA's from the year
df_top10 <-na.omit(df_top10$activity)
# Lets start some bar plots on the Countries - Top 10 Countries
# sort the plot
# bar plot from countrycount - sorted, w/reorder tag using "n", descending n
ggplot(countrycount[1:10,], aes(x= reorder(country,n), y=n)) +
geom_bar(color = "darkblue", fill = "lightblue", stat="identity") +
coord_flip() +
labs(title = "Historical Global Shark Attacks by Country (Top 10)", x= "Country", y = "Number of Attacks", caption=" Shark Attacks Over 122 Years") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_hc() +
scale_y_continuous(labels = comma) +
geom_text(aes(label= n), hjust= 1.5, colour= "white")
## Summary This plot highlights the top 10 countries that have the most
shark attacks over 122 year period.
# # ceate a new df, speciescount to see only the repeat countrihttp://127.0.0.1:10443/graphics/plot_zoom_png?width=1693&height=960es
speciescount <- data.frame(count(df, species))
#now we want to sort the courcount dataframe in decreasing order
speciescount <- speciescount[order(speciescount$n, decreasing = TRUE ),]
# drop na's
speciescount<- na.omit(speciescount)
# bar plot from speciescount - sorted, w/reorder species using "n", descending n
ggplot(speciescount[3:12,], aes(x= reorder(species,n), y=n)) +
geom_bar(color = "darkblue", fill = "lightblue", stat="identity") +
coord_flip() +
labs(title = "Historical Global Shark Attacks by Species (Top 10)", x= "Species", y = "Number of Attacks", caption=" Global Shark Attacks Over 122 Years") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_hc() +
scale_y_continuous(labels = comma) +
geom_text(aes(label= n), hjust= 1.5, colour= "white")
## Summary This plot highlights the top 10 shark species over 122 year
period.
# #create a new df for the years
years_df <- df %>%
select(date) %>%
mutate(year = year(mdy(date))) %>%
group_by(year) %>%
summarise(n =length(date), .groups = 'keep') %>%
data.frame()
# note there are NA's in the count column
years_df <- na.omit(years_df ) # Removes rows with NA/NaN
# create some axis labels
# add a new x-axis variable
x_axis_labels <-min(df$year): max(df$year)
#x_axis_labels
# returns all of the plotting years
# create a plot4 (p4) variable
#scale_x_continuous(labels =x_axis_labels, breaks = x_axis_labels)
# plot annotations, highest year and lowest year
hi_lo <- years_df %>%
filter(n == min(n) | n== max(n)) %>%
data.frame()
# start plotting
ggplot(years_df, aes(x= year, y=n)) +
geom_line(color='black', size=1) +
geom_point(shape=21, size=4, color='red', fill="white")+
labs(x="Year", y="Shark Attacks Count", title = "Historical Global Shark Attacks by Year w/Annotations", caption="Shark Attacks Over 122 Years") +
scale_y_continuous(labels=comma) +
theme_light() +
theme(plot.title = element_text(hjust=0.5)) +
scale_x_continuous( labels = seq(min(years_df$year), max(years_df$year), by = 5),
breaks = seq(min(years_df$year), max(years_df$year), by = 5), minor_breaks = NULL) +
geom_point(data= hi_lo, aes(x = year, y = n), shape=21, size = 4, fill = 'red', color='red')+
geom_label_repel(aes(label= ifelse( n==max(n) | n==min(n), scales::comma(n),"")),
box.padding = 1, point.padding = 2,
size=4, color='grey50', segment.color = 'darkblue')+
annotate("rect", xmin=2012, xmax=2017, ymin=0, ymax=150,
alpha=.1, fill="blue")
## Summary This line plot highlights the a peak in shark attacks in
2105.
# # Create the Months data frame: months_df
months_df <- df %>%
filter(year >= 2002 & year <=2023) %>%
select(date) %>%
mutate(months = months(mdy(date), abbreviate = TRUE),
year= year(mdy(date))) %>%
group_by(year,months) %>%
summarise(n= length(date), .groups = 'keep') %>%
data.frame
# omit the NA's from the data frame
# drop na's
months_df <- na.omit(months_df)
# change the year to discrete by factor
months_df$year <- factor(months_df$year)
# we need to get the months in order (not alpha order)
mymonths <- c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', "Aug", 'Sep','Oct', 'Nov', 'Dec')
#new variable, month_order
month_order <- factor(months_df$months, level=mymonths)
levels(months_df$year) #returns the year as factors
## [1] "2002" "2003" "2004" "2005" "2006" "2007" "2008" "2009" "2010" "2011"
## [11] "2012" "2013" "2014" "2015" "2016" "2017" "2018" "2019" "2020" "2021"
## [21] "2022" "2023"
# reverse the years in the plot to show current year first
as.numeric(levels(months_df$year)) # change the factor back to numeric
## [1] 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
## [16] 2017 2018 2019 2020 2021 2022 2023
x = min(as.numeric(levels(months_df$year)))
y= max(as.numeric(levels(months_df$year)))
# configure the dataframe to get the years in order
months_df$year <- factor(months_df$year, levels = seq(y,x, by=-1))
# Now start our plots
ggplot(months_df, aes(x= month_order, y= n,fill = year)) +
geom_bar(stat="identity", position= "dodge") +
theme_light() +
theme(plot.title=element_text(hjust=0.5)) +
scale_y_continuous(labels=comma) +
labs(title = "Multiple Bar Charts - Historical Global Shark Attacks by Year and Month",
x = "Months of the Year",
y = "Shark Attack Counts",
fill = "Year",
caption=" Global Shark Attacks from 2002 - 2023") +
scale_color_distiller(palette = "Paired", name= "Year", guide = guide_legend(reverse=TRUE)) +
facet_wrap(~year, ncol=5, nrow=5)
## Summary This plot highlights the shark attacks by year and month for
22 years (2003 to 2023).
From this world data set for the last century , the United States experience the most attacks, at 1228 attacks. On average, there is about 12 shark attacks per year. While Australia, experiences about 5 shark attacks on average. South Africa, experiences roughly 3 attacks per year on average. This visualization does indicate that the top three species involved in attacks are, White Sharks, Blacktip, and Tiger sharks. This data does suggest that it is important to be careful when you are in the ocean or at the beach to stay aware of your surroundings for the presence of these fish.