library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom        1.0.5     ✔ rsample      1.2.0
## ✔ dials        1.2.0     ✔ tune         1.1.2
## ✔ infer        1.0.6     ✔ workflows    1.1.3
## ✔ modeldata    1.3.0     ✔ workflowsets 1.0.1
## ✔ parsnip      1.1.1     ✔ yardstick    1.3.0
## ✔ recipes      1.0.9     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Search for functions across packages at https://www.tidymodels.org/find/
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
## 
## Attaching package: 'openintro'
## 
## The following object is masked from 'package:modeldata':
## 
##     ames
library(RSQLite)
library(DBI)
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 4.3.3
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(dplyr)
library(readr)
library (lubridate)
harbor_strandings <- read.csv("~/Downloads/BassConnectionsSearches - Strandings_Nov6_VF.csv")
harbor_inaturalist <- read.csv("~/Documents/csv files for Bass Connections/observations-702305.csv")
harbor_museumNCSM <- read.csv("~/Downloads/Museum Searches - NCSM.csv")
harbor_museumVertnet <- read.csv("~/Downloads/Museum Searches - IDigBio_Vertnet.csv")
harbor_VaAq <- read.csv("~/Downloads/Virginia Aquarium Dataset - Sheet1.csv")
harbor_literature <- read.csv("~/Downloads/Data from Lit Review Extraction - NC only.csv")
harbor_newspaper <- read.csv("~/Downloads/NewspaperSearches - Final_Results_Analysis.csv")
harbor_strandings1 <- harbor_strandings %>%
  select(-National.Database.Number,-Field.Number,-Body.of.Water,-Locality.Detail,-Latitude,-Latitude.Units,-Longitude, -Longitude.Units)

harbor_strandings1 <- harbor_strandings1 %>%
  filter (Common.Name == "Seal, harbor", State == "NC") %>%
  mutate(decade = paste0(floor(Year.of.Observation / 10) * 10, "s"))

strandings <- harbor_strandings1 %>%
  select(-Common.Name, -Species, -Genus, -County, -State, -Observation.Date, -Year.of.Observation, -Month.of.Observation, -Day.of.Observation)
harbor_inaturalist1 <- harbor_inaturalist %>%
  select(-id,-uuid,-observed_on_string, -time_observed_at, -time_zone, -user_id, -user_login, -user_name, -created_at, -updated_at, -quality_grade, -url, -license, -image_url, -sound_url, -tag_list, -description, -num_identification_agreements, -num_identification_disagreements, -captive_cultivated, -oauth_application_id, -place_guess, -latitude, -longitude, -positional_accuracy, -private_place_guess, -private_latitude, -private_longitude, -public_positional_accuracy, -geoprivacy, -taxon_geoprivacy, -coordinates_obscured, -positioning_method, -positioning_device, -iconic_taxon_name, -taxon_id)

harbor_inaturalist1 <- harbor_inaturalist1 %>%
  mutate(
    year = year(observed_on),
    decade = paste0(floor(year / 10) * 10, "s")
  )

inaturalist <- harbor_inaturalist1 %>%
  select (-observed_on, -species_guess, -scientific_name, -common_name, -year)
harbor_museumNCSM1 <- harbor_museumNCSM %>%
  select(-countryCode, -locality, -decimalLatitude, -decimalLongitude, -eventDate, -taxonKey, -speciesKey, -basisOfRecord, -institutionCode, -collectionCode, -catalogNumber, -recordNumber, -license, -rightsHolder)

harbor_museumNCSM1 <- harbor_museumNCSM1 %>%
  filter (verbatimScientificName == "Phoca vitulina" | verbatimScientificName == "Phoca vitulina richardii", stateProvince == "North Carolina", year != "") %>%
  mutate (year = ifelse(row_number() == 10, "1830", year))
harbor_museumNCSM1$year <- as.numeric(harbor_museumNCSM1$year)
harbor_museumNCSM1 <- harbor_museumNCSM1 %>%
  mutate(decade = paste0(floor(year / 10) * 10, "s"))

NCSM <- harbor_museumNCSM1 %>%
  select (-verbatimScientificName, -stateProvince, -day, -month, -year)
harbor_museumVertnet1 <- harbor_museumVertnet %>%
  select(-X, -Institution, -Basis.of.Record, -Locality, -Latitude, -Longitude)

harbor_museumVertnet1 <- harbor_museumVertnet1 %>%
  filter (Scientific.Name == "Phoca vitulina" | Scientific.Name == "Phoca vitulina concolor", State == "North Carolina", !is.na(Date.Collected), Date.Collected != "No Data", Date.Collected != "N/A") %>%
   mutate (Date.Collected = ifelse(row_number() == 6, "1884-01-01", Date.Collected))

harbor_museumVertnet1 <- harbor_museumVertnet1 %>%
  mutate(
    year = year(Date.Collected),
    decade = paste0(floor(year / 10) * 10, "s")
  )

Vertnet <- harbor_museumVertnet1 %>%
  select (-Scientific.Name, -Date.Collected, -State, -year)
harbor_VaAq1 <- harbor_VaAq %>%
  select (-dataset_id, -row_id, -latitude, -longitude, -species_name, -scientific_name, -itis_tsn, -group_size, -series_id, -timezone, -ds_type, -platform, -provider, -lprecision, -tprecision, -oceano, -notes, -last_mod, -X, -X.1, -X.2, -X.3, -X.4, -X.5, -X.6, -X.7, -X.8, -X.9, -X.10, -X.11, -X.12, -X.13, -X.14, -X.15, -X.16, -X.17, -X.18, -X.19, -X.20, -X.21, -X.22, -X.23, -X.24, -X.25, -X.26, -X.27)

harbor_VaAq1 <- harbor_VaAq1 %>%
  filter (date_time != "##########", common_name == "Harbor Seal")

harbor_VaAq1 <- harbor_VaAq1 %>%
  mutate(
    date_time = mdy_hm(date_time),
    decade = paste0(floor(year(date_time) / 10) * 10, "s")
  )

VaAq1 <- harbor_VaAq1 %>%
  select (-common_name, -date_time)
harbor_literature1 <- harbor_literature %>%
  select (-Latitude, -Longitude, -Locality.Detail, -Sex, -Age.Class, -Length, -Length.Units, -Length.actual.estimate, -Weight, -Weight.Units, -Weight.actual.estimate, -Dead.Alive, -Other.Notes, -Source, -Page)
harbor_literature1 <- harbor_literature1 %>%
  mutate(decade = paste0(floor(Year.of.Observation / 10) * 10, "s"))

literature <- harbor_literature1 %>%
  select (-Common.Name, -Genus, -Species, -County, -State, -Year.of.Observation, -Month.of.Observation, -Day.of.Observation)
harbor_newspaper1 <- harbor_newspaper %>%
  select (-Coder, -Result.URL, -Newspaper.Title, -Latitude..Longitude, -Perception, -Seal.Alive.or.Dead, -Interesting.Quote, -Additional.Notes)
harbor_newspaper1 <- harbor_newspaper1 %>%
  filter (Seal.Species != "Gray Seal") %>%
  mutate (Date = ifelse(row_number() == 11, "1/27/1888", Date)) %>%
  mutate (Date = ifelse(row_number() == 12, "3/14/1889", Date)) %>%
  mutate (Date = ifelse(row_number() == 51, "9/28/1944", Date)) %>%
  mutate (Date = ifelse(row_number() == 66, "01/01/1993", Date)) %>%
  mutate (Date = ifelse(row_number() == 39, "2/15/1887", Date)) %>%
  mutate (Date = ifelse(row_number() == 40, "3/8/1889", Date)) %>%
  mutate (Date = ifelse(row_number() == 41, "1/9/1891", Date))

harbor_newspaper1 <- harbor_newspaper1 %>%
  mutate(Date = mdy(Date)) %>%
  mutate(
    decade = paste0(floor(year(Date) / 10) * 10, "s")
  )

newspaper <- harbor_newspaper1 %>%
  select (-Date, -Location, -Seal.Species)
museum <- bind_rows (NCSM, Vertnet)

stranding <- bind_rows(strandings, VaAq1)
combined_data <- bind_rows(
  mutate(museum, source = "Museum"),
  mutate(newspaper, source = "Newspaper"),
  mutate(literature, source ="Literature"),
  mutate(inaturalist, source = "iNaturalist"),
  mutate(stranding, source = "Stranding")
)
combined_data <- add_row (combined_data, decade = "Precolonial", source = "Archaeology")
combined_data <- add_row (combined_data, decade = "Precolonial", source = "Archaeology")
combined_data$decade <- factor(
  combined_data$decade,
  levels = c(
    "Precolonial",
    paste0(seq(1830, 2020, 10), "s")
  )
)
ggplot(combined_data, aes(x = decade, fill = source)) +
  geom_bar() +
  labs(
    x = "Decade",
    y = "Count",
    fill = "Source"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))