setwd(“C:/Users/HomePC/OneDrive/Desktop/R project”) # Install necessary libraries (if not already installed) install.packages(“tidyverse”) # For data manipulation and visualization install.packages(“readxl”) # For reading Excel files # Load the required libraries library(tidyverse) library(readxl)

List all sheets in the Excel file

excel_sheets(“Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release.xlsx”) # Load a specific sheet from the Excel file shipwreck_data <- read_excel(“Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release.xlsx”, sheet = 1)

Preview the first few rows of the data

head(shipwreck_data)

List all files in the current working directory

list.files() # Load the Excel file using the correct name shipwreck_data <- read_excel(“Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”)

Preview the first few rows of the dataset

head(shipwreck_data)

List all sheet names in the Excel file

excel_sheets(“Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”) # Load the main dataset from the “GeoDatabase” sheet shipwreck_data <- read_excel(“Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”)

Preview the first few rows of the dataset

head(shipwreck_data) # Load the GeoDatabase sheet into R geo_data <- read_excel(“Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”)

Preview the first few rows of the data

head(geo_data) # Clean column names for better usability library(janitor) geo_data <- geo_data %>% clean_names()

Preview cleaned data

head(geo_data)

Clean and standardize column names

geo_data <- geo_data %>% clean_names() # Converts names to lowercase and replaces spaces/special characters with underscores

View the cleaned column names

colnames(geo_data) # Preview the first few rows of the cleaned data head(geo_data) # Remove columns with empty or missing names geo_data <- geo_data %>% select(where(~ !all(is.na(.))))

Preview the cleaned dataset

head(geo_data) # Reload the dataset, skipping the first row if necessary geo_data <- read_excel( “Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”, skip = 1 # Adjust this number based on where the actual data starts )

Clean column names

library(janitor) geo_data <- geo_data %>% clean_names()

Preview the cleaned column names and data

colnames(geo_data) head(geo_data) # Remove columns with all NA values or irrelevant names geo_data <- geo_data %>% select(where(~ !all(is.na(.)))) # Keeps columns with at least one non-NA value

Preview cleaned data

head(geo_data) # Reload the data, skipping more rows geo_data <- read_excel( “Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”, skip = 2 # Skip the first two rows )

Clean column names

geo_data <- geo_data %>% clean_names()

Preview the cleaned column names

colnames(geo_data) # Remove columns with all NA values or irrelevant names geo_data <- geo_data %>% select(where(~ !all(is.na(.)))) # Keeps columns with at least one non-NA value

Preview the dataset again

head(geo_data) # Reload the dataset and skip additional rows geo_data <- read_excel( “Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”, skip = 3 # Adjust to find the row where actual data starts )

Clean column names

geo_data <- geo_data %>% clean_names()

Preview the cleaned data

head(geo_data) # Remove irrelevant or empty columns geo_data <- geo_data %>% select(where(~ !all(is.na(.)) & !grepl(“^\.\.\.”, colnames(geo_data))))

Preview the dataset again

head(geo_data) # Rename columns manually if necessary geo_data <- geo_data %>% rename( name = “name”, # Replace with correct column names latitude = “lat”, # Replace as needed longitude = “long” # Replace as needed ) # Display all column names colnames(geo_data)

Reload the dataset, skipping more rows to reach actual headers

geo_data <- read_excel( “Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”, skip = 4 # Adjust the number of rows skipped if necessary )

Preview the first few rows

head(geo_data) # Load the data without headers to inspect its structure geo_data_raw <- read_excel( “Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”, col_names = FALSE # Prevent R from using the first row as headers )

Preview the first few rows

head(geo_data_raw) # Reload the dataset, skipping the rows we identified as extra geo_data <- read_excel( “Geodatabase of Shipwrecks OXRep-DARMC-MAPS 07-2020-for-release (1).xlsx”, sheet = “GeoDatabase”, skip = 2 # Adjust this based on where the actual data starts )

Clean column names for better usability

library(janitor) geo_data <- geo_data %>% clean_names()

Preview the cleaned dataset

head(geo_data) # Remove empty or irrelevant columns geo_data <- geo_data %>% select(where(~ !all(is.na(.)))) # Keeps only columns with data

Preview the updated dataset

head(geo_data) # Check the structure of the dataset str(geo_data) # Remove columns with all NA values or empty names geo_data <- geo_data %>% select(where(~ !all(is.na(.)))) # Keeps only columns with at least one non-NA value

Preview the dataset after cleaning

head(geo_data) # Check for missing values in key columns colSums(is.na(geo_data)) # Remove all rows with missing values geo_data_clean <- geo_data %>% drop_na()

Preview the cleaned dataset

head(geo_data_clean)

Check the number of rows after cleaning

nrow(geo_data_clean) # Remove all rows with missing values geo_data_clean <- geo_data %>% drop_na() # Preview the cleaned dataset head(geo_data_clean)

Check the number of rows after cleaning

nrow(geo_data_clean)

Remove all rows with missing values

geo_data_clean <- geo_data %>% drop_na() # Preview the cleaned dataset head(geo_data_clean)

Check the number of rows after cleaning

nrow(geo_data_clean)

Remove rows with missing values in key columns

geo_data_clean <- geo_data %>% filter(!is.na(...7) & !is.na(...8) & !is.na(...13) & !is.na(...14))

Preview the cleaned dataset

head(geo_data_clean)

Check the number of rows after cleaning

nrow(geo_data_clean) # Preview the cleaned dataset head(geo_data_clean)

Check the number of rows after cleaning

nrow(geo_data_clean) # View column names to decide which ones are essential colnames(geo_data) # Filter rows where key columns are not missing geo_data_clean <- geo_data %>% filter(!is.na(…7) & !is.na(…8)) # Replace …7 and …8 with actual column names for latitude and longitude

Preview the cleaned dataset

head(geo_data_clean)

Check the number of rows

nrow(geo_data_clean)

Save the cleaned dataset

write_csv(geo_data_clean, “cleaned_geo_data.csv”) # Plot a histogram of latitude values ggplot(data = geo_data_clean, aes(x = …7)) + # Replace ...7 with the actual column name for latitude geom_histogram(binwidth = 1, fill = “blue”, color = “black”) + labs(title = “Histogram of Latitude Values”, x = “Latitude”, y = “Count”) + theme_minimal() geo_data_clean <- geo_data_clean %>% mutate(latitude = as.numeric(...7)) # Replace ...7 with the correct column name for latitude geo_data_clean %>% filter(!is.na(...7) & is.na(as.numeric(...7))) %>% select(...7) # Replace ...7 with your actual column name for latitude

unique(geo_data_clean$...7)

Replace `...7` with your actual column name

unique(geo_data_clean$...7) geo_data_clean <- geo_data_clean %>% filter(!is.na(as.numeric(...7)))

geo_data_clean %>% filter(is.na(as.numeric(...7))) %>% select(...7) %>% unique() geo_data_clean <- geo_data_clean %>% filter(!is.na(as.numeric(...7))) geo_data_clean <- geo_data_clean %>% mutate(...7 = ifelse(is.na(as.numeric(...7)), NA, as.numeric(...7))) geo_data_clean <- geo_data_clean %>% filter(!is.na(...7)) ggplot(data = geo_data_clean, aes(x = as.numeric(...7))) + geom_histogram(binwidth = 1, fill = “blue”, color = “black”) + labs(title = “Histogram of Latitude Values”, x = “Latitude”, y = “Count”) + theme_minimal() # Ensure the columns are numeric geo_data_clean <- geo_data_clean %>% mutate( Depth = as.numeric(...17), # Replace ...17 with actual column name for depth Start_Date = as.numeric(...13) # Replace ...13 with actual column name for start date )

Remove rows with missing values in key columns

geo_data_clean <- geo_data_clean %>% filter(!is.na(Depth) & !is.na(Start_Date))

Plot scatter plot

colnames(geo_data_clean) # Inspect column names to identify the correct ones colnames(geo_data_clean)

Replace with correct column names for depth and start date

geo_data_clean <- geo_data_clean %>% mutate( Depth = as.numeric(...17), # Replace ...17 with the actual column name for depth Start_Date = as.numeric(...13) # Replace ...13 with the actual column name for start date )

Remove rows with missing values in key columns

geo_data_clean <- geo_data_clean %>% filter(!is.na(Depth) & !is.na(Start_Date))

Plot scatter plot

ggplot(data = geo_data_clean, aes(x = Start_Date, y = Depth)) + geom_point(color = “blue”, alpha = 0.6) + labs(title = “Depth vs. Start Date”, x = “Start Date”, y = “Depth”) + theme_minimal() unique(geo_data_clean$`...13`) # Check unique values for column ...13 unique(geo_data_clean$...17) # Check unique values for column …17 colnames(geo_data_clean) geo_data_clean <- geo_data_clean %>% rename( Start_Date = ...13, # Replace with the correct column index for Start Date Depth = ...17 # Replace with the correct column index for Depth ) colnames(geo_data_clean) geo_data_clean <- geo_data_clean %>% filter(!is.na(Start_Date) & !is.na(Depth)) ggplot(data = geo_data_clean, aes(x = Start_Date, y = Depth)) + geom_point(color = “blue”, alpha = 0.6) + labs(title = “Depth vs. Start Date”, x = “Start Date”, y = “Depth”) + theme_minimal() ggplot(data = geo_data_clean, aes(x = Start_Date, y = Depth)) + geom_point(color = “blue”, alpha = 0.6) + labs(title = “Depth vs. Start Date by Region”, x = “Start Date (Years)”, y = “Depth (Meters)”) + theme_minimal() + facet_wrap(~ Region)

colnames(geo_data_clean) ggplot(data = geo_data_clean, aes(x = Start_Date, y = Depth)) + geom_point(color = “blue”, alpha = 0.6) + labs(title = “Depth vs. Start Date by Region”, x = “Start Date (Years)”, y = “Depth (Meters)”) + theme_minimal() + facet_wrap(~ Region) # Replace Region with the correct column name ggplot(data = geo_data_clean, aes(x = Start_Date, y = Depth)) + geom_point(color = “blue”, alpha = 0.6) + labs(title = “Depth vs. Start Date by Region”, x = “Start Date (Years)”, y = “Depth (Meters)”) + theme_minimal() + facet_wrap(~ ...48) # Replace ...48 with the actual column name colnames(geo_data_clean) geo_data_clean <- geo_data_clean %>% rename(Region = ...48) # Replace ...48 with the correct column name unique(geo_data_clean$Region) ggplot(data = geo_data_clean, aes(x = Start_Date, y = Depth)) + geom_point(color = “blue”, alpha = 0.6) + labs(title = “Depth vs. Start Date by Region”, x = “Start Date (Years)”, y = “Depth (Meters)”) + theme_minimal() + facet_wrap(~ Region) geo_data_clean <- geo_data_clean %>% filter(!is.na(Region)) facet_wrap(~ Region, nrow = 2) # Example for 2 rows theme(axis.text.x = element_text(angle = 45, hjust = 1)) geom_point(aes(color = Region), alpha = 0.6) # Save the cleaned data to a CSV file in the working directory write.csv(geo_data_clean, “cleaned_geo_data.csv”, row.names = FALSE)

List all sheets in the Excel file

Preview the first few rows of the data

List all files in the current working directory

Preview the first few rows of the dataset

List all sheet names in the Excel file

Preview the first few rows of the dataset

Preview the first few rows of the data

Preview cleaned data

Clean and standardize column names

View the cleaned column names

Preview the cleaned dataset

Clean column names

Preview the cleaned column names and data

Preview cleaned data

Clean column names

Preview the cleaned column names

Preview the dataset again

Clean column names

Preview the cleaned data

Preview the dataset again

Reload the dataset, skipping more rows to reach actual headers

Preview the first few rows

Preview the first few rows

Clean column names for better usability

Preview the cleaned dataset

Preview the updated dataset

Preview the dataset after cleaning

Preview the cleaned dataset

Check the number of rows after cleaning

Check the number of rows after cleaning

Remove all rows with missing values

Check the number of rows after cleaning

Remove rows with missing values in key columns

Preview the cleaned dataset

Check the number of rows after cleaning

Check the number of rows after cleaning

Preview the cleaned dataset

Check the number of rows

Save the cleaned dataset

Replace ...7 with your actual column name

Remove rows with missing values in key columns

Plot scatter plot

Replace with correct column names for depth and start date

Remove rows with missing values in key columns

Plot scatter plot

Replace `...7` with your actual column name