Author

Christina Ebrahim

Published

May 15, 2025

Setting up the packages

Reading in the data

Show the code
# Load in CSV  
ocean_acd <- read.csv("C:\\Users\\Admin\\OneDrive\\Documents\\Data & Vis in R\\Final_R_proj\\ocean-acidity_cln.csv")

Plots for pCO2

Show the code
## pCO2 
ho2plot<-
  ggplot(ocean_acd,aes(Hawaii.Year, Hawaii.pCO2))+
  
  geom_point(shape = 21, color = 'darkgray', fill = 'darkblue',
             size= 3)  +
  
  # Labels
  labs(title = "Hawaii pCO2 from 1989-2022",
       x = "Years",
       y = "pC02")+
  geom_smooth(method = "lm", color="red")



cIo2plot <-
  ggplot(ocean_acd,aes(Canary.Islands.Year, Canary.Islands.pCO2))+
  
  geom_point(shape = 21, color = 'darkgray', fill = 'darkblue', size = 3)  +
  
  # Labels
  labs(title = "Canary Islands pCO2 from 1955-2009",
       x = "Years",
       y = "pC02")+
  geom_smooth(method = "lm", color="red")





b02plot <-
  ggplot(ocean_acd,aes(Bermuda.Year, Bermuda.pCO2))+ 
  
  geom_point(shape = 21, color = 'darkgray', fill = 'darkblue', size = 3) +
  
  #Labels
  labs(title = "Bermuda pCO2 from 1983-2015",
       x = "Years",
       y = "pC02")+
  geom_smooth(method = "lm", color="red")


co2plot<- 
  ggplot(ocean_acd,aes(Cariaco.Year..pCO2.,Cariaco.pCO2))+
  
  geom_point(shape = 21, color = 'darkgray', fill = 'darkblue', size = 3)  +
  
  # Labels
  labs(title = "Bermuda pCO2 from 1955 - 2017",
       x = "Years",
       y = "pC02")+
  geom_smooth(method = "lm", color="red")


plot_grid(ho2plot,cIo2plot,b02plot,co2plot)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 85 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 85 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 272 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 272 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 220 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 220 rows containing missing values or values outside the scale range
(`geom_point()`).

Plots for pH

Show the code
#pH plots 
h_plot <-
ggplot(ocean_acd,aes(Hawaii.Year, Hawaii.pH))+ 
  
geom_point(shape = 21, color = 'darkorchid4', fill = 'lightpink2',
           size= 3)  +
  
# Labels
labs(title = "Hawaii pH from 1989 - 2022",
       x = "Years",
       y = "pH")+
  geom_smooth(method = "lm") 



cI_plot <-
ggplot(ocean_acd,aes(Canary.Islands.Year, Canary.Islands.pH))+ 
  
  geom_point(shape = 21, color = 'darkorchid4', fill = 'lightpink2', size = 3)  +
  
  # Labels
  labs(title = "Canary Islands pH from 1995 - 2009",
       x = "Years",
       y = "pH")+
  geom_smooth(method = "lm") 





b_plot<-
ggplot(ocean_acd,aes(Bermuda.Year, Bermuda.pH))+ 
  
  geom_point(shape = 21, color = 'darkorchid4', fill = 'lightpink2', size = 3)  +
  
  # Labels
  labs(title = "Bermuda pH from 1983 - 2015",
       x = "Years",
       y = "pH")+
  geom_smooth(method = "lm") 


c_plot <- 
ggplot(ocean_acd,aes(Cariaco.Year..pH.,Cariaco.pH))+
  
  geom_point(shape = 21, color = 'darkorchid4', fill = 'lightpink2', size = 3)  +
  
  # Labels
  labs(title = "Cariaco pH from 1995 - 2017",
       x = "Years",
       y = "pH")+
  geom_smooth(method = "lm")


plot_grid(h_plot,cI_plot,b_plot,c_plot)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 85 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 85 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 272 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 272 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 205 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 205 rows containing missing values or values outside the scale range
(`geom_point()`).

Plots for pH Vs pCO2

Show the code
#pH vs pCO plots 
h_vs_plot <-
  ggplot(ocean_acd,aes(Hawaii.pCO2, Hawaii.pH))+ 
  
  geom_point(shape = 21, color = 'darkgreen', fill = 'darkseagreen',
             size= 3)  +
  
  # Labels
  labs(title = "Hawaii pCO2 compared to pH from 1989 - 2022",
       x = "pCO2",
       y = "pH")+
  geom_smooth(method = "lm") 



cI_vs_plot <-
  ggplot(ocean_acd,aes(Canary.Islands.pCO2, Canary.Islands.pH))+ 
  
  geom_point(shape = 21, color = 'darkgreen', fill = 'darkseagreen', size = 3)  +
  
  # Labels
  labs(title = "Canary Islands pCO2 compared to pH from 1995 - 2009",
       x = "pCO2",
       y = "pH")+
  geom_smooth(method = "lm") 





b_vs_plot<-
  ggplot(ocean_acd,aes(Bermuda.pCO2, Bermuda.pH))+ 
  
  geom_point(shape = 21, color = 'darkgreen', fill = 'darkseagreen', size = 3)  +
  
  # Labels
  labs(title = "Bermuda pCO2 compared to pH from 1983 - 2015",
       x = "pCO2",
       y = "pH")+
  geom_smooth(method = "lm") 




## Need to compare the same years for Cariaco, requires some calculations 
#because the years were not the same for pH and pCO2.

# First split them into separate data frames for easy of calculations
pH_data <- ocean_acd %>%
  select(year = `Cariaco.Year..pH.`, pH = `Cariaco.pH`) %>%
  mutate(year = floor(year))  # use floor to convert decimal years to integer years

pCO2_data <- ocean_acd %>%
  select(year = `Cariaco.Year..pCO2.`, pCO2 = `Cariaco.pCO2`) %>%
  mutate(year = floor(year)) #using floor again for the other data frame  

# Calculate average by year
avg_pH <- pH_data %>%
  group_by(year) %>%
  summarise(avg_pH = mean(pH, na.rm = TRUE))

avg_pCO2 <- pCO2_data %>%
  group_by(year) %>%
  summarise(avg_pCO2 = mean(pCO2, na.rm = TRUE))

# Merge the data frames
Cariaco_yearly <- full_join(avg_pH, avg_pCO2, by = "year") %>%
  arrange(year)

# Double check it worked
print(Cariaco_yearly)
# A tibble: 24 × 3
    year avg_pH avg_pCO2
   <dbl>  <dbl>    <dbl>
 1  1995   8.06     401.
 2  1996   8.07     398.
 3  1997   8.07     383.
 4  1998   8.08     396.
 5  1999   8.08     398.
 6  2000   8.07     399.
 7  2001   8.06     377.
 8  2002   8.08     389.
 9  2003   8.08     382.
10  2004   8.09     377.
# ℹ 14 more rows
Show the code
c_vs_plot <- 
  ggplot(Cariaco_yearly,aes(avg_pCO2,avg_pH))+
  
  geom_point(shape = 21, color = 'darkgreen', fill = 'darkseagreen', size = 3)  +
  
  # Labels
  labs(title = "Cariaco pCO2 compared to pH from 1995 - 2017",
       x = "pCO2",
       y = "pH")+
  geom_smooth(method = "lm")


plot_grid(h_vs_plot,cI_vs_plot,b_vs_plot,c_vs_plot)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 85 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 85 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 272 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 272 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

Show the code
##slightly less a correlation in the Cariaco basin because pH and pCO2 
#were not from the exact years, used averages

Clean for pH Vs Time

Show the code
#hawaii 
hawaii_year <- ocean_acd %>%
  select(Hawaii.Year, Hawaii.pH) %>%  
  mutate(Hawaii.Year = floor(Hawaii.Year)) %>%  # Convert to whole numbers
  group_by(Hawaii.Year) %>%
  summarise(avg_pH = mean(Hawaii.pH, na.rm = TRUE)) %>%
  arrange(Hawaii.Year)%>% 
  rename(Year= Hawaii.Year)%>%
  filter(Year %in% c(1995, 2000, 2005, 2009))

canary_year <- ocean_acd %>%
  select(Canary.Islands.Year, Canary.Islands.pH) %>%  
  mutate(Canary.Islands.Year = floor(Canary.Islands.Year)) %>%  
  group_by(Canary.Islands.Year) %>%
  summarise(avg_pH = mean(Canary.Islands.pH, na.rm = TRUE)) %>%
  arrange(Canary.Islands.Year)%>%
  rename(Year= Canary.Islands.Year)%>%
  filter(Year %in% c(1995, 2000, 2005, 2009))

bermuda_year <- ocean_acd %>%
  select(Bermuda.Year, Bermuda.pH) %>%  
  mutate(Bermuda.Year = floor(Bermuda.Year)) %>%  
  group_by(Bermuda.Year) %>%
  summarise(avg_pH = mean(Bermuda.pH, na.rm = TRUE)) %>%
  arrange(Bermuda.Year)%>% 
  rename(Year= Bermuda.Year)%>%
  filter(Year %in% c(1995, 2000, 2005, 2009))

cariaco_year <- ocean_acd %>%
  select(Cariaco.Year..pH., Cariaco.pH) %>%  
  mutate(Cariaco.Year..pH. = floor(Cariaco.Year..pH.)) %>%  
  group_by(Cariaco.Year..pH.) %>%
  summarise(avg_pH = mean(Cariaco.pH, na.rm = TRUE)) %>%
  arrange(Cariaco.Year..pH.)%>% 
  rename(Year= Cariaco.Year..pH.)%>%
  filter(Year %in% c(1995, 2000, 2005, 2009))


# First join Hawaii and Canary Islands
step1 <- full_join(
  hawaii_year,
  canary_year,
  by = "Year",
  suffix = c("_Hawaii", "_Canary")  # add to make it clear which location 
)

# Then join with Bermuda
step2 <- full_join(
  step1,
  bermuda_year,
  by = "Year"
) %>% rename(avg_pH_Bermuda = avg_pH)  # Explicit rename for Bermuda

# Finally join with Cariaco
all_years <- full_join(
  step2,
  cariaco_year,
  by = "Year"
) %>% rename(avg_pH_Cariaco = avg_pH)  # Explicit rename for Cariaco



#Then flip so the years are the column names 

years_flipped <- all_years %>%
  pivot_longer(
    cols = -Year,                     # Keep 'Year' as identifier
    names_to = "Location",            # Current column names become values
    values_to = "avg_pH"              # pH values stay as values
  ) %>%
  pivot_wider(
    names_from = Year,                # Years become new column names
    values_from = avg_pH              # pH values fill the cells
  ) %>%
  mutate(Location = gsub("_pH", "", Location))  # Clean location names

Mapping Locations

Show the code
#Create a data frame with the 4 locations to map spatial relationship 

pH_location <- data.frame(
  long= c(-156.938810, -16.865106, -64.760224, 10.5),
  lat= c(21.107735,28.341451,32.305925,-64.667),
  name= c("Hawaii","Canary Islands","Bermuda","Cariaco Basin"),
  Location= c("avg_Hawaii","avg_Canary","avg_Bermuda","avg_Cariaco")
)


# Join to the Averged data  
combined_data <- pH_location %>%
  full_join(years_flipped, by = "Location")


###Mapping 

#Make another column with the popup content using HTML

combined_data <- combined_data %>%
  mutate(
    popup_content = paste(
      "<strong>Location: </strong>", name, "<br>",
      "<strong>Coordinates: </strong>", 
      round(lat, 4), ", ", round(long, 4), "<br>",
      "<strong>pH (1995): </strong>", `1995`, "<br>",
      "<strong>pH (2000): </strong>", `2000`, "<br>",
      "<strong>pH (2005): </strong>", `2005`, "<br>",
      "<strong>pH (2009): </strong>", `2009`, "<br>",
      sep = ""
    )
  )

##Map with the PopUps 
leaflet(combined_data)%>%
  addProviderTiles("CartoDB.Positron", group= "CartoDB.Positron")%>%
  addProviderTiles("Esri.WorldImagery", group= "Esri.WorldImagery")%>%
  addCircleMarkers(
      lng = ~long, lat = ~lat, 
      popup = ~popup_content, 
      color = "blue",
      )%>%
  addLayersControl(baseGroups = c("CartoDB.Positron","Esri.WorldImagery"), position = "topright")%>%
  setView(lng = -77.484421,lat= -11.844999, zoom = 2)%>%
  addLegend(colors= "blue",
            labels= "Locations Of Study",
            position= "bottomleft")

Mapping Locations Change Overtime

Show the code
#Mapping Change in pH over time using leaflet

# Define color palette (blue = high pH, red = low pH)
pal <- colorNumeric(
  palette = "RdYlBu",  # Red-Yellow-Blue (reversed for pH)
  domain = c(8.0, 8.2),  # Adjust based on your pH range
  reverse = TRUE  # So blue = high, red = low
)

# Create a map with separate layers for each year
leaflet(combined_data) %>%
  addProviderTiles("CartoDB.Positron") %>%
  
# Layer for 1995
  addCircleMarkers(
    data = combined_data,
    lng = ~long,
    lat = ~lat,
    radius = 8,
    color = ~pal(`1995`),  # Color by 1995 pH
    popup = paste(
      "<strong>1995 pH:</strong>", combined_data$`1995`, "<br>",
      "<strong>Location:</strong>", combined_data$name
    ),
    group = "1995"
  ) %>%
  
  # Layer for 2000
  addCircleMarkers(
    data = combined_data,
    lng = ~long,
    lat = ~lat,
    radius = 8,
    color = ~pal(`2000`),  # Color by 2000 pH
    popup = paste(
      "<strong>2000 pH:</strong>", combined_data$`2000`, "<br>",
      "<strong>Location:</strong>", combined_data$name
    ),
    group = "2000"
  ) %>%
  
  # Layer for 2005
  addCircleMarkers(
    data = combined_data,
    lng = ~long,
    lat = ~lat,
    radius = 8,
    color = ~pal(`2005`),  # Color by 2005 pH
    popup = paste(
      "<strong>2005 pH:</strong>", combined_data$`2005`, "<br>",
      "<strong>Location:</strong>", combined_data$name
    ),
    group = "2005"
  ) %>%
  
  # Layer for 2009
  addCircleMarkers(
    data = combined_data,
    lng = ~long,
    lat = ~lat,
    radius = 8,
    color = ~pal(`2009`),  # Color by 2009 pH
    popup = paste(
      "<strong>2009 pH:</strong>", combined_data$`2009`, "<br>",
      "<strong>Location:</strong>", combined_data$name
    ),
    group = "2009"
  ) %>%
  
  # Add layer control
  addLayersControl(
    overlayGroups = c("1995", "2000", "2005", "2009"),
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  
  # Add legend
  addLegend(
    pal = pal,
    values = c(8.0, 8.2),  # Adjust to your pH range
    title = "pH (More Acidic = More Blue)",
    position = "bottomright"
  ) %>%
  
  # Set view
  setView(lng = -77.484421, lat = -11.844999, zoom = 2)