#update

task 1:

 # Read data files
Coastal_cities_capstone <- read.csv("coast_100000.csv")
microplastic_data_capstone <- read.csv("Marine Microplastic Concentrations.csv")
data <- read.csv("C:/Users/dwvil/Documents/SPRING 2025/CAPSTONE/Microplastic maps/Capstone Edit_AOMI_grid_data.csv")  
coastal_megacities <- Coastal_cities_capstone %>%
  filter(pop_max > 10000000) %>%
  mutate(
    lon = longitude,
    lat = latitude
  )

task 2:

#NOAA BASED DENSITY MAP


# Clean and prepare city data (>10M population only)
cities_clean <- Coastal_cities_capstone %>%
  filter(!is.na(longitude), 
         !is.na(latitude), 
         !is.na(pop_max),
         as.numeric(pop_max) > 10000000) %>%
  mutate(population = as.numeric(pop_max))

# Clean microplastic data with MEASUREMEN focus
microplastic_clean <- microplastic_data_capstone %>%
  filter(!is.na(Latitude), 
         !is.na(Longitude), 
         !is.na(MEASUREMEN),
         MEASUREMEN > 0) %>%
  mutate(
    MEASUREMEN = as.numeric(MEASUREMEN),
    #  categories
    concentration_cat = cut(MEASUREMEN,
                            breaks = c(0, 0.01, 0.1, 1, 10, 100, 1000, 10000, Inf),
                            labels = c("0-0.01", "0.01-0.1", "0.1-1", "1-10", 
                                       "10-100", "100-1k", "1k-10k", "10k+")),
    year = as.numeric(format(as.Date(Date), "%Y"))
  )


















world_map <- map_data("world")

# NOAA Map with fixes
ggplot() +
  # Base world map
  geom_polygon(data = world_map, 
               aes(x = long, y = lat, group = group),
               fill = "lightgray", color = "white", linewidth = 0.1) +
  
  # Microplastic measurements
  geom_point(data = microplastic_clean,
             aes(x = Longitude, y = Latitude, 
                 color = MEASUREMEN, size = MEASUREMEN),
             alpha = 0.7, shape = 16) +
  
  # Coastal megacities
  geom_point(data = coastal_megacities,
             aes(x = lon, y = lat),
             shape = 21, color = "black", fill = NA, size = 3, stroke = 0.8) +
  
  # City labels
  geom_text_repel(
    data = coastal_megacities %>% arrange(-pop_max) %>% head(15),
    aes(x = lon, y = lat, label = nameascii),
    size = 3, force = 0.1, min.segment.length = 0.1, box.padding = 0.3,
    segment.color = "grey40"
  ) +
  
  # Fixed color scale
  scale_color_gradientn(
    name = expression(bold("Microplastics (Particles/m³)")),
    colors = c("#4575b4", "#74add1", "#e0f3f8", "#fee090", "#f46d43", "#d73027"),
    trans = "log10",
    breaks = c(0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 1e5),
    labels = c("0.001", "0.01", "0.1", "1", "10", "100", "1k", "10k", "100k"),
    limits = c(0.000676, 8e5)
  ) +
  
  # Size scale
  scale_size_continuous(range = c(1, 12), guide = "none") +
  
  # Coordinates
  coord_fixed(ratio = 1.3, xlim = c(-180, 180), ylim = c(-90, 90)) +
  
  # Improved labels
  labs(
    title = "Global Microplastic Density (NOAA)",
    subtitle = "Coastal megacities (>10M population) marked",
    caption = paste(
      "Data range:", 
      format(min(microplastic_clean$MEASUREMEN, na.rm = TRUE), scientific = FALSE, digits = 3), "-",
      format(max(microplastic_clean$MEASUREMEN, na.rm = TRUE), scientific = FALSE, big.mark = ","), "particles/m³\n",
      "Log10 scale shows exponential concentration differences",
      "\nCities: Top 15 by population"
    ),
    x = NULL, y = NULL
  ) +
  
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "aliceblue"),
    legend.position = "right",
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5, margin = margin(b = 10)),
    plot.caption = element_text(face = "italic", hjust = 0.5),
    legend.key.height = unit(1.5, "cm"),
    legend.title = element_text(face = "bold")
  )

task 3:

#NOAA
library(sf)
library(spdep)
## Warning: package 'spdep' was built under R version 4.4.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.4.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(dplyr)
library(ggrepel)
library(rnaturalearth)
## Warning: package 'rnaturalearth' was built under R version 4.4.3
# 1. Prepare microplastic data
microplastic_sf <- st_as_sf(
  microplastic_data_capstone,
  coords = c("Longitude", "Latitude"),
  crs = 4326
)

# Remove NA values and filter positive measurements
microplastic_clean <- microplastic_sf %>%
  filter(!is.na(MEASUREMEN), MEASUREMEN > 0)

# 2. Perform hotspot analysis
coords <- st_coordinates(microplastic_clean)
knn_neighbors <- knn2nb(knearneigh(coords, k = 8))
## Warning in knearneigh(coords, k = 8): knearneigh: identical points found
## Warning in knearneigh(coords, k = 8): knearneigh: kd_tree not available for
## identical points
weights <- nb2listw(knn_neighbors, style = "B")
gi_star <- localG(microplastic_clean$MEASUREMEN, weights)

# 3. Classify hotspots with confidence levels
significant_hotspots <- microplastic_clean %>%
  mutate(
    gi_score = as.numeric(gi_star),
    p_value = 2 * pnorm(abs(gi_score), lower.tail = FALSE),
    hotspot = case_when(
      gi_score > 1.96 & p_value < 0.05 ~ "Hotspot (High)",
      gi_score < -1.96 & p_value < 0.05 ~ "Coldspot (Low)",
      TRUE ~ "Not Significant"
    ),
    confidence = case_when(
      abs(gi_score) > 2.58 ~ "99% Confidence",
      TRUE ~ "95% Confidence"
    ),
    hotspot_conf = paste(hotspot, confidence)
  ) %>%
  filter(hotspot != "Not Significant")

# 4. Prepare coastal cities data
coastal_megacities <- Coastal_cities_capstone %>%
  filter(pop_max > 10000000) %>%
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) %>%
  mutate(
    lon = st_coordinates(.)[,1],
    lat = st_coordinates(.)[,2]
  )

# 5. Define color scheme
hotspot_colors <- c(
  "Hotspot (High) 99% Confidence" = "#8B0000",  # Dark red
  "Hotspot (High) 95% Confidence" = "#FF0000",  # Bright red
  "Coldspot (Low) 99% Confidence" = "#00008B",  # Dark blue
  "Coldspot (Low) 95% Confidence" = "#0000FF"   # Bright blue
)

# 6. Create the plot
ggplot() +
  # Base world map with brown continents
  geom_sf(data = ne_countries(scale = "medium", returnclass = "sf"), 
          fill = "#d2b48c", color = "white", linewidth = 0.2) +
  
  # Significant hotspots
  geom_sf(
    data = significant_hotspots,
    aes(color = hotspot_conf),
    size = 3,
    shape = 16
  ) +
  
  # Coastal megacities (hollow circles)
  geom_sf(
    data = coastal_megacities,
    color = "black",
    fill = NA,
    shape = 21,
    size = 3,
    stroke = 0.8,
    show.legend = FALSE
  ) +
  
  # City labels (top 15 by population)
  geom_text_repel(
    data = coastal_megacities %>% arrange(-pop_max) %>% head(15),
    aes(x = lon, y = lat, label = nameascii),
    size = 3.2,
    force = 0.15,
    min.segment.length = 0.15,
    segment.color = "grey40"
  ) +
  
  # Color scale
  scale_color_manual(
    values = hotspot_colors,
    name = "Hotspot Significance",
    labels = c(
      "High Density (99% Conf)",
      "High Density (95% Conf)", 
      "Low Density (99% Conf)",
      "Low Density (95% Conf)"
    )
  ) +
  
  # Map labels
  labs(
    title = "Statistically Significant Microplastic Hotspots near Coastal Megacities (NOAA)",
    subtitle = "Showing statistically significant clusters (95% and 99% confidence)",
    x = "Longitude",
    y = "Latitude",
    caption = paste("NOAA Marine Microplastics Data |", 
                    nrow(significant_hotspots), "significant locations shown")
  ) +
  
  theme_minimal() +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.title = element_text(face = "bold")
  )

task 4:

library(leaflet)
## Warning: package 'leaflet' was built under R version 4.4.3
library(sf)

# Prepare the data ---------------------------------------------------------

# Extract coordinates and convert to regular data frames
microplastic_df <- cbind(
  as.data.frame(microplastic_clean),
  as.data.frame(st_coordinates(microplastic_clean))
)

significant_hotspots_df <- cbind(
  as.data.frame(significant_hotspots),
  as.data.frame(st_coordinates(significant_hotspots))
)

coastal_megacities_df <- cbind(
  as.data.frame(coastal_megacities),
  as.data.frame(st_coordinates(coastal_megacities))
)

# Create color palettes ---------------------------------------------------

# For microplastic measurements (log10 scale)
mp_pal <- colorNumeric(
  palette = c("#4575b4", "#74add1", "#e0f3f8", "#fee090", "#f46d43", "#d73027"),
  domain = log10(microplastic_df$MEASUREMEN),
  na.color = "transparent"
)

# For hotspots (categorical)
hotspot_pal <- colorFactor(
  palette = c("#8B0000", "#FF0000", "#00008B", "#0000FF"),
  domain = significant_hotspots_df$hotspot_conf
)

# Create the map ----------------------------------------------------------

combined_map <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  
  # Microplastic measurements (all points)
  addCircleMarkers(
    data = microplastic_df,
    lng = ~X, lat = ~Y,
    radius = 4,
    color = ~mp_pal(log10(MEASUREMEN)),
    fillOpacity = 0.6,
    stroke = FALSE,
    popup = ~paste0(
      "<b>Microplastics:</b> ", round(MEASUREMEN, 2), " particles/m³<br>",
      "<b>Location:</b> ", round(X, 4), ", ", round(Y, 4)
    ),
    group = "Microplastic Measurements"
  ) %>%
  
  # Significant hotspots
  addCircleMarkers(
    data = significant_hotspots_df,
    lng = ~X, lat = ~Y,
    radius = 8,
    color = ~hotspot_pal(hotspot_conf),
    fillOpacity = 0.8,
    stroke = TRUE,
    weight = 1,
    popup = ~paste0(
      "<b>Hotspot Concentration:</b> ", round(MEASUREMEN, 2), "<br>",
      "<b>Significance:</b> ", hotspot_conf, "<br>",
      "<b>Coordinates:</b> ", round(X, 4), ", ", round(Y, 4)
    ),
    group = "Significant Hotspots"
  ) %>%
  
  # Coastal megacities
  addCircleMarkers(
    data = coastal_megacities_df,
    lng = ~X, lat = ~Y,
    radius = 5,
    color = "black",
    fillColor = "white",
    fillOpacity = 0.8,
    weight = 1.5,
    popup = ~paste0(
      "<b>City:</b> ", nameascii, "<br>",
      "<b>Population:</b> ", format(pop_max, big.mark = ",")
    ),
    group = "Coastal Megacities"
  ) %>%
  
  # Legends
  addLegend(
    position = "bottomright",
    pal = mp_pal,
    values = log10(microplastic_df$MEASUREMEN),
    title = "Log10 Microplastics (particles/m³)",
    labFormat = labelFormat(transform = function(x) round(10^x, 2)),
    opacity = 1,
    group = "Microplastic Measurements"
  ) %>%
  
  addLegend(
    position = "bottomleft",
    pal = hotspot_pal,
    values = significant_hotspots_df$hotspot_conf,
    title = "Hotspot Significance",
    opacity = 1,
    group = "Significant Hotspots"
  ) %>%
  
  # Layer control
  addLayersControl(
    overlayGroups = c("Microplastic Measurements", "Significant Hotspots", "Coastal Megacities"),
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  
  # Additional map settings
  setView(lng = 0, lat = 30, zoom = 2) %>%
  addScaleBar(position = "bottomleft")

# Display the map
combined_map