library(groundhog)
set.groundhog.folder("C:\\Groundhog_R\\R4.4.1_2024-05-14")
GroundhogDay <- '2024-05-14'

groundhog.library("MASS",GroundhogDay)
groundhog.library("tidyverse",GroundhogDay)
groundhog.library("vegan",GroundhogDay)
groundhog.library("ade4",GroundhogDay)
groundhog.library("wesanderson",GroundhogDay)
groundhog.library("ggiraph",GroundhogDay)
groundhog.library("sf",GroundhogDay)
groundhog.library("ggspatial",GroundhogDay)
groundhog.library("scales",GroundhogDay)
# Get the data
setwd("D:\\Dropbox\\100_PROJECTS\\2025_AlphaEarth_FenceLineContrasts")
read_csv("Embeddings_2017_2024_FLC_paired (1).csv")->dat

dat%>%
  mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)->
  dat

dat %>%
  mutate(.before=A00,
    tooltip = sprintf("%s\n(%.5f, %.5f)", PointName, latitude, longitude),
    # Open Google Maps centered on the point at zoom level 16
    onclick = sprintf(
  'window.open("https://www.google.com/maps?q=%s@%f,%f")',
  utils::URLencode(PointName, reserved = TRUE), latitude, longitude))%>%
  mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)->
  dat

Mapping Thicket Degradation

The aim of this analysis is to map the degree of thicket degradation across the landscape. To do this, we plotted over 200 fenceline contrasts, where one side retains relatively intact thicket and the other side has experienced clear degradation. Importantly, we never treat the “intact” side as truly pristine—rather, it represents a working baseline. This is advantageous: if these “intact” sites fall somewhere between degraded thicket and pristine reference thicket, it indicates that our approach is capturing a realistic continuum of degradation.

Use of AlphaEarth Channels

For environmental predictors, we use the AlphaEarth channels. These are derived from Google’s AlphaEarth embeddings, which are produced by a deep learning model trained on multi-spectral satellite imagery. The channels have been pre-processed to remove noise and encode broad environmental patterns such as land cover, vegetation structure, soil characteristics, and seasonal dynamics. In effect, they act as a compressed but information-rich representation of the raw satellite data, enabling us to work with major ecological gradients without the need for heavy pre-processing.

Ordination Approach

We began with a Principal Components Analysis (PCA), chosen for its simplicity and interpretability. The results were striking:

Degraded thicket and relatively intact thicket separate strongly along the PCA axis.

Semi-degraded or marginal sites fall in intermediate positions, exactly as expected for vegetation in transition.

This clear separation demonstrates that the AlphaEarth channels are sensitive to degradation state, and that PCA captures the continuum from intact → semi-degraded → degraded thicket.

Interactive PCA

The PCA plot is shown below. Each point represents a fenceline site, and points are clickable: selecting a point opens the corresponding location in Google Maps, allowing direct inspection of the site in its spatial context.

# Generate the PCA
dat%>%
  select(-Type,-PointName,-year,-latitude,-longitude,-tooltip,-onclick)%>%
  #select(1:10)%>%
  dudi.pca(nf=3, scannf = FALSE)->
  pca

var_explained <- pca$eig / sum(pca$eig) * 100

# labels for the first two axes
xlab <- sprintf("Axis 1 (%.1f%%)", var_explained[1])
ylab <- sprintf("Axis 2 (%.1f%%)", var_explained[2])  
# dat%>%
#   select(-PointName,-year,-latitude,-longitude)%>%
#   select(1:10)%>%
#   metaMDS(try = 999)->
#   nmds

# pca$li%>%
#   as_tibble()%>%
#   bind_cols(dat%>%select(1:6))%>%
#   ggplot(aes(Axis1,Axis2,colour=Type))+
#   geom_point()+
#   facet_wrap(~year)

# pca$li%>%
#   as_tibble()%>%
#   bind_cols(dat%>%select(1:6))%>%
#   ggplot(aes(Axis1,Axis2,group=paste0(PointName),colour=Type))+
#   geom_line()+
#   theme(legend.position="none")
# 
# 
# pca$li%>%
#   as_tibble()%>%
#   bind_cols(dat%>%select(1:4))%>%
#   filter(year%in%c(2020,2024))%>%
#   ggplot(aes(Axis1,Axis2,group=paste0(PointName),colour=Type))+
#   geom_line()+
#   theme(legend.position="none")
#   facet_wrap(~year)
# 
# 
# pca$li%>%
#   as_tibble()%>%
#   bind_cols(dat%>%select(1:4))%>%
#   filter(year%in%c(2020))
#   
#   
# pal <- wes_palette("Zissou1", 100, type = "continuous")
# pca$li%>%
#   as_tibble()%>%
#   bind_cols(dat%>%select(1:5))%>%
#   filter(year%in%c(2020))%>%
#   ggplot(aes(Axis1,Axis2,group=paste0(PointName),shape=Type,colour=longitude))+
#   geom_point(size=3)+
#   scale_colour_gradientn(colours = pal)
#   theme(legend.position="none")

  
(pca$li%>%
  as_tibble()%>%
  bind_cols(dat%>%select(1:7))%>%
  mutate(PointPair=substr(PointName,nchar(PointName)-2,nchar(PointName)))->tutu)%>%
  filter(year%in%c(2020))%>%
  ggplot(aes(Axis1,Axis2,group=paste0(PointName)))+
  labs(x = xlab, y = ylab)+
  geom_point(data=tutu,aes(Axis1,Axis2),size=1,colour="darkgrey")+
  geom_line(aes(group=PointPair))+
  #geom_text(aes(colour=Type,label=PointPair),size=2)+
  geom_point_interactive(
    aes(Axis1,Axis2,tooltip = tooltip, data_id = PointName, onclick = onclick,colour=Type),
    size = 3
  )+coord_equal()->p
  
girafe(ggobj = p)  

PCA (across all years) of AlphaEarth Channels across a fenceline contrast for the year 2020 (height of the drought). Dark grey points represent all localites across all of the years

# dat%>%
#   filter(year==2017)%>%
#   mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)%>%
#   ggplot(aes(Type,A05))+
#   geom_point()+
#   geom_violin()
# 
# dat%>%
#   mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)%>%
#   pivot_longer(cols=8:71,names_to="channel",values_to="value")%>%
#   ggplot(aes(Type,value))+
#   geom_violin()+
#   facet_wrap(~channel)
# variance explained by each axis
pca$li%>%
  as_tibble()%>%
  bind_cols(dat%>%select(1:7))%>%
  mutate(PointPair=substr(PointName,nchar(PointName)-2,nchar(PointName)))%>%
  filter(year%in%c(2024))%>%
  ggplot(aes(Axis1,Axis2,group=paste0(PointName)))+
  labs(x = xlab, y = ylab)+
  geom_line(aes(group=PointPair))+
  geom_point(size=4,colour="white")+
  #geom_text(aes(colour=Type,label=PointPair),size=2)+
  geom_point_interactive(
    aes(Axis1,Axis2,tooltip = tooltip, data_id = PointName, onclick = onclick,colour=Type),
    size = 3
  )->p
  
girafe(ggobj = p)  

PCA (across all years) of AlphaEarth Channels across a fenceline contrast for the year 2024 (post-drought)

pca$li%>%
  as_tibble()%>%
  bind_cols(dat%>%select(1:4))%>%
  filter(year%in%c(2020,2024))%>%
  ggplot(aes(Axis1,Axis2,group=paste0(PointName),colour=Type))+
  geom_line()
PCA (across all years) of AlphaEarth Channels across a fenceline contrast linking the same locations at year 2020 and 2024

PCA (across all years) of AlphaEarth Channels across a fenceline contrast linking the same locations at year 2020 and 2024

  #theme(legend.position="none")
scores <- pca$li %>%
  as_tibble() %>%
  rename(PC1 = Axis1, PC2 = Axis2)

var_exp <- pca$eig / sum(pca$eig) * 100
pc1_lab <- sprintf("PC1 (%.1f%%)", var_exp[1])

df <- dat %>%
  select(PointName,lon = longitude, lat = latitude, year,Type,onclick,tooltip) %>%
  bind_cols(scores)%>%
  pivot_longer(cols=c(PC1,PC2),names_to="pcaxes",values_to="pcvalues")

lon_range <- range(df$lon, na.rm = TRUE) + c(-0.2, 0.2)  # add margin
lat_range <- range(df$lat, na.rm = TRUE) + c(-0.2, 0.2)  

df%>%
  filter(year==2020)%>%
  ggplot(aes(lon, lat, colour = pcval)) + 
  borders("world", colour = "grey70") + 
  coord_quickmap(xlim = lon_range, ylim = lat_range) +
   geom_point_interactive(
    aes(colour = pcvalues, tooltip = tooltip, data_id = PointName, onclick = onclick),
    size = 2, alpha = 0.8
  )+
  scale_colour_gradientn(
  colours = rev(c("darkgreen", "lightgreen", "white", "orange", "red", "darkred")),
  values  = scales::rescale(c(min(df$pcvalues), quantile(df$pcvalues, 0.1),
                              median(df$pcvalues),
                              quantile(df$pcvalues, 0.9), max(df$pcvalues))))+
  facet_grid(Type~pcaxes)->pp
girafe(ggobj = pp)

Spatial distribution of the PC1 and PC2 axes for 2020

Spatial distribution of the PC1 and PC2 axes for 2024

Selecting the best intact and worst degraded sites using LDA

# Averaging PCA scores across years per point
dat %>%
  select(PointName,Type,latitude,longitude,year)%>%
  bind_cols(scores)%>%
  group_by(PointName,Type,latitude,longitude)%>%
  summarise(PC1=mean(PC1),PC2=mean(PC2))->
  df

lda_fit <- lda(Type ~ PC1 + PC2, data = df)
# Project the sites onto the LDA axis
lda_scores <- predict(lda_fit)$x[,1]
df$LDA1 <- lda_scores

# Identify extremes
best_intact <- df %>%
  filter(Type == "Intact") %>%
  arrange(desc(LDA1)) %>%
  slice_head(n = 10)

worst_degraded <- df %>%
  filter(Type == "Degraded") %>%
  arrange(LDA1) %>%
  slice_head(n = 10)

# df%>%
#   ggplot(aes(LDA1,fill=Type))+geom_histogram()
# So going to take points above 2 and below -2

df%>%
  mutate(train_intact=ifelse(LDA1>1.5,TRUE,FALSE),
         train_degraded=ifelse(LDA1<(-1.5),TRUE,FALSE))->
  df_final

df_final%>%
  ggplot()+
  geom_point(aes(PC1,PC2,colour=Type))+
  geom_point(data=df_final%>%filter(train_intact),aes(PC1,PC2),colour="blue",size=2)+
  geom_point(data=df_final%>%filter(train_degraded),aes(PC1,PC2),colour="red",size=2)

#df_final%>%write.csv("Intact.vs.degraded.endpoints.csv")

Below is the distribution of the points - unfortunately the distribution is too biased!

ggplot() + 
  borders("world", colour = "grey70") + 
  coord_quickmap(xlim = lon_range, ylim = lat_range) +
  geom_point(data=df_final%>%filter(train_intact),aes(longitude, latitude),colour="green")+
  geom_point(data=df_final%>%filter(train_degraded),aes(longitude, latitude),colour="red")

Attempt 2

Moving onto finding pairs where the intact is >1 and degraded < -1 along the LDA.

df_final%>%
  mutate(Name2=substr(PointName,nchar(PointName)-2,nchar(PointName)))->
  toto
toto%>%  
  group_by(Name2)%>%summarise(n=n())%>%filter(n==2)%>%
  select(Name2)%>%
  left_join(toto)%>%
  select(Name2,Type,LDA1)%>%
  filter(Name2!="081")%>%#Something up with this point
  pivot_wider(values_from = LDA1,names_from=Type)->
  tyty

tyty%>%
  filter(Intact>1,Degraded<(-1))%>%
  left_join(toto,by="Name2")->
  tete

toto%>%
  ggplot(aes(PC1,PC2))+
  geom_point()+
  geom_line(data=tete,aes(PC1,PC2,group=Name2))+
  geom_point(data=tete,aes(PC1,PC2,colour=Type),size=2)

ggplot() + 
  borders("world", colour = "grey70") + 
  coord_quickmap(xlim = lon_range, ylim = lat_range) +
  geom_point(data=tete%>%filter(train_intact),aes(longitude, latitude),colour="black")