library(groundhog)
set.groundhog.folder("C:\\Groundhog_R\\R4.4.1_2024-05-14")
GroundhogDay <- '2024-05-14'
groundhog.library("MASS",GroundhogDay)
groundhog.library("tidyverse",GroundhogDay)
groundhog.library("vegan",GroundhogDay)
groundhog.library("ade4",GroundhogDay)
groundhog.library("wesanderson",GroundhogDay)
groundhog.library("ggiraph",GroundhogDay)
groundhog.library("sf",GroundhogDay)
groundhog.library("ggspatial",GroundhogDay)
groundhog.library("scales",GroundhogDay)
# Get the data
setwd("D:\\Dropbox\\100_PROJECTS\\2025_AlphaEarth_FenceLineContrasts")
read_csv("Embeddings_2017_2024_FLC_paired (1).csv")->dat
dat%>%
mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)->
dat
dat %>%
mutate(.before=A00,
tooltip = sprintf("%s\n(%.5f, %.5f)", PointName, latitude, longitude),
# Open Google Maps centered on the point at zoom level 16
onclick = sprintf(
'window.open("https://www.google.com/maps?q=%s@%f,%f")',
utils::URLencode(PointName, reserved = TRUE), latitude, longitude))%>%
mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)->
dat
The aim of this analysis is to map the degree of thicket degradation across the landscape. To do this, we plotted over 200 fenceline contrasts, where one side retains relatively intact thicket and the other side has experienced clear degradation. Importantly, we never treat the “intact” side as truly pristine—rather, it represents a working baseline. This is advantageous: if these “intact” sites fall somewhere between degraded thicket and pristine reference thicket, it indicates that our approach is capturing a realistic continuum of degradation.
For environmental predictors, we use the AlphaEarth channels. These are derived from Google’s AlphaEarth embeddings, which are produced by a deep learning model trained on multi-spectral satellite imagery. The channels have been pre-processed to remove noise and encode broad environmental patterns such as land cover, vegetation structure, soil characteristics, and seasonal dynamics. In effect, they act as a compressed but information-rich representation of the raw satellite data, enabling us to work with major ecological gradients without the need for heavy pre-processing.
We began with a Principal Components Analysis (PCA), chosen for its simplicity and interpretability. The results were striking:
Degraded thicket and relatively intact thicket separate strongly along the PCA axis.
Semi-degraded or marginal sites fall in intermediate positions, exactly as expected for vegetation in transition.
This clear separation demonstrates that the AlphaEarth channels are sensitive to degradation state, and that PCA captures the continuum from intact → semi-degraded → degraded thicket.
The PCA plot is shown below. Each point represents a fenceline site, and points are clickable: selecting a point opens the corresponding location in Google Maps, allowing direct inspection of the site in its spatial context.
# Generate the PCA
dat%>%
select(-Type,-PointName,-year,-latitude,-longitude,-tooltip,-onclick)%>%
#select(1:10)%>%
dudi.pca(nf=3, scannf = FALSE)->
pca
var_explained <- pca$eig / sum(pca$eig) * 100
# labels for the first two axes
xlab <- sprintf("Axis 1 (%.1f%%)", var_explained[1])
ylab <- sprintf("Axis 2 (%.1f%%)", var_explained[2])
# dat%>%
# select(-PointName,-year,-latitude,-longitude)%>%
# select(1:10)%>%
# metaMDS(try = 999)->
# nmds
# pca$li%>%
# as_tibble()%>%
# bind_cols(dat%>%select(1:6))%>%
# ggplot(aes(Axis1,Axis2,colour=Type))+
# geom_point()+
# facet_wrap(~year)
# pca$li%>%
# as_tibble()%>%
# bind_cols(dat%>%select(1:6))%>%
# ggplot(aes(Axis1,Axis2,group=paste0(PointName),colour=Type))+
# geom_line()+
# theme(legend.position="none")
#
#
# pca$li%>%
# as_tibble()%>%
# bind_cols(dat%>%select(1:4))%>%
# filter(year%in%c(2020,2024))%>%
# ggplot(aes(Axis1,Axis2,group=paste0(PointName),colour=Type))+
# geom_line()+
# theme(legend.position="none")
# facet_wrap(~year)
#
#
# pca$li%>%
# as_tibble()%>%
# bind_cols(dat%>%select(1:4))%>%
# filter(year%in%c(2020))
#
#
# pal <- wes_palette("Zissou1", 100, type = "continuous")
# pca$li%>%
# as_tibble()%>%
# bind_cols(dat%>%select(1:5))%>%
# filter(year%in%c(2020))%>%
# ggplot(aes(Axis1,Axis2,group=paste0(PointName),shape=Type,colour=longitude))+
# geom_point(size=3)+
# scale_colour_gradientn(colours = pal)
# theme(legend.position="none")
(pca$li%>%
as_tibble()%>%
bind_cols(dat%>%select(1:7))%>%
mutate(PointPair=substr(PointName,nchar(PointName)-2,nchar(PointName)))->tutu)%>%
filter(year%in%c(2020))%>%
ggplot(aes(Axis1,Axis2,group=paste0(PointName)))+
labs(x = xlab, y = ylab)+
geom_point(data=tutu,aes(Axis1,Axis2),size=1,colour="darkgrey")+
geom_line(aes(group=PointPair))+
#geom_text(aes(colour=Type,label=PointPair),size=2)+
geom_point_interactive(
aes(Axis1,Axis2,tooltip = tooltip, data_id = PointName, onclick = onclick,colour=Type),
size = 3
)+coord_equal()->p
girafe(ggobj = p)
PCA (across all years) of AlphaEarth Channels across a fenceline contrast for the year 2020 (height of the drought). Dark grey points represent all localites across all of the years
# dat%>%
# filter(year==2017)%>%
# mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)%>%
# ggplot(aes(Type,A05))+
# geom_point()+
# geom_violin()
#
# dat%>%
# mutate(Type=ifelse(grepl("ITT",PointName),"Intact","Degraded"),.before=PointName)%>%
# pivot_longer(cols=8:71,names_to="channel",values_to="value")%>%
# ggplot(aes(Type,value))+
# geom_violin()+
# facet_wrap(~channel)
# variance explained by each axis
pca$li%>%
as_tibble()%>%
bind_cols(dat%>%select(1:7))%>%
mutate(PointPair=substr(PointName,nchar(PointName)-2,nchar(PointName)))%>%
filter(year%in%c(2024))%>%
ggplot(aes(Axis1,Axis2,group=paste0(PointName)))+
labs(x = xlab, y = ylab)+
geom_line(aes(group=PointPair))+
geom_point(size=4,colour="white")+
#geom_text(aes(colour=Type,label=PointPair),size=2)+
geom_point_interactive(
aes(Axis1,Axis2,tooltip = tooltip, data_id = PointName, onclick = onclick,colour=Type),
size = 3
)->p
girafe(ggobj = p)
PCA (across all years) of AlphaEarth Channels across a fenceline contrast for the year 2024 (post-drought)
An interesting observation is that sites with higher PCA scores (on the main degradation axis) correspond to south-facing slopes. This indicates that the ordination is capturing not only differences between intact, semi-degraded, and degraded thicket, but also underlying topographic influences on degradation. South-facing slopes in this region typically have cooler microclimates, reduced solar radiation, and different soil moisture regimes, which may influence both the degree of vegetation persistence and the trajectory of degradation.
FLC005 appears to be in the wrong place - it is in the intact thicket.
pca$li%>%
as_tibble()%>%
bind_cols(dat%>%select(1:4))%>%
filter(year%in%c(2020,2024))%>%
ggplot(aes(Axis1,Axis2,group=paste0(PointName),colour=Type))+
geom_line()
PCA (across all years) of AlphaEarth Channels across a fenceline contrast linking the same locations at year 2020 and 2024
#theme(legend.position="none")
scores <- pca$li %>%
as_tibble() %>%
rename(PC1 = Axis1, PC2 = Axis2)
var_exp <- pca$eig / sum(pca$eig) * 100
pc1_lab <- sprintf("PC1 (%.1f%%)", var_exp[1])
df <- dat %>%
select(PointName,lon = longitude, lat = latitude, year,Type,onclick,tooltip) %>%
bind_cols(scores)%>%
pivot_longer(cols=c(PC1,PC2),names_to="pcaxes",values_to="pcvalues")
lon_range <- range(df$lon, na.rm = TRUE) + c(-0.2, 0.2) # add margin
lat_range <- range(df$lat, na.rm = TRUE) + c(-0.2, 0.2)
df%>%
filter(year==2020)%>%
ggplot(aes(lon, lat, colour = pcval)) +
borders("world", colour = "grey70") +
coord_quickmap(xlim = lon_range, ylim = lat_range) +
geom_point_interactive(
aes(colour = pcvalues, tooltip = tooltip, data_id = PointName, onclick = onclick),
size = 2, alpha = 0.8
)+
scale_colour_gradientn(
colours = rev(c("darkgreen", "lightgreen", "white", "orange", "red", "darkred")),
values = scales::rescale(c(min(df$pcvalues), quantile(df$pcvalues, 0.1),
median(df$pcvalues),
quantile(df$pcvalues, 0.9), max(df$pcvalues))))+
facet_grid(Type~pcaxes)->pp
girafe(ggobj = pp)
Spatial distribution of the PC1 and PC2 axes for 2020
Spatial distribution of the PC1 and PC2 axes for 2024
# Averaging PCA scores across years per point
dat %>%
select(PointName,Type,latitude,longitude,year)%>%
bind_cols(scores)%>%
group_by(PointName,Type,latitude,longitude)%>%
summarise(PC1=mean(PC1),PC2=mean(PC2))->
df
lda_fit <- lda(Type ~ PC1 + PC2, data = df)
# Project the sites onto the LDA axis
lda_scores <- predict(lda_fit)$x[,1]
df$LDA1 <- lda_scores
# Identify extremes
best_intact <- df %>%
filter(Type == "Intact") %>%
arrange(desc(LDA1)) %>%
slice_head(n = 10)
worst_degraded <- df %>%
filter(Type == "Degraded") %>%
arrange(LDA1) %>%
slice_head(n = 10)
# df%>%
# ggplot(aes(LDA1,fill=Type))+geom_histogram()
# So going to take points above 2 and below -2
df%>%
mutate(train_intact=ifelse(LDA1>1.5,TRUE,FALSE),
train_degraded=ifelse(LDA1<(-1.5),TRUE,FALSE))->
df_final
df_final%>%
ggplot()+
geom_point(aes(PC1,PC2,colour=Type))+
geom_point(data=df_final%>%filter(train_intact),aes(PC1,PC2),colour="blue",size=2)+
geom_point(data=df_final%>%filter(train_degraded),aes(PC1,PC2),colour="red",size=2)
#df_final%>%write.csv("Intact.vs.degraded.endpoints.csv")
Below is the distribution of the points - unfortunately the distribution is too biased!
ggplot() +
borders("world", colour = "grey70") +
coord_quickmap(xlim = lon_range, ylim = lat_range) +
geom_point(data=df_final%>%filter(train_intact),aes(longitude, latitude),colour="green")+
geom_point(data=df_final%>%filter(train_degraded),aes(longitude, latitude),colour="red")
Moving onto finding pairs where the intact is >1 and degraded < -1 along the LDA.
df_final%>%
mutate(Name2=substr(PointName,nchar(PointName)-2,nchar(PointName)))->
toto
toto%>%
group_by(Name2)%>%summarise(n=n())%>%filter(n==2)%>%
select(Name2)%>%
left_join(toto)%>%
select(Name2,Type,LDA1)%>%
filter(Name2!="081")%>%#Something up with this point
pivot_wider(values_from = LDA1,names_from=Type)->
tyty
tyty%>%
filter(Intact>1,Degraded<(-1))%>%
left_join(toto,by="Name2")->
tete
toto%>%
ggplot(aes(PC1,PC2))+
geom_point()+
geom_line(data=tete,aes(PC1,PC2,group=Name2))+
geom_point(data=tete,aes(PC1,PC2,colour=Type),size=2)
ggplot() +
borders("world", colour = "grey70") +
coord_quickmap(xlim = lon_range, ylim = lat_range) +
geom_point(data=tete%>%filter(train_intact),aes(longitude, latitude),colour="black")