Define Pseudo Observation Polygons

Find suitable area for defining pseudo observation polygons

In this document, we use the 14 GIA forward model solutions to find regions where the observations are certain and close to zero. The 14 GIA forward model solutions are given at 1 degree resolution with the mean trend and estimated errors over the period 2005-1015.

In the following we explore the data sets individually as well as in an ensemble approach.

0 Load useful packages and the data

library(ggplot2)
library(gridExtra)


### Load the 14 GIA data 
data_path <- "Z:/WP2-SolidEarth/GIAforwardModels/textfiles/"
file_names <- system(paste("ls", data_path), intern = TRUE)[1:14]
GIA_name <- unlist(strsplit(file_names, split = ".txt"))
GIA_priors <- list()
for (i in 1:14){
GIA_priors[[i]] <- GIA_temp <- read.table(paste0(data_path, file_names[i]), header = T)
}

1 Criteria based on statistical tests

Assume the GIA solutions are GIA plus Gaussian noise and the standard errors reflect the magnitude of the Gaussian noise. We define a point GIA value to be significantly deviated from zero if it is larger than 2 times the corresponding standard errors. Hence the “zero region” will be those with GIA values smaller than 2 times the standard errors. We plot such “zero region” for the 14 GIA solutions and their ensemble means and standard errors.

## setup base
world_map <- map_data("world2")
map_zero1 <- list()


## Individual data sets
for (i in 1:14){
  GIA_temp <- GIA_priors[[i]]
GIA_temp$zero <- abs(GIA_temp$trend) < GIA_temp$std * 2

map_temp1 <- ggplot(data=GIA_temp) + geom_raster(aes(x = x_center, y = y_center, fill = trend)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_temp1 <- map_temp1 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle(paste(i, GIA_name[i], "-- trend"))


map_temp2 <- ggplot(data=GIA_temp) + geom_raster(aes(x = x_center, y = y_center, fill = std)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_temp2 <- map_temp2 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle("standard error")

map_temp3 <- ggplot(data=GIA_temp) + geom_raster(aes(x = x_center, y = y_center, fill = zero)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_temp3 <- map_temp3 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle(paste(i, GIA_name[i], "-- zero region"))
map_zero1[[i]] <- map_temp3

## Plot the result
print(map_temp3)

}

Then do the same for the ensemble mean of the 14 data sets.

## create the ensemble data sets
GIA_ensemble <- GIA_priors[[1]]
GIA_ensemble$trend <- rowMeans(sapply(GIA_priors, "[[", "trend"))
GIA_ensemble$sd <- sqrt((rowSums(sapply(GIA_priors, function(x) (x$std)^2)))/14)
GIA_ensemble$zero <- abs(GIA_ensemble$trend) < GIA_ensemble$std*2

## Plot the result
map_ens1 <- ggplot(data=GIA_ensemble) + geom_raster(aes(x = x_center, y = y_center, fill = trend)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_ens1 <- map_ens1 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle("ensemble -- trend")

map_ens2 <- ggplot(data=GIA_ensemble) + geom_raster(aes(x = x_center, y = y_center, fill = std)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_ens2 <- map_ens2 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle("standard error")

map_ens3 <- ggplot(data=GIA_ensemble) + geom_raster(aes(x = x_center, y = y_center, fill = zero)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_ens3 <- map_ens3 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle("zero region")

## Plot the result
map_ens1

map_ens2

map_ens3

2 Criteria based on absolute values

The plots based on the statistical criterion is not satisfactory, so we change to use a simple rule based on absolute value smaller than a given threshold, say 0.5mm/year.

## Individual data sets
for (i in 1:14){
GIA_temp <- GIA_priors[[i]]
GIA_temp$zero <- abs(GIA_temp$trend) < 0.5

map_temp3 <- ggplot(data=GIA_temp) + geom_raster(aes(x = x_center, y = y_center, fill = zero)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-89.5,89.5),  expand = c(0, 0)) 

map_temp3 <- map_temp3 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle(paste(i, GIA_name[i], "-- zero region"))
map_zero1[[i+14]] <- map_temp3

## Plot the result
print(map_temp3)

}

Then the same for the ensemble mean.

GIA_ensemble$zero2 <- abs(GIA_ensemble$trend) < 0.5

## Plot the result
map_ens3.2 <- ggplot(data=GIA_ensemble) + geom_raster(aes(x = x_center, y = y_center, fill = zero2)) + 
  coord_fixed() + xlab("Longitude") + ylab("Latitude") + 
  scale_x_continuous(limits=c(0,359),  expand = c(0, 0)) + scale_y_continuous(limits=c(-90,90),  expand = c(0, 0)) 

map_ens3.2 <- map_ens3.2 + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), 
                                      colour="grey", fill = NA, alpha = 0.5) + ggtitle("ensemble zero region 2")

## Plot the result
map_ens3.2

The ensemble mean using the absolute value criteria looks reasonable for defining the zero value polygons. Now we convert the pixels in the zero stable area into polygons.

## create a Spatialpixels object
GIA_ens <- GIA_ensemble[,c(1:3,16)]
library(sp)
library(raster)
coordinates(GIA_ens) <- c("x_center", "y_center")
gridded(GIA_ens) <- TRUE
GIA_ensR <- raster(GIA_ens, layer = 2) # 2 is the column no of zero2
GIA_zeroPoly <- rasterToPolygons(GIA_ensR, fun=function(zero2) {zero2 == TRUE}, dissolve=TRUE )

## Loading required namespace: rgeos

plot(GIA_zeroPoly, col = "blue", main = "zero certain regions")

Note that there are holes in the large polygons and check if they are correctly labeled.

sapply(GIA_zeroPoly@polygons[[1]]@Polygons, function(x) slot(x, "hole"))

##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
## [56]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

We can convert the spatialPolygonDataFrame object to a data frame that can be plotted by ggplot2.

library(broom)
GIA_zeroDF <- tidy(GIA_zeroPoly)

## Regions defined for each Polygons

zeropoly_map <- ggplot()+ coord_fixed()+ geom_polygon(data = GIA_zeroDF, aes(x=long, y=lat, group = group, fill = !hole),colour="black") 
print(zeropoly_map)

Exclude the GPS area

Given the zero value region, we also want to exclude the region that include real GPS readings. Now we plot the latest GPS data for inspection.

GPS_data <- read.table("Z:/WP2-SolidEarth/GPS/NGL/GPS_version03/GPSdataset_v03c_QC_20170809_comb.txt",  header = T)
GPS_data$lon <- ifelse(GPS_data$lon < 0, GPS_data$lon + 359, GPS_data$lon)
map_ensGPS <- map_ens3.2 + geom_point(data=GPS_data, aes(x=lon, y=lat), pch=19, 
                                    col = "red", fill = "red", alpha=0.7) + ggtitle("ensemble zero region 2 with GPS")
map_ensGPS

Define the locations of the pseudo observations

With the GPS added in the plots, it is messy to define polygons for the pseudo observations. One simple way to create the locations is to generate a regular mesh over the entire sphere, then remove vertices outside the zero value region and points that has GPS reading or too close to a GPS reading.

First, generate vertices that evenly distributed over the entire globe. The distance between the vertices is about the same as the correlation length of the of the residual process, which is about 400-500km apart. This is about spreading 1000 points using the Fibonacci spiral on a sphere.

## xyz -- Generate equally distributed points around a sphere -- Fibonacci spiral
fiboSphere1 <- function(n = 1000L) {
  
  phi <- (sqrt(5) + 1) / 2  # golden ratio
  i <- seq(-(n-1), (n-1),2)
  theta <- 2 * pi * i / phi
  sphi <- i/n
  cphi <- sqrt((n+i) * (n-i))/n
  
  x <- cphi * sin(theta)
  y <- cphi * cos(theta)
  z <- sphi
  return(cbind(x,y,z))
}

##lonlat -- Generate equally distributed points around a sphere -- Fibonacci spiral
fiboSphere2 <- function(n = 1000L) {
  phi <- (sqrt(5) + 1) / 2 - 1 # golden ratio
  ga <- phi * 2 * pi           # golden angle
  
  i <- 1L:n
  lon <- ga * i / (2 * pi)
  lon <- 2 * pi * (lon - floor(lon))
  lon <- ifelse(lon <= pi, lon, lon - 2 * pi)/pi*180
  lat <- asin(-1 + 2 * i / n)/pi*180
  
  cbind(lon = lon, lat = lat)
}

fibopoints <- fiboSphere2(n = 1000) 
fibopoints <- data.frame(fibopoints)
fibopoints$lon <- ifelse(fibopoints$lon < 0, fibopoints$lon+359, fibopoints$lon)
## Plot these points 
map_fibo <- map_ensGPS + geom_point(data=data.frame(fibopoints), aes(x=lon, y=lat), pch=19, 
                                    col = "blue", alpha=0.5)  + ggtitle("ensemble zero region 2 with GPS and fibo points")
map_fibo

Then remove points that are not in the zero region polygons.

fibsp <- SpatialPoints(fibopoints, proj4string = CRS("+proj=longlat"))
zeroPolys <- SpatialPolygons(GIA_zeroPoly@polygons, proj4string = CRS("+proj=longlat"))
fibin <- over(fibsp, zeroPolys)
fibin <- ifelse(is.na(fibin), FALSE, TRUE)
## plot these points
fibpoint_in <- data.frame(fibopoints[fibin, ])
map_fibo2 <- zeropoly_map + geom_point(data=fibpoint_in, aes(x=lon, y=lat), pch=19, 
                                    col = "blue", alpha=0.5)  + ggtitle("fibpoints in the zero value regions")
map_fibo2

Then remove points that are too close to the GPS observations (say within the correlation length 400-500km , that is about 5 degree in longlat).

fibin2 <- apply(fibpoint_in, 1, function(x) all(apply(GPS_data[,2:3], 1, function(y) dist(rbind(y, x)) > 5)))
fibpoint_in2 <- fibpoint_in[fibin2,]

map_fibo3 <- zeropoly_map + geom_point(data=fibpoint_in2, aes(x=lon, y=lat), pch=19, 
                                    col = "blue", alpha=0.5)  +
  geom_point(data=GPS_data, aes(x=lon, y=lat), pch=19, 
                                    col = "red", fill = "red", alpha=0.7) +
  ggtitle("fibpoints in the zero value regions away from GPS")
map_fibo3

Finally, assemble the pseudo observation and real observation into the same data set. The pseudo observations are simulated from a zero mean Gaussian distribution with sd error half of the smallest real GPS error.

pseudo_error <- min(GPS_data$std)/2
nps <- nrow(fibpoint_in2)
pseudo_obs <- rnorm(nps, sd = pseudo_error)
pseudo_data <- data.frame(ID = rep("pseudo", nps), lon = fibpoint_in2$lon, lat = fibpoint_in2$lat, 
                          trend = pseudo_obs, std = rep(pseudo_error, nps))
obs_all <- rbind(GPS_data, pseudo_data)
obs_all$source <- ifelse(obs_all$ID == "pseudo", "pseudo", "real")

map_final <- ggplot() + coord_fixed() + geom_polygon(data=world_map, aes(x=long, y=lat, group=group), colour="black", fill = NA) + 
  geom_polygon(data = GIA_zeroDF, aes(x=long, y=lat, group = group), colour="red", fill = NA) +
  geom_point(data=obs_all, aes(x=lon, y=lat, colour = source), pch=19) + 
  ggtitle("Final map of real and pseudo observations.")

map_final

Plot with another projection.

library(mapproj)

## Loading required package: maps

map_final + coord_map(projection = "orthographic")