Libraries:
suppressPackageStartupMessages({
library(dplyr)
library(terra)
library(raster)
library(sf)
library(ggplot2)
})
## Warning: package 'sf' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
Preparing pathways:
root <- "C:/Users/hudso/OneDrive/Documents/02. WUR/10. THESIS"
rproject_path <- file.path(root, '06 r_project')
raw_samples_path <- file.path(root, '04 data/01 raw/samples_vRAW')
proc_samples_path <- file.path(root, '04 data/03 processed_samples')
boun_path <- file.path(root, '04 data/02 processed_rasters/boundaries')
Loading raw samples:
nbis_raw <- read.csv(file.path(raw_samples_path, "nbis_vRAW.csv"), header = T)
Subsetting the essential information and renaming columns:
nbis <- nbis_raw[, c("latitude", "longitude", "year", "season", "land_use",
"Structure.Index..MEAN.", "Enrichment.Index..MEAN.", "Channel.Index..MEAN.")]
names(nbis) <- c("lat_4326", "long_4326", "year", "season", "landuse", "si", "ei", "ci")
head(nbis)
## lat_4326 long_4326 year season landuse si ei ci
## 1 46.25719 7.022394 2016 Summer natural grassland 72.73 43.24 37.50
## 2 46.26847 7.032504 2016 Summer natural grassland 64.49 54.22 55.56
## 3 46.27363 7.048331 2016 Summer natural grassland 88.99 88.79 3.16
## 4 46.28558 7.095852 2016 Summer natural grassland 76.26 50.91 85.71
## 5 46.28509 7.124600 2016 Summer natural grassland 55.47 53.17 58.21
## 6 46.29094 7.112580 2016 Summer natural grassland 39.84 54.52 46.96
Calculating the standard deviation of SI in samples with exactly
the same latitude:
for (i in 1:nrow(nbis)){
lat <- nbis$lat_4326[i]
df_same_lat <- subset(nbis, nbis$lat_4326==lat)
if (nrow(df_same_lat)>1) {
nbis$std_si[i] = sd(df_same_lat$si)
} else {
nbis$std_si[i] = NA
}
}
nrow(nbis) # 2936 samples
## [1] 2936
Removing samples with standard deviation too high (arbitrary choice = 11):
nbis <- nbis[nbis$std_si <= 11 | is.na(nbis$std_si), ]
nrow(nbis) # 1742 samples
## [1] 1742
Creating a layer containing rows (band 1) and columns (band 2)
to be used to extract this information for each sample point.
nrows <- 15965
ncols <- 13282
Creating a template raster:
r <- raster(nrows=15965, ncols=13282, res=c(250,250), xmn=2635500, xmx=5956000, ymn=1425000, ymx=5416250, crs = 3035) # rows layer
c <- raster(nrows=15965, ncols=13282, res=c(250,250), xmn=2635500, xmx=5956000, ymn=1425000, ymx=5416250, crs = 3035) # columns layer
values(r) <- rep(1:nrows, each=ncols) # giving the values of rows
values(c) <- rep(1:ncols, times=nrows) # giving the values of cols
Stacking both rasters:
raster_stack <- stack(r, c)
names(raster_stack) <- c("rows", "cols")
Loading boundary and sample data:
boundary_sf <- st_read(file.path(boun_path, "geoBoundariesCGAZ_Europe_Dissolved_v04.shp"))
## Reading layer `geoBoundariesCGAZ_Europe_Dissolved_v04' from data source
## `C:\Users\hudso\OneDrive\Documents\02. WUR\10. THESIS\04 data\02 processed_rasters\boundaries\geoBoundariesCGAZ_Europe_Dissolved_v04.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 2635657 ymin: 1425026 xmax: 5955764 ymax: 5416023
## Projected CRS: ETRS89-extended / LAEA Europe
Removing samples with empty latitudes/longitudes:
idx_del <- which(is.na(nbis$lat_4326))
nbis <- nbis[-idx_del,]
nrow(nbis) # 1740 samples
## [1] 1740
Making a copy of coordinates to be transformed into geometry:
nbis$latitude <- nbis$lat_4326
nbis$longitude <- nbis$long_4326
Convert to sf object (EPSG:4326 is the CRS for lat/long WGS84):
nbis_4326 <- st_as_sf(nbis, coords = c("longitude", "latitude"), crs = 4326)
Transform to EPSG:3035 (ETRS89 / LAEA Europe):
nbis_sf <- st_transform(nbis_4326, crs = 3035)
Registering also coordinates in CRS 3035:
nbis_sf$x_3035 <- st_coordinates(nbis_sf)[, 1] # X coordinates (longitude)
nbis_sf$y_3035 <- st_coordinates(nbis_sf)[, 2] # Y coordinates (latitude)
Using the function st_intersection to restrict the samples only in the study area:
# Original data (before intersection)
nbis_sf_original <- nbis_sf
# Perform the intersection
nbis_sf <- st_intersection(nbis_sf, boundary_sf)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
# Create the plot
plot_combined <- ggplot() +
# Plot nbis_sf before intersection in gray
geom_sf(data = nbis_sf_original, color = "gray50", alpha = 0.6) +
# Plot nbis_sf after intersection in red
geom_sf(data = nbis_sf, color = "red", alpha = 0.6) +
# Plot the boundary
geom_sf(data = boundary_sf, fill = NA, color = "gray80", size = 0.8) +
labs(title = "NBIS Before (Gray) and After (Red) Intersection",
fill = "Legend") +
theme_minimal()
# Display the plot
print(plot_combined)
nrow(nbis_sf) # 1656 samples
## [1] 1656
Extracting information of rows and columns from the stacked raster created:
nbis_rows_cols <- extract(raster_stack, nbis_sf)
nbis_df <- st_drop_geometry(nbis_sf)
nbis_df <- nbis_df[,c("lat_4326", "long_4326", "x_3035", "y_3035", "year", "season", "landuse", "si", "ei", "ci")]
nbis_rows_cols <- cbind(nbis_df, nbis_rows_cols)
Creating a pixel_group which is a code for pixels sharing an unique combination of row and col:
df <- nbis_rows_cols %>%
mutate(pixel_group = as.integer(factor(paste(rows, cols))))
Calculating average for each NBI for samples in the same pixel_group:
df <- df %>%
group_by(pixel_group) %>%
mutate(avg_si = mean(si, na.rm = TRUE)) %>%
mutate(avg_ei = mean(ei, na.rm = TRUE)) %>%
mutate(avg_ci = mean(ci, na.rm = TRUE)) %>%
ungroup()
Calculating sd for each NBI for samples in the same pixel_group:
df <- df %>%
group_by(pixel_group) %>%
mutate(sd_si = sd(si, na.rm = TRUE)) %>%
mutate(sd_ei = sd(ei, na.rm = TRUE)) %>%
mutate(sd_ci = sd(ci, na.rm = TRUE)) %>%
ungroup()
Counting repetitions. Pixel_group seen first (or just once) will be assigned 1
df <- df %>%
group_by(pixel_group) %>%
mutate(count_pxgroup = n()) %>% # Count the number of rows in each group and store it in 'count_same'
ungroup()
Selecting pixels not repeated (i.e., count_pxgroup = 1). The new column agg_pixel receives a pixel_group for not repeated (or at least seen first than the others repetitions). The remaining pixel groups receive NA.
df <- df %>%
group_by(pixel_group) %>%
mutate(agg_pixel = ifelse(row_number() == 1, pixel_group, NA)) %>%
ungroup()
df <- data.frame(df)
Removing NA values from agg_pixel (i.e., samples repeated in the same pixel):
df_non_na <- df[!is.na(df$agg_pixel),]
df_non_na <- df_non_na[!is.na(df_non_na$avg_si),]
nrow(df_non_na) # 1327 samples
## [1] 1327
Subsetting the data frame for only relevant information for the next steps and renaming them:
final_df <- df_non_na[,c("lat_4326", "long_4326", "x_3035", "y_3035", "year", "season", "landuse", "avg_si", "avg_ei", "avg_ci")]
names(final_df) <- c("lat_4326", "long_4326", "x_3035", "y_3035", "year_cat", "season_cat", "landuse_cat", "si", "ei", "ci")
Creating sample_id:
final_df$sample_id <- 1:nrow(final_df)
Moving sample_id to the first column:
final_df <- final_df[,c("sample_id", "lat_4326", "long_4326", "x_3035", "y_3035", "year_cat", "season_cat", "landuse_cat", "si", "ei", "ci")]
#write.csv(final_df, file.path(proc_samples_path, "01_nbis_allrows_basic_cols_v01.csv"), row.names=FALSE)