tidy Habitat Suitability and Distribution Models Section7.4

About

I rewrite th R codes of the book Habitat Suitability and Distribution Models Japanese version in a tidy way, using sf and stars package.

Overview of section 7.4

例えばこれから新しいサンプルを野外で取る際に、どの場所でどれだけサンプルを取るか、といったサンプリングデザインは極めて重要である。ここでは主要なサンプリングデザインをいくつか紹介する。

7.4.1

Abstract

まずはじめに、使うデータを準備する。環境データとしてWorldClimeから取得した生物気候データを用いる。２つの生物気候を用いて対象領域をいくつかのクラスターに分割する（本文では層別化と呼んでいた。）

# Read a shapefile of U.S.A. 
usa <- read_sf("data/vector/usa/USA_states.shp") %>%
  filter(STATE_NAME != "Alaska" & STATE_NAME != "Hawaii")

# Read raster files of bio-climatic data
bio12 <- raster::raster("data/raster/bioclim/current/grd/bio12") %>%
  st_as_stars() %>%
  st_crop(usa)

## Warning in CPL_read_gdal(as.character(x), as.character(options),
## as.character(driver), : GDAL Error 1: Unhandled datatype=FLT4S

## trying to read file: /home/okamoto/hsdm/data/raster/bioclim/current/grd/bio12.grd

## although coordinates are longitude/latitude, st_union assumes that they are planar

## although coordinates are longitude/latitude, st_intersects assumes that they are planar

bio3 <- raster::raster("data/raster/bioclim/current/grd/bio3") %>%
  st_as_stars() %>%
  st_crop(usa)

## Warning in CPL_read_gdal(as.character(x), as.character(options),
## as.character(driver), : GDAL Error 1: Unhandled datatype=FLT4S

## trying to read file: /home/okamoto/hsdm/data/raster/bioclim/current/grd/bio3.grd

## although coordinates are longitude/latitude, st_union assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

# In the original code, each bio climatic data are separated in 9 categories and just concatenated together as 11, 12, ... 98, 99. This method is really uncool, then I apply k-means clustering.

# Because na.omit() is not working with stars objects, I vectorize them temporarily. # I use tidymodels::kmeans()
bio <- c(bio3, bio12) %>%
  st_as_sf(as_points = T) %>%
  na.omit() %>%
  mutate(
    cluster = kmeans(
      as_tibble(.) %>% select(bio3, bio12),
      centers = 10
    )$cluster
  )

# Rasterize again
bio <- c(
  select(bio, bio3) %>% 
    st_rasterize(template = bio3),
  select(bio, bio12) %>% 
    st_rasterize(template = bio3),
  select(bio, cluster) %>% 
    st_rasterize(template = bio3)
) %>%
  mutate(cluster = as.factor(cluster))

# plot results
ggplot() +
  geom_stars(data = select(bio,bio3)) +
  labs(title = "bio3") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio,bio12)) +
  labs(title = "bio12") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster)) +
  labs(title = "k-means clusters") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

bio %>% 
  as_tibble() %>%
  filter(is.na(bio3)==F & is.na(bio12)==F) %>%
  ggplot(mapping = aes(x = bio3, y = bio12)) +
  geom_hex() +
  labs(title = "heat map of bio3 and bio12") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

7.4.2

Abstract

最も簡単な方法は、対象地の領域だけを決めてそこからサンプリングする方法である。サンプリングの方法には - Random エリア内でランダムにサンプリングする - Regular エリア内に等間隔に点を配置する - Stratified エリアを等面積の同じサイズのパッチに分割し、パッチ内でランダムに点を取る。

の3つがある。

# Some sampling methods for simply sampling points in a region.

# Random sampling
sampled_random <- bio %>% 
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "random")

# Regularized sampling
sampled_regular <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "regular")

# Hexagonal sampling
sampled_hex <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "hexagonal")

# Stratified sampling, using spatstat::rstrat()
sampled_strat <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "strat", nx = 20, ny = 15) %>%
  `st_crs<-`(32614)

## Registered S3 method overwritten by 'spatstat':
##   method     from
##   print.boxx cli

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_random, 
          mapping = aes()) +
  labs(title = "Random Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_regular, mapping = aes()) +
  labs(title = "Regular Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_hex, mapping = aes()) +
  labs(title = "Hexagonal Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_strat, mapping = aes()) +
  labs(title = "Stratified Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

7.4.3

Abstract

次に、環境のクラスターを考慮したサンプリングを行う。ここでは、各クラスターごとに等数の点をサンプルする方法（Equal sampling）、クラスターの面積に比例した数をサンプルする方法（Propotional sampling）の２つを紹介する。

# First, sample many points from bio raster. Then sample some points from them with two strategies.

regular100000 <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100000, type = "regular")

bio_points <- regular100000 %>%
  st_transform(st_crs(bio3)) %>%
  st_as_sf() %>%
  mutate(
    bio3 = st_extract(bio, .)$bio3,
    bio12 = st_extract(bio, .)$bio12,
    cluster = st_extract(bio, .)$cluster
    ) %>%
  na.omit()

# Sample equal numbers of points by each cluster.
equal_sampling <- bio_points %>%
  group_by(cluster) %>%
  sample_n(10)

# Sample points in proportion to the area of each cluster
propotional_sampling <- bio_points %>%
  group_by(cluster) %>%
  sample_frac(0.001)

ggplot() + 
  geom_stars(data = select(bio, cluster)) +
  geom_sf(data = select(propotional_sampling, bio3)) +
  labs(title = "Propotional Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster)) +
  geom_sf(data = select(equal_sampling, bio3)) +
  labs(title = "Equal Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

equal_sampling %>%
  ggplot(mapping = aes(x = cluster)) +
  geom_bar() + 
  labs(title = "Equal Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

propotional_sampling %>%
  ggplot(mapping = aes(x = cluster)) +
  geom_bar() + 
  labs(title = "Propotional Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

7.4.4

Abstract

最後に、何かしらの線に沿ったサンプリングを行う。これは、道路や河川、人為的に設置したライントランセクトに沿ってサンプリングを行うような状況を想定している。このコードに関していえば、rasterとspを使ったほうがきれいに書ける（sp::spsample()）がよくできているのと、ラスターデータのダウンスケーリングはrasterパッケージのほうが楽)。

# Read, crop, and downsample the DEM.
dem <- raster::raster("data/raster/topo/GTOPO30.tif") %>%
  raster::`crs<-`(value = raster::crs(as_Spatial(usa))) %>%
  raster::crop(as_Spatial(usa)) %>%
  raster::mask(as_Spatial(usa)) %>%
  raster::aggregate(10, fun=mean) %>%
  st_as_stars()

# Contour line per 1000m a.s.l.
cont <- st_contour(dem, breaks=0:4*1000,
                   contour_lines = T)

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont, mapping = aes()) +
  labs(title = "Contour line") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

# Contour line of 1000m a.s.l. 
cont1000 <- cont %>%
  filter(GTOPO30 == "[0,1e+03]") 
 
ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  labs(title = "Contour line 1000m a.s.l.") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

# Sample points randomly along the contour line.
cont_random <- cont1000 %>% 
  st_combine() %>%
  st_transform(32614) %>%
  st_sample(size = 100, type = "random")

# Sample points in equal interval along the contour line
cont_regular <- cont1000 %>%
  st_combine() %>%
  st_transform(32614) %>%
  st_sample(100, type = "regular")

# Sample points equal numbers per line segments.
cont_strat <-  cont1000 %>%
  st_transform(32614) %>%
  st_line_sample(1, type = "regular")

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  geom_sf(cont_random, mapping = aes(), color = "white") +
  labs(title = "Random along the contour line") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  geom_sf(cont_regular, mapping = aes(), color = "white") +
  labs(title = "Equal interval (Regular)") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  geom_sf(cont_strat, mapping = aes(), color = "white") +
  labs(title = "Equal numbers per line segment (Stratified)") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))