About

I rewrite th R codes of the book Habitat Suitability and Distribution Models Japanese version in a tidy way, using sf and stars package.

Overview of section 7.4

例えばこれから新しいサンプルを野外で取る際に、どの場所でどれだけサンプルを取るか、 といったサンプリングデザインは極めて重要である。ここでは主要なサンプリングデザインを いくつか紹介する。

7.4.1

Abstract

まずはじめに、使うデータを準備する。環境データとしてWorldClimeから取得した生物気候データ を用いる。2つの生物気候を用いて対象領域をいくつかのクラスターに分割する(本文では層別化と呼んでいた。)

# Read a shapefile of U.S.A. 
usa <- read_sf("data/vector/usa/USA_states.shp") %>%
  filter(STATE_NAME != "Alaska" & STATE_NAME != "Hawaii")

# Read raster files of bio-climatic data
bio12 <- raster::raster("data/raster/bioclim/current/grd/bio12") %>%
  st_as_stars() %>%
  st_crop(usa)
## Warning in CPL_read_gdal(as.character(x), as.character(options),
## as.character(driver), : GDAL Error 1: Unhandled datatype=FLT4S
## trying to read file: /home/okamoto/hsdm/data/raster/bioclim/current/grd/bio12.grd
## although coordinates are longitude/latitude, st_union assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
bio3 <- raster::raster("data/raster/bioclim/current/grd/bio3") %>%
  st_as_stars() %>%
  st_crop(usa)
## Warning in CPL_read_gdal(as.character(x), as.character(options),
## as.character(driver), : GDAL Error 1: Unhandled datatype=FLT4S
## trying to read file: /home/okamoto/hsdm/data/raster/bioclim/current/grd/bio3.grd
## although coordinates are longitude/latitude, st_union assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
# In the original code, each bio climatic data are separated in 9 categories and just concatenated together as 11, 12, ... 98, 99. This method is really uncool, then I apply k-means clustering.

# Because na.omit() is not working with stars objects, I vectorize them temporarily. # I use tidymodels::kmeans()
bio <- c(bio3, bio12) %>%
  st_as_sf(as_points = T) %>%
  na.omit() %>%
  mutate(
    cluster = kmeans(
      as_tibble(.) %>% select(bio3, bio12),
      centers = 10
    )$cluster
  )

# Rasterize again
bio <- c(
  select(bio, bio3) %>% 
    st_rasterize(template = bio3),
  select(bio, bio12) %>% 
    st_rasterize(template = bio3),
  select(bio, cluster) %>% 
    st_rasterize(template = bio3)
) %>%
  mutate(cluster = as.factor(cluster))

# plot results
ggplot() +
  geom_stars(data = select(bio,bio3)) +
  labs(title = "bio3") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio,bio12)) +
  labs(title = "bio12") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster)) +
  labs(title = "k-means clusters") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

bio %>% 
  as_tibble() %>%
  filter(is.na(bio3)==F & is.na(bio12)==F) %>%
  ggplot(mapping = aes(x = bio3, y = bio12)) +
  geom_hex() +
  labs(title = "heat map of bio3 and bio12") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

7.4.2

Abstract

最も簡単な方法は、対象地の領域だけを決めてそこからサンプリングする方法である。 サンプリングの方法には - Random エリア内でランダムにサンプリングする - Regular エリア内に等間隔に点を配置する - Stratified エリアを等面積の同じサイズのパッチに分割し、パッチ内でランダムに点を取る。

の3つがある。

# Some sampling methods for simply sampling points in a region.

# Random sampling
sampled_random <- bio %>% 
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "random")

# Regularized sampling
sampled_regular <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "regular")

# Hexagonal sampling
sampled_hex <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "hexagonal")

# Stratified sampling, using spatstat::rstrat()
sampled_strat <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100, type = "strat", nx = 20, ny = 15) %>%
  `st_crs<-`(32614)
## Registered S3 method overwritten by 'spatstat':
##   method     from
##   print.boxx cli
ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_random, 
          mapping = aes()) +
  labs(title = "Random Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_regular, mapping = aes()) +
  labs(title = "Regular Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_hex, mapping = aes()) +
  labs(title = "Hexagonal Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster) %>%
               st_transform(st_crs(sampled_random))) +
  geom_sf(data = sampled_strat, mapping = aes()) +
  labs(title = "Stratified Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

7.4.3

Abstract

次に、環境のクラスターを考慮したサンプリングを行う。 ここでは、各クラスターごとに等数の点をサンプルする方法(Equal sampling)、 クラスターの面積に比例した数をサンプルする方法(Propotional sampling)の2つを紹介する。

# First, sample many points from bio raster. Then sample some points from them with two strategies.

regular100000 <- bio %>%
  st_transform(32614) %>%
  st_as_sf() %>%
  st_sample(100000, type = "regular")

bio_points <- regular100000 %>%
  st_transform(st_crs(bio3)) %>%
  st_as_sf() %>%
  mutate(
    bio3 = st_extract(bio, .)$bio3,
    bio12 = st_extract(bio, .)$bio12,
    cluster = st_extract(bio, .)$cluster
    ) %>%
  na.omit()

# Sample equal numbers of points by each cluster.
equal_sampling <- bio_points %>%
  group_by(cluster) %>%
  sample_n(10)

# Sample points in proportion to the area of each cluster
propotional_sampling <- bio_points %>%
  group_by(cluster) %>%
  sample_frac(0.001)

ggplot() + 
  geom_stars(data = select(bio, cluster)) +
  geom_sf(data = select(propotional_sampling, bio3)) +
  labs(title = "Propotional Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = select(bio, cluster)) +
  geom_sf(data = select(equal_sampling, bio3)) +
  labs(title = "Equal Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

equal_sampling %>%
  ggplot(mapping = aes(x = cluster)) +
  geom_bar() + 
  labs(title = "Equal Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

propotional_sampling %>%
  ggplot(mapping = aes(x = cluster)) +
  geom_bar() + 
  labs(title = "Propotional Sampling") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

7.4.4

Abstract

最後に、何かしらの線に沿ったサンプリングを行う。これは、道路や河川、人為的に設置したライントランセクトに 沿ってサンプリングを行うような状況を想定している。 このコードに関していえば、rasterspを使ったほうがきれいに書ける(sp::spsample()) がよくできているのと、ラスターデータのダウンスケーリングはrasterパッケージのほうが楽)。

# Read, crop, and downsample the DEM.
dem <- raster::raster("data/raster/topo/GTOPO30.tif") %>%
  raster::`crs<-`(value = raster::crs(as_Spatial(usa))) %>%
  raster::crop(as_Spatial(usa)) %>%
  raster::mask(as_Spatial(usa)) %>%
  raster::aggregate(10, fun=mean) %>%
  st_as_stars()

# Contour line per 1000m a.s.l.
cont <- st_contour(dem, breaks=0:4*1000,
                   contour_lines = T)

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont, mapping = aes()) +
  labs(title = "Contour line") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

# Contour line of 1000m a.s.l. 
cont1000 <- cont %>%
  filter(GTOPO30 == "[0,1e+03]") 
 
ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  labs(title = "Contour line 1000m a.s.l.") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

# Sample points randomly along the contour line.
cont_random <- cont1000 %>% 
  st_combine() %>%
  st_transform(32614) %>%
  st_sample(size = 100, type = "random")

# Sample points in equal interval along the contour line
cont_regular <- cont1000 %>%
  st_combine() %>%
  st_transform(32614) %>%
  st_sample(100, type = "regular")

# Sample points equal numbers per line segments.
cont_strat <-  cont1000 %>%
  st_transform(32614) %>%
  st_line_sample(1, type = "regular")

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  geom_sf(cont_random, mapping = aes(), color = "white") +
  labs(title = "Random along the contour line") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  geom_sf(cont_regular, mapping = aes(), color = "white") +
  labs(title = "Equal interval (Regular)") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))

ggplot() + 
  geom_stars(data = dem) + 
  geom_sf(data = cont1000, mapping = aes()) +
  geom_sf(cont_strat, mapping = aes(), color = "white") +
  labs(title = "Equal numbers per line segment (Stratified)") +
  theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))