I rewrite th R codes of the book Habitat Suitability and Distribution Models Japanese version in a tidy way, using sf and stars package.
例えばこれから新しいサンプルを野外で取る際に、どの場所でどれだけサンプルを取るか、 といったサンプリングデザインは極めて重要である。ここでは主要なサンプリングデザインを いくつか紹介する。
まずはじめに、使うデータを準備する。環境データとしてWorldClimeから取得した生物気候データ を用いる。2つの生物気候を用いて対象領域をいくつかのクラスターに分割する(本文では層別化と呼んでいた。)
# Read a shapefile of U.S.A.
usa <- read_sf("data/vector/usa/USA_states.shp") %>%
filter(STATE_NAME != "Alaska" & STATE_NAME != "Hawaii")
# Read raster files of bio-climatic data
bio12 <- raster::raster("data/raster/bioclim/current/grd/bio12") %>%
st_as_stars() %>%
st_crop(usa)
## Warning in CPL_read_gdal(as.character(x), as.character(options),
## as.character(driver), : GDAL Error 1: Unhandled datatype=FLT4S
## trying to read file: /home/okamoto/hsdm/data/raster/bioclim/current/grd/bio12.grd
## although coordinates are longitude/latitude, st_union assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
bio3 <- raster::raster("data/raster/bioclim/current/grd/bio3") %>%
st_as_stars() %>%
st_crop(usa)
## Warning in CPL_read_gdal(as.character(x), as.character(options),
## as.character(driver), : GDAL Error 1: Unhandled datatype=FLT4S
## trying to read file: /home/okamoto/hsdm/data/raster/bioclim/current/grd/bio3.grd
## although coordinates are longitude/latitude, st_union assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
# In the original code, each bio climatic data are separated in 9 categories and just concatenated together as 11, 12, ... 98, 99. This method is really uncool, then I apply k-means clustering.
# Because na.omit() is not working with stars objects, I vectorize them temporarily. # I use tidymodels::kmeans()
bio <- c(bio3, bio12) %>%
st_as_sf(as_points = T) %>%
na.omit() %>%
mutate(
cluster = kmeans(
as_tibble(.) %>% select(bio3, bio12),
centers = 10
)$cluster
)
# Rasterize again
bio <- c(
select(bio, bio3) %>%
st_rasterize(template = bio3),
select(bio, bio12) %>%
st_rasterize(template = bio3),
select(bio, cluster) %>%
st_rasterize(template = bio3)
) %>%
mutate(cluster = as.factor(cluster))
# plot results
ggplot() +
geom_stars(data = select(bio,bio3)) +
labs(title = "bio3") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = select(bio,bio12)) +
labs(title = "bio12") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = select(bio, cluster)) +
labs(title = "k-means clusters") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
bio %>%
as_tibble() %>%
filter(is.na(bio3)==F & is.na(bio12)==F) %>%
ggplot(mapping = aes(x = bio3, y = bio12)) +
geom_hex() +
labs(title = "heat map of bio3 and bio12") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
最も簡単な方法は、対象地の領域だけを決めてそこからサンプリングする方法である。 サンプリングの方法には - Random エリア内でランダムにサンプリングする - Regular エリア内に等間隔に点を配置する - Stratified エリアを等面積の同じサイズのパッチに分割し、パッチ内でランダムに点を取る。
の3つがある。
# Some sampling methods for simply sampling points in a region.
# Random sampling
sampled_random <- bio %>%
st_transform(32614) %>%
st_as_sf() %>%
st_sample(100, type = "random")
# Regularized sampling
sampled_regular <- bio %>%
st_transform(32614) %>%
st_as_sf() %>%
st_sample(100, type = "regular")
# Hexagonal sampling
sampled_hex <- bio %>%
st_transform(32614) %>%
st_as_sf() %>%
st_sample(100, type = "hexagonal")
# Stratified sampling, using spatstat::rstrat()
sampled_strat <- bio %>%
st_transform(32614) %>%
st_as_sf() %>%
st_sample(100, type = "strat", nx = 20, ny = 15) %>%
`st_crs<-`(32614)
## Registered S3 method overwritten by 'spatstat':
## method from
## print.boxx cli
ggplot() +
geom_stars(data = select(bio, cluster) %>%
st_transform(st_crs(sampled_random))) +
geom_sf(data = sampled_random,
mapping = aes()) +
labs(title = "Random Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = select(bio, cluster) %>%
st_transform(st_crs(sampled_random))) +
geom_sf(data = sampled_regular, mapping = aes()) +
labs(title = "Regular Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = select(bio, cluster) %>%
st_transform(st_crs(sampled_random))) +
geom_sf(data = sampled_hex, mapping = aes()) +
labs(title = "Hexagonal Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = select(bio, cluster) %>%
st_transform(st_crs(sampled_random))) +
geom_sf(data = sampled_strat, mapping = aes()) +
labs(title = "Stratified Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
次に、環境のクラスターを考慮したサンプリングを行う。 ここでは、各クラスターごとに等数の点をサンプルする方法(Equal sampling)、 クラスターの面積に比例した数をサンプルする方法(Propotional sampling)の2つを紹介する。
# First, sample many points from bio raster. Then sample some points from them with two strategies.
regular100000 <- bio %>%
st_transform(32614) %>%
st_as_sf() %>%
st_sample(100000, type = "regular")
bio_points <- regular100000 %>%
st_transform(st_crs(bio3)) %>%
st_as_sf() %>%
mutate(
bio3 = st_extract(bio, .)$bio3,
bio12 = st_extract(bio, .)$bio12,
cluster = st_extract(bio, .)$cluster
) %>%
na.omit()
# Sample equal numbers of points by each cluster.
equal_sampling <- bio_points %>%
group_by(cluster) %>%
sample_n(10)
# Sample points in proportion to the area of each cluster
propotional_sampling <- bio_points %>%
group_by(cluster) %>%
sample_frac(0.001)
ggplot() +
geom_stars(data = select(bio, cluster)) +
geom_sf(data = select(propotional_sampling, bio3)) +
labs(title = "Propotional Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = select(bio, cluster)) +
geom_sf(data = select(equal_sampling, bio3)) +
labs(title = "Equal Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
equal_sampling %>%
ggplot(mapping = aes(x = cluster)) +
geom_bar() +
labs(title = "Equal Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
propotional_sampling %>%
ggplot(mapping = aes(x = cluster)) +
geom_bar() +
labs(title = "Propotional Sampling") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
最後に、何かしらの線に沿ったサンプリングを行う。これは、道路や河川、人為的に設置したライントランセクトに 沿ってサンプリングを行うような状況を想定している。 このコードに関していえば、rasterとspを使ったほうがきれいに書ける(sp::spsample()) がよくできているのと、ラスターデータのダウンスケーリングはrasterパッケージのほうが楽)。
# Read, crop, and downsample the DEM.
dem <- raster::raster("data/raster/topo/GTOPO30.tif") %>%
raster::`crs<-`(value = raster::crs(as_Spatial(usa))) %>%
raster::crop(as_Spatial(usa)) %>%
raster::mask(as_Spatial(usa)) %>%
raster::aggregate(10, fun=mean) %>%
st_as_stars()
# Contour line per 1000m a.s.l.
cont <- st_contour(dem, breaks=0:4*1000,
contour_lines = T)
ggplot() +
geom_stars(data = dem) +
geom_sf(data = cont, mapping = aes()) +
labs(title = "Contour line") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
# Contour line of 1000m a.s.l.
cont1000 <- cont %>%
filter(GTOPO30 == "[0,1e+03]")
ggplot() +
geom_stars(data = dem) +
geom_sf(data = cont1000, mapping = aes()) +
labs(title = "Contour line 1000m a.s.l.") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
# Sample points randomly along the contour line.
cont_random <- cont1000 %>%
st_combine() %>%
st_transform(32614) %>%
st_sample(size = 100, type = "random")
# Sample points in equal interval along the contour line
cont_regular <- cont1000 %>%
st_combine() %>%
st_transform(32614) %>%
st_sample(100, type = "regular")
# Sample points equal numbers per line segments.
cont_strat <- cont1000 %>%
st_transform(32614) %>%
st_line_sample(1, type = "regular")
ggplot() +
geom_stars(data = dem) +
geom_sf(data = cont1000, mapping = aes()) +
geom_sf(cont_random, mapping = aes(), color = "white") +
labs(title = "Random along the contour line") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = dem) +
geom_sf(data = cont1000, mapping = aes()) +
geom_sf(cont_regular, mapping = aes(), color = "white") +
labs(title = "Equal interval (Regular)") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))
ggplot() +
geom_stars(data = dem) +
geom_sf(data = cont1000, mapping = aes()) +
geom_sf(cont_strat, mapping = aes(), color = "white") +
labs(title = "Equal numbers per line segment (Stratified)") +
theme(plot.title = element_text(hjust = 0.5), text = element_text(size = 18))