This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
1
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
sf::sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
# 读取
sa2 <- st_read("/Users/j.bhuang/Desktop/5145/SA2_2021_AUST_GDA2020.shp", quiet = TRUE)
rail <- st_read("/Users/j.bhuang/Desktop/5145/railway_melb.gpkg", quiet = TRUE)
water <- st_read("/Users/j.bhuang/Desktop/5145/water_poly_melb.gpkg", quiet = TRUE)
# 投影
melb_sa2_proj <- sa2 %>% filter(GCC_NAME21 == "Greater Melbourne") %>% st_transform(3111)
rail_proj <- st_transform(rail, 3111)
water_proj <- st_transform(water, 3111)
# === 关键:AOI = “与铁路相交的分区集合”的合并面(与 DEP 一致)===
sa2_with_rail <- melb_sa2_proj %>%
st_filter(rail_proj, .predicate = st_intersects) %>%
distinct(SA2_CODE21, .keep_all = TRUE)
aoi <- st_union(sa2_with_rail) # 用它做所有图层的裁剪面
bbox_deg <- st_bbox(st_transform(aoi, 4326)) # 统一图窗(经纬度)
clip <- function(g) st_intersection(st_crop(g, st_bbox(aoi)), aoi) # 统一裁剪函数
# 用 AOI 裁剪 rail / water(先 bbox 再精确相交)
rail_within <- clip(rail_proj)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
water_within <- clip(water_proj)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
# 质量检查图(现在视野就是 AOI;不会再显示全城)
ggplot() +
geom_sf(data = sa2_with_rail, fill = NA, color = "black", linewidth = 0.3) +
geom_sf(data = water_within, fill = "lightblue", color = "blue", alpha = 0.5) +
geom_sf(data = rail_within, color = "red", linewidth = 0.2) +
coord_sf(crs = 4326,
xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
expand = FALSE) +
theme_minimal(base_size = 14) +
labs(title = "Greater Melbourne — Rail & Water within AOI", x = NULL, y = NULL)
2
sf::sf_use_s2(FALSE)
library(sf)
library(dplyr)
library(ggplot2)
# 直接从根目录读上个块的结果(不依赖内存变量)
need <- function(fname){
if (!file.exists(fname)) stop("缺少缓存:", fname, " —— 请先运行第一个块。")
readRDS(fname)
}
# 读取本块自己的原始图层
veg <- st_read("urban_veg_2018_melb.gpkg", quiet = TRUE)
opens <- st_read("openspace_melb.gpkg", quiet = TRUE)
# 修复 + 投影
veg_proj <- st_transform(st_make_valid(veg), 3111)
opens_proj <- st_transform(st_make_valid(opens), 3111)
# 提速:按 AOI 的 bbox 粗裁,再做精确相交
veg_crop <- st_crop(veg_proj, st_bbox(melb_sa2_proj))
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
opens_crop <- st_crop(opens_proj, st_bbox(melb_sa2_proj))
veg_within <- st_intersection(veg_crop, aoi)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
opens_within <- st_intersection(opens_crop, aoi)
# 画图(原样)
ggplot() +
geom_sf(data = veg_within, aes(fill = "Vegetation"), color = NA, alpha = 0.3) +
geom_sf(data = opens_within, aes(fill = "Open Space"), color = "darkgreen", alpha = 0.4) +
geom_sf(data = water_within, aes(fill = "Water"), color = "blue", alpha = 0.5) +
geom_sf(data = rail_within, aes(color = "Railway"), linewidth = 0.2) +
geom_sf(data = sa2_with_rail, fill = NA, color = "black", linewidth = 0.4) +
scale_color_manual(values = c("Railway" = "red")) +
scale_fill_manual(values = c("Vegetation" = "forestgreen",
"Open Space" = "lightgreen",
"Water" = "lightblue")) +
coord_sf(crs = 4326,
xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
expand = FALSE) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
labs(title = "Greater Melbourne",
fill = "Land / Water", color = "Transport")
# ---- Road / Verge(优先 road_casement,找不到再退 easement;线就缓冲成面)----
road_within <- NULL # 先占位,避免未定义
# 1) 选数据源(按文件存在与否判断)
if (file.exists("road_casement_melb.gpkg")) {
road_src <- "road_casement_melb.gpkg"
} else if (file.exists("road_casement_melb.shp")) {
road_src <- "road_casement_melb.shp"
} else if (file.exists("easement_melb.gpkg")) {
road_src <- "easement_melb.gpkg" # 兜底:用地役权近似道路边带
} else {
road_src <- NA_character_
}
if (!is.na(road_src)) {
road_raw <- sf::st_read(road_src, quiet = TRUE) |>
sf::st_transform(3111) |>
sf::st_make_valid()
# 2) 若是线几何,轻缓冲成面(单位:米)
is_line <- any(sf::st_geometry_type(road_raw) %in% c("LINESTRING","MULTILINESTRING"))
road_poly <- if (is_line) sf::st_buffer(road_raw, 6) else road_raw
# 3) 裁到统一 AOI(用你第1块的 clip())
road_within <- clip(road_poly)
}
## Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
## Message 1: Invalid content for record 1 in column PFI_RETIRE: 0000-00-00
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
3
# 第三个块(仅消费前两块在根目录写出的 RDS)
library(sf)
library(dplyr)
library(ggplot2)
sf::sf_use_s2(FALSE)
# —— 本块仅处理 easement —— #
easements <- st_read("easement_melb.gpkg", quiet = TRUE)
## Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
## Message 1: Invalid content for record 2 in column PFI_CR: 0000-00-00
easements_proj <- st_transform(st_make_valid(easements), 3111)
# 可选提速:先 bbox 粗裁,再相交
easements_crop <- st_crop(easements_proj, st_bbox(melb_sa2_proj))
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
easements_within <- st_intersection(easements_crop, aoi)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
# —— 画图(与你原版一致) —— #
ggplot() +
# 多边形用 fill
geom_sf(data = veg_within, aes(fill = "Vegetation"), color = NA, alpha = 0.30) +
geom_sf(data = water_within, aes(fill = "Water"), color = "blue", alpha = 0.50) +
geom_sf(data = opens_within, aes(fill = "Open Space"), color = "darkgreen", alpha = 0.70) +
# 线层用 color(两类 easement)
geom_sf(data = rail_within, aes(color = "Railway"), linewidth = 0.30) +
geom_sf(data = easements_within, aes(color = "Utility easement"), linewidth = 0.40, alpha = 0.40) +
geom_sf(data = sa2_with_rail, fill = NA, color = "black", linewidth = 0.40) +
scale_fill_manual(
name = "Background & Formal",
values = c("Vegetation" = "palegreen3",
"Water" = "lightblue",
"Open Space" = "forestgreen")
) +
scale_color_manual(
name = "Informal (IGS)",
values = c("Railway" = "red",
"Utility easement" = "orange")
) +
guides(color = guide_legend(order = 1),
fill = guide_legend(order = 2)) +
coord_sf(crs = 4326,
xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
expand = FALSE) +
theme_minimal(base_size = 14) +
theme(
legend.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_blank()
) +
labs(title = "Greater Melbourne — Formal vs Informal Green Space")
4
library(readr)
library(sf); sf::sf_use_s2(FALSE) # (可选)与前两块一致
library(dplyr)
# 1) 读入
bird_path <- "newdata.csv"
birds <- readr::read_csv(bird_path, show_col_types = FALSE)
# 2) 自动识别列(注意:cand_vec 必须全小写,因为 pick_col 用了 tolower(names(df)))
pick_col <- function(df, cand_vec) {
nms <- tolower(names(df))
for (c in cand_vec) { i <- match(c, nms, nomatch = 0); if (i > 0) return(names(df)[i]) }
NA_character_
}
# 2.1 经纬度候选:加入 Darwin Core 的 decimallatitude/decimallongitude
lat_col <- pick_col(birds, c("lat_json","latitude","lat","y","decimallatitude","decimal_latitude"))
lon_col <- pick_col(birds, c("lon_json","longitude","lon","lng","x","decimallongitude","decimal_longitude"))
stopifnot(!is.na(lat_col), !is.na(lon_col))
# 确保可数值化(有些来源会给成字符)
birds[[lat_col]] <- suppressWarnings(as.numeric(birds[[lat_col]]))
birds[[lon_col]] <- suppressWarnings(as.numeric(birds[[lon_col]]))
# 3) 物种列候选(保留以备后续用;此处不强制改名)
spec_col <- pick_col(
birds,
c("species","speciescode","scientificname","scientific_name",
"comname","commonname","vernacularname","vernacular_name",
"sci_name","common_name")
)
# 4) 转 sf 点(WGS84→3111)并裁 AOI
# ★ 提速与更稳的空间关系:先 union AOI,再用 intersects(边界点也保留)
aoi_rail <- sf::st_union(sa2_with_rail)
# 正确:先建 sf 点,再裁剪
birds_sf <- birds |>
dplyr::filter(!is.na(.data[[lat_col]]), !is.na(.data[[lon_col]])) |>
sf::st_as_sf(coords = c(lon_col, lat_col), crs = 4326, remove = FALSE) |>
sf::st_transform(3111)
birds_sf <- birds_sf[sf::st_intersects(birds_sf, aoi, sparse = FALSE), ]
cat("导入并落在 AOI 内的观测点数: ", nrow(birds_sf), "\n")
## 导入并落在 AOI 内的观测点数: 351714
5
# === 第五块(仅消费前块写在根目录的 RDS)===
library(sf); library(dplyr); library(ggplot2); library(readr)
sf::sf_use_s2(FALSE)
# —— 叠加检查图(与你原版一致;仅增了 easements 条件叠加)——
p <- ggplot() +
geom_sf(data = sa2_with_rail, fill = NA, color = "grey20", linewidth = 0.4) +
geom_sf(data = water_within, fill = "lightblue", color = NA, alpha = 0.35) +
geom_sf(data = opens_within, fill = "lightgreen", color = "darkgreen", alpha = 0.4) +
geom_sf(data = rail_within, color = "red", linewidth = 0.25) +
geom_sf(data = birds_sf, color = "#E54B4B", alpha = 0.7, size = 0.6) +
coord_sf(crs = 4326,
xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
expand = FALSE) +
theme_minimal(base_size = 13) +
labs(title = "Bird observations over basemap", x = NULL, y = NULL)
if (!is.null(easements_within))
p <- p + geom_sf(data = easements_within, aes(color = "Utility easement"), linewidth = 0.40, alpha = 0.40)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
print(p)
# ========== B. SA2 聚合(观测数 / 物种丰富度) ==========
sa2_join <- st_make_valid(melb_sa2_proj)[, c("SA2_CODE21","SA2_NAME21")]
obs_sa2 <- sf::st_join(
birds_sf,
sa2_join,
left = FALSE # 如担心边界点被丢,可改:join = sf::st_intersects
)
# 自动识别一个物种列名(仅用于 richness;完全不依赖上块的变量)
cols <- names(sf::st_drop_geometry(birds_sf))
spec_col <- intersect(c("sciName","scientific_name","common_name","species",
"taxon_name","comname","vernacular_name","vernacularName"), cols)
spec_col <- if (length(spec_col)) spec_col[1] else NA_character_
if (!is.na(spec_col)) {
sa2_sum <- obs_sa2 |>
sf::st_drop_geometry() |>
dplyr::group_by(SA2_CODE21, SA2_NAME21) |>
dplyr::summarise(
n_obs = dplyr::n(),
richness = dplyr::n_distinct(.data[[spec_col]]),
.groups = "drop"
)
} else {
sa2_sum <- obs_sa2 |>
sf::st_drop_geometry() |>
dplyr::count(SA2_CODE21, SA2_NAME21, name = "n_obs")
}
6
# === 第六块(仅消费根目录 RDS;生成 visit_id 与清单级表)===
suppressPackageStartupMessages({
library(dplyr); library(lubridate); library(stringr); library(readr); library(sf)
})
sf::sf_use_s2(FALSE)
# —— 只读前置块产物(根目录)——
need <- function(fname){
if (!file.exists(fname)) stop("缺少缓存:", fname, " —— 请先运行前置块。")
readRDS(fname)
}
dat0 <- sf::st_drop_geometry(birds_sf) # 本块按数据框处理
# 0) 列名兜底(大小写均可)
pick1 <- function(nm, pool) { x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","decimalLatitude","decimallatitude","y"), dat0)
lon_nm <- pick1(c("lon","longitude","decimalLongitude","decimallongitude","x","lng"), dat0)
spec_nm <- pick1(c("sciName","scientificName","scientific_name","taxon_name",
"species","common_name","comname","vernacularName","vernacular_name"), dat0)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))
## 1) 时间:稳健解析 obsDt → 墨尔本分钟;失败用 date@00:00 兜底
parse_obsdt_local <- function(x) {
s <- as.character(x); s <- gsub("T"," ", s, fixed=TRUE); s <- sub("Z$","", s)
dt <- suppressWarnings(parse_date_time(
s,
orders = c("Y-m-d H:M:S","Y-m-d H:M","Y/m/d H:M:S","Y/m/d H:M",
"d/m/Y H:M:S","d/m/Y H:M","Y-m-d","Y/m/d","d/m/Y"),
tz = "UTC"
))
with_tz(dt, "Australia/Melbourne")
}
time_min <- floor_date(parse_obsdt_local(dat0$obsDt), "minute")
date_fallback <- suppressWarnings(parse_date_time(
as.character(dat0$date),
orders = c("Y-m-d","Y/m/d","d/m/Y","m/d/Y"),
tz = "Australia/Melbourne"
))
time_min[is.na(time_min)] <- floor_date(date_fallback[is.na(time_min)], "minute")
event_date <- as.Date(time_min)
## 2) 地点:3111 下 20 m 网格(用动态列名)
use_grid <- TRUE; grid_cell_m <- 20
if (use_grid) {
pts3111 <- st_as_sf(dat0, coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) |> st_transform(3111)
xy <- st_coordinates(pts3111)
xbin_m <- floor(xy[,1] / grid_cell_m) * grid_cell_m
ybin_m <- floor(xy[,2] / grid_cell_m) * grid_cell_m
space_x <- sprintf("X%.0f", xbin_m)
space_y <- sprintf("Y%.0f", ybin_m)
} else {
space_x <- paste0("lat", round(as.numeric(dat0[[lat_nm]]), 4))
space_y <- paste0("lon", round(as.numeric(dat0[[lon_nm]]), 4))
}
## 3) 人物 + 来源(与你原逻辑一致)
norm_person <- function(x) {
s <- tolower(str_squish(str_replace_all(as.character(x), "&", " and ")))
s <- str_replace_all(s, "[,;|+/]+", " "); s <- str_squish(s); s[s %in% c("", "na")] <- NA_character_; s
}
person_raw <- if ("recorded_by" %in% names(dat0)) norm_person(dat0$recorded_by) else NA_character_
person_fallback <- {
ds <- if ("dataset_key" %in% names(dat0)) paste0("ds:", dat0$dataset_key) else NA_character_
so <- if ("source" %in% names(dat0)) paste0("src:", dat0$source) else NA_character_
res <- str_squish(paste(ds, so)); res[res == ""] <- NA_character_; res
}
person_token <- ifelse(!is.na(person_raw), person_raw, person_fallback) |> str_replace_all("[^a-z0-9]+","-")
src <- if ("source" %in% names(dat0)) as.character(dat0$source) else "NA"
## 4) 合成 visit_id
visit_id <- paste0(format(time_min, "%Y%m%d%H%M"), "_", space_x, "_", space_y, "_", person_token, "_", src)
## 5) 生成清单表 vis(仅新增列,不改原列)
vis <- dat0 |>
mutate(time_min = time_min, event_date = event_date,
space_x = space_x, space_y = space_y,
person_token = person_token, source = src,
visit_id = visit_id)
## 6) 聚合到清单级(DEP 旧口径)
first_non_na <- function(x) { y <- na.omit(x); if (length(y)) y[1] else NA }
dur_col <- pick1(c("durationHrs","duration_hrs","durationhrs","duration_min"), vis)
dist_col <- pick1(c("effortDistanceKm","distance_km","distance_m"), vis)
comp_col <- pick1(c("allObsReported","complete","checklist_complete"), vis)
chk <- vis |>
group_by(visit_id) |>
summarise(
n_rows = n(),
richness = if (!is.na(spec_nm)) n_distinct(.data[[spec_nm]]) else NA_integer_,
durationHrs = if (!is.na(dur_col)) {
v <- first_non_na(.data[[dur_col]]); if (identical(dur_col, "duration_min")) v/60 else v
} else NA_real_,
effortDistanceKm = if (!is.na(dist_col)) {
v <- first_non_na(.data[[dist_col]]); if (identical(dist_col, "distance_m")) v/1000 else v
} else NA_real_,
allObsReported = if (!is.na(comp_col)) as.logical(first_non_na(.data[[comp_col]])) else NA,
lat = first_non_na(as.numeric(.data[[lat_nm]])),
lon = first_non_na(as.numeric(.data[[lon_nm]])),
event_date = first_non_na(as.Date(event_date)),
person_token = first_non_na(person_token),
source = first_non_na(source),
.groups = "drop"
)
## 7) 可比性过滤(DEP 旧口径)
chk_f <- chk |>
filter(
is.na(allObsReported) | allObsReported,
is.na(durationHrs) | between(durationHrs, 5/60, 2), # 5–120 min
is.na(effortDistanceKm) | effortDistanceKm <= 5
)
## 8) 自检 + 缓存(根目录,供后续块 need() 直接读)
cat("NA time_min share:", mean(is.na(vis$time_min)), "\n",
"distinct visit_id:", dplyr::n_distinct(vis$visit_id), "\n",
"visits (raw / filtered):", nrow(chk), "/", nrow(chk_f), "\n")
## NA time_min share: 0
## distinct visit_id: 21512
## visits (raw / filtered): 21512 / 21184
print(table(month(chk$event_date), useNA = "ifany")[c(1:12, NA)])
##
## 1 2 3 4 5 6 7 8 9 10 11 12 <NA>
## 1833 1528 1699 1671 1680 1486 1565 1796 2149 2578 1869 1658
7
# === 第七块(仅消费根目录 RDS;空间分类:Railway > Road > OpenSpace > Other)===
library(sf); library(dplyr); library(ggplot2); library(readr)
sf::sf_use_s2(FALSE)
# --- 清单点转 sf(动态兜底经纬度列名) ---
pick1 <- function(nm, pool) { x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","y","decimalLatitude","decimallatitude"), chk_f)
lon_nm <- pick1(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), chk_f)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))
checklists_filt <- chk_f %>%
sf::st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) %>%
sf::st_transform(3111)
# ---------- 分类判定(沿用你原口径) ----------
# 1) 距铁路 ≤ 25 m(先缓冲再相交,快且稳)
rail_buf25 <- sf::st_buffer(rail_within, 25)
near_rail <- if (inherits(rail_within, "sf") && nrow(rail_within) > 0) {
lengths(sf::st_intersects(checklists_filt, rail_buf25)) > 0
} else rep(FALSE, nrow(checklists_filt))
# 2) 在道路路权内(无则全 FALSE)
in_road <- if (inherits(road_within, "sf") && nrow(road_within) > 0) {
lengths(sf::st_intersects(checklists_filt, road_within)) > 0
} else rep(FALSE, nrow(checklists_filt))
# 2.5 在公用事业地役权内(utility)
util_geom <- if (exists("easements_within") && inherits(easements_within, "sf") && nrow(easements_within)>0) {
if (any(sf::st_geometry_type(easements_within) %in% c("LINESTRING","MULTILINESTRING")))
sf::st_buffer(easements_within, 15) # 线→面,15m 可按需要调
else easements_within
} else NULL
in_utility <- if (!is.null(util_geom)) lengths(sf::st_intersects(checklists_filt, util_geom)) > 0 else rep(FALSE, nrow(checklists_filt))
# 3) 在正规开放空间内
in_open <- if (inherits(opens_within, "sf") && nrow(opens_within) > 0) {
lengths(sf::st_intersects(checklists_filt, opens_within)) > 0
} else rep(FALSE, nrow(checklists_filt))
# 4) 打标签(优先级:Railway > Road > OpenSpace > Other)
igs_type <- dplyr::case_when(
near_rail ~ "Railway easement",
in_utility ~ "Utility easement",
in_road ~ "Road verge",
TRUE ~ NA_character_
)
space_group <- ifelse(!is.na(igs_type), "IGS",
ifelse(in_open, "FormalOpenSpace", "Other"))
# 5) 输出给下游(列名保持一致)
df_chk <- checklists_filt %>%
sf::st_drop_geometry() %>%
dplyr::mutate(space_group = space_group,
igs_type = igs_type)
df_igs <- dplyr::filter(df_chk, space_group == "IGS", !is.na(igs_type))
# (可选)自检
# table(df_chk$space_group); table(df_igs$igs_type, useNA = "ifany")
8
## === 第八块(读前面所有需要的 RDS;用 visit_id 解析日期并回填)===
suppressPackageStartupMessages({ library(dplyr); library(lubridate); library(sf); library(readr) })
sf::sf_use_s2(FALSE)
# 直接用上文内存对象
# df_chk 来自第7块;若 df_igs 为空,从 df_chk 筛
if (!exists("df_igs") || is.null(df_igs)) df_igs <- dplyr::filter(df_chk, space_group == "IGS", !is.na(igs_type))
# checklists_filt 来自第7块(sf);若要无几何表,用 st_drop_geometry(checklists_filt)
# 用第6块的过滤清单重建 checklists_filt(sf),以便保持与后续空间步骤一致
pick1 <- function(nm, pool){ x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","y","decimalLatitude","decimallatitude"), chk_f)
lon_nm <- pick1(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), chk_f)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))
checklists_filt <- chk_f |>
sf::st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) |>
sf::st_transform(3111)
# 1) 键来源(含 visit_id):用 df_chk(第7块产物)
key_df <- df_chk
# 2) 解析 visit_id 前缀得到日期(兼容含秒/时区;首选 YYYYMMDDHHMM)
extract_date_from_visit <- function(v) {
s <- as.character(v)
s <- sub("_.*$", "", s) # 取第一段时间戳
s <- sub(" [A-Z]{3,4}$", "", s) # 去掉可能的时区缩写
d12 <- sub("^([0-9]{12}).*$", "\\1", s) # 抓前12位(YYYYMMDDHHMM)
dt <- suppressWarnings(lubridate::parse_date_time(d12, orders = "YmdHM", tz = "Australia/Melbourne"))
if (all(is.na(dt))) { # 回退:hm / hms
dt1 <- suppressWarnings(lubridate::ymd_hm(s, tz = "Australia/Melbourne"))
dt2 <- suppressWarnings(lubridate::ymd_hms(s, tz = "Australia/Melbourne"))
dt <- ifelse(is.na(dt1), dt2, dt1)
}
as.Date(dt)
}
dates_by_visit <- key_df |>
dplyr::transmute(visit_id, date = extract_date_from_visit(visit_id)) |>
dplyr::group_by(visit_id) |>
dplyr::summarise(date = dplyr::first(na.omit(date), default = as.Date(NA)), .groups = "drop")
# 3) 季节映射
mk_season <- function(m) dplyr::case_when(
m %in% c(12L,1L,2L) ~ "Summer",
m %in% c(3L,4L,5L) ~ "Autumn",
m %in% c(6L,7L,8L) ~ "Winter",
m %in% c(9L,10L,11L)~ "Spring",
TRUE ~ NA_character_
)
# 4) 回填(移除旧 date,避免重名)
fill_time_fields <- function(df) {
if (is.null(df)) return(df)
df |>
dplyr::select(-dplyr::any_of("date")) |>
dplyr::left_join(dates_by_visit, by = "visit_id") |>
dplyr::mutate(
month = factor(lubridate::month(date), levels = 1:12, labels = month.abb),
season = factor(mk_season(lubridate::month(date)),
levels = c("Summer","Autumn","Winter","Spring"))
)
}
checklists_filt <- fill_time_fields(checklists_filt)
df_chk <- fill_time_fields(df_chk)
df_igs <- fill_time_fields(df_igs)
# 5) 自检
cat("按月样本量:\n")
## 按月样本量:
print(table(addNA(df_chk$month), useNA = "ifany"))
##
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec <NA>
## 1833 1528 1699 1671 1680 1486 1565 1796 2072 2476 1798 1580 0
cat("rows(df_chk) =", nrow(df_chk),
"| non-NA date =", sum(!is.na(df_chk$date)), "\n")
## rows(df_chk) = 21184 | non-NA date = 21184
df_chk
## # A tibble: 21,184 × 16
## visit_id n_rows richness durationHrs effortDistanceKm allObsReported lat
## <chr> <int> <int> <dbl> <dbl> <lgl> <dbl>
## 1 2024010111… 15 15 NA NA NA -37.9
## 2 2024010111… 30 30 NA NA NA -37.9
## 3 2024010111… 34 34 NA NA NA -37.7
## 4 2024010111… 1 1 NA NA NA -37.8
## 5 2024010111… 44 44 NA NA NA -37.9
## 6 2024010111… 5 5 NA NA NA -37.8
## 7 2024010111… 19 19 NA NA NA -37.8
## 8 2024010111… 8 8 NA NA NA -37.8
## 9 2024010111… 18 18 NA NA NA -37.8
## 10 2024010111… 26 26 NA NA NA -37.7
## # ℹ 21,174 more rows
## # ℹ 9 more variables: lon <dbl>, event_date <date>, person_token <chr>,
## # source <chr>, space_group <chr>, igs_type <chr>, date <date>, month <fct>,
## # season <fct>
picture1
# 前置:这些对象在你前面块里已有
# sa2_with_rail(分区几何,已裁 AOI)
# chk_f (visit 级、可比性过滤后的清单)
# aoi / bbox_deg(统一裁剪与图窗)
suppressPackageStartupMessages({library(sf); library(dplyr); library(ggplot2)})
sf::sf_use_s2(FALSE)
# 0) 保障:chk_f 真的是“一行=一次观鸟”
stopifnot(nrow(chk_f) == dplyr::n_distinct(chk_f$visit_id))
# 1) visit → sf(动态识别经纬度;与第七块一致)
pick1 <- function(nm, pool){ x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","y","decimalLatitude","decimallatitude"), chk_f)
lon_nm <- pick1(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), chk_f)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))
chk_sf <- chk_f %>%
sf::st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) %>%
sf::st_transform(3111)
# 2) 贴“区域” & 聚合 —— 指标=观鸟次数(=行数,因为一行=一次观鸟)
code_nm <- intersect(c("SA2_CODE21","sa2_code21","SA2_MAINCODE_2016","SA2_MAIN16"), names(sa2_with_rail))[1]
name_nm <- intersect(c("SA2_NAME21","sa2_name21","SA2_NAME_2016","SA2_NAME16"), names(sa2_with_rail))[1]
sa2_join <- sf::st_make_valid(sa2_with_rail)[, c(code_nm, name_nm)]
vis_sa2 <- sf::st_join(chk_sf, sa2_join, left = FALSE) # 边界点要保留可改 join=sf::st_intersects
# —— 主图口径:观鸟次数(n_vis)
sum_all <- vis_sa2 %>%
sf::st_drop_geometry() %>%
dplyr::summarise(
n_vis = dplyr::n(), # 观鸟次数
rich_med = median(richness, na.rm=TRUE), #(可选)每次观鸟的物种数中位数
.by = c(!!sym(code_nm), !!sym(name_nm))
)
# 3) 合并到分区几何,缺失置 0
sa2p <- sa2_with_rail %>%
dplyr::left_join(sum_all, by = setNames(c(code_nm,name_nm), c(code_nm,name_nm))) %>%
dplyr::mutate(n_vis = tidyr::replace_na(n_vis, 0L))
# 4) 作图(统一 AOI 图窗)——“分区填色地图,颜色越深=该区域观鸟次数越多”
ggplot(sa2p) +
geom_sf(aes(fill = n_vis), color = "grey30", linewidth = 0.2) +
scale_fill_viridis_c(option = "C", name = "观鸟次数") +
coord_sf(crs = 4326,
xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
expand = FALSE) +
theme_minimal(base_size = 12) +
labs(title = "Q1 — 分区填色地图(全季)")
# Q1-B 季节对比:分区填色小倍数(颜色越深 = 该区域观鸟次数越多)
suppressPackageStartupMessages({library(sf); library(dplyr); library(ggplot2); library(lubridate)})
sf::sf_use_s2(FALSE)
# 1) 若清单还没有季节列,就由 event_date 映射
if (!"season" %in% names(chk_f)) {
mk_season <- function(m) dplyr::case_when(
m %in% c(12,1,2) ~ "Summer",
m %in% c(3,4,5) ~ "Autumn",
m %in% c(6,7,8) ~ "Winter",
m %in% c(9,10,11)~ "Spring",
TRUE ~ NA_character_
)
chk_f <- chk_f %>%
mutate(season = factor(mk_season(lubridate::month(event_date)),
levels = c("Summer","Autumn","Winter","Spring")))
}
# 2) 清单 → sf(动态识别经纬度列)
lat_nm <- intersect(c("lat","latitude","y","decimalLatitude","decimallatitude"), names(chk_f))[1]
lon_nm <- intersect(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), names(chk_f))[1]
stopifnot(!is.na(lat_nm) && !is.na(lon_nm))
chk_sf <- chk_f %>%
st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) %>%
st_transform(3111)
# 3) 贴“区域” → 按季节聚合观鸟次数(一次清单 = 一次观鸟)
code_nm <- intersect(c("SA2_CODE21","sa2_code21","SA2_MAINCODE_2016","SA2_MAIN16"), names(sa2_with_rail))[1]
name_nm <- intersect(c("SA2_NAME21","sa2_name21","SA2_NAME_2016","SA2_NAME16"), names(sa2_with_rail))[1]
join_poly <- st_make_valid(sa2_with_rail)[, c(code_nm, name_nm)]
vis_season <- st_join(chk_sf, join_poly, left = FALSE)
sum_season <- vis_season %>%
st_drop_geometry() %>%
summarise(n_vis = n(), .by = c(!!sym(code_nm), !!sym(name_nm), season))
# 4) 并回分区几何;同一色标范围
sa2_season <- sa2_with_rail %>%
left_join(sum_season, by = setNames(c(code_nm,name_nm), c(code_nm,name_nm))) %>%
filter(!is.na(season)) %>%
mutate(n_vis = tidyr::replace_na(n_vis, 0L))
lim_max <- max(sa2_season$n_vis, na.rm = TRUE)
# 5) 作图(小倍数)
ggplot(sa2_season) +
geom_sf(aes(fill = n_vis), color = "grey30", linewidth = 0.2) +
scale_fill_viridis_c(option = "C", limits = c(0, lim_max), name = "观鸟次数") +
facet_wrap(~season, ncol = 2) +
coord_sf(crs = 4326,
xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
expand = FALSE) +
theme_minimal(base_size = 12) +
labs(title = "Q1 — 季节对比(分区填色小倍数)")
library(leaflet); library(sf)
# 用你已经做好的全季聚合结果 sa2p(里面有 n_vis)
name_nm <- intersect(c("SA2_NAME21","sa2_name21","SA2_NAME_2016","SA2_NAME16"), names(sa2p))[1]
geo_deg <- sa2p |>
sf::st_make_valid() |>
sf::st_transform(4326) |>
sf::st_zm(drop = TRUE) |>
dplyr::mutate(region_label = .data[[name_nm]])
leaflet(geo_deg) %>% # ← 直接把数据传给 leaflet,自动定位到范围
addProviderTiles("CartoDB.Positron") %>%
addPolygons(fillColor = "#3182bd", fillOpacity = 0.25,
color = "#555", weight = 0.6,
label = ~sprintf("%s:%s 次", region_label, n_vis))
# 先准备季节面几何(你前面已生成 sa2_season)
sea_deg <- sa2_season |>
sf::st_make_valid() |>
sf::st_transform(4326) |>
sf::st_zm(drop = TRUE) |>
dplyr::mutate(region_label = .data[[name_nm]])
rng <- range(c(geo_deg$n_vis, sea_deg$n_vis), na.rm = TRUE)
pal <- colorNumeric("viridis", rng, na.color = "#ffffff")
borders_deg <- geo_deg |>
sf::st_geometry() |>
sf::st_boundary() |>
sf::st_as_sf()
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
# 建两个图层面板:面填充在下,边界线在上(不被填充遮住)
addMapPane("fills", zIndex = 410) %>%
addMapPane("borders", zIndex = 420) %>%
addPolygons(data = geo_deg, options = pathOptions(pane = "fills"),
fillColor = ~pal(n_vis), fillOpacity = 0.85,
color = "#666", weight = 0.6, opacity = 1,
label = ~sprintf("%s:%s 次", region_label, n_vis),
group = "All") %>%
addPolygons(data = subset(sea_deg, season == "Summer"),
fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
label = ~sprintf("%s:%s 次 (Summer)", region_label, n_vis),
group = "Summer") %>%
addPolygons(data = subset(sea_deg, season == "Autumn"),
fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
label = ~sprintf("%s:%s 次 (Autumn)", region_label, n_vis),
group = "Autumn") %>%
addPolygons(data = subset(sea_deg, season == "Winter"),
fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
label = ~sprintf("%s:%s 次 (Winter)", region_label, n_vis),
group = "Winter") %>%
addPolygons(data = subset(sea_deg, season == "Spring"),
fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
label = ~sprintf("%s:%s 次 (Spring)", region_label, n_vis),
group = "Spring") %>%
addPolylines(data = borders_deg, options = pathOptions(pane = "borders"),
color = "#222", weight = 1.2, opacity = 1) %>%
addLegend(position = "topright", pal = pal, values = rng, title = "观鸟次数") %>%
addLayersControl(overlayGroups = c("All","Summer","Autumn","Winter","Spring"),
options = layersControlOptions(collapsed = FALSE)) %>%
hideGroup(c("Summer","Autumn","Winter","Spring"))
library(dplyr); library(ggplot2); library(forcats); library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
# 仅保留三类;如你只想看两类改成 c("IGS","FormalOpenSpace")
cmp3 <- df_chk %>% filter(space_group %in% c("IGS","FormalOpenSpace","Other"))
# 统计:中位数与 IQR(Q1~Q3)
sum_tbl <- cmp3 %>%
summarise(
n = n(),
q1 = quantile(richness, 0.25, na.rm=TRUE),
median = median(richness, na.rm=TRUE),
q3 = quantile(richness, 0.75, na.rm=TRUE),
.by = space_group
) %>%
mutate(space_group = fct_reorder(space_group, median)) # 按中位数排序
# 柱状图:柱=中位数;误差线=IQR(Q1~Q3)
ggplot(sum_tbl, aes(x = space_group, y = median, fill = space_group)) +
geom_col(width = .65) +
geom_errorbar(aes(ymin = q1, ymax = q3), width = .18, linewidth = .7) +
geom_text(aes(label = comma(median)), vjust = -0.5, size = 3.2) +
scale_fill_manual(values = c(FormalOpenSpace="#4CAF50", IGS="#9C27B0", Other="#90A4AE")) +
labs(title = "C1|不同地块类型的每次观鸟物种数(中位数 + IQR)",
x = NULL, y = "物种数(每次观鸟)") +
coord_cartesian(ylim = c(0, max(sum_tbl$q3)*1.15)) +
theme_minimal(base_size = 12) +
theme(legend.position = "none")
# Q1-D|季节折线图:各地块类型的每次观鸟物种数(中位数)
suppressPackageStartupMessages({library(dplyr); library(ggplot2); library(lubridate); library(forcats)})
# 若还没有 season,就由 event_date 映射(与你前面的口径一致)
if (!"season" %in% names(df_chk)) {
mk_season <- function(m) dplyr::case_when(
m %in% c(12,1,2) ~ "Summer",
m %in% c(3,4,5) ~ "Autumn",
m %in% c(6,7,8) ~ "Winter",
m %in% c(9,10,11)~ "Spring",
TRUE ~ NA_character_
)
df_chk <- df_chk %>%
mutate(season = factor(mk_season(lubridate::month(event_date)),
levels = c("Summer","Autumn","Winter","Spring")))
}
# 只用三类:IGS / Formal / Other;汇总到“中位数 + IQR”
sum_season_type <- df_chk %>%
filter(space_group %in% c("IGS","FormalOpenSpace","Other"), !is.na(season), !is.na(richness)) %>%
summarise(
n = n(),
q1 = quantile(richness, 0.25, na.rm = TRUE),
median = median(richness, na.rm = TRUE),
q3 = quantile(richness, 0.75, na.rm = TRUE),
.by = c(space_group, season)
) %>%
mutate(space_group = fct_relevel(space_group, "FormalOpenSpace", "IGS", "Other"))
# 折线 + 点;误差线用 IQR(更稳健)
ggplot(sum_season_type, aes(x = season, y = median, group = space_group, color = space_group)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = q1, ymax = q3), width = .12, alpha = .6) +
scale_color_manual(values = c(FormalOpenSpace = "#4CAF50", IGS = "#9C27B0", Other = "#90A4AE"),
labels = c(FormalOpenSpace = "Formal green space", IGS = "IGS", Other = "Other")) +
labs(title = "Q1|季节影响:不同地块类型的每次观鸟物种数(中位数)",
x = NULL, y = "物种数(每次观鸟,中位数)", color = "地块类型") +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank())
# 可选:在点上标注样本量
# + ggrepel::geom_text_repel(aes(label = n), size = 3, show.legend = FALSE)
# Q2 主图|IGS 子类型 × 每次观鸟物种数(互动散点)
# Q2 主图(改版):只保留“整体中位数”短横;季节可在图例勾选
suppressPackageStartupMessages({
library(dplyr); library(ggplot2); library(forcats); library(lubridate); library(plotly)
})
# 数据:IGS 子类型 + 每次观鸟物种数,确保有 season
dat <- df_chk %>%
filter(space_group == "IGS", !is.na(igs_type), !is.na(richness)) %>%
{ if (!"season" %in% names(.)) {
mutate(., season = factor(case_when(
month(event_date) %in% c(12,1,2) ~ "Summer",
month(event_date) %in% c(3,4,5) ~ "Autumn",
month(event_date) %in% c(6,7,8) ~ "Winter",
month(event_date) %in% c(9,10,11) ~ "Spring",
TRUE ~ NA_character_
), levels = c("Summer","Autumn","Winter","Spring")))
} else . } %>%
mutate(
igs_type = fct_relevel(igs_type,
c("Railway easement","Utility easement","Road verge")[
c("Railway easement","Utility easement","Road verge") %in% unique(igs_type)]),
date_chr = if ("event_date" %in% names(.)) as.character(event_date) else NA_character_,
dur_chr = if ("durationHrs" %in% names(.)) sprintf("%.1f h", ifelse(is.na(durationHrs), NA, durationHrs)) else NA,
dis_chr = if ("effortDistanceKm" %in% names(.)) sprintf("%.1f km", ifelse(is.na(effortDistanceKm), NA, effortDistanceKm)) else NA,
text = paste0(
"IGS类型:", igs_type,
"<br>物种数:", richness,
if (!all(is.na(date_chr))) paste0("<br>日期:", date_chr) else "",
if (!all(is.na(dur_chr))) paste0("<br>时长:", dur_chr) else "",
if (!all(is.na(dis_chr))) paste0("<br>距离:", dis_chr) else ""
)
)
# 只算“整体中位数”(不分季节)
med_df <- dat %>%
summarise(med = median(richness, na.rm = TRUE), .by = igs_type) %>%
mutate(xnum = as.integer(igs_type),
x0 = xnum - 0.35, x1 = xnum + 0.35)
p <- ggplot(dat, aes(x = igs_type, y = richness, color = season, text = text)) +
geom_jitter(width = 0.22, height = 0, alpha = 0.55, size = 1.6) +
# 自绘“整体中位数”短横(每个 IGS 子类 1 条)
geom_segment(data = med_df,
aes(x = x0, xend = x1, y = med, yend = med),
inherit.aes = FALSE, linewidth = 0.9, color = "black") +
scale_color_manual(values = c(Summer="#FFB74D", Autumn="#AED581", Winter="#4FC3F7", Spring="#BA68C8"),
na.translate = FALSE) +
labs(title = "Q2 — IGS 子类型与每次观鸟物种数(散点)",
x = "IGS 子类型", y = "物种数(每次观鸟)", color = "季节") +
theme_minimal(base_size = 12)
# 互动化:图例可勾选季节(单击隐藏/显示,双击仅显示该季节)
plotly::ggplotly(p, tooltip = "text")
# Q2 副图|IGS 子类型的每次观鸟物种数(小提琴 + 箱线)
suppressPackageStartupMessages({
library(dplyr); library(ggplot2); library(forcats); library(scales)
})
# 仅取 IGS,确保有 igs_type / richness
vdat <- df_chk %>%
filter(space_group == "IGS", !is.na(igs_type), !is.na(richness)) %>%
mutate(
# 按中位数从高到低排序,读图更直观
igs_type = fct_reorder(igs_type, richness, .fun = median, .desc = TRUE)
)
# 每类样本量,用于标注
n_tbl <- vdat %>% summarise(n = n(), .by = igs_type)
p_violin_box <- ggplot(vdat, aes(x = igs_type, y = richness, fill = igs_type)) +
geom_violin(trim = FALSE, alpha = .55, color = NA) +
geom_boxplot(width = .18, outlier.alpha = .25, color = "black",
fill = "white", linewidth = .4) +
# 在箱线图上方标注样本量
stat_summary(fun = median, geom = "text", vjust = -2.0, size = 3.3,
aes(label = paste0("n=", ..count..)),
fun.data = function(y){ data.frame(y = median(y), count = length(y)) }) +
scale_fill_manual(values = c(
"Railway easement" = "#7B1FA2",
"Utility easement" = "#AB47BC",
"Road verge" = "#8E24AA"
)) +
labs(title = "Q2 — IGS 子类型的每次观鸟物种数(小提琴 + 箱线)",
x = "IGS 子类型", y = "物种数(每次观鸟)") +
coord_cartesian(ylim = c(0, max(vdat$richness, na.rm = TRUE) * 1.1)) +
theme_minimal(base_size = 12) +
theme(legend.position = "none",
panel.grid.minor = element_blank())
p_violin_box
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Q2|IGS内部:季节×子类型的柱状图(中位数 + IQR + n,带悬停)
suppressPackageStartupMessages({
library(dplyr); library(ggplot2); library(forcats); library(lubridate); library(scales); library(plotly)
})
# 1) 取 IGS 清单,补季节列(若缺)
igs <- df_chk %>%
filter(space_group == "IGS", !is.na(igs_type), !is.na(richness))
if (!"season" %in% names(igs)) {
mk_season <- function(m) dplyr::case_when(
m %in% c(12,1,2) ~ "Summer",
m %in% c(3,4,5) ~ "Autumn",
m %in% c(6,7,8) ~ "Winter",
m %in% c(9,10,11)~ "Spring",
TRUE ~ NA_character_
)
igs <- igs %>% mutate(season = factor(mk_season(month(event_date)),
levels = c("Summer","Autumn","Winter","Spring")))
}
# 2) 统计:每季×子类型的 中位数 + IQR + 样本量
lvl <- c("Railway easement","Utility easement","Road verge")
sum_season <- igs %>%
mutate(igs_type = fct_relevel(igs_type, lvl[lvl %in% unique(igs_type)])) %>%
summarise(
n = n(),
q1 = quantile(richness, .25, na.rm=TRUE),
median = median(richness, na.rm=TRUE),
q3 = quantile(richness, .75, na.rm=TRUE),
.by = c(season, igs_type)
)
sum_season <- sum_season %>%
dplyr::mutate(
tt = paste0(
"季节:", season,
"<br>IGS:", igs_type,
"<br>中位数:", median,
"<br>IQR:", q1, "–", q3,
"<br>样本量 n:", scales::comma(n)
)
)
# 3) 柱状 + 误差线 + n 标注
p_bar <- ggplot(sum_season, aes(x = season, y = median, fill = igs_type, text = tt)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
geom_errorbar(aes(ymin = q1, ymax = q3),
position = position_dodge(width = 0.8), width = 0.18, linewidth = .7, alpha = .9) +
geom_text(aes(label = scales::comma(n)),
position = position_dodge(width = 0.8), vjust = -0.6, size = 3, color = "grey30") +
scale_fill_manual(values = c("Railway easement"="#7B1FA2",
"Utility easement"="#AB47BC",
"Road verge" ="#8E24AA"),
name = "IGS 子类型") +
labs(title = "Q2|季节 × IGS 子类型:每次观鸟物种数(中位数 + IQR)",
x = NULL, y = "物种数(每次观鸟,中位数)") +
coord_cartesian(ylim = c(0, max(sum_season$q3, na.rm=TRUE) * 1.15)) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank())
# 互动
plotly::ggplotly(p_bar, tooltip = "text")
# 4) 悬停信息:转 plotly(tooltip 显示 季节/子类型/中位数/IQR/n)
sum_season <- sum_season %>%
mutate(tt = paste0("季节:", season,
"<br>IGS:", igs_type,
"<br>中位数:", median,
"<br>IQR:", q1, "–", q3,
"<br>样本量 n:", comma(n)))
p_bar_plt <- ggplotly(
p_bar +
geom_point(aes(text = tt, y = median, color = igs_type),
position = position_dodge(width = 0.8), alpha = 0), # 透明点,仅用于tooltip
tooltip = "text"
)
## Warning in geom_point(aes(text = tt, y = median, color = igs_type), position =
## position_dodge(width = 0.8), : Ignoring unknown aesthetics: text
p_bar_plt
Q3
# Q3 主图|植被覆盖 × 距水体 的二维热力图(Z=每次观鸟物种数中位数;分面=IGS子类型)
suppressPackageStartupMessages({
library(sf); library(dplyr); library(ggplot2); library(forcats); library(plotly)
})
sf::sf_use_s2(FALSE)
# ---- 0) 前置断言(内存对象就绪) ----
stopifnot(inherits(checklists_filt, "sf"),
"visit_id" %in% names(checklists_filt),
"visit_id" %in% names(df_chk))
stopifnot(inherits(veg_within, "sf"),
inherits(water_within, "sf"))
# ---- 1) 计算距最近水体的距离(米),若已存在 dist_water_m 就跳过 ----
if (!"dist_water_m" %in% names(df_chk) || all(is.na(df_chk$dist_water_m))) {
pts3111 <- sf::st_transform(checklists_filt, 3111)
water3111 <- sf::st_transform(sf::st_make_valid(water_within), 3111)
idx_near <- sf::st_nearest_feature(pts3111, water3111)
dist_m <- as.numeric(sf::st_distance(pts3111, water3111[idx_near, ], by_element = TRUE))
df_chk <- df_chk %>%
select(-dplyr::any_of("dist_water_m")) %>%
left_join(pts3111 %>% st_drop_geometry() %>% transmute(visit_id, dist_water_m = dist_m),
by = "visit_id")
}
# ---- 2) 计算 50 m 植被覆盖率(%)----
# 对每个清单点做50m缓冲,与植被面求交面积;cover = 交面积 / (π·50^2) × 100
buf50 <- st_buffer(st_transform(checklists_filt, 3111), 250)
buf50$visit_id <- checklists_filt$visit_id
# 交集(注意:可能较慢,但最稳;如太慢可适当抽稀或先 st_crop() 限定范围)
ov <- suppressWarnings(st_intersection(st_make_valid(buf50), st_make_valid(st_transform(veg_within, 3111))))
cover_tbl <- ov %>%
transmute(visit_id, a_int = as.numeric(st_area(.))) %>%
group_by(visit_id) %>%
summarise(a_int = sum(a_int, na.rm = TRUE), .groups = "drop") %>%
mutate(veg_cover_pct = pmin(100, 100 * a_int / (pi * 50^2))) %>%
select(visit_id, veg_cover_pct)
# —— 并回 df_chk(没有交到植被的点记 0%;确保列存在且为数值)——
df_chk <- df_chk %>%
dplyr::left_join(cover_tbl, by = "visit_id")
if (!"veg_cover_pct" %in% names(df_chk)) {
df_chk$veg_cover_pct <- NA_real_ # 万一 join 没有带来该列,先创建
}
df_chk <- df_chk %>%
dplyr::mutate(
veg_cover_pct = tidyr::replace_na(.data$veg_cover_pct, 0),
veg_cover_pct = as.numeric(veg_cover_pct)
)
# ---- 3) 分箱(Y:对数水距;X:线性绿量)----
water_breaks <- c(0, 25, 50, 100, 200, 500, 1000, 2000, Inf)
water_labs <- c("0–25m","25–50m","50–100m","100–200m","200–500m","0.5–1km","1–2km",">2km")
veg_breaks <- seq(0, 100, by = 10)
veg_labs <- paste0(veg_breaks[-length(veg_breaks)], "–", veg_breaks[-1], "%")
# ---- 3) 分箱(Y:对数水距;X:线性绿量)+ 设分面类别(IGS 三子类 + Non-IGS)----
binned <- df_chk %>%
filter(!is.na(richness), !is.na(dist_water_m)) %>%
mutate(
# 分面标签:IGS 子类保留原名;其余合并为 Non-IGS
facet_type = if_else(space_group == "IGS" & !is.na(igs_type),
as.character(igs_type),
"Non-IGS"),
facet_type = factor(facet_type,
levels = c("Railway easement","Utility easement","Road verge","Non-IGS")),
water_bin = cut(dist_water_m, breaks = water_breaks, labels = water_labs,
right = FALSE, include.lowest = TRUE),
veg_bin = cut(veg_cover_pct, breaks = veg_breaks, labels = veg_labs,
right = FALSE, include.lowest = TRUE)
)
# ---- 4) 聚合(每格:中位数 + IQR + n;小样本标记)----
agg <- binned %>%
group_by(facet_type, veg_bin, water_bin, .drop = FALSE) %>%
summarise(
n = n(),
q1 = quantile(richness, .25, na.rm = TRUE),
median = median(richness, na.rm = TRUE),
q3 = quantile(richness, .75, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(alpha_flag = ifelse(n < 15, "low", "ok"))
# ---- 5) 静态热力图(facet = IGS 三子类 + Non-IGS)----
p_q3_heat <- ggplot(
agg,
aes(x = veg_bin, y = water_bin, fill = median,
text = paste0("组别:", facet_type,
"<br>绿量:", veg_bin,
"<br>距水:", water_bin,
"<br>中位数:", median,
"<br>IQR:", q1, "–", q3,
"<br>n:", n))
) +
geom_tile(aes(alpha = alpha_flag), colour = "grey85", linewidth = .1) +
scale_alpha_manual(values = c(low = 0.35, ok = 1), guide = "none") +
scale_fill_viridis_c(name = "物种数(中位数)", na.value = "grey95", option = "C") +
facet_wrap(~ facet_type, nrow = 1) + # 觉得太挤可改 nrow = 2 变成 2×2
labs(title = "Q3|植被覆盖 × 距水体(每次观鸟物种数中位数)",
x = "植被覆盖(50 m 半径内,%)", y = "到最近水体的距离") +
theme_minimal(base_size = 12) +
theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
strip.background = element_rect(fill = "grey95", colour = NA))
# 互动
plotly::ggplotly(p_q3_heat, tooltip = "text")
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in colorscale_json(trace$colorscale): A colorscale list must of
## elements of the same (non-zero) length