Melbourne Bird Analysis

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(sf)

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
sf::sf_use_s2(FALSE)

## Spherical geometry (s2) switched off

# 读取
sa2   <- st_read("/Users/j.bhuang/Desktop/5145/SA2_2021_AUST_GDA2020.shp", quiet = TRUE)
rail  <- st_read("/Users/j.bhuang/Desktop/5145/railway_melb.gpkg",         quiet = TRUE)
water <- st_read("/Users/j.bhuang/Desktop/5145/water_poly_melb.gpkg",      quiet = TRUE)

# 投影
melb_sa2_proj <- sa2 %>% filter(GCC_NAME21 == "Greater Melbourne") %>% st_transform(3111)
rail_proj     <- st_transform(rail,  3111)
water_proj    <- st_transform(water, 3111)

# === 关键：AOI = “与铁路相交的分区集合”的合并面（与 DEP 一致）===
sa2_with_rail <- melb_sa2_proj %>%
  st_filter(rail_proj, .predicate = st_intersects) %>%
  distinct(SA2_CODE21, .keep_all = TRUE)

aoi      <- st_union(sa2_with_rail)                   # 用它做所有图层的裁剪面
bbox_deg <- st_bbox(st_transform(aoi, 4326))          # 统一图窗（经纬度）
clip     <- function(g) st_intersection(st_crop(g, st_bbox(aoi)), aoi)  # 统一裁剪函数

# 用 AOI 裁剪 rail / water（先 bbox 再精确相交）
rail_within  <- clip(rail_proj)

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

water_within <- clip(water_proj)

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

# 质量检查图（现在视野就是 AOI；不会再显示全城）
ggplot() +
  geom_sf(data = sa2_with_rail, fill = NA, color = "black", linewidth = 0.3) +
  geom_sf(data = water_within,  fill = "lightblue", color = "blue", alpha = 0.5) +
  geom_sf(data = rail_within,   color = "red", linewidth = 0.2) +
  coord_sf(crs = 4326,
           xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
           ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
           expand = FALSE) +
  theme_minimal(base_size = 14) +
  labs(title = "Greater Melbourne — Rail & Water within AOI", x = NULL, y = NULL)

sf::sf_use_s2(FALSE)
library(sf)
library(dplyr)
library(ggplot2)

# 直接从根目录读上个块的结果（不依赖内存变量）
need <- function(fname){
  if (!file.exists(fname)) stop("缺少缓存：", fname, " —— 请先运行第一个块。")
  readRDS(fname)
}


# 读取本块自己的原始图层
veg   <- st_read("urban_veg_2018_melb.gpkg", quiet = TRUE)
opens <- st_read("openspace_melb.gpkg",      quiet = TRUE)

# 修复 + 投影
veg_proj   <- st_transform(st_make_valid(veg),   3111)
opens_proj <- st_transform(st_make_valid(opens), 3111)

# 提速：按 AOI 的 bbox 粗裁，再做精确相交
veg_crop   <- st_crop(veg_proj,   st_bbox(melb_sa2_proj))

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

opens_crop <- st_crop(opens_proj, st_bbox(melb_sa2_proj))
veg_within   <- st_intersection(veg_crop,   aoi)

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

opens_within <- st_intersection(opens_crop, aoi)



# 画图（原样）
ggplot() +
  geom_sf(data = veg_within,   aes(fill = "Vegetation"), color = NA, alpha = 0.3) +
  geom_sf(data = opens_within, aes(fill = "Open Space"), color = "darkgreen", alpha = 0.4) +
  geom_sf(data = water_within, aes(fill = "Water"),      color = "blue", alpha = 0.5) +
  geom_sf(data = rail_within,  aes(color = "Railway"), linewidth = 0.2) +
  geom_sf(data = sa2_with_rail, fill = NA, color = "black", linewidth = 0.4) +
  scale_color_manual(values = c("Railway" = "red")) +
  scale_fill_manual(values = c("Vegetation" = "forestgreen",
                               "Open Space" = "lightgreen",
                               "Water"      = "lightblue")) +
  coord_sf(crs = 4326,
         xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
         ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
         expand = FALSE) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x  = element_text(angle = 45, hjust = 1),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  ) +
  labs(title = "Greater Melbourne",
       fill = "Land / Water", color = "Transport")

# ---- Road / Verge（优先 road_casement，找不到再退 easement；线就缓冲成面）----
road_within <- NULL  # 先占位，避免未定义
# 1) 选数据源（按文件存在与否判断）
if (file.exists("road_casement_melb.gpkg")) {
  road_src <- "road_casement_melb.gpkg"
} else if (file.exists("road_casement_melb.shp")) {
  road_src <- "road_casement_melb.shp"
} else if (file.exists("easement_melb.gpkg")) {
  road_src <- "easement_melb.gpkg"  # 兜底：用地役权近似道路边带
} else {
  road_src <- NA_character_
}

if (!is.na(road_src)) {
  road_raw <- sf::st_read(road_src, quiet = TRUE) |>
              sf::st_transform(3111) |>
              sf::st_make_valid()
  # 2) 若是线几何，轻缓冲成面（单位：米）
  is_line <- any(sf::st_geometry_type(road_raw) %in% c("LINESTRING","MULTILINESTRING"))
  road_poly <- if (is_line) sf::st_buffer(road_raw, 6) else road_raw
  # 3) 裁到统一 AOI（用你第1块的 clip()）
  road_within <- clip(road_poly)
}

## Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
## Message 1: Invalid content for record 1 in column PFI_RETIRE: 0000-00-00

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

# 第三个块（仅消费前两块在根目录写出的 RDS）
library(sf)
library(dplyr)
library(ggplot2)
sf::sf_use_s2(FALSE)
# —— 本块仅处理 easement —— #
easements      <- st_read("easement_melb.gpkg", quiet = TRUE)

## Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
## Message 1: Invalid content for record 2 in column PFI_CR: 0000-00-00

easements_proj <- st_transform(st_make_valid(easements), 3111)

# 可选提速：先 bbox 粗裁，再相交
easements_crop    <- st_crop(easements_proj, st_bbox(melb_sa2_proj))

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

easements_within  <- st_intersection(easements_crop, aoi)

## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

# —— 画图（与你原版一致） —— #
ggplot() +
  # 多边形用 fill
  geom_sf(data = veg_within,    aes(fill = "Vegetation"),  color = NA,          alpha = 0.30) +
  geom_sf(data = water_within,  aes(fill = "Water"),       color = "blue",      alpha = 0.50) +
  geom_sf(data = opens_within,  aes(fill = "Open Space"),  color = "darkgreen", alpha = 0.70) +
  # 线层用 color（两类 easement）
  geom_sf(data = rail_within,       aes(color = "Railway"),          linewidth = 0.30) +
  geom_sf(data = easements_within,  aes(color = "Utility easement"), linewidth = 0.40, alpha = 0.40) +
  geom_sf(data = sa2_with_rail, fill = NA, color = "black", linewidth = 0.40) +
  scale_fill_manual(
    name   = "Background & Formal",
    values = c("Vegetation" = "palegreen3",
               "Water"      = "lightblue",
               "Open Space" = "forestgreen")
  ) +
  scale_color_manual(
    name   = "Informal (IGS)",
    values = c("Railway" = "red",
               "Utility easement" = "orange")
  ) +
  guides(color = guide_legend(order = 1),
         fill  = guide_legend(order = 2)) +
  coord_sf(crs = 4326,
         xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
         ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
         expand = FALSE) +
  theme_minimal(base_size = 14) +
  theme(
    legend.title = element_text(face = "bold"),
    axis.text.x  = element_text(angle = 45, hjust = 1),
    axis.title   = element_blank()
  ) +
  labs(title = "Greater Melbourne — Formal vs Informal Green Space")

library(readr)
library(sf); sf::sf_use_s2(FALSE)  # （可选）与前两块一致
library(dplyr)

# 1) 读入
bird_path <- "newdata.csv"
birds <- readr::read_csv(bird_path, show_col_types = FALSE)

# 2) 自动识别列（注意：cand_vec 必须全小写，因为 pick_col 用了 tolower(names(df))）
pick_col <- function(df, cand_vec) {
  nms <- tolower(names(df))
  for (c in cand_vec) { i <- match(c, nms, nomatch = 0); if (i > 0) return(names(df)[i]) }
  NA_character_
}

# 2.1 经纬度候选：加入 Darwin Core 的 decimallatitude/decimallongitude
lat_col <- pick_col(birds, c("lat_json","latitude","lat","y","decimallatitude","decimal_latitude"))
lon_col <- pick_col(birds, c("lon_json","longitude","lon","lng","x","decimallongitude","decimal_longitude"))
stopifnot(!is.na(lat_col), !is.na(lon_col))

# 确保可数值化（有些来源会给成字符）
birds[[lat_col]] <- suppressWarnings(as.numeric(birds[[lat_col]]))
birds[[lon_col]] <- suppressWarnings(as.numeric(birds[[lon_col]]))

# 3) 物种列候选（保留以备后续用；此处不强制改名）
spec_col <- pick_col(
  birds,
  c("species","speciescode","scientificname","scientific_name",
    "comname","commonname","vernacularname","vernacular_name",
    "sci_name","common_name")
)

# 4) 转 sf 点（WGS84→3111）并裁 AOI


# ★ 提速与更稳的空间关系：先 union AOI，再用 intersects（边界点也保留）
aoi_rail <- sf::st_union(sa2_with_rail)
# 正确：先建 sf 点，再裁剪
birds_sf <- birds |>
  dplyr::filter(!is.na(.data[[lat_col]]), !is.na(.data[[lon_col]])) |>
  sf::st_as_sf(coords = c(lon_col, lat_col), crs = 4326, remove = FALSE) |>
  sf::st_transform(3111)

birds_sf <- birds_sf[sf::st_intersects(birds_sf, aoi, sparse = FALSE), ]

cat("导入并落在 AOI 内的观测点数: ", nrow(birds_sf), "\n")

## 导入并落在 AOI 内的观测点数:  351714

# === 第五块（仅消费前块写在根目录的 RDS）===
library(sf); library(dplyr); library(ggplot2); library(readr)
sf::sf_use_s2(FALSE)


# —— 叠加检查图（与你原版一致；仅增了 easements 条件叠加）——
p <- ggplot() +
  geom_sf(data = sa2_with_rail, fill = NA, color = "grey20", linewidth = 0.4) +
  geom_sf(data = water_within,  fill = "lightblue",  color = NA,         alpha = 0.35) +
  geom_sf(data = opens_within,  fill = "lightgreen", color = "darkgreen", alpha = 0.4) +
  geom_sf(data = rail_within,   color = "red",        linewidth = 0.25) +
  geom_sf(data = birds_sf,      color = "#E54B4B",    alpha = 0.7, size = 0.6) +
  coord_sf(crs = 4326,
           xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
           ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
           expand = FALSE) +
  theme_minimal(base_size = 13) +
  labs(title = "Bird observations over basemap", x = NULL, y = NULL)
if (!is.null(easements_within))
  p <- p + geom_sf(data = easements_within, aes(color = "Utility easement"), linewidth = 0.40, alpha = 0.40)

## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

print(p)

# ========== B. SA2 聚合（观测数 / 物种丰富度） ==========
sa2_join <- st_make_valid(melb_sa2_proj)[, c("SA2_CODE21","SA2_NAME21")]

obs_sa2 <- sf::st_join(
  birds_sf,
  sa2_join,
  left = FALSE              # 如担心边界点被丢，可改：join = sf::st_intersects
)


# 自动识别一个物种列名（仅用于 richness；完全不依赖上块的变量）
cols <- names(sf::st_drop_geometry(birds_sf))
spec_col <- intersect(c("sciName","scientific_name","common_name","species",
                        "taxon_name","comname","vernacular_name","vernacularName"), cols)
spec_col <- if (length(spec_col)) spec_col[1] else NA_character_

if (!is.na(spec_col)) {
  sa2_sum <- obs_sa2 |>
    sf::st_drop_geometry() |>
    dplyr::group_by(SA2_CODE21, SA2_NAME21) |>
    dplyr::summarise(
      n_obs    = dplyr::n(),
      richness = dplyr::n_distinct(.data[[spec_col]]),
      .groups  = "drop"
    )
} else {
  sa2_sum <- obs_sa2 |>
    sf::st_drop_geometry() |>
    dplyr::count(SA2_CODE21, SA2_NAME21, name = "n_obs")
}

# === 第六块（仅消费根目录 RDS；生成 visit_id 与清单级表）===
suppressPackageStartupMessages({
  library(dplyr); library(lubridate); library(stringr); library(readr); library(sf)
})
sf::sf_use_s2(FALSE)

# —— 只读前置块产物（根目录）——
need <- function(fname){
  if (!file.exists(fname)) stop("缺少缓存：", fname, " —— 请先运行前置块。")
  readRDS(fname)
}

dat0     <- sf::st_drop_geometry(birds_sf)         # 本块按数据框处理

# 0) 列名兜底（大小写均可）
pick1 <- function(nm, pool) { x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm  <- pick1(c("lat","latitude","decimalLatitude","decimallatitude","y"), dat0)
lon_nm  <- pick1(c("lon","longitude","decimalLongitude","decimallongitude","x","lng"), dat0)
spec_nm <- pick1(c("sciName","scientificName","scientific_name","taxon_name",
                   "species","common_name","comname","vernacularName","vernacular_name"), dat0)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))

## 1) 时间：稳健解析 obsDt → 墨尔本分钟；失败用 date@00:00 兜底
parse_obsdt_local <- function(x) {
  s <- as.character(x); s <- gsub("T"," ", s, fixed=TRUE); s <- sub("Z$","", s)
  dt <- suppressWarnings(parse_date_time(
    s,
    orders = c("Y-m-d H:M:S","Y-m-d H:M","Y/m/d H:M:S","Y/m/d H:M",
               "d/m/Y H:M:S","d/m/Y H:M","Y-m-d","Y/m/d","d/m/Y"),
    tz = "UTC"
  ))
  with_tz(dt, "Australia/Melbourne")
}
time_min <- floor_date(parse_obsdt_local(dat0$obsDt), "minute")
date_fallback <- suppressWarnings(parse_date_time(
  as.character(dat0$date),
  orders = c("Y-m-d","Y/m/d","d/m/Y","m/d/Y"),
  tz = "Australia/Melbourne"
))
time_min[is.na(time_min)] <- floor_date(date_fallback[is.na(time_min)], "minute")
event_date <- as.Date(time_min)

## 2) 地点：3111 下 20 m 网格（用动态列名）
use_grid <- TRUE; grid_cell_m <- 20
if (use_grid) {
  pts3111 <- st_as_sf(dat0, coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) |> st_transform(3111)
  xy <- st_coordinates(pts3111)
  xbin_m <- floor(xy[,1] / grid_cell_m) * grid_cell_m
  ybin_m <- floor(xy[,2] / grid_cell_m) * grid_cell_m
  space_x <- sprintf("X%.0f", xbin_m)
  space_y <- sprintf("Y%.0f", ybin_m)
} else {
  space_x <- paste0("lat", round(as.numeric(dat0[[lat_nm]]), 4))
  space_y <- paste0("lon", round(as.numeric(dat0[[lon_nm]]), 4))
}

## 3) 人物 + 来源（与你原逻辑一致）
norm_person <- function(x) {
  s <- tolower(str_squish(str_replace_all(as.character(x), "&", " and ")))
  s <- str_replace_all(s, "[,;|+/]+", " "); s <- str_squish(s); s[s %in% c("", "na")] <- NA_character_; s
}
person_raw <- if ("recorded_by" %in% names(dat0)) norm_person(dat0$recorded_by) else NA_character_
person_fallback <- {
  ds <- if ("dataset_key" %in% names(dat0)) paste0("ds:", dat0$dataset_key) else NA_character_
  so <- if ("source" %in% names(dat0))      paste0("src:", dat0$source)      else NA_character_
  res <- str_squish(paste(ds, so)); res[res == ""] <- NA_character_; res
}
person_token <- ifelse(!is.na(person_raw), person_raw, person_fallback) |> str_replace_all("[^a-z0-9]+","-")
src <- if ("source" %in% names(dat0)) as.character(dat0$source) else "NA"

## 4) 合成 visit_id
visit_id <- paste0(format(time_min, "%Y%m%d%H%M"), "_", space_x, "_", space_y, "_", person_token, "_", src)

## 5) 生成清单表 vis（仅新增列，不改原列）
vis <- dat0 |>
  mutate(time_min = time_min, event_date = event_date,
         space_x = space_x, space_y = space_y,
         person_token = person_token, source = src,
         visit_id = visit_id)

## 6) 聚合到清单级（DEP 旧口径）
first_non_na <- function(x) { y <- na.omit(x); if (length(y)) y[1] else NA }
dur_col  <- pick1(c("durationHrs","duration_hrs","durationhrs","duration_min"), vis)
dist_col <- pick1(c("effortDistanceKm","distance_km","distance_m"), vis)
comp_col <- pick1(c("allObsReported","complete","checklist_complete"), vis)

chk <- vis |>
  group_by(visit_id) |>
  summarise(
    n_rows   = n(),
    richness = if (!is.na(spec_nm)) n_distinct(.data[[spec_nm]]) else NA_integer_,
    durationHrs = if (!is.na(dur_col)) {
      v <- first_non_na(.data[[dur_col]]); if (identical(dur_col, "duration_min")) v/60 else v
    } else NA_real_,
    effortDistanceKm = if (!is.na(dist_col)) {
      v <- first_non_na(.data[[dist_col]]); if (identical(dist_col, "distance_m")) v/1000 else v
    } else NA_real_,
    allObsReported = if (!is.na(comp_col)) as.logical(first_non_na(.data[[comp_col]])) else NA,
    lat = first_non_na(as.numeric(.data[[lat_nm]])),
    lon = first_non_na(as.numeric(.data[[lon_nm]])),
    event_date   = first_non_na(as.Date(event_date)),
    person_token = first_non_na(person_token),
    source       = first_non_na(source),
    .groups = "drop"
  )

## 7) 可比性过滤（DEP 旧口径）
chk_f <- chk |>
  filter(
    is.na(allObsReported) | allObsReported,
    is.na(durationHrs)    | between(durationHrs, 5/60, 2),  # 5–120 min
    is.na(effortDistanceKm) | effortDistanceKm <= 5
  )

## 8) 自检 + 缓存（根目录，供后续块 need() 直接读）
cat("NA time_min share:", mean(is.na(vis$time_min)), "\n",
    "distinct visit_id:", dplyr::n_distinct(vis$visit_id), "\n",
    "visits (raw / filtered):", nrow(chk), "/", nrow(chk_f), "\n")

## NA time_min share: 0 
##  distinct visit_id: 21512 
##  visits (raw / filtered): 21512 / 21184

print(table(month(chk$event_date), useNA = "ifany")[c(1:12, NA)])

## 
##    1    2    3    4    5    6    7    8    9   10   11   12 <NA> 
## 1833 1528 1699 1671 1680 1486 1565 1796 2149 2578 1869 1658

# === 第七块（仅消费根目录 RDS；空间分类：Railway > Road > OpenSpace > Other）===
library(sf); library(dplyr); library(ggplot2); library(readr)
sf::sf_use_s2(FALSE)

# --- 清单点转 sf（动态兜底经纬度列名） ---
pick1 <- function(nm, pool) { x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","y","decimalLatitude","decimallatitude"), chk_f)
lon_nm <- pick1(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), chk_f)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))

checklists_filt <- chk_f %>%
  sf::st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) %>%
  sf::st_transform(3111)

# ---------- 分类判定（沿用你原口径） ----------
# 1) 距铁路 ≤ 25 m（先缓冲再相交，快且稳）
rail_buf25 <- sf::st_buffer(rail_within, 25)
near_rail <- if (inherits(rail_within, "sf") && nrow(rail_within) > 0) {
  lengths(sf::st_intersects(checklists_filt, rail_buf25)) > 0
} else rep(FALSE, nrow(checklists_filt))

# 2) 在道路路权内（无则全 FALSE）
in_road <- if (inherits(road_within, "sf") && nrow(road_within) > 0) {
  lengths(sf::st_intersects(checklists_filt, road_within)) > 0
} else rep(FALSE, nrow(checklists_filt))

# 2.5 在公用事业地役权内（utility）
util_geom <- if (exists("easements_within") && inherits(easements_within, "sf") && nrow(easements_within)>0) {
  if (any(sf::st_geometry_type(easements_within) %in% c("LINESTRING","MULTILINESTRING")))
    sf::st_buffer(easements_within, 15)  # 线→面，15m 可按需要调
  else easements_within
} else NULL
in_utility <- if (!is.null(util_geom)) lengths(sf::st_intersects(checklists_filt, util_geom)) > 0 else rep(FALSE, nrow(checklists_filt))
# 3) 在正规开放空间内
in_open <- if (inherits(opens_within, "sf") && nrow(opens_within) > 0) {
  lengths(sf::st_intersects(checklists_filt, opens_within)) > 0
} else rep(FALSE, nrow(checklists_filt))

# 4) 打标签（优先级：Railway > Road > OpenSpace > Other）
igs_type <- dplyr::case_when(
  near_rail ~ "Railway easement",
  in_utility ~ "Utility easement",
  in_road ~ "Road verge",
  TRUE ~ NA_character_
)
space_group <- ifelse(!is.na(igs_type), "IGS",
                      ifelse(in_open, "FormalOpenSpace", "Other"))

# 5) 输出给下游（列名保持一致）
df_chk <- checklists_filt %>%
  sf::st_drop_geometry() %>%
  dplyr::mutate(space_group = space_group,
                igs_type    = igs_type)

df_igs <- dplyr::filter(df_chk, space_group == "IGS", !is.na(igs_type))

# （可选）自检
# table(df_chk$space_group); table(df_igs$igs_type, useNA = "ifany")

## === 第八块（读前面所有需要的 RDS；用 visit_id 解析日期并回填）===
suppressPackageStartupMessages({ library(dplyr); library(lubridate); library(sf); library(readr) })
sf::sf_use_s2(FALSE)


# 直接用上文内存对象
# df_chk 来自第7块；若 df_igs 为空，从 df_chk 筛
if (!exists("df_igs") || is.null(df_igs)) df_igs <- dplyr::filter(df_chk, space_group == "IGS", !is.na(igs_type))
# checklists_filt 来自第7块（sf）；若要无几何表，用 st_drop_geometry(checklists_filt)

# 用第6块的过滤清单重建 checklists_filt（sf），以便保持与后续空间步骤一致
pick1 <- function(nm, pool){ x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","y","decimalLatitude","decimallatitude"), chk_f)
lon_nm <- pick1(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), chk_f)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))

checklists_filt <- chk_f |>
  sf::st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) |>
  sf::st_transform(3111)

# 1) 键来源（含 visit_id）：用 df_chk（第7块产物）
key_df <- df_chk

# 2) 解析 visit_id 前缀得到日期（兼容含秒/时区；首选 YYYYMMDDHHMM）
extract_date_from_visit <- function(v) {
  s <- as.character(v)
  s <- sub("_.*$", "", s)                 # 取第一段时间戳
  s <- sub(" [A-Z]{3,4}$", "", s)         # 去掉可能的时区缩写
  d12 <- sub("^([0-9]{12}).*$", "\\1", s) # 抓前12位（YYYYMMDDHHMM）
  dt  <- suppressWarnings(lubridate::parse_date_time(d12, orders = "YmdHM", tz = "Australia/Melbourne"))
  if (all(is.na(dt))) {                    # 回退：hm / hms
    dt1 <- suppressWarnings(lubridate::ymd_hm(s,  tz = "Australia/Melbourne"))
    dt2 <- suppressWarnings(lubridate::ymd_hms(s, tz = "Australia/Melbourne"))
    dt  <- ifelse(is.na(dt1), dt2, dt1)
  }
  as.Date(dt)
}

dates_by_visit <- key_df |>
  dplyr::transmute(visit_id, date = extract_date_from_visit(visit_id)) |>
  dplyr::group_by(visit_id) |>
  dplyr::summarise(date = dplyr::first(na.omit(date), default = as.Date(NA)), .groups = "drop")

# 3) 季节映射
mk_season <- function(m) dplyr::case_when(
  m %in% c(12L,1L,2L) ~ "Summer",
  m %in% c(3L,4L,5L)  ~ "Autumn",
  m %in% c(6L,7L,8L)  ~ "Winter",
  m %in% c(9L,10L,11L)~ "Spring",
  TRUE ~ NA_character_
)

# 4) 回填（移除旧 date，避免重名）
fill_time_fields <- function(df) {
  if (is.null(df)) return(df)
  df |>
    dplyr::select(-dplyr::any_of("date")) |>
    dplyr::left_join(dates_by_visit, by = "visit_id") |>
    dplyr::mutate(
      month  = factor(lubridate::month(date), levels = 1:12, labels = month.abb),
      season = factor(mk_season(lubridate::month(date)),
                      levels = c("Summer","Autumn","Winter","Spring"))
    )
}

checklists_filt <- fill_time_fields(checklists_filt)
df_chk          <- fill_time_fields(df_chk)
df_igs          <- fill_time_fields(df_igs)

# 5) 自检
cat("按月样本量：\n")

## 按月样本量：

print(table(addNA(df_chk$month), useNA = "ifany"))

## 
##  Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec <NA> 
## 1833 1528 1699 1671 1680 1486 1565 1796 2072 2476 1798 1580    0

cat("rows(df_chk) =", nrow(df_chk),
    "| non-NA date =", sum(!is.na(df_chk$date)), "\n")

## rows(df_chk) = 21184 | non-NA date = 21184

df_chk

## # A tibble: 21,184 × 16
##    visit_id    n_rows richness durationHrs effortDistanceKm allObsReported   lat
##    <chr>        <int>    <int>       <dbl>            <dbl> <lgl>          <dbl>
##  1 2024010111…     15       15          NA               NA NA             -37.9
##  2 2024010111…     30       30          NA               NA NA             -37.9
##  3 2024010111…     34       34          NA               NA NA             -37.7
##  4 2024010111…      1        1          NA               NA NA             -37.8
##  5 2024010111…     44       44          NA               NA NA             -37.9
##  6 2024010111…      5        5          NA               NA NA             -37.8
##  7 2024010111…     19       19          NA               NA NA             -37.8
##  8 2024010111…      8        8          NA               NA NA             -37.8
##  9 2024010111…     18       18          NA               NA NA             -37.8
## 10 2024010111…     26       26          NA               NA NA             -37.7
## # ℹ 21,174 more rows
## # ℹ 9 more variables: lon <dbl>, event_date <date>, person_token <chr>,
## #   source <chr>, space_group <chr>, igs_type <chr>, date <date>, month <fct>,
## #   season <fct>

picture1

# 前置：这些对象在你前面块里已有
# sa2_with_rail（分区几何，已裁 AOI）
# chk_f         （visit 级、可比性过滤后的清单）
# aoi / bbox_deg（统一裁剪与图窗）

suppressPackageStartupMessages({library(sf); library(dplyr); library(ggplot2)})
sf::sf_use_s2(FALSE)

# 0) 保障：chk_f 真的是“一行=一次观鸟”
stopifnot(nrow(chk_f) == dplyr::n_distinct(chk_f$visit_id))

# 1) visit → sf（动态识别经纬度；与第七块一致）
pick1 <- function(nm, pool){ x <- intersect(nm, names(pool)); if (length(x)) x[1] else NA_character_ }
lat_nm <- pick1(c("lat","latitude","y","decimalLatitude","decimallatitude"), chk_f)
lon_nm <- pick1(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), chk_f)
stopifnot(!is.na(lat_nm), !is.na(lon_nm))

chk_sf <- chk_f %>%
  sf::st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) %>%
  sf::st_transform(3111)

# 2) 贴“区域” & 聚合 —— 指标=观鸟次数（=行数，因为一行=一次观鸟）
code_nm <- intersect(c("SA2_CODE21","sa2_code21","SA2_MAINCODE_2016","SA2_MAIN16"), names(sa2_with_rail))[1]
name_nm <- intersect(c("SA2_NAME21","sa2_name21","SA2_NAME_2016","SA2_NAME16"),   names(sa2_with_rail))[1]
sa2_join <- sf::st_make_valid(sa2_with_rail)[, c(code_nm, name_nm)]

vis_sa2 <- sf::st_join(chk_sf, sa2_join, left = FALSE)   # 边界点要保留可改 join=sf::st_intersects

# —— 主图口径：观鸟次数（n_vis）
sum_all <- vis_sa2 %>%
  sf::st_drop_geometry() %>%
  dplyr::summarise(
    n_vis        = dplyr::n(),               # 观鸟次数
    rich_med     = median(richness, na.rm=TRUE),  #（可选）每次观鸟的物种数中位数
    .by = c(!!sym(code_nm), !!sym(name_nm))
  )

# 3) 合并到分区几何，缺失置 0
sa2p <- sa2_with_rail %>%
  dplyr::left_join(sum_all, by = setNames(c(code_nm,name_nm), c(code_nm,name_nm))) %>%
  dplyr::mutate(n_vis = tidyr::replace_na(n_vis, 0L))

# 4) 作图（统一 AOI 图窗）——“分区填色地图，颜色越深=该区域观鸟次数越多”
ggplot(sa2p) +
  geom_sf(aes(fill = n_vis), color = "grey30", linewidth = 0.2) +
  scale_fill_viridis_c(option = "C", name = "观鸟次数") +
  coord_sf(crs = 4326,
           xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
           ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
           expand = FALSE) +
  theme_minimal(base_size = 12) +
  labs(title = "Q1 — 分区填色地图（全季）")

# Q1-B 季节对比：分区填色小倍数（颜色越深 = 该区域观鸟次数越多）
suppressPackageStartupMessages({library(sf); library(dplyr); library(ggplot2); library(lubridate)})
sf::sf_use_s2(FALSE)

# 1) 若清单还没有季节列，就由 event_date 映射
if (!"season" %in% names(chk_f)) {
  mk_season <- function(m) dplyr::case_when(
    m %in% c(12,1,2) ~ "Summer",
    m %in% c(3,4,5)  ~ "Autumn",
    m %in% c(6,7,8)  ~ "Winter",
    m %in% c(9,10,11)~ "Spring",
    TRUE ~ NA_character_
  )
  chk_f <- chk_f %>%
    mutate(season = factor(mk_season(lubridate::month(event_date)),
                           levels = c("Summer","Autumn","Winter","Spring")))
}

# 2) 清单 → sf（动态识别经纬度列）
lat_nm <- intersect(c("lat","latitude","y","decimalLatitude","decimallatitude"), names(chk_f))[1]
lon_nm <- intersect(c("lon","longitude","x","decimalLongitude","decimallongitude","lng"), names(chk_f))[1]
stopifnot(!is.na(lat_nm) && !is.na(lon_nm))

chk_sf <- chk_f %>%
  st_as_sf(coords = c(lon_nm, lat_nm), crs = 4326, remove = FALSE) %>%
  st_transform(3111)

# 3) 贴“区域” → 按季节聚合观鸟次数（一次清单 = 一次观鸟）
code_nm <- intersect(c("SA2_CODE21","sa2_code21","SA2_MAINCODE_2016","SA2_MAIN16"), names(sa2_with_rail))[1]
name_nm <- intersect(c("SA2_NAME21","sa2_name21","SA2_NAME_2016","SA2_NAME16"),   names(sa2_with_rail))[1]
join_poly <- st_make_valid(sa2_with_rail)[, c(code_nm, name_nm)]

vis_season <- st_join(chk_sf, join_poly, left = FALSE)
sum_season <- vis_season %>%
  st_drop_geometry() %>%
  summarise(n_vis = n(), .by = c(!!sym(code_nm), !!sym(name_nm), season))

# 4) 并回分区几何；同一色标范围
sa2_season <- sa2_with_rail %>%
  left_join(sum_season, by = setNames(c(code_nm,name_nm), c(code_nm,name_nm))) %>%
  filter(!is.na(season)) %>%
  mutate(n_vis = tidyr::replace_na(n_vis, 0L))

lim_max <- max(sa2_season$n_vis, na.rm = TRUE)

# 5) 作图（小倍数）
ggplot(sa2_season) +
  geom_sf(aes(fill = n_vis), color = "grey30", linewidth = 0.2) +
  scale_fill_viridis_c(option = "C", limits = c(0, lim_max), name = "观鸟次数") +
  facet_wrap(~season, ncol = 2) +
  coord_sf(crs = 4326,
           xlim = c(bbox_deg["xmin"], bbox_deg["xmax"]),
           ylim = c(bbox_deg["ymin"], bbox_deg["ymax"]),
           expand = FALSE) +
  theme_minimal(base_size = 12) +
  labs(title = "Q1 — 季节对比（分区填色小倍数）")

library(leaflet); library(sf)

# 用你已经做好的全季聚合结果 sa2p（里面有 n_vis）
name_nm <- intersect(c("SA2_NAME21","sa2_name21","SA2_NAME_2016","SA2_NAME16"), names(sa2p))[1]

geo_deg <- sa2p |>
  sf::st_make_valid() |>
  sf::st_transform(4326) |>
  sf::st_zm(drop = TRUE) |>
  dplyr::mutate(region_label = .data[[name_nm]])

leaflet(geo_deg) %>%                          # ← 直接把数据传给 leaflet，自动定位到范围
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(fillColor = "#3182bd", fillOpacity = 0.25,
              color = "#555", weight = 0.6,
              label = ~sprintf("%s：%s 次", region_label, n_vis))

# 先准备季节面几何（你前面已生成 sa2_season）
sea_deg <- sa2_season |>
  sf::st_make_valid() |>
  sf::st_transform(4326) |>
  sf::st_zm(drop = TRUE) |>
  dplyr::mutate(region_label = .data[[name_nm]])

rng <- range(c(geo_deg$n_vis, sea_deg$n_vis), na.rm = TRUE)
pal <- colorNumeric("viridis", rng, na.color = "#ffffff")
borders_deg <- geo_deg |>
  sf::st_geometry() |>
  sf::st_boundary() |>
  sf::st_as_sf()



leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  # 建两个图层面板：面填充在下，边界线在上（不被填充遮住）
  addMapPane("fills",   zIndex = 410) %>%
  addMapPane("borders", zIndex = 420) %>%

  addPolygons(data = geo_deg, options = pathOptions(pane = "fills"),
              fillColor = ~pal(n_vis), fillOpacity = 0.85,
              color = "#666", weight = 0.6, opacity = 1,
              label = ~sprintf("%s：%s 次", region_label, n_vis),
              group = "All") %>%

  addPolygons(data = subset(sea_deg, season == "Summer"),
              fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
              label = ~sprintf("%s：%s 次 (Summer)", region_label, n_vis),
              group = "Summer") %>%
  addPolygons(data = subset(sea_deg, season == "Autumn"),
              fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
              label = ~sprintf("%s：%s 次 (Autumn)", region_label, n_vis),
              group = "Autumn") %>%
  addPolygons(data = subset(sea_deg, season == "Winter"),
              fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
              label = ~sprintf("%s：%s 次 (Winter)", region_label, n_vis),
              group = "Winter") %>%
  addPolygons(data = subset(sea_deg, season == "Spring"),
              fillColor = ~pal(n_vis), fillOpacity = 0.85, color = "#555", weight = 0.5,
              label = ~sprintf("%s：%s 次 (Spring)", region_label, n_vis),
              group = "Spring") %>%
  addPolylines(data = borders_deg, options = pathOptions(pane = "borders"),
               color = "#222", weight = 1.2, opacity = 1) %>%
  addLegend(position = "topright", pal = pal, values = rng, title = "观鸟次数") %>%
  addLayersControl(overlayGroups = c("All","Summer","Autumn","Winter","Spring"),
                   options = layersControlOptions(collapsed = FALSE)) %>%
  hideGroup(c("Summer","Autumn","Winter","Spring"))

library(dplyr); library(ggplot2); library(forcats); library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:readr':
## 
##     col_factor

# 仅保留三类；如你只想看两类改成 c("IGS","FormalOpenSpace")
cmp3 <- df_chk %>% filter(space_group %in% c("IGS","FormalOpenSpace","Other"))

# 统计：中位数与 IQR（Q1~Q3）
sum_tbl <- cmp3 %>%
  summarise(
    n      = n(),
    q1     = quantile(richness, 0.25, na.rm=TRUE),
    median = median(richness,     na.rm=TRUE),
    q3     = quantile(richness, 0.75, na.rm=TRUE),
    .by = space_group
  ) %>%
  mutate(space_group = fct_reorder(space_group, median))  # 按中位数排序

# 柱状图：柱=中位数；误差线=IQR（Q1~Q3）
ggplot(sum_tbl, aes(x = space_group, y = median, fill = space_group)) +
  geom_col(width = .65) +
  geom_errorbar(aes(ymin = q1, ymax = q3), width = .18, linewidth = .7) +
  geom_text(aes(label = comma(median)), vjust = -0.5, size = 3.2) +
  scale_fill_manual(values = c(FormalOpenSpace="#4CAF50", IGS="#9C27B0", Other="#90A4AE")) +
  labs(title = "C1｜不同地块类型的每次观鸟物种数（中位数 + IQR）",
       x = NULL, y = "物种数（每次观鸟）") +
  coord_cartesian(ylim = c(0, max(sum_tbl$q3)*1.15)) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none")

# Q1-D｜季节折线图：各地块类型的每次观鸟物种数（中位数）
suppressPackageStartupMessages({library(dplyr); library(ggplot2); library(lubridate); library(forcats)})

# 若还没有 season，就由 event_date 映射（与你前面的口径一致）
if (!"season" %in% names(df_chk)) {
  mk_season <- function(m) dplyr::case_when(
    m %in% c(12,1,2) ~ "Summer",
    m %in% c(3,4,5)  ~ "Autumn",
    m %in% c(6,7,8)  ~ "Winter",
    m %in% c(9,10,11)~ "Spring",
    TRUE ~ NA_character_
  )
  df_chk <- df_chk %>%
    mutate(season = factor(mk_season(lubridate::month(event_date)),
                           levels = c("Summer","Autumn","Winter","Spring")))
}

# 只用三类：IGS / Formal / Other；汇总到“中位数 + IQR”
sum_season_type <- df_chk %>%
  filter(space_group %in% c("IGS","FormalOpenSpace","Other"), !is.na(season), !is.na(richness)) %>%
  summarise(
    n      = n(),
    q1     = quantile(richness, 0.25, na.rm = TRUE),
    median = median(richness,     na.rm = TRUE),
    q3     = quantile(richness, 0.75, na.rm = TRUE),
    .by = c(space_group, season)
  ) %>%
  mutate(space_group = fct_relevel(space_group, "FormalOpenSpace", "IGS", "Other"))

# 折线 + 点；误差线用 IQR（更稳健）
ggplot(sum_season_type, aes(x = season, y = median, group = space_group, color = space_group)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = q1, ymax = q3), width = .12, alpha = .6) +
  scale_color_manual(values = c(FormalOpenSpace = "#4CAF50", IGS = "#9C27B0", Other = "#90A4AE"),
                     labels = c(FormalOpenSpace = "Formal green space", IGS = "IGS", Other = "Other")) +
  labs(title = "Q1｜季节影响：不同地块类型的每次观鸟物种数（中位数）",
       x = NULL, y = "物种数（每次观鸟，中位数）", color = "地块类型") +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor = element_blank())

# 可选：在点上标注样本量
# + ggrepel::geom_text_repel(aes(label = n), size = 3, show.legend = FALSE)

# Q2 主图｜IGS 子类型 × 每次观鸟物种数（互动散点）
# Q2 主图（改版）：只保留“整体中位数”短横；季节可在图例勾选
suppressPackageStartupMessages({
  library(dplyr); library(ggplot2); library(forcats); library(lubridate); library(plotly)
})

# 数据：IGS 子类型 + 每次观鸟物种数，确保有 season
dat <- df_chk %>%
  filter(space_group == "IGS", !is.na(igs_type), !is.na(richness)) %>%
  { if (!"season" %in% names(.)) {
      mutate(., season = factor(case_when(
        month(event_date) %in% c(12,1,2) ~ "Summer",
        month(event_date) %in% c(3,4,5) ~ "Autumn",
        month(event_date) %in% c(6,7,8) ~ "Winter",
        month(event_date) %in% c(9,10,11) ~ "Spring",
        TRUE ~ NA_character_
      ), levels = c("Summer","Autumn","Winter","Spring")))
    } else . } %>%
  mutate(
    igs_type = fct_relevel(igs_type,
                c("Railway easement","Utility easement","Road verge")[
                  c("Railway easement","Utility easement","Road verge") %in% unique(igs_type)]),
    date_chr = if ("event_date" %in% names(.)) as.character(event_date) else NA_character_,
    dur_chr  = if ("durationHrs" %in% names(.)) sprintf("%.1f h", ifelse(is.na(durationHrs), NA, durationHrs)) else NA,
    dis_chr  = if ("effortDistanceKm" %in% names(.)) sprintf("%.1f km", ifelse(is.na(effortDistanceKm), NA, effortDistanceKm)) else NA,
    text = paste0(
      "IGS类型：", igs_type,
      "<br>物种数：", richness,
      if (!all(is.na(date_chr))) paste0("<br>日期：", date_chr) else "",
      if (!all(is.na(dur_chr)))  paste0("<br>时长：", dur_chr) else "",
      if (!all(is.na(dis_chr)))  paste0("<br>距离：", dis_chr) else ""
    )
  )

# 只算“整体中位数”（不分季节）
med_df <- dat %>%
  summarise(med = median(richness, na.rm = TRUE), .by = igs_type) %>%
  mutate(xnum = as.integer(igs_type),
         x0 = xnum - 0.35, x1 = xnum + 0.35)

p <- ggplot(dat, aes(x = igs_type, y = richness, color = season, text = text)) +
  geom_jitter(width = 0.22, height = 0, alpha = 0.55, size = 1.6) +
  # 自绘“整体中位数”短横（每个 IGS 子类 1 条）
  geom_segment(data = med_df,
               aes(x = x0, xend = x1, y = med, yend = med),
               inherit.aes = FALSE, linewidth = 0.9, color = "black") +
  scale_color_manual(values = c(Summer="#FFB74D", Autumn="#AED581", Winter="#4FC3F7", Spring="#BA68C8"),
                     na.translate = FALSE) +
  labs(title = "Q2 — IGS 子类型与每次观鸟物种数（散点）",
       x = "IGS 子类型", y = "物种数（每次观鸟）", color = "季节") +
  theme_minimal(base_size = 12)

# 互动化：图例可勾选季节（单击隐藏/显示，双击仅显示该季节）
plotly::ggplotly(p, tooltip = "text")

# Q2 副图｜IGS 子类型的每次观鸟物种数（小提琴 + 箱线）
suppressPackageStartupMessages({
  library(dplyr); library(ggplot2); library(forcats); library(scales)
})

# 仅取 IGS，确保有 igs_type / richness
vdat <- df_chk %>%
  filter(space_group == "IGS", !is.na(igs_type), !is.na(richness)) %>%
  mutate(
    # 按中位数从高到低排序，读图更直观
    igs_type = fct_reorder(igs_type, richness, .fun = median, .desc = TRUE)
  )

# 每类样本量，用于标注
n_tbl <- vdat %>% summarise(n = n(), .by = igs_type)

p_violin_box <- ggplot(vdat, aes(x = igs_type, y = richness, fill = igs_type)) +
  geom_violin(trim = FALSE, alpha = .55, color = NA) +
  geom_boxplot(width = .18, outlier.alpha = .25, color = "black",
               fill = "white", linewidth = .4) +
  # 在箱线图上方标注样本量
  stat_summary(fun = median, geom = "text", vjust = -2.0, size = 3.3,
               aes(label = paste0("n=", ..count..)), 
               fun.data = function(y){ data.frame(y = median(y), count = length(y)) }) +
  scale_fill_manual(values = c(
    "Railway easement" = "#7B1FA2",
    "Utility easement" = "#AB47BC",
    "Road verge"       = "#8E24AA"
  )) +
  labs(title = "Q2 — IGS 子类型的每次观鸟物种数（小提琴 + 箱线）",
       x = "IGS 子类型", y = "物种数（每次观鸟）") +
  coord_cartesian(ylim = c(0, max(vdat$richness, na.rm = TRUE) * 1.1)) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none",
        panel.grid.minor = element_blank())
p_violin_box

## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Q2｜IGS内部：季节×子类型的柱状图（中位数 + IQR + n，带悬停）
suppressPackageStartupMessages({
  library(dplyr); library(ggplot2); library(forcats); library(lubridate); library(scales); library(plotly)
})

# 1) 取 IGS 清单，补季节列（若缺）
igs <- df_chk %>%
  filter(space_group == "IGS", !is.na(igs_type), !is.na(richness))

if (!"season" %in% names(igs)) {
  mk_season <- function(m) dplyr::case_when(
    m %in% c(12,1,2) ~ "Summer",
    m %in% c(3,4,5)  ~ "Autumn",
    m %in% c(6,7,8)  ~ "Winter",
    m %in% c(9,10,11)~ "Spring",
    TRUE ~ NA_character_
  )
  igs <- igs %>% mutate(season = factor(mk_season(month(event_date)),
                      levels = c("Summer","Autumn","Winter","Spring")))
}

# 2) 统计：每季×子类型的 中位数 + IQR + 样本量
lvl <- c("Railway easement","Utility easement","Road verge")
sum_season <- igs %>%
  mutate(igs_type = fct_relevel(igs_type, lvl[lvl %in% unique(igs_type)])) %>%
  summarise(
    n      = n(),
    q1     = quantile(richness, .25, na.rm=TRUE),
    median = median(richness,     na.rm=TRUE),
    q3     = quantile(richness, .75, na.rm=TRUE),
    .by = c(season, igs_type)
  )
sum_season <- sum_season %>%
  dplyr::mutate(
    tt = paste0(
      "季节：", season,
      "<br>IGS：", igs_type,
      "<br>中位数：", median,
      "<br>IQR：", q1, "–", q3,
      "<br>样本量 n：", scales::comma(n)
    )
  )
# 3) 柱状 + 误差线 + n 标注
p_bar <- ggplot(sum_season, aes(x = season, y = median, fill = igs_type, text = tt)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_errorbar(aes(ymin = q1, ymax = q3),
                position = position_dodge(width = 0.8), width = 0.18, linewidth = .7, alpha = .9) +
  geom_text(aes(label = scales::comma(n)),
            position = position_dodge(width = 0.8), vjust = -0.6, size = 3, color = "grey30") +
  scale_fill_manual(values = c("Railway easement"="#7B1FA2",
                               "Utility easement"="#AB47BC",
                               "Road verge"       ="#8E24AA"),
                    name = "IGS 子类型") +
  labs(title = "Q2｜季节 × IGS 子类型：每次观鸟物种数（中位数 + IQR）",
       x = NULL, y = "物种数（每次观鸟，中位数）") +
  coord_cartesian(ylim = c(0, max(sum_season$q3, na.rm=TRUE) * 1.15)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor = element_blank())

# 互动
plotly::ggplotly(p_bar, tooltip = "text")

# 4) 悬停信息：转 plotly（tooltip 显示 季节/子类型/中位数/IQR/n）
sum_season <- sum_season %>%
  mutate(tt = paste0("季节：", season,
                     "<br>IGS：", igs_type,
                     "<br>中位数：", median,
                     "<br>IQR：", q1, "–", q3,
                     "<br>样本量 n：", comma(n)))

p_bar_plt <- ggplotly(
  p_bar +
    geom_point(aes(text = tt, y = median, color = igs_type),
               position = position_dodge(width = 0.8), alpha = 0),  # 透明点，仅用于tooltip
  tooltip = "text"
)

## Warning in geom_point(aes(text = tt, y = median, color = igs_type), position =
## position_dodge(width = 0.8), : Ignoring unknown aesthetics: text

p_bar_plt

# Q3 主图｜植被覆盖 × 距水体 的二维热力图（Z=每次观鸟物种数中位数；分面=IGS子类型）
suppressPackageStartupMessages({
  library(sf); library(dplyr); library(ggplot2); library(forcats); library(plotly)
})
sf::sf_use_s2(FALSE)

# ---- 0) 前置断言（内存对象就绪） ----
stopifnot(inherits(checklists_filt, "sf"),
          "visit_id" %in% names(checklists_filt),
          "visit_id" %in% names(df_chk))
stopifnot(inherits(veg_within,   "sf"),
          inherits(water_within, "sf"))

# ---- 1) 计算距最近水体的距离（米），若已存在 dist_water_m 就跳过 ----
if (!"dist_water_m" %in% names(df_chk) || all(is.na(df_chk$dist_water_m))) {
  pts3111   <- sf::st_transform(checklists_filt, 3111)
water3111 <- sf::st_transform(sf::st_make_valid(water_within), 3111)

idx_near  <- sf::st_nearest_feature(pts3111, water3111)
dist_m    <- as.numeric(sf::st_distance(pts3111, water3111[idx_near, ], by_element = TRUE))
  df_chk <- df_chk %>%
    select(-dplyr::any_of("dist_water_m")) %>%
    left_join(pts3111 %>% st_drop_geometry() %>% transmute(visit_id, dist_water_m = dist_m),
              by = "visit_id")
}

# ---- 2) 计算 50 m 植被覆盖率（%）----
#   对每个清单点做50m缓冲，与植被面求交面积；cover = 交面积 / (π·50^2) × 100
buf50 <- st_buffer(st_transform(checklists_filt, 3111), 250)
buf50$visit_id <- checklists_filt$visit_id

# 交集（注意：可能较慢，但最稳；如太慢可适当抽稀或先 st_crop() 限定范围）
ov <- suppressWarnings(st_intersection(st_make_valid(buf50), st_make_valid(st_transform(veg_within, 3111))))

cover_tbl <- ov %>%
  transmute(visit_id, a_int = as.numeric(st_area(.))) %>%
  group_by(visit_id) %>%
  summarise(a_int = sum(a_int, na.rm = TRUE), .groups = "drop") %>%
  mutate(veg_cover_pct = pmin(100, 100 * a_int / (pi * 50^2))) %>%
  select(visit_id, veg_cover_pct)

# —— 并回 df_chk（没有交到植被的点记 0%；确保列存在且为数值）——
df_chk <- df_chk %>%
  dplyr::left_join(cover_tbl, by = "visit_id")

if (!"veg_cover_pct" %in% names(df_chk)) {
  df_chk$veg_cover_pct <- NA_real_           # 万一 join 没有带来该列，先创建
}

df_chk <- df_chk %>%
  dplyr::mutate(
    veg_cover_pct = tidyr::replace_na(.data$veg_cover_pct, 0),
    veg_cover_pct = as.numeric(veg_cover_pct)
  )

# ---- 3) 分箱（Y：对数水距；X：线性绿量）----
water_breaks <- c(0, 25, 50, 100, 200, 500, 1000, 2000, Inf)
water_labs   <- c("0–25m","25–50m","50–100m","100–200m","200–500m","0.5–1km","1–2km",">2km")

veg_breaks <- seq(0, 100, by = 10)
veg_labs   <- paste0(veg_breaks[-length(veg_breaks)], "–", veg_breaks[-1], "%")

# ---- 3) 分箱（Y：对数水距；X：线性绿量）+ 设分面类别（IGS 三子类 + Non-IGS）----
binned <- df_chk %>%
  filter(!is.na(richness), !is.na(dist_water_m)) %>%
  mutate(
    # 分面标签：IGS 子类保留原名；其余合并为 Non-IGS
    facet_type = if_else(space_group == "IGS" & !is.na(igs_type),
                         as.character(igs_type),
                         "Non-IGS"),
    facet_type = factor(facet_type,
                        levels = c("Railway easement","Utility easement","Road verge","Non-IGS")),
    water_bin = cut(dist_water_m, breaks = water_breaks, labels = water_labs,
                    right = FALSE, include.lowest = TRUE),
    veg_bin   = cut(veg_cover_pct, breaks = veg_breaks, labels = veg_labs,
                    right = FALSE, include.lowest = TRUE)
  )
# ---- 4) 聚合（每格：中位数 + IQR + n；小样本标记）----
agg <- binned %>%
  group_by(facet_type, veg_bin, water_bin, .drop = FALSE) %>%
  summarise(
    n      = n(),
    q1     = quantile(richness, .25, na.rm = TRUE),
    median = median(richness,     na.rm = TRUE),
    q3     = quantile(richness, .75, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(alpha_flag = ifelse(n < 15, "low", "ok"))

# ---- 5) 静态热力图（facet = IGS 三子类 + Non-IGS）----
p_q3_heat <- ggplot(
  agg,
  aes(x = veg_bin, y = water_bin, fill = median,
      text = paste0("组别：", facet_type,
                    "<br>绿量：", veg_bin,
                    "<br>距水：", water_bin,
                    "<br>中位数：", median,
                    "<br>IQR：", q1, "–", q3,
                    "<br>n：", n))
) +
  geom_tile(aes(alpha = alpha_flag), colour = "grey85", linewidth = .1) +
  scale_alpha_manual(values = c(low = 0.35, ok = 1), guide = "none") +
  scale_fill_viridis_c(name = "物种数（中位数）", na.value = "grey95", option = "C") +
  facet_wrap(~ facet_type, nrow = 1) +   # 觉得太挤可改 nrow = 2 变成 2×2
  labs(title = "Q3｜植被覆盖 × 距水体（每次观鸟物种数中位数）",
       x = "植被覆盖（50 m 半径内，%）", y = "到最近水体的距离") +
  theme_minimal(base_size = 12) +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        strip.background = element_rect(fill = "grey95", colour = NA))

# 互动
plotly::ggplotly(p_q3_heat, tooltip = "text")

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

## Warning in colorscale_json(trace$colorscale): A colorscale list must of
## elements of the same (non-zero) length

Melbourne Bird Analysis

jb huang

2025-11-04

R Markdown

Including Plots