load packages & fonts

load data

基礎統計資料

統計至202502資料:

多少份館藏標本?
How many specimens in TAIF herbarium?

## 492577 份

多少採集者貢獻標本?
How many collectors contribute specimens?

## 4058 人

平均每人貢獻標本份數
The average number of specimens from each collector?

## 121 份/人

繪製時間分布

time-series calender heatmaps

source("https://raw.githubusercontent.com/iascchen/VisHealth/master/R/calendarHeat.R")
# http://www.columbia.edu/~sg3637/blog/Time_Series_Heatmaps.html

dat_calend <- dat %>%
  select(EarliestDateCollected) %>% 
  mutate(Year = EarliestDateCollected %>% year(),
         Month = EarliestDateCollected %>% month()) %>%
  group_by(Year, Month) %>% 
  summarise(count = n()) %>% 
  drop_na() %>% 
  arrange(Year, Month)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# mutate(EarliestDateCollected = as.Date(EarliestDateCollected)) %>% 
# drop_na() %>% 
# arrange(EarliestDateCollected)
# filter(lubridate::year(EarliestDateCollected) == lubridate::year(lubridate::ymd(20000101)))

# calendarHeat(dates = dat_calend$EarliestDateCollected, values = dat_calend$count)

p_calend <- dat_calend %>%
  # filter(Year <1945) %>% 
  # filter(Year >= 1945 & Year < 2000) %>%
  # filter(Year >= 2000) %>%
  ggplot(data = .) +
  geom_tile(mapping = aes(x = Year, y = Month, fill = count), color = "black") +
  scale_fill_gradient2(low = "white", high = "#004616") +
  scale_x_continuous(breaks = c(1904, 1924, 1945, 2000, 2024),
                     minor_breaks = seq(1850, 1945, 10)
                     # labels = c(1904, 1924, 1945, 2000, 2024)
  ) +
  scale_y_continuous(breaks = c(1, 3, 6, 9, 12), labels = c("Jan", "Mar", "Jun", "Sep", "Dec")) +
  coord_equal() +
  labs(x = "yr", y = "", fill = "#") +
  theme_void() +
  # theme(axis.text.y = element_text(size = 10))
  theme(
    axis.ticks.length.x = unit(0.1, units = "cm"),
    axis.minor.ticks.length.x = unit(0.5, units = "cm"),
    axis.ticks = element_line(linewidth = 1.5, color = "black"),
    axis.minor.ticks.x.bottom = element_line(linewidth = 1, color = "black"),
    axis.text = element_markdown(
      # size = unit(24, units = "cm"),
      color = "black"),
    axis.title.y = element_text(angle = 0, color = "black", hjust = 1),
    axis.title.x = element_text(color = "black", hjust = 1),
    plot.background = element_rect(fill = "#ffffff00", color = "#ffffff00"),
    panel.background = element_rect(fill = "#FFD700")
    # plot.margin = unit(c(1,1,1,1), units = "cm")
    # legend.position="none"
  )

p_calend

# ggsave(filename = "timeGap_calend.png", device = "png", width = 32, height = 10,units = "cm", dpi = 600)
# ggsave(filename = "timeGap_calend_2.png", device = "png", width = 12, height = 10,units = "cm", dpi = 600)
# ggsave(filename = "timeGap_calend_3.png", device = "png", width = 11, height = 10,units = "cm", dpi = 600)

文字雲

library(wordcloud2)
library(webshot)
# webshot::install_phantomjs()

test <- dat_timeS_dropNA %>%
  # filter(EarliestDateCollected < ymd(19040101)) %>%
  # filter(between(EarliestDateCollected, ymd(19040101),ymd(19230101))) %>%
  # filter(between(EarliestDateCollected, ymd(19240101),ymd(19440101))) %>%
  # filter(between(EarliestDateCollected, ymd(19450101),ymd(19990101))) %>%
  # filter(between(EarliestDateCollected, ymd(20000101),ymd(20250101))) %>%
  select(word = Collector, freq = count) %>%
  group_by(word) %>%
  summarise(freq = sum(freq)) %>%
  arrange(desc(freq)) %>% 
  wordcloud2(data = .,
             # color = "forestgreen",
             # color = "darkorange",
             fontWeight = 500, minRotation = 0.1, maxRotation = 0.9, shape = "circle")
# htmlwidgets::saveWidget(widget = test, file = "test_1904.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_1924.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_1945.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_2000.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_2024.html", selfcontained = FALSE)
test

標本數-屬排名(前30名)

rank_n <- 1:30
p_rankGenus <- dat %>%
  group_by(Genus) %>%
  summarise(count = n()) %>%
  left_join(.,
            by = "Genus",
            multiple = "first",
            dat %>% dplyr::distinct(Genus, GenusInChinese, ClassInChinese)
  ) %>%
  arrange(desc(count)) %>%
  mutate(Genus = glue::glue("{GenusInChinese}/<i>{Genus}</i>", .na = NULL)) %>%
  mutate(Genus = fct_reorder(Genus, count)) %>% 
  mutate(ClassInChinese = fct(ClassInChinese, levels = c("眼蟲綱","石松綱","木賊綱","松葉蕨綱","真蕨綱","蘇鐵綱","銀杏綱","買麻藤綱","松綱","木蘭綱","百合綱"))) %>%
  drop_na(Genus) %>%
  slice(rank_n) %>%
  ggplot(mapping = aes(x = Genus, y = count, fill = ClassInChinese)) +
  geom_col() +
  scale_fill_viridis(direction = 1, discrete = TRUE, option = "G") +
  labs(y = "#") +
  coord_flip(expand = FALSE, clip = "on") +
  theme_solarized(base_family = "NotoSansCJKtcBlack") +
  theme(
    axis.text.y = element_markdown(
      # size = 16,
      colour = "black"),
    axis.title.y = element_blank(),
    axis.title.x = element_text(colour = "black", hjust = 1),
    legend.text = element_text(colour = "black"),
    legend.title = element_blank()
  )

# p_rankGenus %>% ggsave(filename = "rankGenus.png", device = "png",units = "cm", width = 30, height = 18, dpi = 600)
p_rankGenus

cat(
  dat %>% group_by(Genus) %>% summarise(count = n()) %>% arrange(desc(count)) %>% mutate(Genus = fct_reorder(Genus, count)) %>% filter(Genus %>% is.na()) %>% pull(count),
  "份(未鑑定)\n"
)
## 8577 份(未鑑定)

標本數-物種排名(前30名) 大家最愛採

p_rankSci <- dat %>%
  group_by(ScientificName) %>%
  summarise(count = n()) %>%
  left_join(.,
            by = "ScientificName",
            multiple = "first",
            dat %>% dplyr::distinct(ScientificName, ClassInChinese, SpeciesNameInChinese, GenusInChinese, Genus, SpecificEpithet, InfraspecificRank, InfraspecificEpithet, AuthorYearOfScientificName)
  ) %>%
  arrange(desc(count)) %>%
  # slice_head(n = 10) %>%
  rowwise(SpeciesNameInChinese, GenusInChinese) %>% 
  mutate(ScientificName_plot = replace_na(SpeciesNameInChinese, GenusInChinese)) %>%
  ungroup() %>% 
  mutate(ScientificName_plot2 = glue("<i>{Genus} {SpecificEpithet}</i>{InfraspecificRank}<i> {InfraspecificEpithet}</i>{AuthorYearOfScientificName}",.sep = " ", .na = "")) %>% 
  mutate(ScientificName_plot = glue("{ScientificName_plot}/{ScientificName_plot2}")) %>%
  mutate(ScientificName_plot = fct_reorder(ScientificName_plot, count)) %>%
  mutate(ClassInChinese = fct(ClassInChinese, levels = c("眼蟲綱","石松綱","木賊綱","松葉蕨綱","真蕨綱","蘇鐵綱","銀杏綱","買麻藤綱","松綱","木蘭綱","百合綱"))) %>%
  drop_na(ScientificName) %>%
  slice(rank_n) %>%
  ggplot(mapping = aes(x = ScientificName_plot, y = count, fill = ClassInChinese)) +
  geom_col() +
  scale_fill_viridis(direction = 1, discrete = TRUE, option = "G") +
  coord_flip(expand = FALSE, clip = "on") +
  labs(y = "#") +
  theme_solarized(
    # base_size = 12,
    base_family = "NotoSansCJKtcBlack") +
  theme(
    axis.text.y = element_markdown(
      # size = 16,
      colour = "black"),
    axis.title.y = element_blank(),
    axis.title.x = element_text(colour = "black", hjust = 1),
    legend.text = element_text(colour = "black"),
    legend.title = element_blank()
  )

# p_rankSci %>% ggsave(filename = "rankSpecies.png", device = "png",units = "cm", width = 30, height = 18, dpi = 600)
p_rankSci

cat(
  dat %>% group_by(ScientificName) %>% summarise(count = n()) %>% arrange(desc(count)) %>% mutate(ScientificName = fct_reorder(ScientificName, count)) %>% filter(ScientificName %>% is.na()) %>% pull(count),
  "份(未鑑定)\n"
)
## 8577 份(未鑑定)

資料空缺年

website: https://chestnut123tw.github.io/theme-year_datagap

分類群空缺

## [1]   0   1  10  50 100 200 400 800