統計至202502資料:
## 492577 份
## 4058 人
## 121 份/人
source("https://raw.githubusercontent.com/iascchen/VisHealth/master/R/calendarHeat.R")
# http://www.columbia.edu/~sg3637/blog/Time_Series_Heatmaps.html
dat_calend <- dat %>%
select(EarliestDateCollected) %>%
mutate(Year = EarliestDateCollected %>% year(),
Month = EarliestDateCollected %>% month()) %>%
group_by(Year, Month) %>%
summarise(count = n()) %>%
drop_na() %>%
arrange(Year, Month)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# mutate(EarliestDateCollected = as.Date(EarliestDateCollected)) %>%
# drop_na() %>%
# arrange(EarliestDateCollected)
# filter(lubridate::year(EarliestDateCollected) == lubridate::year(lubridate::ymd(20000101)))
# calendarHeat(dates = dat_calend$EarliestDateCollected, values = dat_calend$count)
p_calend <- dat_calend %>%
# filter(Year <1945) %>%
# filter(Year >= 1945 & Year < 2000) %>%
# filter(Year >= 2000) %>%
ggplot(data = .) +
geom_tile(mapping = aes(x = Year, y = Month, fill = count), color = "black") +
scale_fill_gradient2(low = "white", high = "#004616") +
scale_x_continuous(breaks = c(1904, 1924, 1945, 2000, 2024),
minor_breaks = seq(1850, 1945, 10)
# labels = c(1904, 1924, 1945, 2000, 2024)
) +
scale_y_continuous(breaks = c(1, 3, 6, 9, 12), labels = c("Jan", "Mar", "Jun", "Sep", "Dec")) +
coord_equal() +
labs(x = "yr", y = "", fill = "#") +
theme_void() +
# theme(axis.text.y = element_text(size = 10))
theme(
axis.ticks.length.x = unit(0.1, units = "cm"),
axis.minor.ticks.length.x = unit(0.5, units = "cm"),
axis.ticks = element_line(linewidth = 1.5, color = "black"),
axis.minor.ticks.x.bottom = element_line(linewidth = 1, color = "black"),
axis.text = element_markdown(
# size = unit(24, units = "cm"),
color = "black"),
axis.title.y = element_text(angle = 0, color = "black", hjust = 1),
axis.title.x = element_text(color = "black", hjust = 1),
plot.background = element_rect(fill = "#ffffff00", color = "#ffffff00"),
panel.background = element_rect(fill = "#FFD700")
# plot.margin = unit(c(1,1,1,1), units = "cm")
# legend.position="none"
)
p_calend
# ggsave(filename = "timeGap_calend.png", device = "png", width = 32, height = 10,units = "cm", dpi = 600)
# ggsave(filename = "timeGap_calend_2.png", device = "png", width = 12, height = 10,units = "cm", dpi = 600)
# ggsave(filename = "timeGap_calend_3.png", device = "png", width = 11, height = 10,units = "cm", dpi = 600)
library(wordcloud2)
library(webshot)
# webshot::install_phantomjs()
test <- dat_timeS_dropNA %>%
# filter(EarliestDateCollected < ymd(19040101)) %>%
# filter(between(EarliestDateCollected, ymd(19040101),ymd(19230101))) %>%
# filter(between(EarliestDateCollected, ymd(19240101),ymd(19440101))) %>%
# filter(between(EarliestDateCollected, ymd(19450101),ymd(19990101))) %>%
# filter(between(EarliestDateCollected, ymd(20000101),ymd(20250101))) %>%
select(word = Collector, freq = count) %>%
group_by(word) %>%
summarise(freq = sum(freq)) %>%
arrange(desc(freq)) %>%
wordcloud2(data = .,
# color = "forestgreen",
# color = "darkorange",
fontWeight = 500, minRotation = 0.1, maxRotation = 0.9, shape = "circle")
# htmlwidgets::saveWidget(widget = test, file = "test_1904.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_1924.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_1945.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_2000.html", selfcontained = FALSE)
# htmlwidgets::saveWidget(widget = test, file = "test_2024.html", selfcontained = FALSE)
test
rank_n <- 1:30
p_rankGenus <- dat %>%
group_by(Genus) %>%
summarise(count = n()) %>%
left_join(.,
by = "Genus",
multiple = "first",
dat %>% dplyr::distinct(Genus, GenusInChinese, ClassInChinese)
) %>%
arrange(desc(count)) %>%
mutate(Genus = glue::glue("{GenusInChinese}/<i>{Genus}</i>", .na = NULL)) %>%
mutate(Genus = fct_reorder(Genus, count)) %>%
mutate(ClassInChinese = fct(ClassInChinese, levels = c("眼蟲綱","石松綱","木賊綱","松葉蕨綱","真蕨綱","蘇鐵綱","銀杏綱","買麻藤綱","松綱","木蘭綱","百合綱"))) %>%
drop_na(Genus) %>%
slice(rank_n) %>%
ggplot(mapping = aes(x = Genus, y = count, fill = ClassInChinese)) +
geom_col() +
scale_fill_viridis(direction = 1, discrete = TRUE, option = "G") +
labs(y = "#") +
coord_flip(expand = FALSE, clip = "on") +
theme_solarized(base_family = "NotoSansCJKtcBlack") +
theme(
axis.text.y = element_markdown(
# size = 16,
colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_text(colour = "black", hjust = 1),
legend.text = element_text(colour = "black"),
legend.title = element_blank()
)
# p_rankGenus %>% ggsave(filename = "rankGenus.png", device = "png",units = "cm", width = 30, height = 18, dpi = 600)
p_rankGenus
cat(
dat %>% group_by(Genus) %>% summarise(count = n()) %>% arrange(desc(count)) %>% mutate(Genus = fct_reorder(Genus, count)) %>% filter(Genus %>% is.na()) %>% pull(count),
"份(未鑑定)\n"
)
## 8577 份(未鑑定)
p_rankSci <- dat %>%
group_by(ScientificName) %>%
summarise(count = n()) %>%
left_join(.,
by = "ScientificName",
multiple = "first",
dat %>% dplyr::distinct(ScientificName, ClassInChinese, SpeciesNameInChinese, GenusInChinese, Genus, SpecificEpithet, InfraspecificRank, InfraspecificEpithet, AuthorYearOfScientificName)
) %>%
arrange(desc(count)) %>%
# slice_head(n = 10) %>%
rowwise(SpeciesNameInChinese, GenusInChinese) %>%
mutate(ScientificName_plot = replace_na(SpeciesNameInChinese, GenusInChinese)) %>%
ungroup() %>%
mutate(ScientificName_plot2 = glue("<i>{Genus} {SpecificEpithet}</i>{InfraspecificRank}<i> {InfraspecificEpithet}</i>{AuthorYearOfScientificName}",.sep = " ", .na = "")) %>%
mutate(ScientificName_plot = glue("{ScientificName_plot}/{ScientificName_plot2}")) %>%
mutate(ScientificName_plot = fct_reorder(ScientificName_plot, count)) %>%
mutate(ClassInChinese = fct(ClassInChinese, levels = c("眼蟲綱","石松綱","木賊綱","松葉蕨綱","真蕨綱","蘇鐵綱","銀杏綱","買麻藤綱","松綱","木蘭綱","百合綱"))) %>%
drop_na(ScientificName) %>%
slice(rank_n) %>%
ggplot(mapping = aes(x = ScientificName_plot, y = count, fill = ClassInChinese)) +
geom_col() +
scale_fill_viridis(direction = 1, discrete = TRUE, option = "G") +
coord_flip(expand = FALSE, clip = "on") +
labs(y = "#") +
theme_solarized(
# base_size = 12,
base_family = "NotoSansCJKtcBlack") +
theme(
axis.text.y = element_markdown(
# size = 16,
colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_text(colour = "black", hjust = 1),
legend.text = element_text(colour = "black"),
legend.title = element_blank()
)
# p_rankSci %>% ggsave(filename = "rankSpecies.png", device = "png",units = "cm", width = 30, height = 18, dpi = 600)
p_rankSci
cat(
dat %>% group_by(ScientificName) %>% summarise(count = n()) %>% arrange(desc(count)) %>% mutate(ScientificName = fct_reorder(ScientificName, count)) %>% filter(ScientificName %>% is.na()) %>% pull(count),
"份(未鑑定)\n"
)
## 8577 份(未鑑定)
website: https://chestnut123tw.github.io/theme-year_datagap
## [1] 0 1 10 50 100 200 400 800