Author

Eetu Mäkelä

Published

January 10, 2023

Code
source(here::here("src/common_basis.R"))
library(ggplot2)
library(gghsci)
library(ggiraph)
library(gt)
Code
reuses_per_work_id <- tbl(con,sql("
WITH work_id_mapping AS (
  SELECT DISTINCT w_id, work_id_short FROM idmap
)
SELECT i2.w_id, COUNT(DISTINCT i.w_id)-1 AS reusing_works
FROM (
  SELECT * FROM clustered_textreuses_c ct  
) ct
INNER JOIN clustered_pieces_c cp USING (piece_id)
INNER JOIN earliest_works_by_cluster_c ewbc USING (cluster_id)
LEFT JOIN work_id_mapping i2 USING (w_id)
LEFT JOIN idmap i USING (t_id)
GROUP BY i2.w_id
")) %>% 
  compute_a(name="reuses_per_work_id",temporary=FALSE,overwrite=TRUE,unique_indexes=list(c("w_id")))

Number of works reusing a work by the year of the original

Code
d <- reuses_per_work_id %>%
  inner_join(earliest_years_of_publication_by_work_a,by=c("w_id")) %>%
  inner_join(
    estc_projected_ecco_modules_a %>%
      union_all(
        idmap_a %>%
        distinct(w_id,ecco_id) %>%
        inner_join(ecco_core_a,by=c("ecco_id")) %>%
        distinct(w_id,projected_ecco_module=ecco_module)
      )
  ,by=c("w_id")) %>%
  group_by(w_id) %>%
  filter(!(projected_ecco_module %in% c("Keyworded but not projected", "Not keyworded")) | all(projected_ecco_module %in% c("Keyworded but not projected", "Not keyworded"))) %>% 
  ungroup() %>%
  inner_join(idmap_a,by=c("w_id")) %>%
  distinct(publication_year,w_id,work_id_short,projected_ecco_module,reusing_works) %>%
  collect()
Code
(d %>%
  filter(reusing_works>=100) %>%
  ggplot(aes(x=publication_year,y=reusing_works,color=projected_ecco_module,data_id=w_id,tooltip=work_id_short)) + 
  theme_hsci_discrete() +
  geom_point_interactive() + 
  facet_wrap(~projected_ecco_module,ncol=3) +
   scale_y_log10() +
  theme(legend.position="bottom")
) %>%
  girafe(ggobj=.,width_svg=8,height_svg=11)