reuses_per_work_id <-tbl(con,sql("WITH work_id_mapping AS ( SELECT DISTINCT w_id, work_id_short FROM idmap)SELECT i2.w_id, COUNT(DISTINCT i.w_id)-1 AS reusing_worksFROM ( SELECT * FROM clustered_textreuses_c ct ) ctINNER JOIN clustered_pieces_c cp USING (piece_id)INNER JOIN earliest_works_by_cluster_c ewbc USING (cluster_id)LEFT JOIN work_id_mapping i2 USING (w_id)LEFT JOIN idmap i USING (t_id)GROUP BY i2.w_id")) %>%compute_a(name="reuses_per_work_id",temporary=FALSE,overwrite=TRUE,unique_indexes=list(c("w_id")))
Number of works reusing a work by the year of the original
Code
d <- reuses_per_work_id %>%inner_join(earliest_years_of_publication_by_work_a,by=c("w_id")) %>%inner_join( estc_projected_ecco_modules_a %>%union_all( idmap_a %>%distinct(w_id,ecco_id) %>%inner_join(ecco_core_a,by=c("ecco_id")) %>%distinct(w_id,projected_ecco_module=ecco_module) ) ,by=c("w_id")) %>%group_by(w_id) %>%filter(!(projected_ecco_module %in%c("Keyworded but not projected", "Not keyworded")) |all(projected_ecco_module %in%c("Keyworded but not projected", "Not keyworded"))) %>%ungroup() %>%inner_join(idmap_a,by=c("w_id")) %>%distinct(publication_year,w_id,work_id_short,projected_ecco_module,reusing_works) %>%collect()