works_of_interest_l <- tribble(
~book, ~work, ~pattern, ~year, ~n_verses_prev, ~publication_order,
"Väitöskirjan aineksia (1827)", "Väitöskirjan aineksia (1827)", "^kr00031", 1827, 95, 1,
"Kantele I (1829)", "Kantele I (1829)", "^kr00038", 1829, NA, 2,
"Kantele II (1829)", "Kantele II (1829)", "^kr00039", 1829, NA, 3,
"Kantele III (1830)", "Kantele III (1830)", "^kr00040", 1830, NA, 4,
"Kantele IV (1831)", "Kantele IV (1831)", "^kr00041", 1831, NA, 5,
"Sikermä-Kalevala (1833)", "Sikermä-Kalevala (1833) - Lemminkäinen", "^kr00032", 1833, 825, 6,
"Sikermä-Kalevala (1833)", "Sikermä-Kalevala (1833) - Väinämöinen", "^kr00033", 1833, 1867, 6,
"Sikermä-Kalevala (1833)", "Sikermä-Kalevala (1833) - Naimakansan virsiä Vuokkiniemeltä", "^kr00034", 1833, 499, 6,
"Runokokous Väinämöisestä (1833)","Runokokous Väinämöisestä (1833)", "^kr00035", 1833, 5052, 7,
"Lisiä runokokoukseen Väinämöisestä (1834)", "Lisiä runokokoukseen Väinämöisestä (1834)", "^kr00036", 1834, NA, 8,
"Vanha Kalevala (1835)", "Vanha Kalevala (1835)", "^vkalevala", 1835, 12078, 9,
"Lisiä Vanhaan Kalevalaan (1836-1837)", "Lisiä Vanhaan Kalevalaan (1836-1837)", "^kr00037", 1836, NA, 10,
"Kanteletar (1840)", "Kanteletar (1840)", "^kt", 1840, NA, 11,
"Kalevala (1849)", "Kalevala (1849) - Luominen ja Väinämöisen syntymä", "^kalevala0[1-2]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Aino-runo", "^kalevala0[3-5]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Väinämöisen ja Ilmarisen matkat Pohjolaan", "^kalevala(0[6-9]|10)", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Lemminkäinen I", "^kalevala1[1-5]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Väinämöisen veneenveisto ja Pohjolasta kosinta I", "^kalevala1[6-9]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Häärunot", "^kalevala2[0-5]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Lemminkäinen II", "^kalevala(2[6-9]|30)", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Kullervo", "^kalevala3[1-6]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Kultaneito ja Pohjolasta kosinta II", "^kalevala3[7-8]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Matkalla sammon ryöstöön", "^kalevala(39|4[0-1])", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Taistelu sammosta", "^kalevala4[2-3]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Kanteleen soitto", "^kalevala44", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Kalevalan ja Pohjolan välinen taistelu", "^kalevala4[5-9]", 1849, 22795, 12,
"Kalevala (1849)", "Kalevala (1849) - Väinämöisen lähtö", "^kalevala50", 1849, 22795, 12,
) %>% mutate(display_order=row_number())
works_of_interest <- works_of_interest_l %>%
copy_to_a(con, temporary=FALSE, overwrite=TRUE, name="works_of_interest", unique_indexes=list(c("work")))
work_parts_of_interest <- works_of_interest_l %>%
mutate(p_id=map(pattern,~poems %>%
filter(str_detect(nro,.x)) %>%
pull(p_id))) %>%
unnest(p_id) %>%
select(-pattern) %>%
copy_to_a(con, temporary=FALSE, overwrite=TRUE, name="work_parts_of_interest", indexes=list(c("work")), unique_indexes=list(c("p_id")))
work_verses <- work_parts_of_interest %>%
select(work,p_id) %>%
inner_join(verse_poem %>%
select(p_id, pos, v_id)) %>%
inner_join(verses %>%
filter(type=="V") %>%
select(v_id)) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="work_verses", indexes=list(c("v_id"),c("work")), unique_indexes=list(c("p_id","pos","v_id")))
poems_of_interest <- poems %>%
filter(collection %in% c("skvr","jr")) %>%
inner_join(p_year %>% filter(year>=1700,year<1849)) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="poems_of_interest", unique_indexes=list(c("p_id")))
poem_verses <- poems_of_interest %>%
select(p_id) %>%
inner_join(verse_poem %>%
select(p_id, pos, v_id)) %>%
inner_join(verses %>%
filter(type=="V") %>%
select(v_id)) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="poems_verses", indexes=list(c("v_id")), unique_indexes=list(c("p_id","pos","v_id")))
poem_verse_clusts <- poem_verses %>%
inner_join(v_clust %>% inner_join(v_clusterings %>% filter(name=="loose"))) %>%
distinct(p_id,clust_id) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="poem_verse_clusts", indexes=list(c("clust_id")), unique_indexes=list(c("p_id","clust_id")))
work_poem_sims <- work_verses %>%
rename(v1_id=v_id, p1_id=p_id, pos1=pos) %>%
inner_join(v_sim) %>%
inner_join(poem_verses %>%
rename(v2_id=v_id, p2_id=p_id, pos2=pos)) %>%
compute_a(temporary=FALSE,overwrite=TRUE, name="work_poem_sims", indexes=list(c("work"),c("p1_id","pos1"),c("p2_id","pos2")))
work_work_sims <- work_verses %>%
rename(v1_id=v_id, p1_id=p_id, pos1=pos, work1=work) %>%
inner_join(v_sim) %>%
inner_join(work_verses %>% rename(v2_id=v_id, p2_id=p_id, pos2=pos, work2=work)) %>%
filter(work1!=work2) %>%
compute_a(temporary=FALSE,overwrite=TRUE, name="work_work_sims", indexes=list(c("sim_cos"),c("work1"),c("work2"),c("p1_id","pos1"),c("p2_id","pos2")))
work_verse_clusts <- work_verses %>%
inner_join(v_clust %>% inner_join(v_clusterings %>% filter(name=="loose"))) %>%
distinct(work,clust_id) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="work_verse_clusts", indexes=list(c("clust_id")), unique_indexes=list(c("work","clust_id")))
work_verse_clust_origins <- work_verse_clusts %>%
inner_join(works_of_interest) %>%
group_by(clust_id) %>%
filter(publication_order==min(publication_order)) %>%
ungroup() %>%
select(origin_work=work, clust_id) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="work_verse_clust_origins", indexes=list(c("origin_work"), c("clust_id")))
poems_to_cols <- poems_of_interest %>%
inner_join(p_col) %>%
inner_join(collectors %>% rename(col_name=name)) %>%
inner_join(p_pl) %>%
inner_join(places %>% select(pl_id, par_id)) %>%
inner_join(places %>% filter(type=="county") %>% rename(pl_name=name), join_by(par_id==pl_id)) %>%
mutate(pl_name=case_match(pl_name,
c("Uusimaa","Varsinais-Suomi","Etelä-Pohjanmaa") ~ "Uusimaa/Varsinais-Suomi/Etelä-Pohjanmaa",
c("Keski-Suomi","Pohjois-Savo","Etelä-Savo") ~ "Keski-Suomi/Savo",
c("Satakunta","Häme") ~ "Satakunta/Häme",
c("Peräpohjola","Pohjois-Pohjanmaa","Kainuu") ~ "Peräpohjola/Pohjois-Pohjanmaa/Kainuu",
c("Etelä-Karjala") ~ "Etelä-Karjala",
c("Aunus") ~ "Aunus",
c("Länsi-Inkeri","Keski-Inkeri","Itä- ja Pohjois-Inkeri") ~ "Inkeri",
c("Laatokan Karjala (Raja-Karjala)") ~ "Laatokan Karjala",
c("Viena") ~ "Viena",
c("Pohjois-Karjala") ~ "Pohjois-Karjala")) %>%
distinct(p_id, year, col_name, pl_name) %>%
compute_a(temporary=FALSE, overwrite=TRUE, name="poems_to_cols",indexes=list(c("p_id"), c("year")))
work_parts_of_interest %>%
inner_join(poem_stats) %>%
group_by(publication_order,work, n_verses_prev) %>%
summarise(poems=n(), verses=sum(nverses), all_lines=sum(n_all_lines)) %>%
arrange(publication_order,work) %>%
relocate(n_verses_prev, .before=verses)
Works under analysis
work_parts_of_interest %>%
inner_join(poem_stats) %>%
group_by(book) %>%
summarise(poems=n(), verses=sum(nverses), all_lines=sum(n_all_lines),.groups="drop") %>%
inner_join(work_verse_clusts %>% inner_join(works_of_interest) %>% count(book, name="verse_types")) %>%
left_join(works_of_interest %>% group_by(book) %>% summarise(display_order=min(display_order))) %>%
arrange(display_order) %>%
select(book,poems,verses,verse_types) %>%
gt(rowname_col = "book") %>%
fmt_integer(c(poems, verses, verse_types), sep_mark = " ") %>%
cols_label(verses="Verses",poems="Poems",verse_types="Verse types")
|
Poems |
Verses |
Verse types |
Väitöskirjan aineksia (1827) |
1 |
98 |
80 |
Kantele I (1829) |
25 |
1 490 |
1 207 |
Kantele II (1829) |
54 |
1 814 |
1 499 |
Kantele III (1830) |
16 |
1 493 |
1 223 |
Kantele IV (1831) |
27 |
1 609 |
1 384 |
Sikermä-Kalevala (1833) |
7 |
4 754 |
3 113 |
Runokokous Väinämöisestä (1833) |
17 |
8 006 |
4 681 |
Lisiä runokokoukseen Väinämöisestä (1834) |
1 |
4 040 |
2 700 |
Vanha Kalevala (1835) |
64 |
15 249 |
8 121 |
Lisiä Vanhaan Kalevalaan (1836-1837) |
34 |
21 399 |
12 319 |
Kanteletar (1840) |
676 |
23 133 |
14 071 |
Kalevala (1849) |
50 |
22 796 |
16 788 |
Work development
work_nodes_l <- works_of_interest_l %>%
mutate(id=row_number() - 1)
work_verse_network_l <- work_verse_clusts %>%
inner_join(works_of_interest %>% select(work, publication_order)) %>%
right_join(work_verse_clusts %>%
inner_join(works_of_interest %>% select(work, publication_order)), join_by(clust_id, publication_order < publication_order)) %>%
mutate(work.x=coalesce(work.x,work.y), publication_order.x=coalesce(publication_order.x,publication_order.y)) %>%
group_by(clust_id, work.y) %>%
filter(publication_order.x==max(publication_order.x)) %>%
ungroup() %>%
inner_join(work_verse_clust_origins) %>%
count(origin_work, work.x,work.y) %>%
collect() %>%
inner_join(work_nodes_l %>% select(work.x=work,id.x=id)) %>%
inner_join(work_nodes_l %>% select(work.y=work,id.y=id)) %>%
mutate(id.x=if_else(id.y==id.x,14+id.x,id.x))
work_nodes_l <- work_nodes_l %>%
union_all(work_nodes_l %>% mutate(id=id+14))
library(networkD3)
sankeyNetwork(Nodes=work_nodes_l,Links=work_verse_network_l, NodeID='work', Source='id.x', Target='id.y',Value='n', fontSize = 12, width=2048, height=2048, nodeWidth = 30, LinkGroup = 'origin_work', units="verses", colourScale=JS("d3.scaleOrdinal(d3.schemeCategory20c);"))
work_nodes_l <- works_of_interest_l %>%
mutate(id=row_number() - 1)
n_works <- work_nodes_l %>% count() %>% pull()
work_verse_network_l <- work_verse_clusts %>%
inner_join(works_of_interest %>% select(work, publication_order)) %>%
right_join(work_verse_clusts %>%
inner_join(works_of_interest %>% select(work, publication_order)), join_by(clust_id, publication_order < publication_order)) %>%
mutate(work.x=coalesce(work.x,work.y), publication_order.x=coalesce(publication_order.x,publication_order.y)) %>%
group_by(clust_id, work.y) %>%
filter(publication_order.x==max(publication_order.x)) %>%
ungroup() %>%
inner_join(work_verse_clust_origins) %>%
count(work.x,work.y) %>%
collect() %>%
inner_join(work_nodes_l %>% select(work.x=work,id.x=id)) %>%
inner_join(work_nodes_l %>% select(work.y=work,id.y=id)) %>%
mutate(id.x=if_else(id.y==id.x,n_works+id.x,id.x))
Joining with `by = join_by(work)`Joining with `by = join_by(work)`Joining with `by = join_by(clust_id)`Joining with `by = join_by(work.x)`Joining with `by = join_by(work.y)`
work_nodes_l <- work_nodes_l %>%
union_all(work_nodes_l %>% mutate(id=id+n_works))
library(networkD3)
sankeyNetwork(Nodes=work_nodes_l,Links=work_verse_network_l, NodeID='work', Source='id.x', Target='id.y',Value='n', fontSize = 12, width=2048, height=2048, nodeWidth = 30, LinkGroup = 'work.x', units="verses", colourScale=JS("d3.scaleOrdinal(d3.schemeCategory20c);"))
Links is a tbl_df. Converting to a plain data frame.
Nodes is a tbl_df. Converting to a plain data frame.
work_nodes_l <- works_of_interest_l %>%
mutate(id=row_number() - 1)
Error: object 'works_of_interest_l' not found
library(networkD3)
sankeyNetwork(Nodes=work_nodes_l,Links=work_verse_network_l, NodeID='work', Source='id.x', Target='id.y',Value='n', fontSize = 12, width=2048, height=2048, nodeWidth = 30, LinkGroup = 'work.x', units="verses", colourScale=JS("d3.scaleOrdinal(d3.schemeCategory20c);"))
work_verse_clusts %>%
inner_join(work_verse_clust_origins) %>%
inner_join(works_of_interest %>% select(origin_work=work,publication_order)) %>%
inner_join(works_of_interest %>% select(work,publication_order), join_by(work)) %>%
filter(publication_order.x<publication_order.y | work == origin_work) %>%
count(origin_work, work, publication_order.x, publication_order.y) %>%
group_by(work) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
collect() %>%
mutate(origin_work=fct_reorder(origin_work, publication_order.x),work=fct_reorder(work, publication_order.y)) %>%
ggplot(aes(x=origin_work,y=work,fill=prop)) +
geom_tile() +
theme_hsci_continuous() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Joining with `by = join_by(clust_id)`Joining with `by = join_by(origin_work)`

Origins of verse types in parts of the Kalevala in terms of earlier
works
work_verse_clusts %>%
inner_join(work_verse_clust_origins) %>%
inner_join(works_of_interest %>% select(origin_work=work,publication_order)) %>%
inner_join(works_of_interest %>% select(work,publication_order), join_by(work)) %>%
filter(publication_order.x<publication_order.y | work == origin_work) %>%
count(origin_work, work, publication_order.x, publication_order.y) %>%
group_by(work) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(publication_order.y==12,origin_work!=work) %>%
collect() %>%
inner_join(works_of_interest_l %>% mutate(order=row_number()) %>% select(work,order)) %>%
mutate(origin_work=fct_reorder(origin_work, publication_order.x),work=fct_reorder(work, order,.desc=TRUE)) %>%
ggplot(aes(x=work,y=prop,fill=origin_work)) +
geom_col() +
scale_y_continuous(labels=scales::percent) +
coord_flip() +
theme_hsci_discrete() +
labs(fill="") +
theme(legend.position="bottom") +
guides(fill=guide_legend(ncol=1))
Joining with `by = join_by(clust_id)`Joining with `by = join_by(origin_work)`Joining with `by = join_by(work)`

work_verse_clusts %>%
inner_join(works_of_interest %>% select(work,publication_order)) %>%
inner_join(work_verse_clusts %>%
inner_join(works_of_interest %>% filter(publication_order==12) %>% select(work,publication_order)), join_by(clust_id, publication_order<publication_order)
) %>%
count(work.x,work.y) %>%
inner_join(work_verse_clusts %>% count(work.x=work, name="on")) %>%
mutate(prop=n/on) %>%
ggplot(aes(x=work.y,y=prop,fill=work.x)) +
geom_col(position='dodge') +
scale_y_continuous(labels=scales::percent) +
coord_flip() +
theme_hsci_discrete() +
labs(fill="") +
theme(legend.position="bottom") +
guides(fill=guide_legend(ncol=1))

work_verse_clusts %>%
inner_join(works_of_interest %>% select(work,book,publication_order)) %>%
inner_join(work_verse_clusts %>%
inner_join(works_of_interest %>% filter(publication_order==12)) %>% select(clust_id,work,book,publication_order), join_by(clust_id, publication_order<publication_order)
) %>%
group_by(work.x,book.y) %>%
summarise(n=n_distinct(clust_id),.groups="drop") %>%
inner_join(work_verse_clusts %>% count(work.x=work, name="on")) %>%
mutate(prop=n/on) %>%
ggplot(aes(x=work.x,y=prop)) +
geom_col(position='dodge') +
scale_y_continuous(labels=scales::percent) +
coord_flip() +
theme_hsci_discrete() +
labs(fill="") +
theme(legend.position="bottom") +
guides(fill=guide_legend(ncol=1))

