Error : Table 'vd17.vd17_normalized_years_c' doesn't exist [1146]
Joining, by = "record_number"Joining, by = "record_number"
Joining vd17_id and vd17_titles tables with
fbs_links_of_interest
fbs_links_of_interest_local <- fbs_links_of_interest_a %>%
inner_join(fbs_metadata_a %>% select(GND,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD) %>% mutate(GND = str_c("gnd/", GND))) %>%
inner_join(vd17_id_a) %>%
inner_join(vd17_titles_a %>% select(record_number,title)) %>%
collect() %>%
select(where(~!all(is.na(.x))))
Joining, by = "GND"Joining, by = "record_number"Joining, by = "record_number"
fbs_links_of_interest_local <- select (fbs_links_of_interest_local, c('record_number','vd17_id','title','Member_number_new','First_name_new', 'Last_name_new','Society_name','GND','Estimated_admission_year','Estimated_DOD','role2','field_code'))
Joining vd17_id and vd17_titles tables with whole FBS (not
links_of_interest)
fbs_all_links_local <- fbs_links_a %>%
inner_join(fbs_metadata_a %>% select(GND,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD) %>% mutate(GND = str_c("gnd/", GND))) %>%
inner_join(vd17_id_a) %>%
inner_join(vd17_titles_a %>% select(record_number,title)) %>%
collect() %>%
select(where(~!all(is.na(.x))))
Joining, by = "GND"Joining, by = "record_number"Joining, by = "record_number"
fbs_all_links_local <- select (fbs_all_links_local, c('record_number','vd17_id','title','Member_number_new','First_name_new', 'Last_name_new','Society_name','GND','Estimated_admission_year','Estimated_DOD','role2','field_code'))
Creating local tables of vd17 normalized years, genres, languages
and id.
vd17_normalized_years_local <- vd17_normalized_years_a %>%
collect()
vd17_genres_local <- vd17_genres_a %>%
collect()
vd17_normalized_langs_local <- vd17_normalized_langs_a %>%
collect()
vd17_id_local <- vd17_id_a %>%
collect()
FBS publications by year for members (links of interest).
The below plot depicts the number of publications (plus cumulative
distribution) by FBS members by year for links of interest.
publications_fbs <- fbs_links_of_interest_local %>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code)
pub_fbs <-publications_fbs %>%
group_by(normalized_year) %>%
summarize(records = n_distinct(record_number), .groups = "drop") %>%
mutate(phase = case_when(
normalized_year < 1617 ~ "< 1617",
normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
normalized_year > 1682 ~ "> 1682"
))
ggplot(pub_fbs,aes(x=normalized_year, y=records, color = phase)) +
geom_step()+
xlab("Year") + ylab("FBS-Publications(links of interest)")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

cumulative_frequencies_pub <- pub_fbs %>%
arrange(normalized_year) %>%
mutate(cum_frequency=cumsum(records))%>%
mutate(phase = case_when(
normalized_year < 1617 ~ "< 1617",
normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
normalized_year > 1682 ~ "> 1682"
))
ggplot(cumulative_frequencies_pub, aes(x=normalized_year, y=cum_frequency)) +
geom_step()+
xlab("Year") + ylab("FBS-Publications-CD (links of interest)")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

FBS publications by year for all members.
The below plot depicts the number of publications (plus cumulative
distribution) by whole FBS members by year.
publications_fbs_all <- fbs_all_links_local %>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code)
pub_fbs_all <-publications_fbs_all %>%
group_by(normalized_year) %>%
summarize(records = n_distinct(record_number), .groups = "drop") %>%
mutate(phase = case_when(
normalized_year < 1617 ~ "< 1617",
normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
normalized_year > 1682 ~ "> 1682"
))
ggplot(pub_fbs_all,aes(x=normalized_year, y=records, color = phase)) +
geom_step()+
xlab("Year") + ylab("FBS-Publications (all)")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

cumulative_frequencies_pub_all <- pub_fbs_all %>%
arrange(normalized_year) %>%
mutate(cum_frequency=cumsum(records))%>%
mutate(phase = case_when(
normalized_year < 1617 ~ "< 1617",
normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
normalized_year > 1682 ~ "> 1682"
))
ggplot(cumulative_frequencies_pub_all, aes(x=normalized_year, y=cum_frequency)) +
geom_step()+
xlab("Year") + ylab("FBS-Publications-CD (all)")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

FBS genres by year (links of interest).
The below plot depicts the top genre of FBS publications by year for
links of interest.
genres_fbs <- fbs_links_of_interest_local %>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
left_join(vd17_genres_local, by = c("record_number"))%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,genre)
genre_fbs_links_of_interest <- gs4_create(
"sheets-genre_fbs_links_of_interest",
sheets = genres_fbs)
✔ Creating new Sheet: sheets-genre_fbs_links_of_interest.
The googlesheets4 package is requesting access to your Google account.
Select a pre-authorised account or enter '0' to obtain a new token.
Press Esc/Ctrl + C to cancel.
1: narges.azizifard@gmail.com
1
Auto-refreshing stale OAuth token.
genre_fbs_links_of_interest
Spreadsheet name: sheets-genre_fbs_links_of_interest
ID: 1UM1quvDGOkxRRLtDnf1eA-k6SsaeP8BRIr_wtzoq0kw
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
genres_fbs: 20398 x 14
phase_genres_fbs <- genres_fbs%>%
select(record_number,normalized_year,genre)%>%
distinct(record_number,normalized_year,genre)%>%
na.omit(genres_fbs)%>%
group_by(normalized_year) %>%
count(genre)%>%
arrange(desc(n))%>%
mutate(phase = case_when(
normalized_year < 1617 ~ "< 1617",
normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
normalized_year > 1682 ~ "> 1682"
))
phase_genres_fbs%>%
slice(1:1)%>%
ggplot(aes(x=normalized_year,y=n,fill=genre))+
geom_col()+
xlab("Year") + ylab("FBS-Top Genre_links_of_interest")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
theme(legend.position="right",
legend.key.width=unit(0.15, "cm"))+
geom_text(aes(label = n), color = "black", size = 1, position = position_stack(vjust = 0.9))

FBS genres by year (all).
The below plot depicts the top genre of whole FBS publications by
year.
genres_fbs_all <- fbs_all_links_local %>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
left_join(vd17_genres_local, by = c("record_number"))%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,genre)
genre_fbs_all <- gs4_create(
"sheets-genre_fbs_all",
sheets = genres_fbs_all)
✔ Creating new Sheet: sheets-genre_fbs_all.
genre_fbs_all
Spreadsheet name: sheets-genre_fbs_all
ID: 11W7X4MRfDIyAu2UPOFiHgK4xgr--8nUaQ4AL3Bm5TNI
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
genres_fbs_all: 23599 x 14
phase_genres_fbs_all <- genres_fbs_all%>%
select(record_number,normalized_year,genre)%>%
distinct(record_number,normalized_year,genre)%>%
na.omit(genres_fbs_all)%>%
group_by(normalized_year) %>%
count(genre)%>%
arrange(desc(n))%>%
mutate(phase = case_when(
normalized_year < 1617 ~ "< 1617",
normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
normalized_year > 1682 ~ "> 1682"
))
phase_genres_fbs_all%>%
slice(1:1)%>%
ggplot(aes(x=normalized_year,y=n,fill=genre))+
geom_col()+
xlab("Year") + ylab("FBS-Top Genre_all")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
theme(legend.position="right",
legend.key.width=unit(0.15, "cm"))+
geom_text(aes(label = n), color = "black", size = 1, position = position_stack(vjust = 0.9))

The below code, shows the keywords related to translations available
in notes, titles and subtitles of publications (in a case that their
original language is NA)
list1 <- c("Übertrag","Übersetz","Geteutsch","Gedolmetsch","Dolmetsch","Verdeütsch","Verdeutsch","Verteutsch","Übergesetz","Versetzt","Gegenübersetz","Ubergesatz","Uberbracht","Überbracht","Teutsch","Deutsch","Gebracht","übertrag","übersetz","geteutsch","gedolmetsch","dolmetsch","verdeütsch","verdeutsch","verteutsch","übergesetz","versetzt","gegenübersetz","ubergesatz","uberbracht","überbracht","teutsch","deutsch","gebracht")
list2 <- c("Übers","übers")
vd17_a_translations_link1 <- vd17_a %>%
mutate(value = str_replace_all(value, sql("CHR(0)"), "_")) %>%
collect()%>%
filter(field_code %in% c("021A","021G","021M","021N"),grepl(paste(list1, collapse="|"), value))%>%
mutate(translations_value1=value)%>%
select(record_number,translations_value1)%>%
distinct()
vd17_a_translations_link2 <- vd17_a %>%
mutate(value = str_replace_all(value, sql("CHR(0)"), "_")) %>%
collect()%>%
filter(field_code %in% c("037A","046L"),grepl(paste(list2, collapse="|"), value))%>%
mutate(translations_value2=value)%>%
select(record_number,translations_value2)%>%
distinct()
vd17_a_translations_link <- merge(vd17_a_translations_link1, vd17_a_translations_link2,by=c("record_number"),all=TRUE)
vd17_a_translations_link <- vd17_a_translations_link %>%
distinct()
vd17_id_translations_link <- vd17_a_translations_link %>%
inner_join(vd17_id_local,by=c("record_number"))
vd17_specific_link_translations <- gs4_create(
"sheets-vd17_specific_link_translations_fbs",
sheets = vd17_id_translations_link)
✔ Creating new Sheet: sheets-vd17_specific_link_translations_fbs.
vd17_specific_link_translations
Spreadsheet name: sheets-vd17_specific_link_translations_fbs
ID: 1I3hnXDo_wxUtEbzWN8v9uScPNWhIORYO9qLezaSqFr0
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
vd17_id_translations_link: 21799 x 4
vd17_id_translations_link_2 = subset(vd17_id_translations_link, select = -c(translations_value1,translations_value2,vd17_id))
# FBS translations (links of interest) (It includes all translations not only German one which their original language is not NA).
fbs_translation <- fbs_links_of_interest_local %>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
left_join(vd17_normalized_langs_local, by = c("record_number"))%>%
filter(!is.na(original_language))%>%
left_join(vd17_genres_local, by = c("record_number"))%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,original_language,intermediary_language,publication_language,genre)%>%
distinct()
# Joining the vd17 translations (which extracted by the keywords in their notes, titles, subtitles) with fbs publications, to extract more translations (in a case that their original language is NA).
specified_translation_fbs <- fbs_links_of_interest_local %>%
inner_join(vd17_id_translations_link_2, by = c("record_number"))%>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
left_join(vd17_normalized_langs_local, by = c("record_number"))%>%
left_join(vd17_genres_local, by = c("record_number"))%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,original_language,intermediary_language,publication_language,genre)%>%
distinct()
# Finding the records of of translations with keywords, which are not available in FBS translations (by original language).
needed_translation_fbs <- specified_translation_fbs[!specified_translation_fbs$record_number %in% fbs_translation$record_number, , drop = FALSE]
needed_translations_of_fbs <- gs4_create(
"sheets-translations_of_fbs",
sheets = needed_translation_fbs)
✔ Creating new Sheet: sheets-translations_of_fbs.
needed_translations_of_fbs
Spreadsheet name: sheets-translations_of_fbs
ID: 1aJzEHRMQwhMxC7IcqN5uJXfusPDbXp4xhqfspBMLwbw
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
needed_translation_fbs: 1327 x 17
Genre of FBS (links of interest) (for German translations).
The below wordcloud depicts the frequency of genres of FBS
translations for links of interest.
library(wordcloud)
Loading required package: RColorBrewer
genre_translation_fbs <- fbs_translation%>%
filter(publication_language=="ger")%>%
select(record_number,normalized_year,genre)%>%
distinct(record_number,normalized_year,genre)%>%
na.omit(translation_fbs)%>%
group_by(genre) %>%
count(genre)%>%
arrange(desc(n))
wordcloud(words = genre_translation_fbs$genre, freq = genre_translation_fbs$n, min.freq = 1,
colors = brewer.pal(20,"Paired"), random.order=FALSE, scale = c(1, 0.4))
Warning: n too large, allowed maximum for palette Paired is 12
Returning the palette you asked for with that many colors

Original language of FBS translations (links of interest)(It
includes all translations not only German one).
The below wordcloud depicts the frequency of original_language of
FBS translations for links of interest.
o_language_translation_fbs <- fbs_translation%>%
select(record_number,normalized_year,original_language)%>%
distinct(record_number,normalized_year,original_language)%>%
na.omit(ger_translation_fbs_all)%>%
group_by(original_language) %>%
count(original_language)%>%
arrange(desc(n))
wordcloud(words = o_language_translation_fbs$original_language, freq = o_language_translation_fbs$n, min.freq = 1,
colors = brewer.pal(20,"Paired"), random.order=FALSE, scale = c(1, 0.4))
Warning: n too large, allowed maximum for palette Paired is 12
Returning the palette you asked for with that many colors

Primary authors and Translators of FBS translations (links of
interest)(It includes all translations not only German one).
primary_authors_links_of_interests <- fbs_translation%>%
filter(field_code=="028A")
primary_authors <- gs4_create(
"sheets-primary_authors_translations_fbs_links_of_interest",
sheets = primary_authors_links_of_interests)
✔ Creating new Sheet: sheets-primary_authors_translations_fbs_links_of_interest.
primary_authors
Spreadsheet name: sheets-primary_authors_translations_fbs_links_of_interest
ID: 1xF3PQ22i52-v-0BoTaPKB4Nmb0WeW37fv_Rn8CDBw6c
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
primary_authors_links_of_interests: 120 x 17
translators_translation_links_of_interest <- fbs_translation %>%
filter(field_code=="028C",(grepl(paste(c("Übers"), collapse="|"), role2)|role2 %in%(NA)))
translators_translation <- gs4_create(
"sheets-translator_translations_fbs",
sheets = translators_translation_links_of_interest)
✔ Creating new Sheet: sheets-translator_translations_fbs.
translators_translation
Spreadsheet name: sheets-translator_translations_fbs
ID: 1MQZbvrXSwimDCe15cUwIKkCgvyhEwy_305Oh8HfXzW0
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
translators_translation_links_of_interest: 391 x 17
Translated vs Non-Translated member publications_fbs (all).
The below plot depicts the Translated vs Non-Translated member
publications of FBS.
trans_vs_non_trans <- fbs_all_links_local %>%
left_join(vd17_normalized_years_local, by = c("record_number")) %>%
filter(normalized_year >= 1600, normalized_year <= 1700) %>%
filter(nchar(normalized_year)==4)%>%
left_join(vd17_normalized_langs_local, by = c("record_number"))%>%
select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,original_language,publication_language)
trans_vs_non_trans$type <- is.na(trans_vs_non_trans$original_language)
trans_vs_non_trans$type[trans_vs_non_trans$type == "FALSE"] <- "Translated"
trans_vs_non_trans$type[trans_vs_non_trans$type == "TRUE"] <- "Non-Translated"
translated_vs_non_translated <- gs4_create(
"sheets-translated_vs_non_translated_fbs",
sheets = trans_vs_non_trans)
✔ Creating new Sheet: sheets-translated_vs_non_translated_fbs.
translated_vs_non_translated
Spreadsheet name: sheets-translated_vs_non_translated_fbs
ID: 1sFSYxEPi8p9szQo5QfLHOLtqqmFLeVoeKu5h1H7RwdM
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
trans_vs_non_trans: 14678 x 16
trans_vs_non_trans%>%
distinct(record_number,normalized_year,type)%>%
select(normalized_year,type)%>%
group_by(normalized_year)%>%
count(type)%>%
arrange(desc(n))%>%
ggplot(aes(x=normalized_year,y=n, fill=type)) +
geom_col()+
labs(y = "Translated vs Non-Translated FBS", x= "Years")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
theme(legend.position="bottom",
legend.key.width=unit(0.15, "cm"))+
geom_text(aes(label = n), color = "green", size = 1.8, position = position_stack(vjust = 0.9))

Translated vs Non-Translated member publications_fbs (all)
(Percentage by year).
trans_non_trans_percentage <- trans_vs_non_trans%>%
distinct(record_number,normalized_year,type)%>%
select(normalized_year,type)%>%
mutate(type = factor(type)) %>%
count(normalized_year, type, .drop = FALSE, name = 'Percentage') %>%
group_by(normalized_year) %>%
mutate(Percentage = round(Percentage/sum(Percentage) * 100),2)
translated_non_translated_percentage <- gs4_create(
"sheets-translated_non_translated_percentage_fbs",
sheets = trans_non_trans_percentage)
✔ Creating new Sheet: sheets-translated_non_translated_percentage_fbs.
translated_non_translated_percentage
Spreadsheet name: sheets-translated_non_translated_percentage_fbs
ID: 1lngxNQmTxVjsKYyXtPeHlfhY4T_FVFlVcQeTUffBSUo
Locale: en_US
Time zone: Etc/GMT
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
trans_non_trans_percentage: 201 x 4
Translated vs Non-Translated member publications_fbs (all)
(Percentage by whole).
print(c("Translated",round(sum(trans_vs_non_trans$type=="Translated")/length(trans_vs_non_trans$type)*100,2)))
[1] "Translated" "2.95"
print(c("Non-Translated",round(sum(trans_vs_non_trans$type=="Non-Translated")/length(trans_vs_non_trans$type)*100,2)))
[1] "Non-Translated" "97.05"
---
title: "FBS analysis"
output:
  html_notebook:
    code_folding: hide
    toc: yes
---

```{r setup,echo=F}
knitr::opts_knit$set(root.dir = here::here())
library(here)
source(here("code/common_basis.R"), local = knitr::knit_global())
library(tidyverse)
library(gghsci)
library(gt)
p <- function(number) {
  return(format(number, scientific = FALSE, big.mark = ","))
}
pp <- function(percentage, accuracy = 0.01) {
  return(scales::percent(percentage, accuracy = accuracy))
}
```

# Joining vd17_id and vd17_titles tables with fbs_links_of_interest 
```{r}
fbs_links_of_interest_local <- fbs_links_of_interest_a %>%
  inner_join(fbs_metadata_a %>% select(GND,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD) %>% mutate(GND = str_c("gnd/", GND))) %>%
  inner_join(vd17_id_a) %>%
  inner_join(vd17_titles_a %>% select(record_number,title)) %>%
  collect() %>%
  select(where(~!all(is.na(.x))))

fbs_links_of_interest_local <- select (fbs_links_of_interest_local, c('record_number','vd17_id','title','Member_number_new','First_name_new', 'Last_name_new','Society_name','GND','Estimated_admission_year','Estimated_DOD','role2','field_code'))
```
# Joining vd17_id and vd17_titles tables with whole FBS (not links_of_interest)
```{r}
fbs_all_links_local <- fbs_links_a %>%
  inner_join(fbs_metadata_a %>% select(GND,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD) %>% mutate(GND = str_c("gnd/", GND))) %>%
  inner_join(vd17_id_a) %>%
  inner_join(vd17_titles_a %>% select(record_number,title)) %>%
  collect() %>%
  select(where(~!all(is.na(.x))))

fbs_all_links_local <- select (fbs_all_links_local, c('record_number','vd17_id','title','Member_number_new','First_name_new', 'Last_name_new','Society_name','GND','Estimated_admission_year','Estimated_DOD','role2','field_code'))
```
# Creating local tables of vd17 normalized years, genres, languages and id.
```{r}
vd17_normalized_years_local <- vd17_normalized_years_a %>%
  collect()
vd17_genres_local <- vd17_genres_a %>%
  collect()
vd17_normalized_langs_local <- vd17_normalized_langs_a %>%
  collect()
vd17_id_local <- vd17_id_a %>%
  collect()
```

# FBS publications by year for members (links of interest).
# The below plot depicts the number of publications (plus cumulative distribution) by FBS members by year for links of interest. 
```{r}
publications_fbs <- fbs_links_of_interest_local %>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code)

pub_fbs <-publications_fbs %>%
  group_by(normalized_year) %>%
  summarize(records = n_distinct(record_number), .groups = "drop") %>%
  mutate(phase = case_when(
    normalized_year < 1617 ~ "< 1617",
    normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
    normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
    normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
    normalized_year > 1682 ~ "> 1682"
  ))

  ggplot(pub_fbs,aes(x=normalized_year, y=records, color = phase)) + 
  geom_step()+
  xlab("Year") + ylab("FBS-Publications(links of interest)")+
  scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
  theme_hsci_discrete()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
  
  cumulative_frequencies_pub <- pub_fbs %>% 
  arrange(normalized_year) %>% 
  mutate(cum_frequency=cumsum(records))%>%
  mutate(phase = case_when(
    normalized_year < 1617 ~ "< 1617",
    normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
    normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
    normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
    normalized_year > 1682 ~ "> 1682"
  ))


ggplot(cumulative_frequencies_pub, aes(x=normalized_year, y=cum_frequency)) +
  geom_step()+
  xlab("Year") + ylab("FBS-Publications-CD (links of interest)")+
  scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
  theme_hsci_discrete()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
```
# FBS publications by year for all members. 
# The below plot depicts the number of publications (plus cumulative distribution) by whole FBS members by year. 
```{r}
publications_fbs_all <- fbs_all_links_local %>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code)

pub_fbs_all <-publications_fbs_all %>%
  group_by(normalized_year) %>%
  summarize(records = n_distinct(record_number), .groups = "drop") %>%
  mutate(phase = case_when(
    normalized_year < 1617 ~ "< 1617",
    normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
    normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
    normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
    normalized_year > 1682 ~ "> 1682"
  ))

  ggplot(pub_fbs_all,aes(x=normalized_year, y=records, color = phase)) + 
  geom_step()+
  xlab("Year") + ylab("FBS-Publications (all)")+
  scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
  theme_hsci_discrete()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
  
  
  cumulative_frequencies_pub_all <- pub_fbs_all %>% 
  arrange(normalized_year) %>% 
  mutate(cum_frequency=cumsum(records))%>%
  mutate(phase = case_when(
    normalized_year < 1617 ~ "< 1617",
    normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
    normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
    normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
    normalized_year > 1682 ~ "> 1682"
  ))


ggplot(cumulative_frequencies_pub_all, aes(x=normalized_year, y=cum_frequency)) +
  geom_step()+
  xlab("Year") + ylab("FBS-Publications-CD (all)")+
  scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
  theme_hsci_discrete()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
```
# FBS genres by year (links of interest).
# The below plot depicts the top genre of FBS publications by year for links of interest.
```{r}
genres_fbs <- fbs_links_of_interest_local %>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  left_join(vd17_genres_local, by = c("record_number"))%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,genre)

genre_fbs_links_of_interest <- gs4_create(
  "sheets-genre_fbs_links_of_interest",
  sheets = genres_fbs)

genre_fbs_links_of_interest

phase_genres_fbs <- genres_fbs%>%
  select(record_number,normalized_year,genre)%>%
  distinct(record_number,normalized_year,genre)%>%
  na.omit(genres_fbs)%>%
  group_by(normalized_year) %>%
  count(genre)%>%
  arrange(desc(n))%>%
  mutate(phase = case_when(
    normalized_year < 1617 ~ "< 1617",
    normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
    normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
    normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
    normalized_year > 1682 ~ "> 1682"
  ))


phase_genres_fbs%>%
  slice(1:1)%>%
  ggplot(aes(x=normalized_year,y=n,fill=genre))+
  geom_col()+
  xlab("Year") + ylab("FBS-Top Genre_links_of_interest")+
  scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
  theme_hsci_discrete()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
  theme(legend.position="right",
        legend.key.width=unit(0.15, "cm"))+
  geom_text(aes(label = n), color = "black", size = 1, position = position_stack(vjust = 0.9)) 
```

# FBS genres by year (all).
# The below plot depicts the top genre of whole FBS publications by year.
```{r}
genres_fbs_all <- fbs_all_links_local %>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  left_join(vd17_genres_local, by = c("record_number"))%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,genre)

genre_fbs_all <- gs4_create(
  "sheets-genre_fbs_all",
  sheets = genres_fbs_all)

genre_fbs_all

phase_genres_fbs_all <- genres_fbs_all%>%
  select(record_number,normalized_year,genre)%>%
  distinct(record_number,normalized_year,genre)%>%
  na.omit(genres_fbs_all)%>%
  group_by(normalized_year) %>%
  count(genre)%>%
  arrange(desc(n))%>%
  mutate(phase = case_when(
    normalized_year < 1617 ~ "< 1617",
    normalized_year >= 1617 & normalized_year <= 1650 ~ "phase 1",
    normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2",
    normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3",
    normalized_year > 1682 ~ "> 1682"
  ))

phase_genres_fbs_all%>%
  slice(1:1)%>%
  ggplot(aes(x=normalized_year,y=n,fill=genre))+
  geom_col()+
  xlab("Year") + ylab("FBS-Top Genre_all")+
  scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
  theme_hsci_discrete()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
  theme(legend.position="right",
        legend.key.width=unit(0.15, "cm"))+
  geom_text(aes(label = n), color = "black", size = 1, position = position_stack(vjust = 0.9)) 
```
# The below code, shows the keywords related to translations available in notes, titles and subtitles of publications (in a case that their original language is NA)

```{r}
list1 <- c("Übertrag","Übersetz","Geteutsch","Gedolmetsch","Dolmetsch","Verdeütsch","Verdeutsch","Verteutsch","Übergesetz","Versetzt","Gegenübersetz","Ubergesatz","Uberbracht","Überbracht","Teutsch","Deutsch","Gebracht","übertrag","übersetz","geteutsch","gedolmetsch","dolmetsch","verdeütsch","verdeutsch","verteutsch","übergesetz","versetzt","gegenübersetz","ubergesatz","uberbracht","überbracht","teutsch","deutsch","gebracht")
list2 <- c("Übers","übers")

vd17_a_translations_link1 <- vd17_a %>% 
  mutate(value = str_replace_all(value, sql("CHR(0)"), "_")) %>%
  collect()%>%
  filter(field_code %in% c("021A","021G","021M","021N"),grepl(paste(list1, collapse="|"), value))%>%
  mutate(translations_value1=value)%>%
  select(record_number,translations_value1)%>%
  distinct()

vd17_a_translations_link2 <- vd17_a %>% 
  mutate(value = str_replace_all(value, sql("CHR(0)"), "_")) %>%
  collect()%>%
  filter(field_code %in% c("037A","046L"),grepl(paste(list2, collapse="|"), value))%>%
  mutate(translations_value2=value)%>%
  select(record_number,translations_value2)%>%
  distinct()

vd17_a_translations_link <- merge(vd17_a_translations_link1, vd17_a_translations_link2,by=c("record_number"),all=TRUE)
vd17_a_translations_link <- vd17_a_translations_link %>%
  distinct()

vd17_id_translations_link <- vd17_a_translations_link %>%
  inner_join(vd17_id_local,by=c("record_number"))

vd17_specific_link_translations <- gs4_create(
  "sheets-vd17_specific_link_translations_fbs",
  sheets = vd17_id_translations_link)
vd17_specific_link_translations
```
```{r}
vd17_id_translations_link_2 = subset(vd17_id_translations_link, select = -c(translations_value1,translations_value2,vd17_id))
```

```{r}
# FBS translations (links of interest) (It includes all translations not only German one which their original language is not NA).

fbs_translation <- fbs_links_of_interest_local %>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  left_join(vd17_normalized_langs_local, by = c("record_number"))%>%
  filter(!is.na(original_language))%>%
  left_join(vd17_genres_local, by = c("record_number"))%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,original_language,intermediary_language,publication_language,genre)%>%
  distinct()

# Joining the vd17 translations (which extracted by the keywords in their notes, titles, subtitles) with fbs publications, to extract more translations (in a case that their original language is NA).
specified_translation_fbs <- fbs_links_of_interest_local %>%
  inner_join(vd17_id_translations_link_2, by = c("record_number"))%>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  left_join(vd17_normalized_langs_local, by = c("record_number"))%>%
  left_join(vd17_genres_local, by = c("record_number"))%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,original_language,intermediary_language,publication_language,genre)%>%
  distinct()

# Finding the records of of translations with keywords, which are not available in FBS translations (by original language).
needed_translation_fbs <- specified_translation_fbs[!specified_translation_fbs$record_number %in% fbs_translation$record_number, , drop = FALSE]


needed_translations_of_fbs <- gs4_create(
  "sheets-translations_of_fbs",
  sheets = needed_translation_fbs)
needed_translations_of_fbs
```

# Genre of FBS (links of interest) (for German translations).
# The below wordcloud depicts the frequency of genres of FBS translations for links of interest.
```{r}
library(wordcloud)
genre_translation_fbs <- fbs_translation%>%
  filter(publication_language=="ger")%>%
  select(record_number,normalized_year,genre)%>%
  distinct(record_number,normalized_year,genre)%>%
  na.omit(translation_fbs)%>%
  group_by(genre) %>%
  count(genre)%>%
  arrange(desc(n))

wordcloud(words = genre_translation_fbs$genre, freq = genre_translation_fbs$n, min.freq = 1,
          colors = brewer.pal(20,"Paired"), random.order=FALSE, scale = c(1, 0.4))
```

# Original language of FBS translations (links of interest)(It includes all translations not only German one). 
# The below wordcloud depicts the frequency of original_language of FBS translations for links of interest.
```{r}
o_language_translation_fbs <- fbs_translation%>%
  select(record_number,normalized_year,original_language)%>%
  distinct(record_number,normalized_year,original_language)%>%
  na.omit(ger_translation_fbs_all)%>%
  group_by(original_language) %>%
  count(original_language)%>%
  arrange(desc(n))

wordcloud(words = o_language_translation_fbs$original_language, freq = o_language_translation_fbs$n, min.freq = 1,
          colors = brewer.pal(20,"Paired"), random.order=FALSE, scale = c(1, 0.4))
```
# Primary authors and Translators of FBS translations (links of interest)(It includes all translations not only German one).
```{r}
primary_authors_links_of_interests <- fbs_translation%>% 
  filter(field_code=="028A")

primary_authors <- gs4_create(
  "sheets-primary_authors_translations_fbs_links_of_interest",
  sheets = primary_authors_links_of_interests)
primary_authors
    
translators_translation_links_of_interest <- fbs_translation %>% 
  filter(field_code=="028C",(grepl(paste(c("Übers"), collapse="|"), role2)|role2 %in%(NA)))  

translators_translation <- gs4_create(
  "sheets-translator_translations_fbs",
  sheets = translators_translation_links_of_interest)
translators_translation
```

# Translated vs Non-Translated member publications_fbs (all).
# The below plot depicts the Translated vs Non-Translated member publications of FBS.
```{r}
trans_vs_non_trans <- fbs_all_links_local %>%
  left_join(vd17_normalized_years_local, by = c("record_number")) %>%
  filter(normalized_year >= 1600, normalized_year <= 1700) %>%
  filter(nchar(normalized_year)==4)%>%
  left_join(vd17_normalized_langs_local, by = c("record_number"))%>%
  select (record_number,normalized_year,vd17_id,title,Member_number_new,First_name_new,Last_name_new,Society_name,Estimated_admission_year,Estimated_DOD,GND,role2,field_code,original_language,publication_language)


trans_vs_non_trans$type <- is.na(trans_vs_non_trans$original_language)

trans_vs_non_trans$type[trans_vs_non_trans$type == "FALSE"] <- "Translated"
trans_vs_non_trans$type[trans_vs_non_trans$type == "TRUE"] <- "Non-Translated"

translated_vs_non_translated <- gs4_create(
  "sheets-translated_vs_non_translated_fbs",
  sheets = trans_vs_non_trans)
translated_vs_non_translated

trans_vs_non_trans%>%
distinct(record_number,normalized_year,type)%>%
select(normalized_year,type)%>%
group_by(normalized_year)%>%
count(type)%>%
arrange(desc(n))%>%
ggplot(aes(x=normalized_year,y=n, fill=type)) + 
geom_col()+
labs(y = "Translated vs Non-Translated FBS", x= "Years")+
scale_x_continuous(breaks = seq(1000, 2000, by = 5))+
theme_hsci_discrete()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
theme(legend.position="bottom",
        legend.key.width=unit(0.15, "cm"))+
geom_text(aes(label = n), color = "green", size = 1.8, position = position_stack(vjust = 0.9)) 
```
# Translated vs Non-Translated member publications_fbs (all) (Percentage by year).
```{r}
trans_non_trans_percentage <- trans_vs_non_trans%>%
  distinct(record_number,normalized_year,type)%>%
  select(normalized_year,type)%>%
  mutate(type = factor(type)) %>%
  count(normalized_year, type, .drop = FALSE, name = 'Percentage') %>%
  group_by(normalized_year) %>%
  mutate(Percentage = round(Percentage/sum(Percentage) * 100),2)

translated_non_translated_percentage <- gs4_create(
  "sheets-translated_non_translated_percentage_fbs",
  sheets = trans_non_trans_percentage)
translated_non_translated_percentage
```

# Translated vs Non-Translated member publications_fbs (all) (Percentage by whole).
```{r}
print(c("Translated",round(sum(trans_vs_non_trans$type=="Translated")/length(trans_vs_non_trans$type)*100,2)))
print(c("Non-Translated",round(sum(trans_vs_non_trans$type=="Non-Translated")/length(trans_vs_non_trans$type)*100,2)))
```