library(tidyverse)
d18th <- read_csv("18th-data.csv") %>% filter(`Ok for festschrift?`!="yess")
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_character(),
`Earliest letter` = col_double(),
Frequency = col_double(),
`Total DF after` = col_double(),
`Total DF before` = col_double(),
word_daterange_start = col_double(),
sense_daterange_start = col_double(),
MultiSenders = col_logical(),
MultiRec = col_logical(),
Year = col_double(),
YearUncertain = col_logical(),
WordCount = col_double(),
AddressFormula = col_logical(),
ClosingFormula = col_logical(),
CorrespondentNotes = col_logical(),
LetterNumber = col_logical(),
Copyright = col_logical(),
Complete = col_logical(),
Updated = col_datetime(format = ""),
`non-normalized` = col_double(),
words = col_double()
# ... with 16 more columns
)
ℹ Use `spec()` for the full column specifications.
s18th <- read_csv("18th-sampleinfo.csv")
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_character(),
MultiSenders = col_logical(),
MultiRec = col_logical(),
Year = col_double(),
YearUncertain = col_logical(),
WordCount = col_double(),
AddressFormula = col_logical(),
ClosingFormula = col_logical(),
LetterNumber = col_logical(),
Copyright = col_logical(),
Complete = col_logical(),
Updated = col_datetime(format = ""),
`non-normalized` = col_double(),
words = col_double(),
SenderYBirth = col_double(),
SenderYDeath = col_double(),
SenderReligion = col_logical(),
SenderSentLettcont = col_logical(),
SenderRecLettcont = col_logical(),
SenderUpdated = col_datetime(format = ""),
RecipientYBirth = col_double()
# ... with 9 more columns
)
ℹ Use `spec()` for the full column specifications.
d17th <- read_csv("17th-data.csv") %>% filter(`Ok for festschrift?`!="yess")
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_character(),
`Earliest letter` = col_double(),
Frequency = col_double(),
`Total DF after` = col_double(),
`Total DF before` = col_double(),
word_daterange_start = col_double(),
sense_daterange_start = col_double(),
MultiSenders = col_logical(),
MultiRec = col_logical(),
Year = col_double(),
YearUncertain = col_logical(),
WordCount = col_double(),
AddressFormula = col_logical(),
ClosingFormula = col_logical(),
CorrespondentNotes = col_logical(),
LetterNumber = col_logical(),
Copyright = col_logical(),
Complete = col_logical(),
Updated = col_datetime(format = ""),
`non-normalized` = col_double(),
words = col_double()
# ... with 10 more columns
)
ℹ Use `spec()` for the full column specifications.
s17th <- read_csv("17th-sampleinfo.csv")
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_character(),
MultiSenders = col_logical(),
MultiRec = col_logical(),
Year = col_double(),
YearUncertain = col_logical(),
WordCount = col_double(),
AddressFormula = col_logical(),
ClosingFormula = col_logical(),
LetterNumber = col_logical(),
Copyright = col_logical(),
Complete = col_logical(),
Updated = col_datetime(format = ""),
`non-normalized` = col_double(),
words = col_double(),
SenderYBirth = col_double(),
SenderYDeath = col_double(),
SenderUpdated = col_datetime(format = ""),
RecipientYBirth = col_double(),
RecipientYDeath = col_double(),
RecipientUpdated = col_datetime(format = ""),
SenderAge = col_double()
# ... with 3 more columns
)
ℹ Use `spec()` for the full column specifications.
g <- function(d1,d2,grouping) {
grouping = enquo(grouping)
return(d1 %>% group_by(!!grouping) %>% summarize(tokens=n(),lemmas=n_distinct(Lemma),authors=n_distinct(Sender),letters=n_distinct(Letters)) %>% right_join(d2 %>% group_by(!!grouping) %>% summarize(nauthors=n_distinct(Sender),nwords=sum(WordCount),nletters=n()),by=c(quo_name(grouping))) %>% mutate(tokens_per_10k_words=tokens/nwords*10000,letter_proportion=letters/nletters,author_proportion=authors/nauthors,lemmas_per_10k_words=lemmas/nwords*10000) %>% arrange(desc(tokens_per_10k_words)) %>% relocate(!!grouping,tokens_per_10k_words,lemmas_per_10k_words,letter_proportion,author_proportion,lemmas,letters,authors,nwords,nletters,nauthors))
}
Distinct values in each column
17th
d17th %>% summarize_all(~length(unique(.)))
18th
d18th %>% summarize_all(~length(unique(.)))
Sex
17th
g(d17th,s17th,SenderSex)
18th
g(d18th,s18th,SenderSex)
Rank
17th
g(d17th %>% mutate(SenderRank=str_sub(SenderRank,1,1)),s17th %>% mutate(SenderRank=str_sub(SenderRank,1,1)),SenderRank)
18th
g(d18th %>% mutate(SenderRank=str_sub(SenderRank,1,1)),s18th %>% mutate(SenderRank=str_sub(SenderRank,1,1)),SenderRank)
Relationship
17th
g(d17th,s17th,RelCode)
18th
g(d18th,s18th,RelCode)
Education
17th
g(d17th %>% mutate(SenderEduCode=str_sub(SenderEduCode,1,1)),s17th %>% mutate(SenderEduCode=str_sub(SenderEduCode,1,1)),SenderEduCode)
18th
g(d18th %>% mutate(SenderEduCode=str_sub(SenderEduCode,1,1)),s18th %>% mutate(SenderEduCode=str_sub(SenderEduCode,1,1)),SenderEduCode)
Region
17th
g(d17th,s17th,SenderRegion)
18th
g(d18th,s18th,SenderRegion)
Sender
17th
g(d17th,s17th,Sender)
18th
g(d18th,s18th,Sender)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCgpgYGB7cn0KZDE4dGggPC0gcmVhZF9jc3YoIjE4dGgtZGF0YS5jc3YiKSAlPiUgZmlsdGVyKGBPayBmb3IgZmVzdHNjaHJpZnQ/YCE9Inllc3MiKQpzMTh0aCA8LSByZWFkX2NzdigiMTh0aC1zYW1wbGVpbmZvLmNzdiIpCmQxN3RoIDwtIHJlYWRfY3N2KCIxN3RoLWRhdGEuY3N2IikgJT4lIGZpbHRlcihgT2sgZm9yIGZlc3RzY2hyaWZ0P2AhPSJ5ZXNzIikKczE3dGggPC0gcmVhZF9jc3YoIjE3dGgtc2FtcGxlaW5mby5jc3YiKQpgYGAKCmBgYHtyfQpnIDwtIGZ1bmN0aW9uKGQxLGQyLGdyb3VwaW5nKSB7CiAgZ3JvdXBpbmcgPSBlbnF1byhncm91cGluZykKICByZXR1cm4oZDEgJT4lIGdyb3VwX2J5KCEhZ3JvdXBpbmcpICU+JSBzdW1tYXJpemUodG9rZW5zPW4oKSxsZW1tYXM9bl9kaXN0aW5jdChMZW1tYSksYXV0aG9ycz1uX2Rpc3RpbmN0KFNlbmRlciksbGV0dGVycz1uX2Rpc3RpbmN0KExldHRlcnMpKSAlPiUgcmlnaHRfam9pbihkMiAlPiUgZ3JvdXBfYnkoISFncm91cGluZykgJT4lIHN1bW1hcml6ZShuYXV0aG9ycz1uX2Rpc3RpbmN0KFNlbmRlciksbndvcmRzPXN1bShXb3JkQ291bnQpLG5sZXR0ZXJzPW4oKSksYnk9YyhxdW9fbmFtZShncm91cGluZykpKSAlPiUgbXV0YXRlKHRva2Vuc19wZXJfMTBrX3dvcmRzPXRva2Vucy9ud29yZHMqMTAwMDAsbGV0dGVyX3Byb3BvcnRpb249bGV0dGVycy9ubGV0dGVycyxhdXRob3JfcHJvcG9ydGlvbj1hdXRob3JzL25hdXRob3JzLGxlbW1hc19wZXJfMTBrX3dvcmRzPWxlbW1hcy9ud29yZHMqMTAwMDApICU+JSBhcnJhbmdlKGRlc2ModG9rZW5zX3Blcl8xMGtfd29yZHMpKSAlPiUgcmVsb2NhdGUoISFncm91cGluZyx0b2tlbnNfcGVyXzEwa193b3JkcyxsZW1tYXNfcGVyXzEwa193b3JkcyxsZXR0ZXJfcHJvcG9ydGlvbixhdXRob3JfcHJvcG9ydGlvbixsZW1tYXMsbGV0dGVycyxhdXRob3JzLG53b3JkcyxubGV0dGVycyxuYXV0aG9ycykpCn0KYGBgCgojIERpc3RpbmN0IHZhbHVlcyBpbiBlYWNoIGNvbHVtbgoKIyMgMTd0aApgYGB7cn0KZDE3dGggJT4lIHN1bW1hcml6ZV9hbGwofmxlbmd0aCh1bmlxdWUoLikpKQpgYGAKCiMjIDE4dGgKYGBge3J9CmQxOHRoICU+JSBzdW1tYXJpemVfYWxsKH5sZW5ndGgodW5pcXVlKC4pKSkKYGBgCgojIFNleAoKIyMgMTd0aApgYGB7cn0KZyhkMTd0aCxzMTd0aCxTZW5kZXJTZXgpCmBgYAoKIyMgMTh0aApgYGB7cn0KZyhkMTh0aCxzMTh0aCxTZW5kZXJTZXgpCmBgYAojIFJhbmsKCiMjIDE3dGgKYGBge3J9CmcoZDE3dGggJT4lIG11dGF0ZShTZW5kZXJSYW5rPXN0cl9zdWIoU2VuZGVyUmFuaywxLDEpKSxzMTd0aCAlPiUgbXV0YXRlKFNlbmRlclJhbms9c3RyX3N1YihTZW5kZXJSYW5rLDEsMSkpLFNlbmRlclJhbmspCmBgYAoKIyMgMTh0aApgYGB7cn0KZyhkMTh0aCAlPiUgbXV0YXRlKFNlbmRlclJhbms9c3RyX3N1YihTZW5kZXJSYW5rLDEsMSkpLHMxOHRoICU+JSBtdXRhdGUoU2VuZGVyUmFuaz1zdHJfc3ViKFNlbmRlclJhbmssMSwxKSksU2VuZGVyUmFuaykKYGBgCgojIFJlbGF0aW9uc2hpcAoKIyMgMTd0aApgYGB7cn0KZyhkMTd0aCxzMTd0aCxSZWxDb2RlKQpgYGAKCiMjIDE4dGgKYGBge3J9CmcoZDE4dGgsczE4dGgsUmVsQ29kZSkKYGBgCgoKIyBFZHVjYXRpb24KCiMjIDE3dGgKYGBge3J9CmcoZDE3dGggJT4lIG11dGF0ZShTZW5kZXJFZHVDb2RlPXN0cl9zdWIoU2VuZGVyRWR1Q29kZSwxLDEpKSxzMTd0aCAlPiUgbXV0YXRlKFNlbmRlckVkdUNvZGU9c3RyX3N1YihTZW5kZXJFZHVDb2RlLDEsMSkpLFNlbmRlckVkdUNvZGUpCmBgYAoKIyMgMTh0aApgYGB7cn0KZyhkMTh0aCAlPiUgbXV0YXRlKFNlbmRlckVkdUNvZGU9c3RyX3N1YihTZW5kZXJFZHVDb2RlLDEsMSkpLHMxOHRoICU+JSBtdXRhdGUoU2VuZGVyRWR1Q29kZT1zdHJfc3ViKFNlbmRlckVkdUNvZGUsMSwxKSksU2VuZGVyRWR1Q29kZSkKYGBgCgojIFJlZ2lvbgoKIyMgMTd0aApgYGB7cn0KZyhkMTd0aCxzMTd0aCxTZW5kZXJSZWdpb24pCmBgYAoKIyMgMTh0aApgYGB7cn0KZyhkMTh0aCxzMTh0aCxTZW5kZXJSZWdpb24pCmBgYAoKCiMgU2VuZGVyCgojIyAxN3RoCmBgYHtyfQpnKGQxN3RoLHMxN3RoLFNlbmRlcikKYGBgCgojIyAxOHRoCmBgYHtyfQpnKGQxOHRoLHMxOHRoLFNlbmRlcikKYGBgCg==