This is an R Markdown document. Markdown is a simple formatting syntax for authoring HL, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
This document includes both content as well as the output of any embedded R code chunks within the document.
gospels <- c("Mt","Mk","Lk","Jn")
#Get the datanames
library(readxl)
evm <- read_xlsx("Lk_short.xlsx")
# evm <- evm$Lemma
# Create a corpus variable
library(tm)
word.corpus<-Corpus(VectorSource(evm$Lemma)) #Corpus
# Make sure it has loaded properly - have a look!
# inspect(word.corpus)
## # A tibble: 20 × 2
## Lemma n
## <chr> <int>
## 1 ὁ 2646
## 2 καί 1469
## 3 αὐτός 1086
## 4 δέ 542
## 5 λέγω 533
## 6 σύ 446
## 7 ἐν 361
## 8 εἰμί 360
## 9 ἐγώ 282
## 10 οὗτος 229
## 11 εἰς 226
## 12 ὅς 190
## 13 ὅτι 174
## 14 οὐ 172
## 15 πρός 166
## 16 ἐπί 161
## 17 πᾶς 158
## 18 μή 140
## 19 γίνομαι 131
## 20 ἀπό 125
#fReorder factor levels of category based on value (descending order)
top20$Lemma <- factor(top20$Lemma, levels = top20$Lemma[order(top20$n, decreasing = TRUE)])
# Create the bar chart
library(plotly)
fig <- plot_ly(
data = top20,
x = ~Lemma,
y = ~n,
type = 'bar',
marker = list(color = 'sandybrown')
)
# Customize layout
fig <- fig %>% layout(
title = "Bar Chart Sorted by Value",
xaxis = list(title = "Lemma"),
yaxis = list(title = "Előfordulás")
)
# Show the plot
fig
Ez az ábra interaktív; ha ráhúzzuk az egeret, az aktuális lemmát (szótőt) és annak gyakoriságát mutatja.
#word.counts<-as.matrix(TermDocumentMatrix(word.corpus))
#word.freq<-sort(rowSums(word.counts), decreasing=TRUE)
#Load libraries for wordclouds
library(SnowballC)
library(tm)
library(wordcloud2)
library(RColorBrewer)
#Create a table of word frequenciess
greek_words <- evm$FullWord[1:188]
word_freqs <- as.data.frame(table(greek_words))
#Remove stopwords
perzsa <- stopwords::stopwords(language = "grc", source = "perseus")
word_freqs_filtered <- word_freqs %>%
filter(!greek_words %in% perzsa)
# Create the word cloud
set.seed(32) #be sure to set the seed if you want to reproduce the same again
wc <- wordcloud2(
data = word_freqs_filtered,
size = 1,
gridSize = 8,
color = "random-dark", backgroundColor = "white"
)
wc
(greek_network_Jn_prologue.png)
#Install the zipfR package
#install.packages("zipfR")
#Load the package
library(zipfR)
#Load necessary libraries
library(ggplot2)
#Define parameters
N <- 100 # Total number of elements
s <- 1.5 # Shape parameter
#Generate Zipf distribution probabilities
zipf_probs <- (1 / (1:N)^s) / sum(1 / (1:N)^s)
zipf_data <- data.frame(Rank = 1:N, Probability = zipf_probs)
#Display the first few rows
head(zipf_data,n=20)
## Rank Probability
## 1 1 0.414443506
## 2 2 0.146527907
## 3 3 0.079759690
## 4 4 0.051805438
## 5 5 0.037068954
## 6 6 0.028199309
## 7 7 0.022377846
## 8 8 0.018315988
## 9 9 0.015349759
## 10 10 0.013105854
## 11 11 0.011359947
## 12 12 0.009969961
## 13 13 0.008841996
## 14 14 0.007911763
## 15 15 0.007133924
## 16 16 0.006475680
## 17 17 0.005912783
## 18 18 0.005426960
## 19 19 0.005004203
## 20 20 0.004633619
#Basic Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
geom_line(color = "brown", size = .75) +
labs(title = "Basic Zipf Distribution",
x = "Rank",
y = "Probability") +
theme_minimal()
#Log10 Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
geom_line(color = "brown", size = .75) +
scale_x_log10() +
scale_y_log10() +
labs(title = "Log/log Scale Zipf Distribution",
x = "Rank",
y = "Probability") +
theme_minimal()
#Get the data
library(readxl)
#Make frequency tables
library(tidyverse)
evm <- read_xlsx("Mt_short.xlsx")
freqtab1 <- evm %>% count(FullWord, sort=TRUE)
top50Mt <- freqtab1[1:50,]
Mt_total <- sum(freqtab1$n)
#
evm <- read_xlsx("Mk_short.xlsx")
freqtab2 <- evm %>% count(FullWord, sort=TRUE)
top50Mk <- freqtab2[1:50,]
Mk_total <- sum(freqtab2$n)
evm <- read_xlsx("Lk_short.xlsx")
freqtab3 <- evm %>% count(FullWord, sort=TRUE)
top50Lk <- freqtab3[1:50,]
#
Lk_total <- sum(freqtab3$n)
#
evm <- read_xlsx("Jn_short.xlsx")
freqtab4 <- evm %>% count(FullWord, sort=TRUE)
top50Jn <- freqtab4[1:50,]
Jn_total <- sum(freqtab4$n)
#
evmtab50 <- cbind(top50Mt,top50Mk,top50Lk,top50Jn)
names(evmtab50) <- c("Szó(Mt)","n","Szó(Mk)","n","Szó(Lk)","n","Szó(Jn)","n")
evmtab50
## Szó(Mt) n Szó(Mk) n Szó(Lk) n Szó(Jn) n
## 1 καὶ 1175 καὶ 1085 καὶ 1466 καὶ 827
## 2 ὁ 493 ὁ 237 δὲ 513 ὁ 565
## 3 δὲ 471 αὐτοῦ 173 ὁ 399 ὅτι 271
## 4 τοῦ 294 εἰς 168 τοῦ 380 τοῦ 243
## 5 ἐν 293 δὲ 155 ἐν 360 τὸν 240
## 6 αὐτοῦ 266 τὸν 150 αὐτοῦ 255 ἐν 226
## 7 τὸ 227 ἐν 135 εἶπεν 229 δὲ 203
## 8 οἱ 224 τοῦ 132 εἰς 225 οὖν 200
## 9 τὸν 221 τὸ 131 τὸ 222 Ἰησοῦς 198
## 10 εἰς 218 τὴν 126 τὸν 216 εἰς 187
## 11 τῶν 206 οἱ 123 οἱ 185 αὐτοῦ 173
## 12 τὴν 203 αὐτῷ 121 τῷ 177 αὐτῷ 173
## 13 αὐτῷ 170 αὐτοῖς 120 ὅτι 174 οὐκ 151
## 14 τῷ 149 αὐτὸν 117 τὴν 171 τὸ 150
## 15 ὅτι 140 τῶν 108 πρὸς 161 ἵνα 145
## 16 μὴ 123 ὅτι 102 αὐτῷ 153 οἱ 144
## 17 τῆς 121 τῆς 80 αὐτὸν 145 τὴν 142
## 18 ἡ 121 τῷ 77 τῇ 136 ἐκ 139
## 19 εἶπεν 119 μὴ 72 μὴ 132 λέγει 123
## 20 Ἰησοῦς 111 οὐκ 66 τῶν 131 ἡ 122
## 21 τὰ 110 ἵνα 64 τῆς 119 τῷ 114
## 22 γὰρ 108 πρὸς 63 τοὺς 118 εἶπεν 112
## 23 τοὺς 108 τοὺς 63 ἐπὶ 116 τῶν 109
## 24 τοῖς 108 λέγει 62 σου 104 οὐ 108
## 25 ὑμῖν 107 αὐτόν 61 τὰ 104 ἐστιν 107
## 26 αὐτοῖς 103 γὰρ 60 ἡ 102 μὴ 106
## 27 τῇ 103 ἡ 60 οὐκ 99 ἐγὼ 103
## 28 αὐτῶν 100 εἶπεν 59 αὐτῶν 98 ὑμῖν 103
## 29 ἐπὶ 99 τῇ 59 ὑμῖν 96 αὐτοῖς 100
## 30 οὐκ 98 Ἰησοῦς 58 αὐτοῖς 91 αὐτὸν 100
## 31 σου 98 τοῖς 56 γὰρ 87 με 99
## 32 αὐτὸν 94 τί 54 μου 87 μου 98
## 33 οὐ 92 τὰ 52 ἀπὸ 83 πρὸς 97
## 34 ἀπὸ 92 ἐπὶ 52 ἦν 75 ἦν 96
## 35 τότε 90 ἐστιν 52 τοῖς 74 τῆς 82
## 36 μου 83 οὐ 45 θεοῦ 72 τὰ 80
## 37 ἐστιν 83 αὐτῶν 42 ἐγένετο 69 αὐτόν 76
## 38 ὑμῶν 76 ἐκ 42 ἐστιν 69 τῇ 72
## 39 ἢ 65 εὐθὺς 41 ὑμῶν 67 ὑμεῖς 68
## 40 τί 64 μου 40 αὐτόν 66 περὶ 67
## 41 λέγω 61 σου 39 οὐ 66 ταῦτα 61
## 42 ἰδοὺ 60 ἦν 38 τί 62 γὰρ 60
## 43 ἐὰν 58 ὑμῖν 37 ἰδοὺ 57 ἀπεκρίθη 57
## 44 οὖν 56 μετὰ 36 Ἰησοῦς 55 τοὺς 55
## 45 λέγει 54 ἀπὸ 36 ὡς 51 ἀλλὰ 52
## 46 διὰ 53 ἢ 33 εἰ 50 τοῦτο 51
## 47 εἰ 53 θεοῦ 31 λέγω 50 ἀλλ’ 50
## 48 ἐκ 52 τὰς 31 ἐκ 50 ἀμὴν 50
## 49 λέγων 49 ἔλεγεν 31 αὐτούς 47 εἰ 49
## 50 ἕως 49 διὰ 30 λέγων 47 τί 48
#install.packages("plotly")
library(plotly)
datus <- data.frame(Roll_number = 1:50,
y1 = top50Mt$n,
y2 = top50Mk$n,
y3 = top50Lk$n,
y4 = top50Jn$n)
#
fig <-plotly::plot_ly(data = datus, x = ~Roll_number,
y = ~y1, name = "Mt",
type = "scatter",mode = "lines") %>%
add_trace(y = ~y2, name = "Mk") %>%
add_trace(y = ~y4, name = "Jn") %>%
add_trace(y = ~y3, name = "Lk") %>%
layout(title = 'Zipfs law and the gospels', xaxis = list(title = 'Helyezés'),
yaxis = list(title = 'Előfordulás'), legend = list(title=list(text='Legend Title')))
fig
library(DT)
library(readxl)
bigs <- read_xlsx("C:/users/weltl/OneDrive/Dokumentumok/bigcities5col.xlsx")
#dtbigs <- datatable(bigs)
print(bigs, n=100)
## # A tibble: 822 × 5
## population growthRate city country rank
## <dbl> <chr> <chr> <chr> <dbl>
## 1 37036200 -0.00212 Tokyo Japan 1
## 2 34665600 0.02538 Delhi India 2
## 3 30482100 0.02056 Shanghai China 3
## 4 24652900 0.02996 Dhaka Bangladesh 4
## 5 23074200 0.0199 Cairo Egypt 5
## 6 22990000 0.00804 Sao Paulo Brazil 6
## 7 22752400 0.01098 Mexico City Mexico 7
## 8 22596500 0.01836 Beijing China 8
## 9 22089000 0.01919 Mumbai India 9
## 10 18921600 -0.00242 Osaka Japan 10
## 11 18171200 0.02235 Chongqing China 11
## 12 18076800 0.02426 Karachi Pakistan 12
## 13 17778500 0.04381 Kinshasa DR Congo 13
## 14 17156400 0.03752 Lagos Nigeria 14
## 15 16236700 0.0118 Istanbul Turkey 15
## 16 15845200 0.01762 Kolkata India 16
## 17 15752300 0.00858 Buenos Aires Argentina 17
## 18 15230600 0.01931 Manila Philippines 18
## 19 14878700 0.01978 Guangzhou China 19
## 20 14825800 0.02906 Lahore Pakistan 20
## 21 14704100 0.01612 Tianjin China 21
## 22 14395400 0.02763 Bangalore India 22
## 23 13923200 0.00715 Rio de Janeiro Brazil 23
## 24 13545400 0.01754 Shenzhen China 24
## 25 12737400 0.00197 Moscow Russia 25
## 26 12336000 0.02342 Chennai India 26
## 27 11795800 0.0118 Bogota Colombia 27
## 28 11634100 0.01732 Jakarta Indonesia 28
## 29 11517300 0.01368 Lima Peru 29
## 30 11391700 0.01405 Bangkok Thailand 30
## 31 11346800 0.00622 Paris France 31
## 32 11337900 0.0243 Hyderabad India 32
## 33 10174900 0.02285 Nanjing China 33
## 34 10027900 0.03905 Luanda Angola 34
## 35 10025800 0.0021 Seoul South Korea 35
## 36 9998870 0.01737 Chengdu China 36
## 37 9840740 0.00951 London United Kingdom 37
## 38 9816320 0.02599 Ho Chi Minh City Vietnam 38
## 39 9729740 0.01183 Tehran Iran 39
## 40 9534790 -0.00231 Nagoya Japan 40
## 41 9222080 0.0231 Xi-an China 41
## 42 9061820 0.02342 Ahmedabad India 42
## 43 9000280 0.02095 Kuala Lumpur Malaysia 43
## 44 8986480 0.01532 Wuhan China 44
## 45 8592820 0.029 Suzhou China 45
## 46 8591040 0.02033 Hangzhou China 46
## 47 8581730 0.03015 Surat India 47
## 48 8561520 0.04905 Dar es Salaam Tanzania 48
## 49 8141120 0.02777 Baghdad Iraq 49
## 50 7974270 0.01838 Shenyang China 50
## 51 7952860 0.01692 Riyadh Saudi Arabia 51
## 52 7936530 -0.0198 New York City United States 52
## 53 7817160 0.01456 Foshan China 53
## 54 7772860 0.01273 Dongguan China 54
## 55 7768510 0.00552 Hong Kong Hong Kong 55
## 56 7525720 0.02449 Pune India 56
## 57 7066860 0.01857 Haerbin China 57
## 58 6999460 0.00698 Santiago Chile 58
## 59 6810530 0.00402 Madrid Spain 59
## 60 6754180 0.03242 Khartoum Sudan 60
## 61 6491290 0.00931 Toronto Canada 61
## 62 6444580 0.01901 Johannesburg South Africa 62
## 63 6351680 0.00814 Belo Horizonte Brazil 63
## 64 6347380 0.02089 Dalian China 64
## 65 6217970 0.01857 Qingdao China 65
## 66 6157270 0.00622 Singapore Singapore 66
## 67 6156140 0.02348 Zhengzhou China 67
## 68 6065850 0.02107 Ji nan Shandong China 68
## 69 6056880 0.03242 Abidjan Ivory Coast 69
## 70 5956680 0.04437 Addis Ababa Ethiopia 70
## 71 5813190 0.01813 Yangon Myanmar 71
## 72 5807050 0.01947 Alexandria Egypt 72
## 73 5766990 0.04075 Nairobi Kenya 73
## 74 5733250 0.00373 Barcelona Spain 74
## 75 5653490 0.02537 Chittagong Bangladesh 75
## 76 5602200 0.03137 Hanoi Vietnam 76
## 77 5597340 0.0028 Saint Petersburg Russia 77
## 78 5578580 0.01435 Guadalajara Mexico 78
## 79 5550490 0.0134 Ankara Turkey 79
## 80 5465920 -0.00222 Fukuoka Japan 80
## 81 5391890 0.01435 Melbourne Australia 81
## 82 5272360 0.01482 Monterrey Mexico 82
## 83 5248790 0.01232 Sydney Australia 83
## 84 5132170 0.02521 Urumqi China 84
## 85 5128270 0.01995 Changsha China 85
## 86 5063580 0.01723 Cape Town South Africa 86
## 87 5021600 0.01586 Jiddah Saudi Arabia 87
## 88 4990930 0.01128 Brasilia Brazil 88
## 89 4955680 0.01946 Kunming China 89
## 90 4891020 0.01844 Changchun China 90
## 91 4877020 0.03144 Kabul Afghanistan 91
## 92 4854260 0.03684 Yaounde Cameroon 92
## 93 4830170 0.02176 Hefei China 93
## 94 4770300 0.02371 Ningbo China 94
## 95 4737590 0.01741 Shantou China 95
## 96 4645320 0.03442 Kano Nigeria 96
## 97 4568530 0.01619 Tel Aviv Israel 97
## 98 4563850 0.00639 New Taipei Taiwan 98
## 99 4534990 0.01815 Shijiazhuang China 99
## 100 4411110 0.02381 Jaipur India 100
## # ℹ 722 more rows
library(plotly)
library(dplyr)
slope <- -1
intercept <- 1000
fig <- plot_ly(data = bigs, x = ~rank, y = ~population,
text = ~city,
name = "Biggest cities of the world",
type = "scatter",mode = "lines")
fig
Hamlet <- "
To be, or not to be: that is the question:
Whether ’tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing end them? To die: to sleep;
No more; and, by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to, ’tis a consummation
Devoutly to be wish’d. To die, to sleep;
To sleep: perchance to dream: ay, there’s the rub;
For in that sleep of death what dreams may come
When we have shuffled off this mortal coil,
Must give us pause. There’s the respect
That makes calamity of so long life;
For who would bear the whips and scorns of time,
The oppressor’s wrong, the proud man’s contumely,
The pangs of dispriz’d love, the law’s delay,
The insolence of office, and the spurns
That patient merit of the unworthy takes,
When he himself might his quietus make
With a bare bodkin? who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscover’d country from whose bourn
No traveller returns, puzzles the will,
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience does make cowards of us all;
And thus the native hue of resolution
Is sicklied o’er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry,
And lose the name of action. Soft you now!
The fair Ophelia! Nymph, in thy orisons
Be all my sins remember’d."
Hamlet
## [1] "\nTo be, or not to be: that is the question:\nWhether ’tis nobler in the mind to suffer\nThe slings and arrows of outrageous fortune,\nOr to take arms against a sea of troubles,\nAnd by opposing end them? To die: to sleep;\nNo more; and, by a sleep to say we end\nThe heart-ache and the thousand natural shocks\nThat flesh is heir to, ’tis a consummation\nDevoutly to be wish’d. To die, to sleep;\nTo sleep: perchance to dream: ay, there’s the rub;\nFor in that sleep of death what dreams may come\nWhen we have shuffled off this mortal coil,\nMust give us pause. There’s the respect\nThat makes calamity of so long life;\nFor who would bear the whips and scorns of time,\nThe oppressor’s wrong, the proud man’s contumely,\nThe pangs of dispriz’d love, the law’s delay,\nThe insolence of office, and the spurns\nThat patient merit of the unworthy takes,\nWhen he himself might his quietus make\nWith a bare bodkin? who would fardels bear,\nTo grunt and sweat under a weary life,\nBut that the dread of something after death,\nThe undiscover’d country from whose bourn\nNo traveller returns, puzzles the will,\nAnd makes us rather bear those ills we have\nThan fly to others that we know not of?\nThus conscience does make cowards of us all;\nAnd thus the native hue of resolution\nIs sicklied o’er with the pale cast of thought,\nAnd enterprises of great pith and moment\nWith this regard their currents turn awry,\nAnd lose the name of action. Soft you now!\nThe fair Ophelia! Nymph, in thy orisons\nBe all my sins remember’d."
Arany <- "Lenni vagy nem lenni: az itt a kérdés.
Akkor nemesb-e a lélek, ha tűri
Balsorsa minden nyűgét s nyilait;
Vagy ha kiszáll tenger fájdalma ellen,
S fegyvert ragadva véget vet neki?
Meghalni – elszunnyadni – semmi több;
S egy álom által elvégezni mind
A szív keservét, a test eredendő,
Természetes rázkódtatásait
Oly cél, minőt óhajthat a kegyes.
Meghalni – elszunnyadni – és alunni!
Talán álmodni: ez a bökkenő;
Mert hogy mi álmok jőnek a halálban,
Ha majd leráztuk mind e földi bajt,
Ez visszadöbbent. E meggondolás az,
Mi a nyomort oly hosszan élteti
Mert ki viselné a kor gúny-csapásit,
Zsarnok bosszúját, gőgös ember dölyfét,
Útált szerelme kínját, pör-halasztást,
A hivatalnak packázásait,
S mind a rugást, mellyel méltatlanok
Bántalmazzák a tűrő érdemet
Ha nyúgalomba küldhetné magát
Egy puszta tőrrel? – Ki hordaná e terheket,
Izzadva, nyögve élte fáradalmin,
Ha rettegésünk egy halál utáni
Valamitől – a nem ismert tartomány,
Melyből nem tér meg utazó – le nem
Lohasztja kedvünk, inkább tűrni a
Jelen gonoszt, mint ismeretlenek
Felé sietni? – Ekképp az öntudat
Belőlünk mind gyávát csinál,
S az elszántság természetes szinét
A gondolat halványra betegíti;
Ily kétkedés által sok nagyszerű,
Fontos merény kifordul medriből
S elveszti »tett« nevét. – De csöndesen!
A szép Ophelia jő. – Szép hölgy, imádba
Legyenek foglalva minden bűneim."
Arany
## [1] "Lenni vagy nem lenni: az itt a kérdés.\nAkkor nemesb-e a lélek, ha tűri\nBalsorsa minden nyűgét s nyilait;\nVagy ha kiszáll tenger fájdalma ellen,\nS fegyvert ragadva véget vet neki?\nMeghalni – elszunnyadni – semmi több;\nS egy álom által elvégezni mind\nA szív keservét, a test eredendő,\nTermészetes rázkódtatásait\nOly cél, minőt óhajthat a kegyes.\nMeghalni – elszunnyadni – és alunni!\nTalán álmodni: ez a bökkenő;\nMert hogy mi álmok jőnek a halálban,\nHa majd leráztuk mind e földi bajt,\nEz visszadöbbent. E meggondolás az,\nMi a nyomort oly hosszan élteti\nMert ki viselné a kor gúny-csapásit,\nZsarnok bosszúját, gőgös ember dölyfét,\nÚtált szerelme kínját, pör-halasztást,\nA hivatalnak packázásait,\nS mind a rugást, mellyel méltatlanok\nBántalmazzák a tűrő érdemet\nHa nyúgalomba küldhetné magát\nEgy puszta tőrrel? – Ki hordaná e terheket,\nIzzadva, nyögve élte fáradalmin,\nHa rettegésünk egy halál utáni\nValamitől – a nem ismert tartomány,\nMelyből nem tér meg utazó – le nem\nLohasztja kedvünk, inkább tűrni a\nJelen gonoszt, mint ismeretlenek\nFelé sietni? – Ekképp az öntudat\nBelőlünk mind gyávát csinál,\nS az elszántság természetes szinét\nA gondolat halványra betegíti;\nIly kétkedés által sok nagyszerű,\nFontos merény kifordul medriből\nS elveszti »tett« nevét. – De csöndesen!\nA szép Ophelia jő. – Szép hölgy, imádba\nLegyenek foglalva minden bűneim."
## Corpus consisting of 60 documents, showing 60 documents:
##
## Text Types Tokens Sentences Year President FirstName Party
## 1789-Washington 625 1537 23 1789 Washington George none
## 1793-Washington 96 147 4 1793 Washington George none
## 1797-Adams 826 2577 37 1797 Adams John Federalist
## 1801-Jefferson 717 1923 41 1801 Jefferson Thomas Democratic-Republican
## 1805-Jefferson 804 2380 45 1805 Jefferson Thomas Democratic-Republican
## 1809-Madison 535 1261 21 1809 Madison James Democratic-Republican
## 1813-Madison 541 1302 33 1813 Madison James Democratic-Republican
## 1817-Monroe 1040 3677 121 1817 Monroe James Democratic-Republican
## 1821-Monroe 1259 4886 131 1821 Monroe James Democratic-Republican
## 1825-Adams 1003 3147 74 1825 Adams John Quincy Democratic-Republican
## 1829-Jackson 517 1208 25 1829 Jackson Andrew Democratic
## 1833-Jackson 499 1267 29 1833 Jackson Andrew Democratic
## 1837-VanBuren 1315 4158 95 1837 Van Buren Martin Democratic
## 1841-Harrison 1896 9125 210 1841 Harrison William Henry Whig
## 1845-Polk 1334 5186 153 1845 Polk James Knox Whig
## 1849-Taylor 496 1178 22 1849 Taylor Zachary Whig
## 1853-Pierce 1165 3636 104 1853 Pierce Franklin Democratic
## 1857-Buchanan 945 3083 89 1857 Buchanan James Democratic
## 1861-Lincoln 1075 3999 135 1861 Lincoln Abraham Republican
## 1865-Lincoln 360 775 26 1865 Lincoln Abraham Republican
## 1869-Grant 485 1229 40 1869 Grant Ulysses S. Republican
## 1873-Grant 552 1472 43 1873 Grant Ulysses S. Republican
## 1877-Hayes 831 2707 59 1877 Hayes Rutherford B. Republican
## 1881-Garfield 1021 3209 111 1881 Garfield James A. Republican
## 1885-Cleveland 676 1816 44 1885 Cleveland Grover Democratic
## 1889-Harrison 1352 4721 157 1889 Harrison Benjamin Republican
## 1893-Cleveland 821 2125 58 1893 Cleveland Grover Democratic
## 1897-McKinley 1232 4353 130 1897 McKinley William Republican
## 1901-McKinley 854 2437 100 1901 McKinley William Republican
## 1905-Roosevelt 404 1079 33 1905 Roosevelt Theodore Republican
## 1909-Taft 1437 5821 158 1909 Taft William Howard Republican
## 1913-Wilson 658 1882 68 1913 Wilson Woodrow Democratic
## 1917-Wilson 549 1652 59 1917 Wilson Woodrow Democratic
## 1921-Harding 1169 3719 148 1921 Harding Warren G. Republican
## 1925-Coolidge 1220 4440 196 1925 Coolidge Calvin Republican
## 1929-Hoover 1090 3860 158 1929 Hoover Herbert Republican
## 1933-Roosevelt 743 2057 85 1933 Roosevelt Franklin D. Democratic
## 1937-Roosevelt 725 1989 96 1937 Roosevelt Franklin D. Democratic
## 1941-Roosevelt 526 1519 68 1941 Roosevelt Franklin D. Democratic
## 1945-Roosevelt 275 633 27 1945 Roosevelt Franklin D. Democratic
## 1949-Truman 781 2504 116 1949 Truman Harry S. Democratic
## 1953-Eisenhower 900 2743 119 1953 Eisenhower Dwight D. Republican
## 1957-Eisenhower 621 1907 92 1957 Eisenhower Dwight D. Republican
## 1961-Kennedy 566 1541 52 1961 Kennedy John F. Democratic
## 1965-Johnson 568 1710 93 1965 Johnson Lyndon Baines Democratic
## 1969-Nixon 743 2416 103 1969 Nixon Richard Milhous Republican
## 1973-Nixon 544 1995 68 1973 Nixon Richard Milhous Republican
## 1977-Carter 527 1370 52 1977 Carter Jimmy Democratic
## 1981-Reagan 902 2781 129 1981 Reagan Ronald Republican
## 1985-Reagan 925 2909 123 1985 Reagan Ronald Republican
## 1989-Bush 795 2674 141 1989 Bush George Republican
## 1993-Clinton 642 1833 81 1993 Clinton Bill Democratic
## 1997-Clinton 773 2436 111 1997 Clinton Bill Democratic
## 2001-Bush 621 1806 97 2001 Bush George W. Republican
## 2005-Bush 772 2312 99 2005 Bush George W. Republican
## 2009-Obama 938 2689 110 2009 Obama Barack Democratic
## 2013-Obama 814 2317 88 2013 Obama Barack Democratic
## 2017-Trump 582 1660 88 2017 Trump Donald J. Republican
## 2021-Biden 812 2766 216 2021 Biden Joseph R. Democratic
## 2025-Trump 1000 3347 177 2025 Trump Donald J. Republican
require(ggplot2)
korpa %>%
summary %>%
ggplot(aes(x = Year, y = Tokens, group = 1)) +
geom_line() +
geom_point() +
geom_label(aes(label = President, fill = Party), nudge_x = 0.1, nudge_y = 0.1) +
ggtitle("Inauguration Speeches of Presidents - Number of Words") +
theme_bw() +
theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ) +
scale_color_paletteer_d(nord::frost))
library(quanteda)
library(dplyr)
#summary(korpa)
# Most jön a brutale rész: A 'korpa' korpuszból 'mysummary' data frame, TTR-rel!
library(quanteda)
library(dplyr)
data("data_corpus_inaugural")
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
docvars_df <- docvars(data_corpus_inaugural)
#docvars_df
#str(docvars_df)
speech_texts <- as.character(data_corpus_inaugural)
#str(speech_texts)
mysummary <- as.data.frame(summary(korpa, verbose = FALSE))
#mysummary
ms <- mysummary %>% mutate (TTR = Types/Tokens)
#
require(ggplot2)
ms1 <- ms[-2,] # Drop the second row of ms, i.e. Wahington's 2. speech!
ms1 %>%
# options(repr.plot.width = 12, repr.plot.height = 12)
ggplot(aes(x = Year, y = TTR, group = 1)) +
geom_line() +
geom_point() +
geom_label(aes(label = President, fill = Party)) +
ggtitle("Inauguration Speeches of Presidents - T(ypes)/(T)okens (R)atio") +
theme_bw() +
theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ))
library(plotly)
library(quanteda)
#calculate y as the mean word length
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
df <- docvars(data_corpus_inaugural)
df$text <- as.character(data_corpus_inaugural)
library(tokenizers)
words <- tokenize_words(
df$text,
lowercase = TRUE,
stopwords = NULL,
strip_punct = TRUE,
strip_numeric = FALSE,
simplify = FALSE
)
wc <- count_words(df$text)
st <- count_sentences(df$text)
wl <- count_characters(df$text)
fig <- plot_ly(data = ms, type = "scatter", mode = "markers",
x = round(wc/st,2), y = round(wl/wc,2),
text = ~President,
color = ~Party,
colors = c("red","green","blue","tomato","magenta","seagreen","salmon")) %>%
layout(title = "Sentence length vs. word length",
xaxis = list(title = "Sentence Length in Words"),
yaxis = list(title = "Word Length in Characters"))
fig
library(quanteda)
library(quanteda.textplots)
library(quanteda.textstats)
library(ggplot2)
#Example corpus (replace with your actual corpus)
corp_us <- corpus(data_corpus_inaugural) # save the corpus to a short obj name
my_corpus <- corpus_subset(corp_us, Year > 2000)
toki <- tokens(my_corpus)
kwic_results <- kwic(toki, phrase("our country"))
kwic_results
## Keyword-in-context with 37 matches.
## [2001-Bush, 24:25] history, yet common in | our country | . With a simple oath
## [2001-Bush, 311:312] more than the creed of | our country | , it is the inborn
## [2001-Bush, 569:570] embracing these ideals, makes | our country | more, not less,
## [2001-Bush, 673:674] are never small. If | our country | does not lead the cause
## [2001-Bush, 947:948] The enemies of liberty and | our country | should make no mistake:
## [2001-Bush, 1225:1226] our laws. Many in | our country | do not know the pain
## [2001-Bush, 1757:1758] purpose today, to make | our country | more just and generous,
## [2005-Bush, 63:64] the deep commitments that unite | our country | . I am grateful for
## [2005-Bush, 1169:1170] granted in good measure. | Our country | has accepted obligations that are
## [2005-Bush, 1320:1321] have shown their devotion to | our country | in deaths that honored their
## [2005-Bush, 1426:1427] just to the wealth of | our country | but to its character.
## [2005-Bush, 1554:1555] the promise and future of | our country | , we will bring the
## [2005-Bush, 1842:1843] unwanted have worth. And | our country | must abandon all the habits
## [2005-Bush, 1889:1890] the issues and questions before | our country | are many. From the
## [2013-Obama, 666:667] the people, understand that | our country | cannot succeed when a shrinking
## [2013-Obama, 1830:1831] workforce rather than expelled from | our country | . Our journey is not
## [2017-Trump, 46:47] great national effort to rebuild | our country | and restore its promise for
## [2017-Trump, 254:255] but not the citizens of | our country | . Their victories have not
## [2017-Trump, 404:405] forgotten men and women of | our country | will be forgotten no longer
## [2017-Trump, 572:573] too many lives and robbed | our country | of so much unrealized potential
## [2017-Trump, 723:724] strength, and confidence of | our country | has dissipated over the horizon
## [2017-Trump, 1015:1016] back to work - rebuilding | our country | with American hands and American
## [2017-Trump, 1153:1154] and through our loyalty to | our country | , we will rediscover our
## [2017-Trump, 1368:1369] We will not fail. | Our country | will thrive and prosper again
## [2021-Biden, 1869:1870] we are this way, | our country | will be stronger, more
## [2021-Biden, 2174:2175] left behind, and for | our country | . Amen. This is
## [2025-Trump, 79:80] From this day forward, | our country | will flourish and be respected
## [2025-Trump, 404:405] , that have illegally entered | our country | from all over the world
## [2025-Trump, 442:443] , its own people. | Our country | can no longer deliver basic
## [2025-Trump, 538:539] and most powerful individuals in | our country | — some of whom are
## [2025-Trump, 634:635] many cases, to hate | our country | despite the love that we
## [2025-Trump, 976:977] election in the history of | our country | . As our victory showed
## [2025-Trump, 1215:1216] . We will not forget | our country | , we will not forget
## [2025-Trump, 1352:1353] repel the disastrous invasion of | our country | . Under the orders I
## [2025-Trump, 1441:1442] higher responsibility than to defend | our country | from threats and invasions,
## [2025-Trump, 2292:2293] belongs. President McKinley made | our country | very rich through tariffs and
## [2025-Trump, 2897:2898] Washington, D.C., | our country | was forged and built by