This is an R Markdown document. Markdown is a simple formatting syntax for authoring HL, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
This document includes both content as well as the output of any embedded R code chunks within the document.
gospels <- c("Mt","Mk","Lk","Jn")
#Get the datanames
library(readxl)
evm <- read_xlsx("Lk_short.xlsx")
# evm <- evm$Lemma
# Create a corpus variable
library(tm)
word.corpus<-Corpus(VectorSource(evm$Lemma)) #Corpus
# Make sure it has loaded properly - have a look!
# inspect(word.corpus)
## # A tibble: 20 × 2
## Lemma n
## <chr> <int>
## 1 ὁ 2646
## 2 καί 1469
## 3 αὐτός 1086
## 4 δέ 542
## 5 λέγω 533
## 6 σύ 446
## 7 ἐν 361
## 8 εἰμί 360
## 9 ἐγώ 282
## 10 οὗτος 229
## 11 εἰς 226
## 12 ὅς 190
## 13 ὅτι 174
## 14 οὐ 172
## 15 πρός 166
## 16 ἐπί 161
## 17 πᾶς 158
## 18 μή 140
## 19 γίνομαι 131
## 20 ἀπό 125
#fReorder factor levels of category based on value (descending order)
top20$Lemma <- factor(top20$Lemma, levels = top20$Lemma[order(top20$n, decreasing = TRUE)])
# Create the bar chart
library(plotly)
fig <- plot_ly(
data = top20,
x = ~Lemma,
y = ~n,
type = 'bar',
marker = list(color = 'sandybrown')
)
# Customize layout
fig <- fig %>% layout(
title = "Bar Chart Sorted by Value",
xaxis = list(title = "Lemma"),
yaxis = list(title = "Előfordulás")
)
# Show the plot
fig
Ez az ábra interaktív; ha ráhúzzuk az egeret, az aktuális lemmát (szótőt) és annak gyakoriságát mutatja.
#word.counts<-as.matrix(TermDocumentMatrix(word.corpus))
#word.freq<-sort(rowSums(word.counts), decreasing=TRUE)
#Load libraries for wordclouds
library(SnowballC)
library(tm)
library(wordcloud2)
library(RColorBrewer)
#Create a table of word frequenciess
greek_words <- evm$FullWord[1:188]
word_freqs <- as.data.frame(table(greek_words))
#Remove stopwords
perzsa <- stopwords::stopwords(language = "grc", source = "perseus")
word_freqs_filtered <- word_freqs %>%
filter(!greek_words %in% perzsa)
# Create the word cloud
set.seed(32) #be sure to set the seed if you want to reproduce the same again
wc <- wordcloud2(
data = word_freqs_filtered,
size = 1,
gridSize = 8,
color = "random-dark", backgroundColor = "white"
)
wc
(greek_network_Jn_prologue.png)
#Install the zipfR package
#install.packages("zipfR")
#Load the package
library(zipfR)
#Load necessary libraries
library(ggplot2)
#Define parameters
N <- 100 # Total number of elements
s <- 1.5 # Shape parameter
#Generate Zipf distribution probabilities
zipf_probs <- (1 / (1:N)^s) / sum(1 / (1:N)^s)
zipf_data <- data.frame(Rank = 1:N, Probability = zipf_probs)
#Display the first few rows
head(zipf_data,n=20)
## Rank Probability
## 1 1 0.414443506
## 2 2 0.146527907
## 3 3 0.079759690
## 4 4 0.051805438
## 5 5 0.037068954
## 6 6 0.028199309
## 7 7 0.022377846
## 8 8 0.018315988
## 9 9 0.015349759
## 10 10 0.013105854
## 11 11 0.011359947
## 12 12 0.009969961
## 13 13 0.008841996
## 14 14 0.007911763
## 15 15 0.007133924
## 16 16 0.006475680
## 17 17 0.005912783
## 18 18 0.005426960
## 19 19 0.005004203
## 20 20 0.004633619
#Basic Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
geom_line(color = "brown", size = .75) +
labs(title = "Basic Zipf Distribution",
x = "Rank",
y = "Probability") +
theme_minimal()
#Log10 Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
geom_line(color = "brown", size = .75) +
scale_x_log10() +
scale_y_log10() +
labs(title = "Log/log Scale Zipf Distribution",
x = "Rank",
y = "Probability") +
theme_minimal()
#Get the data
library(readxl)
#Make frequency tables
library(tidyverse)
evm <- read_xlsx("Mt_short.xlsx")
freqtab1 <- evm %>% count(FullWord, sort=TRUE)
top50Mt <- freqtab1[1:50,]
Mt_total <- sum(freqtab1$n)
#
evm <- read_xlsx("Mk_short.xlsx")
freqtab2 <- evm %>% count(FullWord, sort=TRUE)
top50Mk <- freqtab2[1:50,]
Mk_total <- sum(freqtab2$n)
evm <- read_xlsx("Lk_short.xlsx")
freqtab3 <- evm %>% count(FullWord, sort=TRUE)
top50Lk <- freqtab3[1:50,]
#
Lk_total <- sum(freqtab3$n)
#
evm <- read_xlsx("Jn_short.xlsx")
freqtab4 <- evm %>% count(FullWord, sort=TRUE)
top50Jn <- freqtab4[1:50,]
Jn_total <- sum(freqtab4$n)
#
evmtab50 <- cbind(top50Mt,top50Mk,top50Lk,top50Jn)
names(evmtab50) <- c("Szó(Mt)","n","Szó(Mk)","n","Szó(Lk)","n","Szó(Jn)","n")
evmtab50
## Szó(Mt) n Szó(Mk) n Szó(Lk) n Szó(Jn) n
## 1 καὶ 1175 καὶ 1085 καὶ 1466 καὶ 827
## 2 ὁ 493 ὁ 237 δὲ 513 ὁ 565
## 3 δὲ 471 αὐτοῦ 173 ὁ 399 ὅτι 271
## 4 τοῦ 294 εἰς 168 τοῦ 380 τοῦ 243
## 5 ἐν 293 δὲ 155 ἐν 360 τὸν 240
## 6 αὐτοῦ 266 τὸν 150 αὐτοῦ 255 ἐν 226
## 7 τὸ 227 ἐν 135 εἶπεν 229 δὲ 203
## 8 οἱ 224 τοῦ 132 εἰς 225 οὖν 200
## 9 τὸν 221 τὸ 131 τὸ 222 Ἰησοῦς 198
## 10 εἰς 218 τὴν 126 τὸν 216 εἰς 187
## 11 τῶν 206 οἱ 123 οἱ 185 αὐτοῦ 173
## 12 τὴν 203 αὐτῷ 121 τῷ 177 αὐτῷ 173
## 13 αὐτῷ 170 αὐτοῖς 120 ὅτι 174 οὐκ 151
## 14 τῷ 149 αὐτὸν 117 τὴν 171 τὸ 150
## 15 ὅτι 140 τῶν 108 πρὸς 161 ἵνα 145
## 16 μὴ 123 ὅτι 102 αὐτῷ 153 οἱ 144
## 17 τῆς 121 τῆς 80 αὐτὸν 145 τὴν 142
## 18 ἡ 121 τῷ 77 τῇ 136 ἐκ 139
## 19 εἶπεν 119 μὴ 72 μὴ 132 λέγει 123
## 20 Ἰησοῦς 111 οὐκ 66 τῶν 131 ἡ 122
## 21 τὰ 110 ἵνα 64 τῆς 119 τῷ 114
## 22 γὰρ 108 πρὸς 63 τοὺς 118 εἶπεν 112
## 23 τοὺς 108 τοὺς 63 ἐπὶ 116 τῶν 109
## 24 τοῖς 108 λέγει 62 σου 104 οὐ 108
## 25 ὑμῖν 107 αὐτόν 61 τὰ 104 ἐστιν 107
## 26 αὐτοῖς 103 γὰρ 60 ἡ 102 μὴ 106
## 27 τῇ 103 ἡ 60 οὐκ 99 ἐγὼ 103
## 28 αὐτῶν 100 εἶπεν 59 αὐτῶν 98 ὑμῖν 103
## 29 ἐπὶ 99 τῇ 59 ὑμῖν 96 αὐτοῖς 100
## 30 οὐκ 98 Ἰησοῦς 58 αὐτοῖς 91 αὐτὸν 100
## 31 σου 98 τοῖς 56 γὰρ 87 με 99
## 32 αὐτὸν 94 τί 54 μου 87 μου 98
## 33 οὐ 92 τὰ 52 ἀπὸ 83 πρὸς 97
## 34 ἀπὸ 92 ἐπὶ 52 ἦν 75 ἦν 96
## 35 τότε 90 ἐστιν 52 τοῖς 74 τῆς 82
## 36 μου 83 οὐ 45 θεοῦ 72 τὰ 80
## 37 ἐστιν 83 αὐτῶν 42 ἐγένετο 69 αὐτόν 76
## 38 ὑμῶν 76 ἐκ 42 ἐστιν 69 τῇ 72
## 39 ἢ 65 εὐθὺς 41 ὑμῶν 67 ὑμεῖς 68
## 40 τί 64 μου 40 αὐτόν 66 περὶ 67
## 41 λέγω 61 σου 39 οὐ 66 ταῦτα 61
## 42 ἰδοὺ 60 ἦν 38 τί 62 γὰρ 60
## 43 ἐὰν 58 ὑμῖν 37 ἰδοὺ 57 ἀπεκρίθη 57
## 44 οὖν 56 μετὰ 36 Ἰησοῦς 55 τοὺς 55
## 45 λέγει 54 ἀπὸ 36 ὡς 51 ἀλλὰ 52
## 46 διὰ 53 ἢ 33 εἰ 50 τοῦτο 51
## 47 εἰ 53 θεοῦ 31 λέγω 50 ἀλλ’ 50
## 48 ἐκ 52 τὰς 31 ἐκ 50 ἀμὴν 50
## 49 λέγων 49 ἔλεγεν 31 αὐτούς 47 εἰ 49
## 50 ἕως 49 διὰ 30 λέγων 47 τί 48
#install.packages("plotly")
library(plotly)
datus <- data.frame(Roll_number = 1:50,
y1 = top50Mt$n,
y2 = top50Mk$n,
y3 = top50Lk$n,
y4 = top50Jn$n)
#
fig <-plotly::plot_ly(data = datus, x = ~Roll_number,
y = ~y1, name = "Mt",
type = "scatter",mode = "lines") %>%
add_trace(y = ~y2, name = "Mk") %>%
add_trace(y = ~y4, name = "Jn") %>%
add_trace(y = ~y3, name = "Lk") %>%
layout(title = 'Zipfs law and the gospels', xaxis = list(title = 'Helyezés'),
yaxis = list(title = 'Előfordulás'), legend = list(title=list(text='Legend Title')))
fig
library(DT)
library(readxl)
bigs <- read_xlsx("C:/users/weltl/Dokumentumok/bigcities5col.xlsx")
#dtbigs <- datatable(bigs)
print(bigs, n=100)
## # A tibble: 822 × 5
## population city country cca2 rank
## <dbl> <chr> <chr> <chr> <dbl>
## 1 37036200 Tokyo Japan JP 1
## 2 34665600 Delhi India IN 2
## 3 30482100 Shanghai China CN 3
## 4 24652900 Dhaka Bangladesh BD 4
## 5 23074200 Cairo Egypt EG 5
## 6 22990000 Sao Paulo Brazil BR 6
## 7 22752400 Mexico City Mexico MX 7
## 8 22596500 Beijing China CN 8
## 9 22089000 Mumbai India IN 9
## 10 18921600 Osaka Japan JP 10
## 11 18171200 Chongqing China CN 11
## 12 18076800 Karachi Pakistan PK 12
## 13 17778500 Kinshasa DR Congo CD 13
## 14 17156400 Lagos Nigeria NG 14
## 15 16236700 Istanbul Turkey TR 15
## 16 15845200 Kolkata India IN 16
## 17 15752300 Buenos Aires Argentina AR 17
## 18 15230600 Manila Philippines PH 18
## 19 14878700 Guangzhou China CN 19
## 20 14825800 Lahore Pakistan PK 20
## 21 14704100 Tianjin China CN 21
## 22 14395400 Bangalore India IN 22
## 23 13923200 Rio de Janeiro Brazil BR 23
## 24 13545400 Shenzhen China CN 24
## 25 12737400 Moscow Russia RU 25
## 26 12336000 Chennai India IN 26
## 27 11795800 Bogota Colombia CO 27
## 28 11634100 Jakarta Indonesia ID 28
## 29 11517300 Lima Peru PE 29
## 30 11391700 Bangkok Thailand TH 30
## 31 11346800 Paris France FR 31
## 32 11337900 Hyderabad India IN 32
## 33 10174900 Nanjing China CN 33
## 34 10027900 Luanda Angola AO 34
## 35 10025800 Seoul South Korea KR 35
## 36 9998870 Chengdu China CN 36
## 37 9840740 London United Kingdom GB 37
## 38 9816320 Ho Chi Minh City Vietnam VN 38
## 39 9729740 Tehran Iran IR 39
## 40 9534790 Nagoya Japan JP 40
## 41 9222080 Xi-an China CN 41
## 42 9061820 Ahmedabad India IN 42
## 43 9000280 Kuala Lumpur Malaysia MY 43
## 44 8986480 Wuhan China CN 44
## 45 8592820 Suzhou China CN 45
## 46 8591040 Hangzhou China CN 46
## 47 8581730 Surat India IN 47
## 48 8561520 Dar es Salaam Tanzania TZ 48
## 49 8141120 Baghdad Iraq IQ 49
## 50 7974270 Shenyang China CN 50
## 51 7952860 Riyadh Saudi Arabia SA 51
## 52 7936530 New York City United States US 52
## 53 7817160 Foshan China CN 53
## 54 7772860 Dongguan China CN 54
## 55 7768510 Hong Kong Hong Kong HK 55
## 56 7525720 Pune India IN 56
## 57 7066860 Haerbin China CN 57
## 58 6999460 Santiago Chile CL 58
## 59 6810530 Madrid Spain ES 59
## 60 6754180 Khartoum Sudan SD 60
## 61 6491290 Toronto Canada CA 61
## 62 6444580 Johannesburg South Africa ZA 62
## 63 6351680 Belo Horizonte Brazil BR 63
## 64 6347380 Dalian China CN 64
## 65 6217970 Qingdao China CN 65
## 66 6157270 Singapore Singapore SG 66
## 67 6156140 Zhengzhou China CN 67
## 68 6065850 Ji nan Shandong China CN 68
## 69 6056880 Abidjan Ivory Coast CI 69
## 70 5956680 Addis Ababa Ethiopia ET 70
## 71 5813190 Yangon Myanmar MM 71
## 72 5807050 Alexandria Egypt EG 72
## 73 5766990 Nairobi Kenya KE 73
## 74 5733250 Barcelona Spain ES 74
## 75 5653490 Chittagong Bangladesh BD 75
## 76 5602200 Hanoi Vietnam VN 76
## 77 5597340 Saint Petersburg Russia RU 77
## 78 5578580 Guadalajara Mexico MX 78
## 79 5550490 Ankara Turkey TR 79
## 80 5465920 Fukuoka Japan JP 80
## 81 5391890 Melbourne Australia AU 81
## 82 5272360 Monterrey Mexico MX 82
## 83 5248790 Sydney Australia AU 83
## 84 5132170 Urumqi China CN 84
## 85 5128270 Changsha China CN 85
## 86 5063580 Cape Town South Africa ZA 86
## 87 5021600 Jiddah Saudi Arabia SA 87
## 88 4990930 Brasilia Brazil BR 88
## 89 4955680 Kunming China CN 89
## 90 4891020 Changchun China CN 90
## 91 4877020 Kabul Afghanistan AF 91
## 92 4854260 Yaounde Cameroon CM 92
## 93 4830170 Hefei China CN 93
## 94 4770300 Ningbo China CN 94
## 95 4737590 Shantou China CN 95
## 96 4645320 Kano Nigeria NG 96
## 97 4568530 Tel Aviv Israel IL 97
## 98 4563850 New Taipei Taiwan TW 98
## 99 4534990 Shijiazhuang China CN 99
## 100 4411110 Jaipur India IN 100
## # ℹ 722 more rows
library(plotly)
library(dplyr)
slope <- -1
intercept <- 1000
fig <- plot_ly(data = bigs, x = ~rank, y = ~population,
text = ~city,
name = "Biggest cities of the world",
type = "scatter",mode = "lines")
fig
Hamlet <- "
To be, or not to be: that is the question:
Whether ’tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing end them? To die: to sleep;
No more; and, by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to, ’tis a consummation
Devoutly to be wish’d. To die, to sleep;
To sleep: perchance to dream: ay, there’s the rub;
For in that sleep of death what dreams may come
When we have shuffled off this mortal coil,
Must give us pause. There’s the respect
That makes calamity of so long life;
For who would bear the whips and scorns of time,
The oppressor’s wrong, the proud man’s contumely,
The pangs of dispriz’d love, the law’s delay,
The insolence of office, and the spurns
That patient merit of the unworthy takes,
When he himself might his quietus make
With a bare bodkin? who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscover’d country from whose bourn
No traveller returns, puzzles the will,
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience does make cowards of us all;
And thus the native hue of resolution
Is sicklied o’er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry,
And lose the name of action. Soft you now!
The fair Ophelia! Nymph, in thy orisons
Be all my sins remember’d."
Hamlet
## [1] "\nTo be, or not to be: that is the question:\nWhether ’tis nobler in the mind to suffer\nThe slings and arrows of outrageous fortune,\nOr to take arms against a sea of troubles,\nAnd by opposing end them? To die: to sleep;\nNo more; and, by a sleep to say we end\nThe heart-ache and the thousand natural shocks\nThat flesh is heir to, ’tis a consummation\nDevoutly to be wish’d. To die, to sleep;\nTo sleep: perchance to dream: ay, there’s the rub;\nFor in that sleep of death what dreams may come\nWhen we have shuffled off this mortal coil,\nMust give us pause. There’s the respect\nThat makes calamity of so long life;\nFor who would bear the whips and scorns of time,\nThe oppressor’s wrong, the proud man’s contumely,\nThe pangs of dispriz’d love, the law’s delay,\nThe insolence of office, and the spurns\nThat patient merit of the unworthy takes,\nWhen he himself might his quietus make\nWith a bare bodkin? who would fardels bear,\nTo grunt and sweat under a weary life,\nBut that the dread of something after death,\nThe undiscover’d country from whose bourn\nNo traveller returns, puzzles the will,\nAnd makes us rather bear those ills we have\nThan fly to others that we know not of?\nThus conscience does make cowards of us all;\nAnd thus the native hue of resolution\nIs sicklied o’er with the pale cast of thought,\nAnd enterprises of great pith and moment\nWith this regard their currents turn awry,\nAnd lose the name of action. Soft you now!\nThe fair Ophelia! Nymph, in thy orisons\nBe all my sins remember’d."
Arany <- "Lenni vagy nem lenni: az itt a kérdés.
Akkor nemesb-e a lélek, ha tűri
Balsorsa minden nyűgét s nyilait;
Vagy ha kiszáll tenger fájdalma ellen,
S fegyvert ragadva véget vet neki?
Meghalni – elszunnyadni – semmi több;
S egy álom által elvégezni mind
A szív keservét, a test eredendő,
Természetes rázkódtatásait
Oly cél, minőt óhajthat a kegyes.
Meghalni – elszunnyadni – és alunni!
Talán álmodni: ez a bökkenő;
Mert hogy mi álmok jőnek a halálban,
Ha majd leráztuk mind e földi bajt,
Ez visszadöbbent. E meggondolás az,
Mi a nyomort oly hosszan élteti
Mert ki viselné a kor gúny-csapásit,
Zsarnok bosszúját, gőgös ember dölyfét,
Útált szerelme kínját, pör-halasztást,
A hivatalnak packázásait,
S mind a rugást, mellyel méltatlanok
Bántalmazzák a tűrő érdemet
Ha nyúgalomba küldhetné magát
Egy puszta tőrrel? – Ki hordaná e terheket,
Izzadva, nyögve élte fáradalmin,
Ha rettegésünk egy halál utáni
Valamitől – a nem ismert tartomány,
Melyből nem tér meg utazó – le nem
Lohasztja kedvünk, inkább tűrni a
Jelen gonoszt, mint ismeretlenek
Felé sietni? – Ekképp az öntudat
Belőlünk mind gyávát csinál,
S az elszántság természetes szinét
A gondolat halványra betegíti;
Ily kétkedés által sok nagyszerű,
Fontos merény kifordul medriből
S elveszti »tett« nevét. – De csöndesen!
A szép Ophelia jő. – Szép hölgy, imádba
Legyenek foglalva minden bűneim."
Arany
## [1] "Lenni vagy nem lenni: az itt a kérdés.\nAkkor nemesb-e a lélek, ha tűri\nBalsorsa minden nyűgét s nyilait;\nVagy ha kiszáll tenger fájdalma ellen,\nS fegyvert ragadva véget vet neki?\nMeghalni – elszunnyadni – semmi több;\nS egy álom által elvégezni mind\nA szív keservét, a test eredendő,\nTermészetes rázkódtatásait\nOly cél, minőt óhajthat a kegyes.\nMeghalni – elszunnyadni – és alunni!\nTalán álmodni: ez a bökkenő;\nMert hogy mi álmok jőnek a halálban,\nHa majd leráztuk mind e földi bajt,\nEz visszadöbbent. E meggondolás az,\nMi a nyomort oly hosszan élteti\nMert ki viselné a kor gúny-csapásit,\nZsarnok bosszúját, gőgös ember dölyfét,\nÚtált szerelme kínját, pör-halasztást,\nA hivatalnak packázásait,\nS mind a rugást, mellyel méltatlanok\nBántalmazzák a tűrő érdemet\nHa nyúgalomba küldhetné magát\nEgy puszta tőrrel? – Ki hordaná e terheket,\nIzzadva, nyögve élte fáradalmin,\nHa rettegésünk egy halál utáni\nValamitől – a nem ismert tartomány,\nMelyből nem tér meg utazó – le nem\nLohasztja kedvünk, inkább tűrni a\nJelen gonoszt, mint ismeretlenek\nFelé sietni? – Ekképp az öntudat\nBelőlünk mind gyávát csinál,\nS az elszántság természetes szinét\nA gondolat halványra betegíti;\nIly kétkedés által sok nagyszerű,\nFontos merény kifordul medriből\nS elveszti »tett« nevét. – De csöndesen!\nA szép Ophelia jő. – Szép hölgy, imádba\nLegyenek foglalva minden bűneim."
## Corpus consisting of 60 documents, showing 60 documents:
##
## Text Types Tokens Sentences Year President FirstName Party
## 1789-Washington 625 1537 23 1789 Washington George none
## 1793-Washington 96 147 4 1793 Washington George none
## 1797-Adams 826 2577 37 1797 Adams John Federalist
## 1801-Jefferson 717 1923 41 1801 Jefferson Thomas Democratic-Republican
## 1805-Jefferson 804 2380 45 1805 Jefferson Thomas Democratic-Republican
## 1809-Madison 535 1261 21 1809 Madison James Democratic-Republican
## 1813-Madison 541 1302 33 1813 Madison James Democratic-Republican
## 1817-Monroe 1040 3677 121 1817 Monroe James Democratic-Republican
## 1821-Monroe 1259 4886 131 1821 Monroe James Democratic-Republican
## 1825-Adams 1003 3147 74 1825 Adams John Quincy Democratic-Republican
## 1829-Jackson 517 1208 25 1829 Jackson Andrew Democratic
## 1833-Jackson 499 1267 29 1833 Jackson Andrew Democratic
## 1837-VanBuren 1315 4158 95 1837 Van Buren Martin Democratic
## 1841-Harrison 1896 9125 210 1841 Harrison William Henry Whig
## 1845-Polk 1334 5186 153 1845 Polk James Knox Whig
## 1849-Taylor 496 1178 22 1849 Taylor Zachary Whig
## 1853-Pierce 1165 3636 104 1853 Pierce Franklin Democratic
## 1857-Buchanan 945 3083 89 1857 Buchanan James Democratic
## 1861-Lincoln 1075 3999 135 1861 Lincoln Abraham Republican
## 1865-Lincoln 360 775 26 1865 Lincoln Abraham Republican
## 1869-Grant 485 1229 40 1869 Grant Ulysses S. Republican
## 1873-Grant 552 1472 43 1873 Grant Ulysses S. Republican
## 1877-Hayes 831 2707 59 1877 Hayes Rutherford B. Republican
## 1881-Garfield 1021 3209 111 1881 Garfield James A. Republican
## 1885-Cleveland 676 1816 44 1885 Cleveland Grover Democratic
## 1889-Harrison 1352 4721 157 1889 Harrison Benjamin Republican
## 1893-Cleveland 821 2125 58 1893 Cleveland Grover Democratic
## 1897-McKinley 1232 4353 130 1897 McKinley William Republican
## 1901-McKinley 854 2437 100 1901 McKinley William Republican
## 1905-Roosevelt 404 1079 33 1905 Roosevelt Theodore Republican
## 1909-Taft 1437 5821 158 1909 Taft William Howard Republican
## 1913-Wilson 658 1882 68 1913 Wilson Woodrow Democratic
## 1917-Wilson 549 1652 59 1917 Wilson Woodrow Democratic
## 1921-Harding 1169 3719 148 1921 Harding Warren G. Republican
## 1925-Coolidge 1220 4440 196 1925 Coolidge Calvin Republican
## 1929-Hoover 1090 3860 158 1929 Hoover Herbert Republican
## 1933-Roosevelt 743 2057 85 1933 Roosevelt Franklin D. Democratic
## 1937-Roosevelt 725 1989 96 1937 Roosevelt Franklin D. Democratic
## 1941-Roosevelt 526 1519 68 1941 Roosevelt Franklin D. Democratic
## 1945-Roosevelt 275 633 27 1945 Roosevelt Franklin D. Democratic
## 1949-Truman 781 2504 116 1949 Truman Harry S. Democratic
## 1953-Eisenhower 900 2743 119 1953 Eisenhower Dwight D. Republican
## 1957-Eisenhower 621 1907 92 1957 Eisenhower Dwight D. Republican
## 1961-Kennedy 566 1541 52 1961 Kennedy John F. Democratic
## 1965-Johnson 568 1710 93 1965 Johnson Lyndon Baines Democratic
## 1969-Nixon 743 2416 103 1969 Nixon Richard Milhous Republican
## 1973-Nixon 544 1995 68 1973 Nixon Richard Milhous Republican
## 1977-Carter 527 1370 52 1977 Carter Jimmy Democratic
## 1981-Reagan 902 2781 129 1981 Reagan Ronald Republican
## 1985-Reagan 925 2909 123 1985 Reagan Ronald Republican
## 1989-Bush 795 2674 141 1989 Bush George Republican
## 1993-Clinton 642 1833 81 1993 Clinton Bill Democratic
## 1997-Clinton 773 2436 111 1997 Clinton Bill Democratic
## 2001-Bush 621 1806 97 2001 Bush George W. Republican
## 2005-Bush 772 2312 99 2005 Bush George W. Republican
## 2009-Obama 938 2689 110 2009 Obama Barack Democratic
## 2013-Obama 814 2317 88 2013 Obama Barack Democratic
## 2017-Trump 582 1660 88 2017 Trump Donald J. Republican
## 2021-Biden 812 2766 216 2021 Biden Joseph R. Democratic
## 2025-Trump 1000 3347 177 2025 Trump Donald J. Republican
require(ggplot2)
korpa %>%
summary %>%
ggplot(aes(x = Year, y = Tokens, group = 1)) +
geom_line() +
geom_point() +
geom_label(aes(label = President, fill = Party), nudge_x = 0.1, nudge_y = 0.1) +
ggtitle("Inauguration Speeches of Presidents - Number of Words") +
theme_bw() +
theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ) +
scale_color_paletteer_d(nord::frost))
library(quanteda)
library(dplyr)
#summary(korpa)
# Most jön a brutale rész: A 'korpa' korpuszból 'mysummary' data frame, TTR-rel!
library(quanteda)
library(dplyr)
data("data_corpus_inaugural")
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
docvars_df <- docvars(data_corpus_inaugural)
#docvars_df
#str(docvars_df)
speech_texts <- as.character(data_corpus_inaugural)
#str(speech_texts)
mysummary <- as.data.frame(summary(korpa, verbose = FALSE))
#mysummary
ms <- mysummary %>% mutate (TTR = Types/Tokens)
#
require(ggplot2)
ms1 <- ms[-2,] # Drop the second row of ms, i.e. Wahington's 2. speech!
ms1 %>%
# options(repr.plot.width = 12, repr.plot.height = 12)
ggplot(aes(x = Year, y = TTR, group = 1)) +
geom_line() +
geom_point() +
geom_label(aes(label = President, fill = Party)) +
ggtitle("Inauguration Speeches of Presidents - T(ypes)/(T)okens (R)atio") +
theme_bw() +
theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ))
library(plotly)
library(quanteda)
#calculate y as the mean word length
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
df <- docvars(data_corpus_inaugural)
df$text <- as.character(data_corpus_inaugural)
library(tokenizers)
words <- tokenize_words(
df$text,
lowercase = TRUE,
stopwords = NULL,
strip_punct = TRUE,
strip_numeric = FALSE,
simplify = FALSE
)
wc <- count_words(df$text)
st <- count_sentences(df$text)
wl <- count_characters(df$text)
fig <- plot_ly(data = ms, type = "scatter", mode = "markers",
x = round(wc/st,2), y = round(wl/wc,2),
text = ~President,
color = ~Party,
colors = c("red","green","blue","tomato","magenta","seagreen","salmon")) %>%
layout(title = "Sentence length vs. word length",
xaxis = list(title = "Sentence Length in Words"),
yaxis = list(title = "Word Length in Characters"))
fig
library(quanteda)
library(quanteda.textplots)
library(quanteda.textstats)
library(ggplot2)
#Example corpus (replace with your actual corpus)
corp_us <- corpus(data_corpus_inaugural) # save the corpus to a short obj name
my_corpus <- corpus_subset(corp_us, Year > 2000)
toki <- tokens(my_corpus)
kwic_results <- kwic(toki, pattern = "country", window = 3)
kwic_results
## Keyword-in-context with 53 matches.
## [2001-Bush, 25] common in our | country | . With a
## [2001-Bush, 312] creed of our | country | , it is
## [2001-Bush, 375] of our own | country | . The ambitions
## [2001-Bush, 415] but not a | country | . We do
## [2001-Bush, 570] , makes our | country | more, not
## [2001-Bush, 674] . If our | country | does not lead
## [2001-Bush, 948] liberty and our | country | should make no
## [2001-Bush, 1226] Many in our | country | do not know
## [2001-Bush, 1758] to make our | country | more just and
## [2005-Bush, 64] that unite our | country | . I am
## [2005-Bush, 988] of your free | country | . The rulers
## [2005-Bush, 1170] measure. Our | country | has accepted obligations
## [2005-Bush, 1321] devotion to our | country | in deaths that
## [2005-Bush, 1427] wealth of our | country | but to its
## [2005-Bush, 1555] future of our | country | , we will
## [2005-Bush, 1843] . And our | country | must abandon all
## [2005-Bush, 1890] questions before our | country | are many.
## [2009-Obama, 1003] forgotten what this | country | has already done
## [2009-Obama, 2550] city and the | country | , alarmed at
## [2013-Obama, 667] understand that our | country | cannot succeed when
## [2013-Obama, 942] that built this | country | and investing in
## [2013-Obama, 990] that in this | country | freedom is reserved
## [2013-Obama, 1089] that make this | country | great. We
## [2013-Obama, 1831] expelled from our | country | . Our journey
## [2013-Obama, 2098] to God and | country | , not party
## [2017-Trump, 47] to rebuild our | country | and restore its
## [2017-Trump, 255] citizens of our | country | . Their victories
## [2017-Trump, 354] , is your | country | . What truly
## [2017-Trump, 405] women of our | country | will be forgotten
## [2017-Trump, 573] and robbed our | country | of so much
## [2017-Trump, 724] confidence of our | country | has dissipated over
## [2017-Trump, 1016] - rebuilding our | country | with American hands
## [2017-Trump, 1154] loyalty to our | country | , we will
## [2017-Trump, 1369] fail. Our | country | will thrive and
## [2021-Biden, 448] silently stalks the | country | . It's taken
## [2021-Biden, 1870] way, our | country | will be stronger
## [2021-Biden, 2175] and for our | country | . Amen.
## [2021-Biden, 2743] and to this | country | we love with
## [2025-Trump, 80] forward, our | country | will flourish and
## [2025-Trump, 237] is sweeping the | country | , sunlight is
## [2025-Trump, 405] illegally entered our | country | from all over
## [2025-Trump, 443] people. Our | country | can no longer
## [2025-Trump, 539] individuals in our | country | — some of
## [2025-Trump, 606] it than any | country | anywhere in the
## [2025-Trump, 635] to hate our | country | despite the love
## [2025-Trump, 977] history of our | country | . As our
## [2025-Trump, 1216] not forget our | country | , we will
## [2025-Trump, 1353] invasion of our | country | . Under the
## [2025-Trump, 1442] to defend our | country | from threats and
## [2025-Trump, 1571] gas of any | country | on earth —
## [2025-Trump, 2293] McKinley made our | country | very rich through
## [2025-Trump, 2335] given to the | country | of Panama after
## [2025-Trump, 2898] ., our | country | was forged and