## 
## Kapcsolódás csomaghoz: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Kapcsolódás csomaghoz: 'igraph'
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HL, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

This document includes both content as well as the output of any embedded R code chunks within the document.

1 Gyakoriságok

gospels <- c("Mt","Mk","Lk","Jn")
#Get the datanames
library(readxl)
evm <- read_xlsx("Lk_short.xlsx")
# evm <- evm$Lemma
# Create a corpus variable
library(tm)
word.corpus<-Corpus(VectorSource(evm$Lemma)) #Corpus
# Make sure it has loaded properly - have a look!
# inspect(word.corpus)

1.1 Gyakoriságok - táblázatok

## # A tibble: 20 × 2
##    Lemma       n
##    <chr>   <int>
##  1 ὁ        2646
##  2 καί      1469
##  3 αὐτός    1086
##  4 δέ        542
##  5 λέγω      533
##  6 σύ        446
##  7 ἐν        361
##  8 εἰμί      360
##  9 ἐγώ       282
## 10 οὗτος     229
## 11 εἰς       226
## 12 ὅς        190
## 13 ὅτι       174
## 14 οὐ        172
## 15 πρός      166
## 16 ἐπί       161
## 17 πᾶς       158
## 18 μή        140
## 19 γίνομαι   131
## 20 ἀπό       125

1.2 Gyakoriságok - diagrammok

#fReorder factor levels of category based on value (descending order)
top20$Lemma <- factor(top20$Lemma, levels = top20$Lemma[order(top20$n, decreasing = TRUE)])

# Create the bar chart
library(plotly)
fig <- plot_ly(
  data = top20,
  x = ~Lemma,
  y = ~n,
  type = 'bar',
  marker = list(color = 'sandybrown')
)

# Customize layout
fig <- fig %>% layout(
  title = "Bar Chart Sorted by Value",
  xaxis = list(title = "Lemma"),
  yaxis = list(title = "Előfordulás")
)

# Show the plot
fig

Ez az ábra interaktív; ha ráhúzzuk az egeret, az aktuális lemmát (szótőt) és annak gyakoriságát mutatja.

1.3 Gyakoriságok - szófelhők

#word.counts<-as.matrix(TermDocumentMatrix(word.corpus))
#word.freq<-sort(rowSums(word.counts), decreasing=TRUE)
#Load libraries for wordclouds
library(SnowballC)
library(tm)
library(wordcloud2)
library(RColorBrewer)

#Create a table of word frequenciess
greek_words <- evm$FullWord[1:188]            
word_freqs <- as.data.frame(table(greek_words))

#Remove stopwords
perzsa <- stopwords::stopwords(language = "grc", source = "perseus")
word_freqs_filtered <- word_freqs %>%
  filter(!greek_words %in% perzsa)

# Create the word cloud
set.seed(32) #be sure to set the seed if you want to reproduce the same again
wc <- wordcloud2(
  data = word_freqs_filtered,
  size = 1,
  gridSize = 8,
  color = "random-dark", backgroundColor = "white"
)
wc

1.4 Gyakoriságok - bigram hálózatok

Image file (greek_network_Jn_prologue.png)

2 Zipf-szabály

2.1 A Zipf-szabály táblázatban

#Install the zipfR package
#install.packages("zipfR")

#Load the package
library(zipfR)

#Load necessary libraries
library(ggplot2)

#Define parameters
N <- 100   # Total number of elements
s <- 1.5   # Shape parameter

#Generate Zipf distribution probabilities
zipf_probs <- (1 / (1:N)^s) / sum(1 / (1:N)^s)
zipf_data <- data.frame(Rank = 1:N, Probability = zipf_probs)

#Display the first few rows
head(zipf_data,n=20)
##    Rank Probability
## 1     1 0.414443506
## 2     2 0.146527907
## 3     3 0.079759690
## 4     4 0.051805438
## 5     5 0.037068954
## 6     6 0.028199309
## 7     7 0.022377846
## 8     8 0.018315988
## 9     9 0.015349759
## 10   10 0.013105854
## 11   11 0.011359947
## 12   12 0.009969961
## 13   13 0.008841996
## 14   14 0.007911763
## 15   15 0.007133924
## 16   16 0.006475680
## 17   17 0.005912783
## 18   18 0.005426960
## 19   19 0.005004203
## 20   20 0.004633619

2.2 A Zipf-szabály mint hatványfüggvény

#Basic Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
  geom_line(color = "brown", size = .75) +
  labs(title = "Basic Zipf Distribution",
       x = "Rank",
       y = "Probability") +
  theme_minimal()

2.3 A Zipf-szabály log-log skálán

#Log10 Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
  geom_line(color = "brown", size = .75) +
  scale_x_log10() +
  scale_y_log10() +
  labs(title = "Log/log Scale Zipf Distribution",
       x = "Rank",
       y = "Probability") +
  theme_minimal()

2.4 A Zipf-szabály és Harry Potter

Harry Potter books
Harry Potter books

2.5 A Zipf-szabály és az evangéliumok

2.5.1 Táblázatban

#Get the data
library(readxl) 
#Make frequency tables 
library(tidyverse)

evm <- read_xlsx("Mt_short.xlsx")
freqtab1 <- evm %>% count(FullWord, sort=TRUE)
top50Mt <- freqtab1[1:50,]
Mt_total <- sum(freqtab1$n)
#
evm <- read_xlsx("Mk_short.xlsx")
freqtab2 <- evm %>% count(FullWord, sort=TRUE)
top50Mk <- freqtab2[1:50,]
Mk_total <- sum(freqtab2$n)
evm <- read_xlsx("Lk_short.xlsx")
freqtab3 <- evm %>% count(FullWord, sort=TRUE)
top50Lk <- freqtab3[1:50,]
#
Lk_total <- sum(freqtab3$n)
#
evm <- read_xlsx("Jn_short.xlsx")
freqtab4 <- evm %>% count(FullWord, sort=TRUE)
top50Jn <- freqtab4[1:50,]
Jn_total <- sum(freqtab4$n)
#
evmtab50 <- cbind(top50Mt,top50Mk,top50Lk,top50Jn)
names(evmtab50) <- c("Szó(Mt)","n","Szó(Mk)","n","Szó(Lk)","n","Szó(Jn)","n")
evmtab50
##    Szó(Mt)    n Szó(Mk)    n Szó(Lk)    n  Szó(Jn)   n
## 1      καὶ 1175     καὶ 1085     καὶ 1466      καὶ 827
## 2        ὁ  493       ὁ  237      δὲ  513        ὁ 565
## 3       δὲ  471   αὐτοῦ  173       ὁ  399      ὅτι 271
## 4      τοῦ  294     εἰς  168     τοῦ  380      τοῦ 243
## 5       ἐν  293      δὲ  155      ἐν  360      τὸν 240
## 6    αὐτοῦ  266     τὸν  150   αὐτοῦ  255       ἐν 226
## 7       τὸ  227      ἐν  135   εἶπεν  229       δὲ 203
## 8       οἱ  224     τοῦ  132     εἰς  225      οὖν 200
## 9      τὸν  221      τὸ  131      τὸ  222   Ἰησοῦς 198
## 10     εἰς  218     τὴν  126     τὸν  216      εἰς 187
## 11     τῶν  206      οἱ  123      οἱ  185    αὐτοῦ 173
## 12     τὴν  203    αὐτῷ  121      τῷ  177     αὐτῷ 173
## 13    αὐτῷ  170  αὐτοῖς  120     ὅτι  174      οὐκ 151
## 14      τῷ  149   αὐτὸν  117     τὴν  171       τὸ 150
## 15     ὅτι  140     τῶν  108    πρὸς  161      ἵνα 145
## 16      μὴ  123     ὅτι  102    αὐτῷ  153       οἱ 144
## 17     τῆς  121     τῆς   80   αὐτὸν  145      τὴν 142
## 18       ἡ  121      τῷ   77      τῇ  136       ἐκ 139
## 19   εἶπεν  119      μὴ   72      μὴ  132    λέγει 123
## 20  Ἰησοῦς  111     οὐκ   66     τῶν  131        ἡ 122
## 21      τὰ  110     ἵνα   64     τῆς  119       τῷ 114
## 22     γὰρ  108    πρὸς   63    τοὺς  118    εἶπεν 112
## 23    τοὺς  108    τοὺς   63     ἐπὶ  116      τῶν 109
## 24    τοῖς  108   λέγει   62     σου  104       οὐ 108
## 25    ὑμῖν  107   αὐτόν   61      τὰ  104    ἐστιν 107
## 26  αὐτοῖς  103     γὰρ   60       ἡ  102       μὴ 106
## 27      τῇ  103       ἡ   60     οὐκ   99      ἐγὼ 103
## 28   αὐτῶν  100   εἶπεν   59   αὐτῶν   98     ὑμῖν 103
## 29     ἐπὶ   99      τῇ   59    ὑμῖν   96   αὐτοῖς 100
## 30     οὐκ   98  Ἰησοῦς   58  αὐτοῖς   91    αὐτὸν 100
## 31     σου   98    τοῖς   56     γὰρ   87       με  99
## 32   αὐτὸν   94      τί   54     μου   87      μου  98
## 33      οὐ   92      τὰ   52     ἀπὸ   83     πρὸς  97
## 34     ἀπὸ   92     ἐπὶ   52      ἦν   75       ἦν  96
## 35    τότε   90   ἐστιν   52    τοῖς   74      τῆς  82
## 36     μου   83      οὐ   45    θεοῦ   72       τὰ  80
## 37   ἐστιν   83   αὐτῶν   42 ἐγένετο   69    αὐτόν  76
## 38    ὑμῶν   76      ἐκ   42   ἐστιν   69       τῇ  72
## 39       ἢ   65   εὐθὺς   41    ὑμῶν   67    ὑμεῖς  68
## 40      τί   64     μου   40   αὐτόν   66     περὶ  67
## 41    λέγω   61     σου   39      οὐ   66    ταῦτα  61
## 42    ἰδοὺ   60      ἦν   38      τί   62      γὰρ  60
## 43     ἐὰν   58    ὑμῖν   37    ἰδοὺ   57 ἀπεκρίθη  57
## 44     οὖν   56    μετὰ   36  Ἰησοῦς   55     τοὺς  55
## 45   λέγει   54     ἀπὸ   36      ὡς   51     ἀλλὰ  52
## 46     διὰ   53       ἢ   33      εἰ   50    τοῦτο  51
## 47      εἰ   53    θεοῦ   31    λέγω   50     ἀλλ’  50
## 48      ἐκ   52     τὰς   31      ἐκ   50     ἀμὴν  50
## 49   λέγων   49  ἔλεγεν   31  αὐτούς   47       εἰ  49
## 50     ἕως   49     διὰ   30   λέγων   47       τί  48

2.5.2 Grafikonon

#install.packages("plotly")
library(plotly)
datus <- data.frame(Roll_number = 1:50, 
                          y1 = top50Mt$n,
                          y2 = top50Mk$n,
                          y3 = top50Lk$n,
                          y4 = top50Jn$n)
#
fig <-plotly::plot_ly(data = datus, x = ~Roll_number,
                      y = ~y1, name = "Mt",
                      type = "scatter",mode = "lines") %>%
  add_trace(y = ~y2, name = "Mk") %>% 
  add_trace(y = ~y4, name = "Jn") %>%
  add_trace(y = ~y3, name = "Lk") %>% 
  layout(title = 'Zipfs law and the gospels', xaxis = list(title = 'Helyezés'),
         yaxis = list(title = 'Előfordulás'), legend = list(title=list(text='Legend Title')))
  
fig

2.6 The largest cities in the world

2.6.1 In datatable

population city country cca2 rank
37036200 Tokyo Japan JP 1
34665600 Delhi India IN 2
30482100 Shanghai China CN 3
24652900 Dhaka Bangladesh BD 4
23074200 Cairo Egypt EG 5
22990000 Sao Paulo Brazil BR 6
22752400 Mexico City Mexico MX 7
22596500 Beijing China CN 8
22089000 Mumbai India IN 9
18921600 Osaka Japan JP 10
18171200 Chongqing China CN 11
18076800 Karachi Pakistan PK 12
17778500 Kinshasa DR Congo CD 13
17156400 Lagos Nigeria NG 14
16236700 Istanbul Turkey TR 15
15845200 Kolkata India IN 16
15752300 Buenos Aires Argentina AR 17
15230600 Manila Philippines PH 18
14878700 Guangzhou China CN 19
14825800 Lahore Pakistan PK 20
14704100 Tianjin China CN 21
14395400 Bangalore India IN 22
13923200 Rio de Janeiro Brazil BR 23
13545400 Shenzhen China CN 24
12737400 Moscow Russia RU 25
12336000 Chennai India IN 26
11795800 Bogota Colombia CO 27
11634100 Jakarta Indonesia ID 28
11517300 Lima Peru PE 29
11391700 Bangkok Thailand TH 30
11346800 Paris France FR 31
11337900 Hyderabad India IN 32
10174900 Nanjing China CN 33
10027900 Luanda Angola AO 34
10025800 Seoul South Korea KR 35
9998870 Chengdu China CN 36
9840740 London United Kingdom GB 37
9816320 Ho Chi Minh City Vietnam VN 38
9729740 Tehran Iran IR 39
9534790 Nagoya Japan JP 40
9222080 Xi-an China CN 41
9061820 Ahmedabad India IN 42
9000280 Kuala Lumpur Malaysia MY 43
8986480 Wuhan China CN 44
8592820 Suzhou China CN 45
8591040 Hangzhou China CN 46
8581730 Surat India IN 47
8561520 Dar es Salaam Tanzania TZ 48
8141120 Baghdad Iraq IQ 49
7974270 Shenyang China CN 50
7952860 Riyadh Saudi Arabia SA 51
7936530 New York City United States US 52
7817160 Foshan China CN 53
7772860 Dongguan China CN 54
7768510 Hong Kong Hong Kong HK 55
7525720 Pune India IN 56
7066860 Haerbin China CN 57
6999460 Santiago Chile CL 58
6810530 Madrid Spain ES 59
6754180 Khartoum Sudan SD 60
6491290 Toronto Canada CA 61
6444580 Johannesburg South Africa ZA 62
6351680 Belo Horizonte Brazil BR 63
6347380 Dalian China CN 64
6217970 Qingdao China CN 65
6157270 Singapore Singapore SG 66
6156140 Zhengzhou China CN 67
6065850 Ji nan Shandong China CN 68
6056880 Abidjan Ivory Coast CI 69
5956680 Addis Ababa Ethiopia ET 70
5813190 Yangon Myanmar MM 71
5807050 Alexandria Egypt EG 72
5766990 Nairobi Kenya KE 73
5733250 Barcelona Spain ES 74
5653490 Chittagong Bangladesh BD 75
5602200 Hanoi Vietnam VN 76
5597340 Saint Petersburg Russia RU 77
5578580 Guadalajara Mexico MX 78
5550490 Ankara Turkey TR 79
5465920 Fukuoka Japan JP 80
5391890 Melbourne Australia AU 81
5272360 Monterrey Mexico MX 82
5248790 Sydney Australia AU 83
5132170 Urumqi China CN 84
5128270 Changsha China CN 85
5063580 Cape Town South Africa ZA 86
5021600 Jiddah Saudi Arabia SA 87
4990930 Brasilia Brazil BR 88
4955680 Kunming China CN 89
4891020 Changchun China CN 90
4877020 Kabul Afghanistan AF 91
4854260 Yaounde Cameroon CM 92
4830170 Hefei China CN 93
4770300 Ningbo China CN 94
4737590 Shantou China CN 95
4645320 Kano Nigeria NG 96
4568530 Tel Aviv Israel IL 97
4563850 New Taipei Taiwan TW 98
4534990 Shijiazhuang China CN 99
4411110 Jaipur India IN 100
4387410 Kozhikode India IN 101
4383600 Nanning China CN 102
4377310 Montreal Canada CA 103
4347100 Rome Italy IT 104
4346420 Douala Cameroon CM 105
4344050 Recife Brazil BR 106
4343910 Malappuram India IN 107
4305290 Taiyuan Shanxi China CN 108
4284450 Fortaleza Brazil BR 109
4268960 Porto Alegre Brazil BR 110
4265160 Kampala Uganda UG 111
4262130 Ekurhuleni South Africa ZA 112
4228980 Antananarivo Madagascar MG 113
4209940 Abuja Nigeria NG 114
4180040 Changzhou China CN 115
4172810 Medellin Colombia CO 116
4144130 Ibadan Nigeria NG 117
4132670 Lucknow India IN 118
4104120 Nanchang China CN 119
4093180 Wenzhou China CN 120
4077760 Xiamen China CN 121
4072430 Fuzhou Fujian China CN 122
4036230 Kumasi Ghana GH 123
4029910 Salvador Brazil BR 124
4024170 Tangshan Hebei China CN 125
4012310 Casablanca Morocco MA 126
3926050 Bekasi Indonesia ID 127
3892830 Faisalabad Pakistan PK 128
3889140 Curitiba Brazil BR 129
3793780 Port Harcourt Nigeria NG 130
3770958 Los Angeles United States US 131
3736730 Guiyang China CN 132
3719980 Thrissur India IN 133
3658640 Dakar Senegal SN 134
3648110 Santo Domingo Dominican Republic DO 135
3627220 Asuncion Paraguay PY 136
3604550 Kochi India IN 137
3580190 Berlin Germany DE 138
3559030 Wuxi China CN 139
3527430 Sanaa Yemen YE 140
3520820 Ouagadougou Burkina Faso BF 141
3491580 Campinas Brazil BR 142
3484430 Busan South Korea KR 143
3482830 Indore India IN 144
3470870 Lusaka Zambia ZM 145
3460660 Mashhad Iran IR 146
3443290 Puebla Mexico MX 147
3430880 Lanzhou China CN 148
3405000 Kuwait City Kuwait KW 149
3345370 Kanpur India IN 150
3301090 Durban South Africa ZA 151
3244750 Guayaquil Ecuador EC 152
3229740 Guatemala City Guatemala GT 153
3218290 Depok Indonesia ID 154
3209300 Pyongyang North Korea KP 155
3180340 Bamako Mali ML 156
3170180 Nagpur India IN 157
3167450 Milan Italy IT 158
3159030 Handan China CN 159
3158720 Coimbatore India IN 160
3158340 Mbuji-Mayi DR Congo CD 161
3155320 Athens Greece GR 162
3152090 Izmir Turkey TR 163
3152050 Huaian China CN 164
3137620 Surabaya Indonesia ID 165
3133080 Port-au-Prince Haiti HT 166
3094640 Dubai United Arab Emirates AE 167
3093870 Zhongshan China CN 168
3072530 Thiruvananthapuram India IN 169
3064290 Weifang China CN 170
3061340 Lubumbashi DR Congo CD 171
3028270 Lisbon Portugal PT 172
3018160 Kiev Ukraine UA 173
3015110 Caracas Venezuela VE 174
3004130 Algiers Algeria DZ 175
2955410 Pretoria South Africa ZA 176
2951330 Shaoxing China CN 177
2930170 Shizuoka Japan JP 178
2927080 Goiania Brazil BR 179
2916790 Cali Colombia CO 180
2898490 Yantai China CN 181
2890880 Huizhou China CN 182
2876180 Zibo China CN 183
2873700 Incheon South Korea KR 184
2846420 Mogadishu Somalia SO 185
2832580 Manchester United Kingdom GB 186
2813480 Brazzaville Republic of the Congo CG 187
2799960 Damascus Syria SY 188
2788380 Accra Ghana GH 189
2779200 Taipei Taiwan TW 190
2758100 Bandung Indonesia ID 191
2725560 Luoyang China CN 192
2719520 Toluca de Lerdo Mexico MX 193
2707920 Vancouver Canada CA 194
2704620 Birmingham United Kingdom GB 195
2689540 Patna India IN 196
2686290 Bhopal India IN 197
2665080 Tashkent Uzbekistan UZ 198
2653580 Sapporo Japan JP 199
2626610 Tangerang Indonesia ID 200
2613750 Nantong China CN 201
2611867 Chicago United States US 202
2568170 Brisbane Australia AU 203
2548930 Peshawar Pakistan PK 204
2545030 Tunis Tunisia TN 205
2543540 Gujranwala Pakistan PK 206
2521590 Medan Indonesia ID 207
2500940 Hohhot China CN 208
2496500 Baku Azerbaijan AZ 209
2486560 Rawalpindi Pakistan PK 210
2478340 Agra India IN 211
2465140 Kannur India IN 212
2453800 Belem Brazil BR 213
2447620 Liuzhou China CN 214
2440420 Visakhapatnam India IN 215
2438480 Aleppo Syria SY 216
2434640 Manaus Brazil BR 217
2433680 San Juan Puerto Rico PR 218
2432440 Maracaibo Venezuela VE 219
2425800 Phnom Penh Cambodia KH 220
2425700 Baotou China CN 221
2424920 Vadodara India IN 222
2396400 Barranquilla Colombia CO 223
2379330 Beirut Lebanon LB 224
2376040 Xuzhou China CN 225
2356610 Taoyuan Taiwan TW 226
2351140 Nashik India IN 227
2350700 Vijayawada India IN 228
2337590 Sendai Japan JP 229
2333220 Tijuana Mexico MX 230
2327990 Esfahan Iran IR 231
2324082 Houston United States US 232
2315020 Putian China CN 233
2273240 Amman Jordan JO 234
2258380 Multan Pakistan PK 235
2257210 Wuhu Anhui China CN 236
2252170 Kollam India IN 237
2251590 Conakry Guinea GN 238
2220560 Grande Vitoria Brazil BR 239
2218580 Mecca Saudi Arabia SA 240
2216800 Yangzhou China CN 241
2198540 Taizhou Zhejiang China CN 242
2190060 Baoding China CN 243
2182170 Naples Italy IT 244
2181080 Daegu South Korea KR 245
2169190 Perth Australia AU 246
2161430 Linyi Shandong China CN 247
2156350 Havana Cuba CU 248
2150330 Rajkot India IN 249
2142600 Bursa Turkey TR 250
2141520 Brussels Belgium BE 251
2108740 Lome Togo TG 252
2095420 Haikou China CN 253
2085470 Daqing China CN 254
2080810 Lianyungang China CN 255
2073240 Yancheng Jiangsu China CN 256
2070930 Minsk Belarus BY 257
2058730 Hyderabad Pakistan PK 258
2058190 Hiroshima Japan JP 259
2054540 Panama City Panama PA 260
2052410 Semarang Indonesia ID 261
2044650 Benin City Nigeria NG 262
2042040 Almaty Kazakhstan KZ 263
2033990 Davao City Philippines PH 264
2030790 Valencia Venezuela VE 265
2028680 Ludhiana India IN 266
2020970 Rabat Morocco MA 267
2017260 Quito Ecuador EC 268
2007500 Can Tho Vietnam VN 269
2005500 Vienna Austria AT 270
1997370 La Paz Bolivia BO 271
1984660 Matola Mozambique MZ 272
1981790 Baixada Santista Brazil BR 273
1968620 Zhuhai China CN 274
1966630 Quanzhou China CN 275
1956470 West Yorkshire United Kingdom GB 276
1951440 Datong China CN 277
1950390 Leon de los Aldamas Mexico MX 278
1923440 Raipur India IN 279
1911650 Madurai India IN 280
1909930 Sharjah United Arab Emirates AE 281
1904140 Mosul Iraq IQ 282
1891230 Santa Cruz Bolivia BO 283
1887700 Palembang Indonesia ID 284
1885950 Cixi China CN 285
1876820 Adana Turkey TR 286
1874890 Meerut India IN 287
1866580 La Laguna Mexico MX 288
1858910 Batam Indonesia ID 289
1858710 Gaziantep Turkey TR 290
1826520 Jiangmen China CN 291
1826010 Varanasi India IN 292
1815750 Kananga DR Congo CD 293
1809850 Turin Italy IT 294
1801750 Xiangyang China CN 295
1800420 Yichang China CN 296
1800230 Warsaw Poland PL 297
1795680 Yinchuan China CN 298
1794650 Monrovia Liberia LR 299
1788170 Montevideo Uruguay UY 300
1787710 Hamburg Germany DE 301
1787230 Lyon France FR 302
1783690 Tiruppur India IN 303
1782240 Budapest Hungary HU 304
1779600 Suqian China CN 305
1777610 Srinagar India IN 306
1767760 Jamshedpur India IN 307
1767330 Aurangabad India IN 308
1767110 Onitsha Nigeria NG 309
1763900 Shiraz Iran IR 310
1762820 Qinhuangdao China CN 311
1758700 Bucharest Romania RO 312
1740860 Xining China CN 313
1739050 Hengyang China CN 314
1738620 Anyang China CN 315
1738430 Anshan China CN 316
1737760 Stockholm Sweden SE 317
1737390 Makassar Indonesia ID 318
1724890 Ulaanbaatar Mongolia MN 319
1722780 N-Djamena Chad TD 320
1719310 Jilin China CN 321
1718940 Glasgow United Kingdom GB 322
1711130 Auckland New Zealand NZ 323
1706660 Novosibirsk Russia RU 324
1702510 Muscat Oman OM 325
1695670 Tabriz Iran IR 326
1689400 Qiqihaer China CN 327
1687900 Calgary Canada CA 328
1675144 Phoenix United States US 329
1672900 Kathmandu Nepal NP 330
1664420 Jodhpur India IN 331
1650280 Tegucigalpa Honduras HN 332
1644610 Marseille France FR 333
1640600 Cordoba Argentina AR 334
1633020 Harare Zimbabwe ZW 335
1631090 Rosario Argentina AR 336
1625980 Ciudad Juarez Mexico MX 337
1624660 Medina Saudi Arabia SA 338
1622270 Jining Shandong China CN 339
1621720 Ranchi India IN 340
1618740 Abu Dhabi United Arab Emirates AE 341
1614660 Karaj Iran IR 342
1612940 Nouakchott Mauritania MR 343
1599350 Kota India IN 344
1598460 Zhangjiakou China CN 345
1594300 Mandalay Myanmar MM 346
1590890 Munich Germany DE 347
1588600 Edmonton Canada CA 348
1586180 Daejon South Korea KR 349
1582200 Jabalpur India IN 350
1575050 Natal Brazil BR 351
1565860 Gaoxiong Taiwan TW 352
1565300 Asansol India IN 353
1564320 Huainan China CN 354
1563820 Yiwu China CN 355
1563600 Homs Syria SY 356
1561490 Niamey Niger NE 357
1553460 Mombasa Kenya KE 358
1550560 Ganzhou China CN 359
1548210 Grande Sao Luis Brazil BR 360
1546690 Kisangani DR Congo CD 361
1545080 Chaozhou China CN 362
1543500 Gwalior India IN 363
1537000 Yekaterinburg Russia RU 364
1536730 Gwangju South Korea KR 365
1532653 San Antonio United States US 366
1524650 Basra Iraq IQ 367
1523670 Allahabad India IN 368
1517114 Philadelphia United States US 369
1511450 Jiaxing China CN 370
1511110 Amritsar India IN 371
1506700 Taizhou Jiangsu China CN 372
1503910 Hai Phong Vietnam VN 373
1502640 San Jose Costa Rica CR 374
1499360 Weihai China CN 375
1491180 Chon Buri Thailand TH 376
1476370 Liuyang China CN 377
1471240 Liuan China CN 378
1467460 Kaifeng China CN 379
1467160 Taian Shandong China CN 380
1465780 Ottawa Canada CA 381
1460280 Cochabamba Bolivia BO 382
1459290 Queretaro Mexico MX 383
1458890 Rizhao China CN 384
1457020 Uyo Nigeria NG 385
1455100 Zurich Switzerland CH 386
1450150 Konya Turkey TR 387
1447780 Joao Pessoa Brazil BR 388
1447340 Mwanza Tanzania TZ 389
1444820 Nanchong China CN 390
1441800 Dhanbad India IN 391
1440090 Dongying China CN 392
1434030 Zunyi China CN 393
1425480 Zhanjiang China CN 394
1423630 Pointe-Noire Republic of the Congo CG 395
1418040 Shiyan China CN 396
1415820 Kharkiv Ukraine UA 397
1413250 Bareilly India IN 398
1412950 Belgrade Serbia RS 399
1411010 Bucaramanga Colombia CO 400
1400850 Mianyang Sichuan China CN 401
1400740 Copenhagen Denmark DK 402
1394750 Tengzhou China CN 403
1394590 Antalya Turkey TR 404
1393410 Samut Prakan Thailand TH 405
1393190 Taizhong Taiwan TW 406
1393010 Lilongwe Malawi MW 407
1392940 Adelaide Australia AU 408
1392280 Qom Iran IR 409
1389672 San Diego United States US 410
1388800 Suweon South Korea KR 411
1387920 Maceio Brazil BR 412
1387360 Freetown Sierra Leone SL 413
1385370 Yingkou China CN 414
1385210 Suzhou China CN 415
1381230 Tanger Morocco MA 416
1379560 Aligarh India IN 417
1375600 Ad-Dammam Saudi Arabia SA 418
1375220 Abomey-Calavi Benin BJ 419
1374630 Joinville Brazil BR 420
1369880 Moradabad India IN 421
1369430 Bukavu DR Congo CD 422
1364640 Pekan Baru Indonesia ID 423
1361880 Maoming China CN 424
1361840 Nnewi Nigeria NG 425
1354930 Jieyang China CN 426
1354890 Helsinki Finland FI 427
1352560 Astana Kazakhstan KZ 428
1350150 Bujumbura Burundi BI 429
1345720 Mysore India IN 430
1344890 Ruian China CN 431
1336960 Fes Morocco MA 432
1334200 Porto Portugal PT 433
1332270 Fushun Liaoning China CN 434
1331940 Prague Czech Republic CZ 435
1330500 Port Elizabeth South Africa ZA 436
1329990 Jinhua China CN 437
1329830 Kigali Rwanda RW 438
1324800 Ahvaz Iran IR 439
1323850 Florianopolis Brazil BR 440
1320910 Bhubaneswar India IN 441
1317340 Baoji China CN 442
1315330 Durg-Bhilainagar India IN 443
1312850 Pingdingshan Henan China CN 444
1310720 San Luis Potosi Mexico MX 445
1309840 Liupanshui China CN 446
1309420 Puning China CN 447
1305090 Chifeng China CN 448
1302638 Dallas United States US 449
1301130 Islamabad Pakistan PK 450
1299580 Kazan Russia RU 451
1299480 Zhuzhou China CN 452
1299130 Zhenjiang Jiangsu China CN 453
1299110 Dublin Ireland IE 454
1297900 Tasikmalaya Indonesia ID 455
1291440 Huaibei China CN 456
1288560 Xiongan China CN 457
1286460 Sofia Bulgaria BG 458
1286000 Da Nang Vietnam VN 459
1282270 Pizhou China CN 460
1281730 Barquisimeto Venezuela VE 461
1281010 Bogor Indonesia ID 462
1278770 Luohe China CN 463
1274780 Aba Nigeria NG 464
1274630 Nanyang Henan China CN 465
1273560 Xiangtan Hunan China CN 466
1270320 Maracay Venezuela VE 467
1269900 Tiruchirappalli India IN 468
1268080 Bazhong China CN 469
1266010 Chandigarh India IN 470
1263650 Binzhou China CN 471
1260660 Jinzhou China CN 472
1259940 Kaduna Nigeria NG 473
1258230 Merida Mexico MX 474
1258120 Benxi China CN 475
1257180 Mendoza Argentina AR 476
1253110 Quetta Pakistan PK 477
1249760 Nizhniy Novgorod Russia RU 478
1246210 Chelyabinsk Russia RU 479
1244190 Chiang Mai Thailand TH 480
1243530 Bobo-Dioulasso Burkina Faso BF 481
1242490 Saharanpur India IN 482
1238950 Guilin China CN 483
1231020 Hubli-Dharwad India IN 484
1230070 Maputo Mozambique MZ 485
1229960 Yueqing China CN 486
1227620 Hargeysa Somalia SO 487
1224170 Guwahati India IN 488
1220710 Salem India IN 489
1215000 Mexicali Mexico MX 490
1210200 Bandar Lampung Indonesia ID 491
1203890 Shimkent Kazakhstan KZ 492
1203390 Tripoli Libya LY 493
1199980 Haifa Israel IL 494
1196820 Ikorodu Nigeria NG 495
1196770 Aguascalientes Mexico MX 496
1191400 Siliguri India IN 497
1189200 Amsterdam Netherlands NL 498
1185920 Tshikapa DR Congo CD 499
1184900 Wenling China CN 500
1180820 Omsk Russia RU 501
1180750 Xinxiang China CN 502
1177840 Krasnoyarsk Russia RU 503
1174750 Bien Hoa Vietnam VN 504
1170240 Fuyang China CN 505
1168490 Zaozhuang China CN 506
1167600 Jalandhar India IN 507
1165590 Ma’anshan China CN 508
1163590 Panjin China CN 509
1159940 Fuzhou Jiangxi China CN 510
1159780 Sekondi Takoradi Ghana GH 511
1159390 Yichun Jiangxi China CN 512
1157920 Yongin South Korea KR 513
1156360 Cuernavaca Mexico MX 514
1154760 Samarinda Indonesia ID 515
1154410 Aden Yemen YE 516
1153710 Samara Russia RU 517
1153600 Shangrao China CN 518
1153540 Chihuahua Mexico MX 519
1153230 Cologne Germany DE 520
1152180 Asmara Eritrea ER 521
1151220 Bishkek Kyrgyzstan KG 522
1150530 Chenzhou China CN 523
1150480 Zhaoqing China CN 524
1148400 Ufa Russia RU 525
1145590 Nyala Sudan SD 526
1142870 Leshan China CN 527
1140390 Rostov-on-Don Russia RU 528
1133630 Dezhou China CN 529
1132420 San Salvador El Salvador SV 530
1128360 Diyarbakir Turkey TR 531
1128190 Kirkuk Iraq IQ 532
1127520 Johor Bahru Malaysia MY 533
1126220 Jingzhou Hubei China CN 534
1123540 Changshu China CN 535
1121440 Goyang South Korea KR 536
1120900 Managua Nicaragua NI 537
1117360 Kermanshah Iran IR 538
1117100 Xuchang China CN 539
1115580 Oslo Norway NO 540
1114370 Huzhou China CN 541
1114090 Blantyre-Limbe Malawi MW 542
1108180 Solapur India IN 543
1105540 Cartagena Colombia CO 544
1100240 Yerevan Armenia AM 545
1099960 Ilorin Nigeria NG 546
1099040 Mersin Turkey TR 547
1097310 Denpasar Indonesia ID 548
1092200 Qujing China CN 549
1091590 Lille France FR 550
1087020 Tbilisi Georgia GE 551
1086960 Guiping China CN 552
1086000 Voronezh Russia RU 553
1085990 Perm Russia RU 554
1085330 Marrakech Morocco MA 555
1081930 Aracaju Brazil BR 556
1080490 Warangal India IN 557
1080180 Toulouse France FR 558
1079980 Yueyang China CN 559
1078650 Hamah Syria SY 560
1077740 Tampico Mexico MX 561
1076120 Warri Nigeria NG 562
1073060 Xintai China CN 563
1072560 Padang Indonesia ID 564
1068550 Teresina Brazil BR 565
1066100 Saltillo Mexico MX 566
1065400 Cancun Mexico MX 567
1064870 Antwerp Belgium BE 568
1063680 Owerri Nigeria NG 569
1061620 Cebu City Philippines PH 570
1057290 Nampula Mozambique MZ 571
1056980 Changwon South Korea KR 572
1052320 Chengde China CN 573
1051040 San Miguel de Tucuman Argentina AR 574
1047810 Lubango Angola AO 575
1046710 Acapulco de Juarez Mexico MX 576
1045070 Zhucheng China CN 577
1043580 Leiyang China CN 578
1042120 Pingxiang Jiangxi China CN 579
1041060 Dehradun India IN 580
1039900 Dushanbe Tajikistan TJ 581
1037250 Kayseri Turkey TR 582
1035090 Jos Nigeria NG 583
1034680 Misratah Libya LY 584
1034200 San Pedro Sula Honduras HN 585
1032980 Sylhet Bangladesh BD 586
1031440 Laiwu China CN 587
1030550 Songkhla Thailand TH 588
1026390 Nonthaburi Thailand TH 589
1026250 Rotterdam Netherlands NL 590
1024970 Jixi Heilongjiang China CN 591
1024430 Valparaiso Chile CL 592
1024430 Jiujiang China CN 593
1018070 Bordeaux France FR 594
1017660 Najaf Iraq IQ 595
1017530 Krasnodar Russia RU 596
1017030 Agadir Morocco MA 597
1016230 Morelia Mexico MX 598
1016150 Bangui Central African Republic CF 599
1015045 Fort Worth United States US 600
1011500 Guigang China CN 601
1009450 Taiz Yemen YE 602
1009260 Mudanjiang China CN 603
1008750 Hengshui China CN 604
1008485 Jacksonville United States US 605
1007020 Rajshahi Bangladesh BD 606
1006700 Odesa Ukraine UA 607
1003580 Xinyu China CN 608
1002710 Linfen China CN 609
1002450 Zhangzhou China CN 610
1000720 Tianmen China CN 611
1000410 Liling China CN 612
996732 Jerusalem Israel IL 613
995970 Yangjiang China CN 614
995027 Zamboanga City Philippines PH 615
992268 Volgograd Russia RU 616
991388 Ciudad Guayana Venezuela VE 617
990695 Cabinda Angola AO 618
990439 Umuahia Nigeria NG 619
989859 Antipolo Philippines PH 620
989252 Austin United States US 621
985078 Deyang China CN 622
983715 Arequipa Peru PE 623
983121 Reynosa Mexico MX 624
983008 Baishan China CN 625
982563 Jiangyin China CN 626
981688 Cucuta Colombia CO 627
981223 Bogra Bangladesh BD 628
981069 Veracruz Mexico MX 629
980200 Khulna Bangladesh BD 630
975189 Bengbu China CN 631
974195 Pathum Thani Thailand TH 632
972373 Villahermosa Mexico MX 633
970074 Bahawalpur Pakistan PK 634
966741 Southampton United Kingdom GB 635
966494 Oran Algeria DZ 636
962403 Guntur India IN 637
961625 West Rand South Africa ZA 638
956195 Nice France FR 639
949309 Changzhi China CN 640
948572 Malang Indonesia ID 641
947488 Dandong China CN 642
944348 Hermosillo Mexico MX 643
943921 Bhiwandi India IN 644
943458 Seongnam South Korea KR 645
943313 Campo Grande Brazil BR 646
943212 San Jose United States US 647
942459 Londrina Brazil BR 648
941758 Firozabad India IN 649
941130 Ashgabat Turkmenistan TM 650
940911 Puducherry India IN 651
940560 Erbil Iraq IQ 652
938279 Changde China CN 653
935079 Shangqiu China CN 654
935017 Charlotte United States US 655
934011 Kuerle China CN 656
933193 La Plata Argentina AR 657
931630 Liaoyang China CN 658
931255 Dnipro Ukraine UA 659
931092 Lokoja Nigeria NG 660
931015 Cherthala India IN 661
929772 Quzhou China CN 662
929357 Concepcion Chile CL 663
929188 Trujillo Peru PE 664
928997 Liverpool United Kingdom GB 665
927758 Tuxtla Gutierrez Mexico MX 666
926519 Tyumen Russia RU 667
923221 Bergamo Italy IT 668
921011 Xingtai China CN 669
919748 Soshanguve South Africa ZA 670
918494 Culiacan Mexico MX 671
917679 Columbus United States US 672
916236 Ulsan South Korea KR 673
908873 Huangshi China CN 674
906658 Enugu Nigeria NG 675
904031 Yongzhou China CN 676
903628 Fuxin China CN 677
902334 Xinghua China CN 678
900351 Bunia DR Congo CD 679
899225 Libreville Gabon GA 680
898535 Maiduguri Nigeria NG 681
897493 Yibin China CN 682
897262 Hufuf-Mubarraz Saudi Arabia SA 683
892519 Huaihua China CN 684
890375 Xinyang China CN 685
887939 Xiaogan China CN 686
887711 Ipoh Malaysia MY 687
886584 Yangquan China CN 688
885396 Kottayam India IN 689
884046 Luzhou China CN 690
881897 Banghazi Libya LY 691
880865 Tainan Taiwan TW 692
878063 Tianshui China CN 693
878004 Benguela Angola AO 694
877072 Bozhou China CN 695
876063 Donetsk Ukraine UA 696
874037 Indianapolis United States US 697
873982 Zanzibar Tanzania TZ 698
873638 Kunshan China CN 699
869441 Zhuji China CN 700
863282 Jincheng China CN 701
861380 Malanje Angola AO 702
860518 Eskisehir Turkey TR 703
860169 Orumiyeh Iran IR 704
858853 Nellore India IN 705
858207 Quebec City Canada CA 706
857367 Winnipeg Canada CA 707
853869 Bikaner India IN 708
851474 Heze China CN 709
851392 Zhumadian China CN 710
851282 Palermo Italy IT 711
851156 Huludao China CN 712
848379 Taixing China CN 713
848129 Haicheng China CN 714
847608 Anqiu China CN 715
847350 Ibb Yemen YE 716
846841 Gebze Turkey TR 717
846517 Sulaimaniya Iraq IQ 718
846438 Langfang China CN 719
845854 Liaocheng China CN 720
845827 Barcelona Puerto La Cruz Venezuela VE 721
841911 Gaomi China CN 722
841354 Valencia Spain ES 723
839309 Hanchuan China CN 724
839131 Dasmarinas Philippines PH 725
838870 Kayamkulam India IN 726
837650 Cagayan de Oro City Philippines PH 727
837335 Meishan China CN 728
837323 Saratov Russia RU 729
836997 Muzaffarnagar India IN 730
834581 Xalapa Mexico MX 731
834242 Newcastle upon Tyne United Kingdom GB 732
831689 Ar-Rayyan Qatar QA 733
829276 Danyang China CN 734
829049 Sorocaba Brazil BR 735
828008 Merca Somalia SO 736
827603 Bucheon South Korea KR 737
824965 Amravati India IN 738
823625 Kitwe Zambia ZM 739
823407 Gaza Palestine PS 740
822497 Oshogbo Nigeria NG 741
821187 Nakhon Ratchasima Thailand TH 742
820064 Jiaozuo China CN 743
819854 Goma DR Congo CD 744
819334 Nottingham United Kingdom GB 745
819297 Bologna Italy IT 746
816586 Gorakhpur India IN 747
815472 Thessaloniki Greece GR 748
813794 Linhai China CN 749
813599 Yan’an China CN 750
812913 Vereeniging South Africa ZA 751
812244 Erduosi-Ordoss China CN 752
811560 Anqing China CN 753
809652 Zaria Nigeria NG 754
808111 Bur Sa’id Egypt EG 755
805079 Shaoguan China CN 756
804914 Cuttack India IN 757
804189 Dengzhou China CN 758
803758 Frankfurt Germany DE 759
803062 Akure Nigeria NG 760
799729 Belgaum India IN 761
799708 Malegaon India IN 762
799706 Banjarmasin Indonesia ID 763
797302 Tirupati India IN 764
797246 Yuncheng China CN 765
795748 Yuxi China CN 766
793974 Niigata Japan JP 767
793083 Hamilton Canada CA 768
792664 Yanji China CN 769
791579 Zigong China CN 770
790986 Shaoyang China CN 771
790694 Qingyuan China CN 772
789918 Maturin Venezuela VE 773
789386 Sialkot Pakistan PK 774
788345 Tongliao China CN 775
788238 Nay Pyi Taw Myanmar MM 776
786000 Tongling China CN 777
785116 Dazhou China CN 778
784856 Al-Hudaydah Yemen YE 779
784420 Tamale Ghana GH 780
784261 Wuzhou China CN 781
780842 Suining Sichuan China CN 782
778685 Mangalore India IN 783
778052 Amara Iraq IQ 784
777336 Huambo Angola AO 785
775312 Zhangjiagang China CN 786
775091 Sargodha Pakistan PK 787
774229 Bacoor Philippines PH 788
774118 Jiamusi China CN 789
773030 Dongtai China CN 790
772996 Nanded Waghala India IN 791
771029 Ansan South Korea KR 792
769375 Krakow Poland PL 793
767968 San Francisco United States US 794
767534 Lattakia Syria SY 795
766887 Bhavnagar India IN 796
766403 Ankang China CN 797
765748 Xianyang Shaanxi China CN 798
765324 Kurnool India IN 799
764938 Taicang China CN 800
764753 Seattle United States US 801
764500 Sao Jose dos Campos Brazil BR 802
761755 Panzhihua China CN 803
761331 Sokoto Nigeria NG 804
760857 Uvira DR Congo CD 805
758893 Zarqa Jordan JO 806
758680 Yuyao China CN 807
757989 Cotonou Benin BJ 808
757765 Rasht Iran IR 809
757604 Ribeirao Preto Brazil BR 810
757292 Oaxaca de Juarez Mexico MX 811
756783 Sheffield United Kingdom GB 812
756507 Buffalo City South Africa ZA 813
755941 Vientiane Laos LA 814
754296 Yulin Shaanxi China CN 815
753877 Bali Indonesia ID 816
752910 Herat Afghanistan AF 817
752672 Longyan China CN 818
752570 Gold Coast Australia AU 819
751802 Gulbarga India IN 820
751521 Jammu India IN 821
751457 Miluo China CN 822

2.6.2 On graph

library(plotly)
library(dplyr)

slope <- -1
intercept <- 1000

fig <- plot_ly(data = bigs, x = ~rank, y = ~population,
                      text = ~city,
                      name = "Biggest cities of the world",
                      type = "scatter",mode = "lines")

fig
#<small>source: https://en.wikipedia.org/wiki/List_of_largest_cities)</small>

2.7 Hamlet’s monologue

2.7.1 Shakespeare’s text

Hamlet <- "
To be, or not to be: that is the question:
Whether ’tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing end them? To die: to sleep;
No more; and, by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to, ’tis a consummation
Devoutly to be wish’d. To die, to sleep;
To sleep: perchance to dream: ay, there’s the rub;
For in that sleep of death what dreams may come
When we have shuffled off this mortal coil,
Must give us pause. There’s the respect
That makes calamity of so long life;
For who would bear the whips and scorns of time,
The oppressor’s wrong, the proud man’s contumely,
The pangs of dispriz’d love, the law’s delay,
The insolence of office, and the spurns
That patient merit of the unworthy takes,
When he himself might his quietus make
With a bare bodkin? who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscover’d country from whose bourn
No traveller returns, puzzles the will,
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience does make cowards of us all;
And thus the native hue of resolution
Is sicklied o’er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry,
And lose the name of action. Soft you now!
The fair Ophelia! Nymph, in thy orisons
Be all my sins remember’d."
Hamlet
## [1] "\nTo be, or not to be: that is the question:\nWhether ’tis nobler in the mind to suffer\nThe slings and arrows of outrageous fortune,\nOr to take arms against a sea of troubles,\nAnd by opposing end them? To die: to sleep;\nNo more; and, by a sleep to say we end\nThe heart-ache and the thousand natural shocks\nThat flesh is heir to, ’tis a consummation\nDevoutly to be wish’d. To die, to sleep;\nTo sleep: perchance to dream: ay, there’s the rub;\nFor in that sleep of death what dreams may come\nWhen we have shuffled off this mortal coil,\nMust give us pause. There’s the respect\nThat makes calamity of so long life;\nFor who would bear the whips and scorns of time,\nThe oppressor’s wrong, the proud man’s contumely,\nThe pangs of dispriz’d love, the law’s delay,\nThe insolence of office, and the spurns\nThat patient merit of the unworthy takes,\nWhen he himself might his quietus make\nWith a bare bodkin? who would fardels bear,\nTo grunt and sweat under a weary life,\nBut that the dread of something after death,\nThe undiscover’d country from whose bourn\nNo traveller returns, puzzles the will,\nAnd makes us rather bear those ills we have\nThan fly to others that we know not of?\nThus conscience does make cowards of us all;\nAnd thus the native hue of resolution\nIs sicklied o’er with the pale cast of thought,\nAnd enterprises of great pith and moment\nWith this regard their currents turn awry,\nAnd lose the name of action. Soft you now!\nThe fair Ophelia! Nymph, in thy orisons\nBe all my sins remember’d."

2.7.2 Arany János fordítása

Arany <- "Lenni vagy nem lenni: az itt a kérdés.
Akkor nemesb-e a lélek, ha tűri
Balsorsa minden nyűgét s nyilait;
Vagy ha kiszáll tenger fájdalma ellen,
S fegyvert ragadva véget vet neki?
Meghalni – elszunnyadni – semmi több;
S egy álom által elvégezni mind
A szív keservét, a test eredendő,
Természetes rázkódtatásait
Oly cél, minőt óhajthat a kegyes.
Meghalni – elszunnyadni – és alunni!
Talán álmodni: ez a bökkenő;
Mert hogy mi álmok jőnek a halálban,
Ha majd leráztuk mind e földi bajt,
Ez visszadöbbent. E meggondolás az,
Mi a nyomort oly hosszan élteti
Mert ki viselné a kor gúny-csapásit,
Zsarnok bosszúját, gőgös ember dölyfét,
Útált szerelme kínját, pör-halasztást,
A hivatalnak packázásait,
S mind a rugást, mellyel méltatlanok
Bántalmazzák a tűrő érdemet
Ha nyúgalomba küldhetné magát
Egy puszta tőrrel? – Ki hordaná e terheket,
Izzadva, nyögve élte fáradalmin,
Ha rettegésünk egy halál utáni
Valamitől – a nem ismert tartomány,
Melyből nem tér meg utazó – le nem
Lohasztja kedvünk, inkább tűrni a
Jelen gonoszt, mint ismeretlenek
Felé sietni? – Ekképp az öntudat
Belőlünk mind gyávát csinál,
S az elszántság természetes szinét
A gondolat halványra betegíti;
Ily kétkedés által sok nagyszerű,
Fontos merény kifordul medriből
S elveszti »tett« nevét. – De csöndesen!
A szép Ophelia jő. – Szép hölgy, imádba
Legyenek foglalva minden bűneim."
Arany
## [1] "Lenni vagy nem lenni: az itt a kérdés.\nAkkor nemesb-e a lélek, ha tűri\nBalsorsa minden nyűgét s nyilait;\nVagy ha kiszáll tenger fájdalma ellen,\nS fegyvert ragadva véget vet neki?\nMeghalni – elszunnyadni – semmi több;\nS egy álom által elvégezni mind\nA szív keservét, a test eredendő,\nTermészetes rázkódtatásait\nOly cél, minőt óhajthat a kegyes.\nMeghalni – elszunnyadni – és alunni!\nTalán álmodni: ez a bökkenő;\nMert hogy mi álmok jőnek a halálban,\nHa majd leráztuk mind e földi bajt,\nEz visszadöbbent. E meggondolás az,\nMi a nyomort oly hosszan élteti\nMert ki viselné a kor gúny-csapásit,\nZsarnok bosszúját, gőgös ember dölyfét,\nÚtált szerelme kínját, pör-halasztást,\nA hivatalnak packázásait,\nS mind a rugást, mellyel méltatlanok\nBántalmazzák a tűrő érdemet\nHa nyúgalomba küldhetné magát\nEgy puszta tőrrel? – Ki hordaná e terheket,\nIzzadva, nyögve élte fáradalmin,\nHa rettegésünk egy halál utáni\nValamitől – a nem ismert tartomány,\nMelyből nem tér meg utazó – le nem\nLohasztja kedvünk, inkább tűrni a\nJelen gonoszt, mint ismeretlenek\nFelé sietni? – Ekképp az öntudat\nBelőlünk mind gyávát csinál,\nS az elszántság természetes szinét\nA gondolat halványra betegíti;\nIly kétkedés által sok nagyszerű,\nFontos merény kifordul medriből\nS elveszti »tett« nevét. – De csöndesen!\nA szép Ophelia jő. – Szép hölgy, imádba\nLegyenek foglalva minden bűneim."

2.7.3 Key lexicographic facts to know about Hamlet (under construction)

3 Korpuszok (szótestek)

## Corpus consisting of 60 documents, showing 60 documents:
## 
##             Text Types Tokens Sentences Year  President       FirstName
##  1789-Washington   625   1537        23 1789 Washington          George
##  1793-Washington    96    147         4 1793 Washington          George
##       1797-Adams   826   2577        37 1797      Adams            John
##   1801-Jefferson   717   1923        41 1801  Jefferson          Thomas
##   1805-Jefferson   804   2380        45 1805  Jefferson          Thomas
##     1809-Madison   535   1261        21 1809    Madison           James
##     1813-Madison   541   1302        33 1813    Madison           James
##      1817-Monroe  1040   3677       121 1817     Monroe           James
##      1821-Monroe  1259   4886       131 1821     Monroe           James
##       1825-Adams  1003   3147        74 1825      Adams     John Quincy
##     1829-Jackson   517   1208        25 1829    Jackson          Andrew
##     1833-Jackson   499   1267        29 1833    Jackson          Andrew
##    1837-VanBuren  1315   4158        95 1837  Van Buren          Martin
##    1841-Harrison  1896   9125       210 1841   Harrison   William Henry
##        1845-Polk  1334   5186       153 1845       Polk      James Knox
##      1849-Taylor   496   1178        22 1849     Taylor         Zachary
##      1853-Pierce  1165   3636       104 1853     Pierce        Franklin
##    1857-Buchanan   945   3083        89 1857   Buchanan           James
##     1861-Lincoln  1075   3999       135 1861    Lincoln         Abraham
##     1865-Lincoln   360    775        26 1865    Lincoln         Abraham
##       1869-Grant   485   1229        40 1869      Grant      Ulysses S.
##       1873-Grant   552   1472        43 1873      Grant      Ulysses S.
##       1877-Hayes   831   2707        59 1877      Hayes   Rutherford B.
##    1881-Garfield  1021   3209       111 1881   Garfield        James A.
##   1885-Cleveland   676   1816        44 1885  Cleveland          Grover
##    1889-Harrison  1352   4721       157 1889   Harrison        Benjamin
##   1893-Cleveland   821   2125        58 1893  Cleveland          Grover
##    1897-McKinley  1232   4353       130 1897   McKinley         William
##    1901-McKinley   854   2437       100 1901   McKinley         William
##   1905-Roosevelt   404   1079        33 1905  Roosevelt        Theodore
##        1909-Taft  1437   5821       158 1909       Taft  William Howard
##      1913-Wilson   658   1882        68 1913     Wilson         Woodrow
##      1917-Wilson   549   1652        59 1917     Wilson         Woodrow
##     1921-Harding  1169   3719       148 1921    Harding       Warren G.
##    1925-Coolidge  1220   4440       196 1925   Coolidge          Calvin
##      1929-Hoover  1090   3860       158 1929     Hoover         Herbert
##   1933-Roosevelt   743   2057        85 1933  Roosevelt     Franklin D.
##   1937-Roosevelt   725   1989        96 1937  Roosevelt     Franklin D.
##   1941-Roosevelt   526   1519        68 1941  Roosevelt     Franklin D.
##   1945-Roosevelt   275    633        27 1945  Roosevelt     Franklin D.
##      1949-Truman   781   2504       116 1949     Truman        Harry S.
##  1953-Eisenhower   900   2743       119 1953 Eisenhower       Dwight D.
##  1957-Eisenhower   621   1907        92 1957 Eisenhower       Dwight D.
##     1961-Kennedy   566   1541        52 1961    Kennedy         John F.
##     1965-Johnson   568   1710        93 1965    Johnson   Lyndon Baines
##       1969-Nixon   743   2416       103 1969      Nixon Richard Milhous
##       1973-Nixon   544   1995        68 1973      Nixon Richard Milhous
##      1977-Carter   527   1370        52 1977     Carter           Jimmy
##      1981-Reagan   902   2781       129 1981     Reagan          Ronald
##      1985-Reagan   925   2909       123 1985     Reagan          Ronald
##        1989-Bush   795   2674       141 1989       Bush          George
##     1993-Clinton   642   1833        81 1993    Clinton            Bill
##     1997-Clinton   773   2436       111 1997    Clinton            Bill
##        2001-Bush   621   1806        97 2001       Bush       George W.
##        2005-Bush   772   2312        99 2005       Bush       George W.
##       2009-Obama   938   2689       110 2009      Obama          Barack
##       2013-Obama   814   2317        88 2013      Obama          Barack
##       2017-Trump   582   1660        88 2017      Trump       Donald J.
##       2021-Biden   812   2766       216 2021      Biden       Joseph R.
##       2025-Trump  1000   3347       177 2025      Trump       Donald J.
##                  Party
##                   none
##                   none
##             Federalist
##  Democratic-Republican
##  Democratic-Republican
##  Democratic-Republican
##  Democratic-Republican
##  Democratic-Republican
##  Democratic-Republican
##  Democratic-Republican
##             Democratic
##             Democratic
##             Democratic
##                   Whig
##                   Whig
##                   Whig
##             Democratic
##             Democratic
##             Republican
##             Republican
##             Republican
##             Republican
##             Republican
##             Republican
##             Democratic
##             Republican
##             Democratic
##             Republican
##             Republican
##             Republican
##             Republican
##             Democratic
##             Democratic
##             Republican
##             Republican
##             Republican
##             Democratic
##             Democratic
##             Democratic
##             Democratic
##             Democratic
##             Republican
##             Republican
##             Democratic
##             Democratic
##             Republican
##             Republican
##             Democratic
##             Republican
##             Republican
##             Republican
##             Democratic
##             Democratic
##             Republican
##             Republican
##             Democratic
##             Democratic
##             Republican
##             Democratic
##             Republican

3.1 Inauguration speeches - Number of Words

require(ggplot2)

korpa %>%
  summary %>%
    ggplot(aes(x = Year, y = Tokens, group = 1)) +
     geom_line() +
     geom_point() +
     geom_label(aes(label = President, fill = Party), nudge_x = 0.1, nudge_y = 0.1) +
     ggtitle("Inauguration Speeches of Presidents - Number of Words") +
     theme_bw() +
     theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ) +
     scale_color_paletteer_d(nord::frost))

3.2 Inauguration speeches - T(ypes)/(T)okens (R)atio

library(quanteda)
library(dplyr)
#summary(korpa)

# Most jön a brutale rész: A 'korpa' korpuszból 'mysummary' data frame, TTR-rel!
library(quanteda)
library(dplyr)
data("data_corpus_inaugural")
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
docvars_df <- docvars(data_corpus_inaugural)
#docvars_df
#str(docvars_df)
speech_texts <- as.character(data_corpus_inaugural)
#str(speech_texts)
mysummary <- as.data.frame(summary(korpa, verbose = FALSE))
#mysummary
ms <- mysummary %>% mutate (TTR = Types/Tokens)
#
require(ggplot2)
ms1 <- ms[-2,]            # Drop the second row of ms, i.e. Wahington's 2. speech!
ms1 %>%
#  options(repr.plot.width = 12, repr.plot.height = 12)
  ggplot(aes(x = Year, y = TTR, group = 1)) +
  geom_line() +
  geom_point() +
  geom_label(aes(label = President, fill = Party)) +
  ggtitle("Inauguration Speeches of Presidents - T(ypes)/(T)okens (R)atio") +
  theme_bw() +
  theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ))

3.3 A basic aspect: sentence length vs. word length

library(plotly)
library(quanteda)
#calculate y as the mean word length
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
df <- docvars(data_corpus_inaugural)
df$text <- as.character(data_corpus_inaugural)

library(tokenizers)
words <- tokenize_words(
  df$text,
  lowercase = TRUE,
  stopwords = NULL,
  strip_punct = TRUE,
  strip_numeric = FALSE,
  simplify = FALSE
)
wc <- count_words(df$text)
st <- count_sentences(df$text)
wl <- count_characters(df$text)
fig <- plot_ly(data = ms, type = "scatter", mode = "markers", 
               x = round(wc/st,2), y = round(wl/wc,2),
               text = ~President,
               color = ~Party, 
               colors = c("red","green","blue","tomato","magenta","seagreen","salmon")) %>%
      layout(title = "Sentence length vs. word length",
         xaxis = list(title = "Sentence Length in Words"),
         yaxis = list(title = "Word Length in Characters"))
      
fig

3.4 Phrase dispersion in the subcorpus 2001-2025

library(quanteda)
library(quanteda.textplots)
library(quanteda.textstats)
library(ggplot2)

#Example corpus (replace with your actual corpus) 
corp_us <- corpus(data_corpus_inaugural) # save the corpus to a short obj name
my_corpus <- corpus_subset(corp_us, Year > 2000)
toki <- tokens(my_corpus)
kwic_results <- kwic(toki, pattern = "country", window = 3)
kwic_results
## Keyword-in-context with 53 matches.                                                                              
##     [2001-Bush, 25]         common in our | country | . With a                
##    [2001-Bush, 312]          creed of our | country | , it is                 
##    [2001-Bush, 375]            of our own | country | . The ambitions         
##    [2001-Bush, 415]             but not a | country | . We do                 
##    [2001-Bush, 570]           , makes our | country | more, not               
##    [2001-Bush, 674]              . If our | country | does not lead           
##    [2001-Bush, 948]       liberty and our | country | should make no          
##   [2001-Bush, 1226]           Many in our | country | do not know             
##   [2001-Bush, 1758]           to make our | country | more just and           
##     [2005-Bush, 64]        that unite our | country | . I am                  
##    [2005-Bush, 988]          of your free | country | . The rulers            
##   [2005-Bush, 1170]          measure. Our | country | has accepted obligations
##   [2005-Bush, 1321]       devotion to our | country | in deaths that          
##   [2005-Bush, 1427]         wealth of our | country | but to its              
##   [2005-Bush, 1555]         future of our | country | , we will               
##   [2005-Bush, 1843]             . And our | country | must abandon all        
##   [2005-Bush, 1890]  questions before our | country | are many.               
##  [2009-Obama, 1003]   forgotten what this | country | has already done        
##  [2009-Obama, 2550]          city and the | country | , alarmed at            
##   [2013-Obama, 667]   understand that our | country | cannot succeed when     
##   [2013-Obama, 942]       that built this | country | and investing in        
##   [2013-Obama, 990]          that in this | country | freedom is reserved     
##  [2013-Obama, 1089]        that make this | country | great. We               
##  [2013-Obama, 1831]     expelled from our | country | . Our journey           
##  [2013-Obama, 2098]            to God and | country | , not party             
##    [2017-Trump, 47]        to rebuild our | country | and restore its         
##   [2017-Trump, 255]       citizens of our | country | . Their victories       
##   [2017-Trump, 354]             , is your | country | . What truly            
##   [2017-Trump, 405]          women of our | country | will be forgotten       
##   [2017-Trump, 573]        and robbed our | country | of so much              
##   [2017-Trump, 724]     confidence of our | country | has dissipated over     
##  [2017-Trump, 1016]      - rebuilding our | country | with American hands     
##  [2017-Trump, 1154]        loyalty to our | country | , we will               
##  [2017-Trump, 1369]             fail. Our | country | will thrive and         
##   [2021-Biden, 448]   silently stalks the | country | . It's taken            
##  [2021-Biden, 1870]              way, our | country | will be stronger        
##  [2021-Biden, 2175]           and for our | country | . Amen.                 
##  [2021-Biden, 2743]           and to this | country | we love with            
##    [2025-Trump, 80]          forward, our | country | will flourish and       
##   [2025-Trump, 237]       is sweeping the | country | , sunlight is           
##   [2025-Trump, 405] illegally entered our | country | from all over           
##   [2025-Trump, 443]           people. Our | country | can no longer           
##   [2025-Trump, 539]    individuals in our | country | — some of               
##   [2025-Trump, 606]           it than any | country | anywhere in the         
##   [2025-Trump, 635]           to hate our | country | despite the love        
##   [2025-Trump, 977]        history of our | country | . As our                
##  [2025-Trump, 1216]        not forget our | country | , we will               
##  [2025-Trump, 1353]       invasion of our | country | . Under the             
##  [2025-Trump, 1442]         to defend our | country | from threats and        
##  [2025-Trump, 1571]            gas of any | country | on earth —              
##  [2025-Trump, 2293]     McKinley made our | country | very rich through       
##  [2025-Trump, 2335]          given to the | country | of Panama after         
##  [2025-Trump, 2898]                ., our | country | was forged and
Phrase dispersion
Phrase dispersion

3.5 A sophisticated measure for corpora: TF-IDF

#TF-IDF Top Keywords of Presidents