##
## Kapcsolódás csomaghoz: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Kapcsolódás csomaghoz: 'igraph'
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HL, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
This document includes both content as well as the output of any embedded R code chunks within the document.
gospels <- c("Mt","Mk","Lk","Jn")
#Get the datanames
library(readxl)
evm <- read_xlsx("Lk_short.xlsx")
# evm <- evm$Lemma
# Create a corpus variable
library(tm)
word.corpus<-Corpus(VectorSource(evm$Lemma)) #Corpus
# Make sure it has loaded properly - have a look!
# inspect(word.corpus)
## # A tibble: 20 × 2
## Lemma n
## <chr> <int>
## 1 ὁ 2646
## 2 καί 1469
## 3 αὐτός 1086
## 4 δέ 542
## 5 λέγω 533
## 6 σύ 446
## 7 ἐν 361
## 8 εἰμί 360
## 9 ἐγώ 282
## 10 οὗτος 229
## 11 εἰς 226
## 12 ὅς 190
## 13 ὅτι 174
## 14 οὐ 172
## 15 πρός 166
## 16 ἐπί 161
## 17 πᾶς 158
## 18 μή 140
## 19 γίνομαι 131
## 20 ἀπό 125
#fReorder factor levels of category based on value (descending order)
top20$Lemma <- factor(top20$Lemma, levels = top20$Lemma[order(top20$n, decreasing = TRUE)])
# Create the bar chart
library(plotly)
fig <- plot_ly(
data = top20,
x = ~Lemma,
y = ~n,
type = 'bar',
marker = list(color = 'sandybrown')
)
# Customize layout
fig <- fig %>% layout(
title = "Bar Chart Sorted by Value",
xaxis = list(title = "Lemma"),
yaxis = list(title = "Előfordulás")
)
# Show the plot
fig
Ez az ábra interaktív; ha ráhúzzuk az egeret, az aktuális lemmát (szótőt) és annak gyakoriságát mutatja.
#word.counts<-as.matrix(TermDocumentMatrix(word.corpus))
#word.freq<-sort(rowSums(word.counts), decreasing=TRUE)
#Load libraries for wordclouds
library(SnowballC)
library(tm)
library(wordcloud2)
library(RColorBrewer)
#Create a table of word frequenciess
greek_words <- evm$FullWord[1:188]
word_freqs <- as.data.frame(table(greek_words))
#Remove stopwords
perzsa <- stopwords::stopwords(language = "grc", source = "perseus")
word_freqs_filtered <- word_freqs %>%
filter(!greek_words %in% perzsa)
# Create the word cloud
set.seed(32) #be sure to set the seed if you want to reproduce the same again
wc <- wordcloud2(
data = word_freqs_filtered,
size = 1,
gridSize = 8,
color = "random-dark", backgroundColor = "white"
)
wc
(greek_network_Jn_prologue.png)
#Install the zipfR package
#install.packages("zipfR")
#Load the package
library(zipfR)
#Load necessary libraries
library(ggplot2)
#Define parameters
N <- 100 # Total number of elements
s <- 1.5 # Shape parameter
#Generate Zipf distribution probabilities
zipf_probs <- (1 / (1:N)^s) / sum(1 / (1:N)^s)
zipf_data <- data.frame(Rank = 1:N, Probability = zipf_probs)
#Display the first few rows
head(zipf_data,n=20)
## Rank Probability
## 1 1 0.414443506
## 2 2 0.146527907
## 3 3 0.079759690
## 4 4 0.051805438
## 5 5 0.037068954
## 6 6 0.028199309
## 7 7 0.022377846
## 8 8 0.018315988
## 9 9 0.015349759
## 10 10 0.013105854
## 11 11 0.011359947
## 12 12 0.009969961
## 13 13 0.008841996
## 14 14 0.007911763
## 15 15 0.007133924
## 16 16 0.006475680
## 17 17 0.005912783
## 18 18 0.005426960
## 19 19 0.005004203
## 20 20 0.004633619
#Basic Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
geom_line(color = "brown", size = .75) +
labs(title = "Basic Zipf Distribution",
x = "Rank",
y = "Probability") +
theme_minimal()
#Log10 Zipf distribution plot
ggplot(zipf_data, aes(x = Rank, y = Probability)) +
geom_line(color = "brown", size = .75) +
scale_x_log10() +
scale_y_log10() +
labs(title = "Log/log Scale Zipf Distribution",
x = "Rank",
y = "Probability") +
theme_minimal()
#Get the data
library(readxl)
#Make frequency tables
library(tidyverse)
evm <- read_xlsx("Mt_short.xlsx")
freqtab1 <- evm %>% count(FullWord, sort=TRUE)
top50Mt <- freqtab1[1:50,]
Mt_total <- sum(freqtab1$n)
#
evm <- read_xlsx("Mk_short.xlsx")
freqtab2 <- evm %>% count(FullWord, sort=TRUE)
top50Mk <- freqtab2[1:50,]
Mk_total <- sum(freqtab2$n)
evm <- read_xlsx("Lk_short.xlsx")
freqtab3 <- evm %>% count(FullWord, sort=TRUE)
top50Lk <- freqtab3[1:50,]
#
Lk_total <- sum(freqtab3$n)
#
evm <- read_xlsx("Jn_short.xlsx")
freqtab4 <- evm %>% count(FullWord, sort=TRUE)
top50Jn <- freqtab4[1:50,]
Jn_total <- sum(freqtab4$n)
#
evmtab50 <- cbind(top50Mt,top50Mk,top50Lk,top50Jn)
names(evmtab50) <- c("Szó(Mt)","n","Szó(Mk)","n","Szó(Lk)","n","Szó(Jn)","n")
evmtab50
## Szó(Mt) n Szó(Mk) n Szó(Lk) n Szó(Jn) n
## 1 καὶ 1175 καὶ 1085 καὶ 1466 καὶ 827
## 2 ὁ 493 ὁ 237 δὲ 513 ὁ 565
## 3 δὲ 471 αὐτοῦ 173 ὁ 399 ὅτι 271
## 4 τοῦ 294 εἰς 168 τοῦ 380 τοῦ 243
## 5 ἐν 293 δὲ 155 ἐν 360 τὸν 240
## 6 αὐτοῦ 266 τὸν 150 αὐτοῦ 255 ἐν 226
## 7 τὸ 227 ἐν 135 εἶπεν 229 δὲ 203
## 8 οἱ 224 τοῦ 132 εἰς 225 οὖν 200
## 9 τὸν 221 τὸ 131 τὸ 222 Ἰησοῦς 198
## 10 εἰς 218 τὴν 126 τὸν 216 εἰς 187
## 11 τῶν 206 οἱ 123 οἱ 185 αὐτοῦ 173
## 12 τὴν 203 αὐτῷ 121 τῷ 177 αὐτῷ 173
## 13 αὐτῷ 170 αὐτοῖς 120 ὅτι 174 οὐκ 151
## 14 τῷ 149 αὐτὸν 117 τὴν 171 τὸ 150
## 15 ὅτι 140 τῶν 108 πρὸς 161 ἵνα 145
## 16 μὴ 123 ὅτι 102 αὐτῷ 153 οἱ 144
## 17 τῆς 121 τῆς 80 αὐτὸν 145 τὴν 142
## 18 ἡ 121 τῷ 77 τῇ 136 ἐκ 139
## 19 εἶπεν 119 μὴ 72 μὴ 132 λέγει 123
## 20 Ἰησοῦς 111 οὐκ 66 τῶν 131 ἡ 122
## 21 τὰ 110 ἵνα 64 τῆς 119 τῷ 114
## 22 γὰρ 108 πρὸς 63 τοὺς 118 εἶπεν 112
## 23 τοὺς 108 τοὺς 63 ἐπὶ 116 τῶν 109
## 24 τοῖς 108 λέγει 62 σου 104 οὐ 108
## 25 ὑμῖν 107 αὐτόν 61 τὰ 104 ἐστιν 107
## 26 αὐτοῖς 103 γὰρ 60 ἡ 102 μὴ 106
## 27 τῇ 103 ἡ 60 οὐκ 99 ἐγὼ 103
## 28 αὐτῶν 100 εἶπεν 59 αὐτῶν 98 ὑμῖν 103
## 29 ἐπὶ 99 τῇ 59 ὑμῖν 96 αὐτοῖς 100
## 30 οὐκ 98 Ἰησοῦς 58 αὐτοῖς 91 αὐτὸν 100
## 31 σου 98 τοῖς 56 γὰρ 87 με 99
## 32 αὐτὸν 94 τί 54 μου 87 μου 98
## 33 οὐ 92 τὰ 52 ἀπὸ 83 πρὸς 97
## 34 ἀπὸ 92 ἐπὶ 52 ἦν 75 ἦν 96
## 35 τότε 90 ἐστιν 52 τοῖς 74 τῆς 82
## 36 μου 83 οὐ 45 θεοῦ 72 τὰ 80
## 37 ἐστιν 83 αὐτῶν 42 ἐγένετο 69 αὐτόν 76
## 38 ὑμῶν 76 ἐκ 42 ἐστιν 69 τῇ 72
## 39 ἢ 65 εὐθὺς 41 ὑμῶν 67 ὑμεῖς 68
## 40 τί 64 μου 40 αὐτόν 66 περὶ 67
## 41 λέγω 61 σου 39 οὐ 66 ταῦτα 61
## 42 ἰδοὺ 60 ἦν 38 τί 62 γὰρ 60
## 43 ἐὰν 58 ὑμῖν 37 ἰδοὺ 57 ἀπεκρίθη 57
## 44 οὖν 56 μετὰ 36 Ἰησοῦς 55 τοὺς 55
## 45 λέγει 54 ἀπὸ 36 ὡς 51 ἀλλὰ 52
## 46 διὰ 53 ἢ 33 εἰ 50 τοῦτο 51
## 47 εἰ 53 θεοῦ 31 λέγω 50 ἀλλ’ 50
## 48 ἐκ 52 τὰς 31 ἐκ 50 ἀμὴν 50
## 49 λέγων 49 ἔλεγεν 31 αὐτούς 47 εἰ 49
## 50 ἕως 49 διὰ 30 λέγων 47 τί 48
#install.packages("plotly")
library(plotly)
datus <- data.frame(Roll_number = 1:50,
y1 = top50Mt$n,
y2 = top50Mk$n,
y3 = top50Lk$n,
y4 = top50Jn$n)
#
fig <-plotly::plot_ly(data = datus, x = ~Roll_number,
y = ~y1, name = "Mt",
type = "scatter",mode = "lines") %>%
add_trace(y = ~y2, name = "Mk") %>%
add_trace(y = ~y4, name = "Jn") %>%
add_trace(y = ~y3, name = "Lk") %>%
layout(title = 'Zipfs law and the gospels', xaxis = list(title = 'Helyezés'),
yaxis = list(title = 'Előfordulás'), legend = list(title=list(text='Legend Title')))
fig
| population | city | country | cca2 | rank |
|---|---|---|---|---|
| 37036200 | Tokyo | Japan | JP | 1 |
| 34665600 | Delhi | India | IN | 2 |
| 30482100 | Shanghai | China | CN | 3 |
| 24652900 | Dhaka | Bangladesh | BD | 4 |
| 23074200 | Cairo | Egypt | EG | 5 |
| 22990000 | Sao Paulo | Brazil | BR | 6 |
| 22752400 | Mexico City | Mexico | MX | 7 |
| 22596500 | Beijing | China | CN | 8 |
| 22089000 | Mumbai | India | IN | 9 |
| 18921600 | Osaka | Japan | JP | 10 |
| 18171200 | Chongqing | China | CN | 11 |
| 18076800 | Karachi | Pakistan | PK | 12 |
| 17778500 | Kinshasa | DR Congo | CD | 13 |
| 17156400 | Lagos | Nigeria | NG | 14 |
| 16236700 | Istanbul | Turkey | TR | 15 |
| 15845200 | Kolkata | India | IN | 16 |
| 15752300 | Buenos Aires | Argentina | AR | 17 |
| 15230600 | Manila | Philippines | PH | 18 |
| 14878700 | Guangzhou | China | CN | 19 |
| 14825800 | Lahore | Pakistan | PK | 20 |
| 14704100 | Tianjin | China | CN | 21 |
| 14395400 | Bangalore | India | IN | 22 |
| 13923200 | Rio de Janeiro | Brazil | BR | 23 |
| 13545400 | Shenzhen | China | CN | 24 |
| 12737400 | Moscow | Russia | RU | 25 |
| 12336000 | Chennai | India | IN | 26 |
| 11795800 | Bogota | Colombia | CO | 27 |
| 11634100 | Jakarta | Indonesia | ID | 28 |
| 11517300 | Lima | Peru | PE | 29 |
| 11391700 | Bangkok | Thailand | TH | 30 |
| 11346800 | Paris | France | FR | 31 |
| 11337900 | Hyderabad | India | IN | 32 |
| 10174900 | Nanjing | China | CN | 33 |
| 10027900 | Luanda | Angola | AO | 34 |
| 10025800 | Seoul | South Korea | KR | 35 |
| 9998870 | Chengdu | China | CN | 36 |
| 9840740 | London | United Kingdom | GB | 37 |
| 9816320 | Ho Chi Minh City | Vietnam | VN | 38 |
| 9729740 | Tehran | Iran | IR | 39 |
| 9534790 | Nagoya | Japan | JP | 40 |
| 9222080 | Xi-an | China | CN | 41 |
| 9061820 | Ahmedabad | India | IN | 42 |
| 9000280 | Kuala Lumpur | Malaysia | MY | 43 |
| 8986480 | Wuhan | China | CN | 44 |
| 8592820 | Suzhou | China | CN | 45 |
| 8591040 | Hangzhou | China | CN | 46 |
| 8581730 | Surat | India | IN | 47 |
| 8561520 | Dar es Salaam | Tanzania | TZ | 48 |
| 8141120 | Baghdad | Iraq | IQ | 49 |
| 7974270 | Shenyang | China | CN | 50 |
| 7952860 | Riyadh | Saudi Arabia | SA | 51 |
| 7936530 | New York City | United States | US | 52 |
| 7817160 | Foshan | China | CN | 53 |
| 7772860 | Dongguan | China | CN | 54 |
| 7768510 | Hong Kong | Hong Kong | HK | 55 |
| 7525720 | Pune | India | IN | 56 |
| 7066860 | Haerbin | China | CN | 57 |
| 6999460 | Santiago | Chile | CL | 58 |
| 6810530 | Madrid | Spain | ES | 59 |
| 6754180 | Khartoum | Sudan | SD | 60 |
| 6491290 | Toronto | Canada | CA | 61 |
| 6444580 | Johannesburg | South Africa | ZA | 62 |
| 6351680 | Belo Horizonte | Brazil | BR | 63 |
| 6347380 | Dalian | China | CN | 64 |
| 6217970 | Qingdao | China | CN | 65 |
| 6157270 | Singapore | Singapore | SG | 66 |
| 6156140 | Zhengzhou | China | CN | 67 |
| 6065850 | Ji nan Shandong | China | CN | 68 |
| 6056880 | Abidjan | Ivory Coast | CI | 69 |
| 5956680 | Addis Ababa | Ethiopia | ET | 70 |
| 5813190 | Yangon | Myanmar | MM | 71 |
| 5807050 | Alexandria | Egypt | EG | 72 |
| 5766990 | Nairobi | Kenya | KE | 73 |
| 5733250 | Barcelona | Spain | ES | 74 |
| 5653490 | Chittagong | Bangladesh | BD | 75 |
| 5602200 | Hanoi | Vietnam | VN | 76 |
| 5597340 | Saint Petersburg | Russia | RU | 77 |
| 5578580 | Guadalajara | Mexico | MX | 78 |
| 5550490 | Ankara | Turkey | TR | 79 |
| 5465920 | Fukuoka | Japan | JP | 80 |
| 5391890 | Melbourne | Australia | AU | 81 |
| 5272360 | Monterrey | Mexico | MX | 82 |
| 5248790 | Sydney | Australia | AU | 83 |
| 5132170 | Urumqi | China | CN | 84 |
| 5128270 | Changsha | China | CN | 85 |
| 5063580 | Cape Town | South Africa | ZA | 86 |
| 5021600 | Jiddah | Saudi Arabia | SA | 87 |
| 4990930 | Brasilia | Brazil | BR | 88 |
| 4955680 | Kunming | China | CN | 89 |
| 4891020 | Changchun | China | CN | 90 |
| 4877020 | Kabul | Afghanistan | AF | 91 |
| 4854260 | Yaounde | Cameroon | CM | 92 |
| 4830170 | Hefei | China | CN | 93 |
| 4770300 | Ningbo | China | CN | 94 |
| 4737590 | Shantou | China | CN | 95 |
| 4645320 | Kano | Nigeria | NG | 96 |
| 4568530 | Tel Aviv | Israel | IL | 97 |
| 4563850 | New Taipei | Taiwan | TW | 98 |
| 4534990 | Shijiazhuang | China | CN | 99 |
| 4411110 | Jaipur | India | IN | 100 |
| 4387410 | Kozhikode | India | IN | 101 |
| 4383600 | Nanning | China | CN | 102 |
| 4377310 | Montreal | Canada | CA | 103 |
| 4347100 | Rome | Italy | IT | 104 |
| 4346420 | Douala | Cameroon | CM | 105 |
| 4344050 | Recife | Brazil | BR | 106 |
| 4343910 | Malappuram | India | IN | 107 |
| 4305290 | Taiyuan Shanxi | China | CN | 108 |
| 4284450 | Fortaleza | Brazil | BR | 109 |
| 4268960 | Porto Alegre | Brazil | BR | 110 |
| 4265160 | Kampala | Uganda | UG | 111 |
| 4262130 | Ekurhuleni | South Africa | ZA | 112 |
| 4228980 | Antananarivo | Madagascar | MG | 113 |
| 4209940 | Abuja | Nigeria | NG | 114 |
| 4180040 | Changzhou | China | CN | 115 |
| 4172810 | Medellin | Colombia | CO | 116 |
| 4144130 | Ibadan | Nigeria | NG | 117 |
| 4132670 | Lucknow | India | IN | 118 |
| 4104120 | Nanchang | China | CN | 119 |
| 4093180 | Wenzhou | China | CN | 120 |
| 4077760 | Xiamen | China | CN | 121 |
| 4072430 | Fuzhou Fujian | China | CN | 122 |
| 4036230 | Kumasi | Ghana | GH | 123 |
| 4029910 | Salvador | Brazil | BR | 124 |
| 4024170 | Tangshan Hebei | China | CN | 125 |
| 4012310 | Casablanca | Morocco | MA | 126 |
| 3926050 | Bekasi | Indonesia | ID | 127 |
| 3892830 | Faisalabad | Pakistan | PK | 128 |
| 3889140 | Curitiba | Brazil | BR | 129 |
| 3793780 | Port Harcourt | Nigeria | NG | 130 |
| 3770958 | Los Angeles | United States | US | 131 |
| 3736730 | Guiyang | China | CN | 132 |
| 3719980 | Thrissur | India | IN | 133 |
| 3658640 | Dakar | Senegal | SN | 134 |
| 3648110 | Santo Domingo | Dominican Republic | DO | 135 |
| 3627220 | Asuncion | Paraguay | PY | 136 |
| 3604550 | Kochi | India | IN | 137 |
| 3580190 | Berlin | Germany | DE | 138 |
| 3559030 | Wuxi | China | CN | 139 |
| 3527430 | Sanaa | Yemen | YE | 140 |
| 3520820 | Ouagadougou | Burkina Faso | BF | 141 |
| 3491580 | Campinas | Brazil | BR | 142 |
| 3484430 | Busan | South Korea | KR | 143 |
| 3482830 | Indore | India | IN | 144 |
| 3470870 | Lusaka | Zambia | ZM | 145 |
| 3460660 | Mashhad | Iran | IR | 146 |
| 3443290 | Puebla | Mexico | MX | 147 |
| 3430880 | Lanzhou | China | CN | 148 |
| 3405000 | Kuwait City | Kuwait | KW | 149 |
| 3345370 | Kanpur | India | IN | 150 |
| 3301090 | Durban | South Africa | ZA | 151 |
| 3244750 | Guayaquil | Ecuador | EC | 152 |
| 3229740 | Guatemala City | Guatemala | GT | 153 |
| 3218290 | Depok | Indonesia | ID | 154 |
| 3209300 | Pyongyang | North Korea | KP | 155 |
| 3180340 | Bamako | Mali | ML | 156 |
| 3170180 | Nagpur | India | IN | 157 |
| 3167450 | Milan | Italy | IT | 158 |
| 3159030 | Handan | China | CN | 159 |
| 3158720 | Coimbatore | India | IN | 160 |
| 3158340 | Mbuji-Mayi | DR Congo | CD | 161 |
| 3155320 | Athens | Greece | GR | 162 |
| 3152090 | Izmir | Turkey | TR | 163 |
| 3152050 | Huaian | China | CN | 164 |
| 3137620 | Surabaya | Indonesia | ID | 165 |
| 3133080 | Port-au-Prince | Haiti | HT | 166 |
| 3094640 | Dubai | United Arab Emirates | AE | 167 |
| 3093870 | Zhongshan | China | CN | 168 |
| 3072530 | Thiruvananthapuram | India | IN | 169 |
| 3064290 | Weifang | China | CN | 170 |
| 3061340 | Lubumbashi | DR Congo | CD | 171 |
| 3028270 | Lisbon | Portugal | PT | 172 |
| 3018160 | Kiev | Ukraine | UA | 173 |
| 3015110 | Caracas | Venezuela | VE | 174 |
| 3004130 | Algiers | Algeria | DZ | 175 |
| 2955410 | Pretoria | South Africa | ZA | 176 |
| 2951330 | Shaoxing | China | CN | 177 |
| 2930170 | Shizuoka | Japan | JP | 178 |
| 2927080 | Goiania | Brazil | BR | 179 |
| 2916790 | Cali | Colombia | CO | 180 |
| 2898490 | Yantai | China | CN | 181 |
| 2890880 | Huizhou | China | CN | 182 |
| 2876180 | Zibo | China | CN | 183 |
| 2873700 | Incheon | South Korea | KR | 184 |
| 2846420 | Mogadishu | Somalia | SO | 185 |
| 2832580 | Manchester | United Kingdom | GB | 186 |
| 2813480 | Brazzaville | Republic of the Congo | CG | 187 |
| 2799960 | Damascus | Syria | SY | 188 |
| 2788380 | Accra | Ghana | GH | 189 |
| 2779200 | Taipei | Taiwan | TW | 190 |
| 2758100 | Bandung | Indonesia | ID | 191 |
| 2725560 | Luoyang | China | CN | 192 |
| 2719520 | Toluca de Lerdo | Mexico | MX | 193 |
| 2707920 | Vancouver | Canada | CA | 194 |
| 2704620 | Birmingham | United Kingdom | GB | 195 |
| 2689540 | Patna | India | IN | 196 |
| 2686290 | Bhopal | India | IN | 197 |
| 2665080 | Tashkent | Uzbekistan | UZ | 198 |
| 2653580 | Sapporo | Japan | JP | 199 |
| 2626610 | Tangerang | Indonesia | ID | 200 |
| 2613750 | Nantong | China | CN | 201 |
| 2611867 | Chicago | United States | US | 202 |
| 2568170 | Brisbane | Australia | AU | 203 |
| 2548930 | Peshawar | Pakistan | PK | 204 |
| 2545030 | Tunis | Tunisia | TN | 205 |
| 2543540 | Gujranwala | Pakistan | PK | 206 |
| 2521590 | Medan | Indonesia | ID | 207 |
| 2500940 | Hohhot | China | CN | 208 |
| 2496500 | Baku | Azerbaijan | AZ | 209 |
| 2486560 | Rawalpindi | Pakistan | PK | 210 |
| 2478340 | Agra | India | IN | 211 |
| 2465140 | Kannur | India | IN | 212 |
| 2453800 | Belem | Brazil | BR | 213 |
| 2447620 | Liuzhou | China | CN | 214 |
| 2440420 | Visakhapatnam | India | IN | 215 |
| 2438480 | Aleppo | Syria | SY | 216 |
| 2434640 | Manaus | Brazil | BR | 217 |
| 2433680 | San Juan | Puerto Rico | PR | 218 |
| 2432440 | Maracaibo | Venezuela | VE | 219 |
| 2425800 | Phnom Penh | Cambodia | KH | 220 |
| 2425700 | Baotou | China | CN | 221 |
| 2424920 | Vadodara | India | IN | 222 |
| 2396400 | Barranquilla | Colombia | CO | 223 |
| 2379330 | Beirut | Lebanon | LB | 224 |
| 2376040 | Xuzhou | China | CN | 225 |
| 2356610 | Taoyuan | Taiwan | TW | 226 |
| 2351140 | Nashik | India | IN | 227 |
| 2350700 | Vijayawada | India | IN | 228 |
| 2337590 | Sendai | Japan | JP | 229 |
| 2333220 | Tijuana | Mexico | MX | 230 |
| 2327990 | Esfahan | Iran | IR | 231 |
| 2324082 | Houston | United States | US | 232 |
| 2315020 | Putian | China | CN | 233 |
| 2273240 | Amman | Jordan | JO | 234 |
| 2258380 | Multan | Pakistan | PK | 235 |
| 2257210 | Wuhu Anhui | China | CN | 236 |
| 2252170 | Kollam | India | IN | 237 |
| 2251590 | Conakry | Guinea | GN | 238 |
| 2220560 | Grande Vitoria | Brazil | BR | 239 |
| 2218580 | Mecca | Saudi Arabia | SA | 240 |
| 2216800 | Yangzhou | China | CN | 241 |
| 2198540 | Taizhou Zhejiang | China | CN | 242 |
| 2190060 | Baoding | China | CN | 243 |
| 2182170 | Naples | Italy | IT | 244 |
| 2181080 | Daegu | South Korea | KR | 245 |
| 2169190 | Perth | Australia | AU | 246 |
| 2161430 | Linyi Shandong | China | CN | 247 |
| 2156350 | Havana | Cuba | CU | 248 |
| 2150330 | Rajkot | India | IN | 249 |
| 2142600 | Bursa | Turkey | TR | 250 |
| 2141520 | Brussels | Belgium | BE | 251 |
| 2108740 | Lome | Togo | TG | 252 |
| 2095420 | Haikou | China | CN | 253 |
| 2085470 | Daqing | China | CN | 254 |
| 2080810 | Lianyungang | China | CN | 255 |
| 2073240 | Yancheng Jiangsu | China | CN | 256 |
| 2070930 | Minsk | Belarus | BY | 257 |
| 2058730 | Hyderabad | Pakistan | PK | 258 |
| 2058190 | Hiroshima | Japan | JP | 259 |
| 2054540 | Panama City | Panama | PA | 260 |
| 2052410 | Semarang | Indonesia | ID | 261 |
| 2044650 | Benin City | Nigeria | NG | 262 |
| 2042040 | Almaty | Kazakhstan | KZ | 263 |
| 2033990 | Davao City | Philippines | PH | 264 |
| 2030790 | Valencia | Venezuela | VE | 265 |
| 2028680 | Ludhiana | India | IN | 266 |
| 2020970 | Rabat | Morocco | MA | 267 |
| 2017260 | Quito | Ecuador | EC | 268 |
| 2007500 | Can Tho | Vietnam | VN | 269 |
| 2005500 | Vienna | Austria | AT | 270 |
| 1997370 | La Paz | Bolivia | BO | 271 |
| 1984660 | Matola | Mozambique | MZ | 272 |
| 1981790 | Baixada Santista | Brazil | BR | 273 |
| 1968620 | Zhuhai | China | CN | 274 |
| 1966630 | Quanzhou | China | CN | 275 |
| 1956470 | West Yorkshire | United Kingdom | GB | 276 |
| 1951440 | Datong | China | CN | 277 |
| 1950390 | Leon de los Aldamas | Mexico | MX | 278 |
| 1923440 | Raipur | India | IN | 279 |
| 1911650 | Madurai | India | IN | 280 |
| 1909930 | Sharjah | United Arab Emirates | AE | 281 |
| 1904140 | Mosul | Iraq | IQ | 282 |
| 1891230 | Santa Cruz | Bolivia | BO | 283 |
| 1887700 | Palembang | Indonesia | ID | 284 |
| 1885950 | Cixi | China | CN | 285 |
| 1876820 | Adana | Turkey | TR | 286 |
| 1874890 | Meerut | India | IN | 287 |
| 1866580 | La Laguna | Mexico | MX | 288 |
| 1858910 | Batam | Indonesia | ID | 289 |
| 1858710 | Gaziantep | Turkey | TR | 290 |
| 1826520 | Jiangmen | China | CN | 291 |
| 1826010 | Varanasi | India | IN | 292 |
| 1815750 | Kananga | DR Congo | CD | 293 |
| 1809850 | Turin | Italy | IT | 294 |
| 1801750 | Xiangyang | China | CN | 295 |
| 1800420 | Yichang | China | CN | 296 |
| 1800230 | Warsaw | Poland | PL | 297 |
| 1795680 | Yinchuan | China | CN | 298 |
| 1794650 | Monrovia | Liberia | LR | 299 |
| 1788170 | Montevideo | Uruguay | UY | 300 |
| 1787710 | Hamburg | Germany | DE | 301 |
| 1787230 | Lyon | France | FR | 302 |
| 1783690 | Tiruppur | India | IN | 303 |
| 1782240 | Budapest | Hungary | HU | 304 |
| 1779600 | Suqian | China | CN | 305 |
| 1777610 | Srinagar | India | IN | 306 |
| 1767760 | Jamshedpur | India | IN | 307 |
| 1767330 | Aurangabad | India | IN | 308 |
| 1767110 | Onitsha | Nigeria | NG | 309 |
| 1763900 | Shiraz | Iran | IR | 310 |
| 1762820 | Qinhuangdao | China | CN | 311 |
| 1758700 | Bucharest | Romania | RO | 312 |
| 1740860 | Xining | China | CN | 313 |
| 1739050 | Hengyang | China | CN | 314 |
| 1738620 | Anyang | China | CN | 315 |
| 1738430 | Anshan | China | CN | 316 |
| 1737760 | Stockholm | Sweden | SE | 317 |
| 1737390 | Makassar | Indonesia | ID | 318 |
| 1724890 | Ulaanbaatar | Mongolia | MN | 319 |
| 1722780 | N-Djamena | Chad | TD | 320 |
| 1719310 | Jilin | China | CN | 321 |
| 1718940 | Glasgow | United Kingdom | GB | 322 |
| 1711130 | Auckland | New Zealand | NZ | 323 |
| 1706660 | Novosibirsk | Russia | RU | 324 |
| 1702510 | Muscat | Oman | OM | 325 |
| 1695670 | Tabriz | Iran | IR | 326 |
| 1689400 | Qiqihaer | China | CN | 327 |
| 1687900 | Calgary | Canada | CA | 328 |
| 1675144 | Phoenix | United States | US | 329 |
| 1672900 | Kathmandu | Nepal | NP | 330 |
| 1664420 | Jodhpur | India | IN | 331 |
| 1650280 | Tegucigalpa | Honduras | HN | 332 |
| 1644610 | Marseille | France | FR | 333 |
| 1640600 | Cordoba | Argentina | AR | 334 |
| 1633020 | Harare | Zimbabwe | ZW | 335 |
| 1631090 | Rosario | Argentina | AR | 336 |
| 1625980 | Ciudad Juarez | Mexico | MX | 337 |
| 1624660 | Medina | Saudi Arabia | SA | 338 |
| 1622270 | Jining Shandong | China | CN | 339 |
| 1621720 | Ranchi | India | IN | 340 |
| 1618740 | Abu Dhabi | United Arab Emirates | AE | 341 |
| 1614660 | Karaj | Iran | IR | 342 |
| 1612940 | Nouakchott | Mauritania | MR | 343 |
| 1599350 | Kota | India | IN | 344 |
| 1598460 | Zhangjiakou | China | CN | 345 |
| 1594300 | Mandalay | Myanmar | MM | 346 |
| 1590890 | Munich | Germany | DE | 347 |
| 1588600 | Edmonton | Canada | CA | 348 |
| 1586180 | Daejon | South Korea | KR | 349 |
| 1582200 | Jabalpur | India | IN | 350 |
| 1575050 | Natal | Brazil | BR | 351 |
| 1565860 | Gaoxiong | Taiwan | TW | 352 |
| 1565300 | Asansol | India | IN | 353 |
| 1564320 | Huainan | China | CN | 354 |
| 1563820 | Yiwu | China | CN | 355 |
| 1563600 | Homs | Syria | SY | 356 |
| 1561490 | Niamey | Niger | NE | 357 |
| 1553460 | Mombasa | Kenya | KE | 358 |
| 1550560 | Ganzhou | China | CN | 359 |
| 1548210 | Grande Sao Luis | Brazil | BR | 360 |
| 1546690 | Kisangani | DR Congo | CD | 361 |
| 1545080 | Chaozhou | China | CN | 362 |
| 1543500 | Gwalior | India | IN | 363 |
| 1537000 | Yekaterinburg | Russia | RU | 364 |
| 1536730 | Gwangju | South Korea | KR | 365 |
| 1532653 | San Antonio | United States | US | 366 |
| 1524650 | Basra | Iraq | IQ | 367 |
| 1523670 | Allahabad | India | IN | 368 |
| 1517114 | Philadelphia | United States | US | 369 |
| 1511450 | Jiaxing | China | CN | 370 |
| 1511110 | Amritsar | India | IN | 371 |
| 1506700 | Taizhou Jiangsu | China | CN | 372 |
| 1503910 | Hai Phong | Vietnam | VN | 373 |
| 1502640 | San Jose | Costa Rica | CR | 374 |
| 1499360 | Weihai | China | CN | 375 |
| 1491180 | Chon Buri | Thailand | TH | 376 |
| 1476370 | Liuyang | China | CN | 377 |
| 1471240 | Liuan | China | CN | 378 |
| 1467460 | Kaifeng | China | CN | 379 |
| 1467160 | Taian Shandong | China | CN | 380 |
| 1465780 | Ottawa | Canada | CA | 381 |
| 1460280 | Cochabamba | Bolivia | BO | 382 |
| 1459290 | Queretaro | Mexico | MX | 383 |
| 1458890 | Rizhao | China | CN | 384 |
| 1457020 | Uyo | Nigeria | NG | 385 |
| 1455100 | Zurich | Switzerland | CH | 386 |
| 1450150 | Konya | Turkey | TR | 387 |
| 1447780 | Joao Pessoa | Brazil | BR | 388 |
| 1447340 | Mwanza | Tanzania | TZ | 389 |
| 1444820 | Nanchong | China | CN | 390 |
| 1441800 | Dhanbad | India | IN | 391 |
| 1440090 | Dongying | China | CN | 392 |
| 1434030 | Zunyi | China | CN | 393 |
| 1425480 | Zhanjiang | China | CN | 394 |
| 1423630 | Pointe-Noire | Republic of the Congo | CG | 395 |
| 1418040 | Shiyan | China | CN | 396 |
| 1415820 | Kharkiv | Ukraine | UA | 397 |
| 1413250 | Bareilly | India | IN | 398 |
| 1412950 | Belgrade | Serbia | RS | 399 |
| 1411010 | Bucaramanga | Colombia | CO | 400 |
| 1400850 | Mianyang Sichuan | China | CN | 401 |
| 1400740 | Copenhagen | Denmark | DK | 402 |
| 1394750 | Tengzhou | China | CN | 403 |
| 1394590 | Antalya | Turkey | TR | 404 |
| 1393410 | Samut Prakan | Thailand | TH | 405 |
| 1393190 | Taizhong | Taiwan | TW | 406 |
| 1393010 | Lilongwe | Malawi | MW | 407 |
| 1392940 | Adelaide | Australia | AU | 408 |
| 1392280 | Qom | Iran | IR | 409 |
| 1389672 | San Diego | United States | US | 410 |
| 1388800 | Suweon | South Korea | KR | 411 |
| 1387920 | Maceio | Brazil | BR | 412 |
| 1387360 | Freetown | Sierra Leone | SL | 413 |
| 1385370 | Yingkou | China | CN | 414 |
| 1385210 | Suzhou | China | CN | 415 |
| 1381230 | Tanger | Morocco | MA | 416 |
| 1379560 | Aligarh | India | IN | 417 |
| 1375600 | Ad-Dammam | Saudi Arabia | SA | 418 |
| 1375220 | Abomey-Calavi | Benin | BJ | 419 |
| 1374630 | Joinville | Brazil | BR | 420 |
| 1369880 | Moradabad | India | IN | 421 |
| 1369430 | Bukavu | DR Congo | CD | 422 |
| 1364640 | Pekan Baru | Indonesia | ID | 423 |
| 1361880 | Maoming | China | CN | 424 |
| 1361840 | Nnewi | Nigeria | NG | 425 |
| 1354930 | Jieyang | China | CN | 426 |
| 1354890 | Helsinki | Finland | FI | 427 |
| 1352560 | Astana | Kazakhstan | KZ | 428 |
| 1350150 | Bujumbura | Burundi | BI | 429 |
| 1345720 | Mysore | India | IN | 430 |
| 1344890 | Ruian | China | CN | 431 |
| 1336960 | Fes | Morocco | MA | 432 |
| 1334200 | Porto | Portugal | PT | 433 |
| 1332270 | Fushun Liaoning | China | CN | 434 |
| 1331940 | Prague | Czech Republic | CZ | 435 |
| 1330500 | Port Elizabeth | South Africa | ZA | 436 |
| 1329990 | Jinhua | China | CN | 437 |
| 1329830 | Kigali | Rwanda | RW | 438 |
| 1324800 | Ahvaz | Iran | IR | 439 |
| 1323850 | Florianopolis | Brazil | BR | 440 |
| 1320910 | Bhubaneswar | India | IN | 441 |
| 1317340 | Baoji | China | CN | 442 |
| 1315330 | Durg-Bhilainagar | India | IN | 443 |
| 1312850 | Pingdingshan Henan | China | CN | 444 |
| 1310720 | San Luis Potosi | Mexico | MX | 445 |
| 1309840 | Liupanshui | China | CN | 446 |
| 1309420 | Puning | China | CN | 447 |
| 1305090 | Chifeng | China | CN | 448 |
| 1302638 | Dallas | United States | US | 449 |
| 1301130 | Islamabad | Pakistan | PK | 450 |
| 1299580 | Kazan | Russia | RU | 451 |
| 1299480 | Zhuzhou | China | CN | 452 |
| 1299130 | Zhenjiang Jiangsu | China | CN | 453 |
| 1299110 | Dublin | Ireland | IE | 454 |
| 1297900 | Tasikmalaya | Indonesia | ID | 455 |
| 1291440 | Huaibei | China | CN | 456 |
| 1288560 | Xiongan | China | CN | 457 |
| 1286460 | Sofia | Bulgaria | BG | 458 |
| 1286000 | Da Nang | Vietnam | VN | 459 |
| 1282270 | Pizhou | China | CN | 460 |
| 1281730 | Barquisimeto | Venezuela | VE | 461 |
| 1281010 | Bogor | Indonesia | ID | 462 |
| 1278770 | Luohe | China | CN | 463 |
| 1274780 | Aba | Nigeria | NG | 464 |
| 1274630 | Nanyang Henan | China | CN | 465 |
| 1273560 | Xiangtan Hunan | China | CN | 466 |
| 1270320 | Maracay | Venezuela | VE | 467 |
| 1269900 | Tiruchirappalli | India | IN | 468 |
| 1268080 | Bazhong | China | CN | 469 |
| 1266010 | Chandigarh | India | IN | 470 |
| 1263650 | Binzhou | China | CN | 471 |
| 1260660 | Jinzhou | China | CN | 472 |
| 1259940 | Kaduna | Nigeria | NG | 473 |
| 1258230 | Merida | Mexico | MX | 474 |
| 1258120 | Benxi | China | CN | 475 |
| 1257180 | Mendoza | Argentina | AR | 476 |
| 1253110 | Quetta | Pakistan | PK | 477 |
| 1249760 | Nizhniy Novgorod | Russia | RU | 478 |
| 1246210 | Chelyabinsk | Russia | RU | 479 |
| 1244190 | Chiang Mai | Thailand | TH | 480 |
| 1243530 | Bobo-Dioulasso | Burkina Faso | BF | 481 |
| 1242490 | Saharanpur | India | IN | 482 |
| 1238950 | Guilin | China | CN | 483 |
| 1231020 | Hubli-Dharwad | India | IN | 484 |
| 1230070 | Maputo | Mozambique | MZ | 485 |
| 1229960 | Yueqing | China | CN | 486 |
| 1227620 | Hargeysa | Somalia | SO | 487 |
| 1224170 | Guwahati | India | IN | 488 |
| 1220710 | Salem | India | IN | 489 |
| 1215000 | Mexicali | Mexico | MX | 490 |
| 1210200 | Bandar Lampung | Indonesia | ID | 491 |
| 1203890 | Shimkent | Kazakhstan | KZ | 492 |
| 1203390 | Tripoli | Libya | LY | 493 |
| 1199980 | Haifa | Israel | IL | 494 |
| 1196820 | Ikorodu | Nigeria | NG | 495 |
| 1196770 | Aguascalientes | Mexico | MX | 496 |
| 1191400 | Siliguri | India | IN | 497 |
| 1189200 | Amsterdam | Netherlands | NL | 498 |
| 1185920 | Tshikapa | DR Congo | CD | 499 |
| 1184900 | Wenling | China | CN | 500 |
| 1180820 | Omsk | Russia | RU | 501 |
| 1180750 | Xinxiang | China | CN | 502 |
| 1177840 | Krasnoyarsk | Russia | RU | 503 |
| 1174750 | Bien Hoa | Vietnam | VN | 504 |
| 1170240 | Fuyang | China | CN | 505 |
| 1168490 | Zaozhuang | China | CN | 506 |
| 1167600 | Jalandhar | India | IN | 507 |
| 1165590 | Ma’anshan | China | CN | 508 |
| 1163590 | Panjin | China | CN | 509 |
| 1159940 | Fuzhou Jiangxi | China | CN | 510 |
| 1159780 | Sekondi Takoradi | Ghana | GH | 511 |
| 1159390 | Yichun Jiangxi | China | CN | 512 |
| 1157920 | Yongin | South Korea | KR | 513 |
| 1156360 | Cuernavaca | Mexico | MX | 514 |
| 1154760 | Samarinda | Indonesia | ID | 515 |
| 1154410 | Aden | Yemen | YE | 516 |
| 1153710 | Samara | Russia | RU | 517 |
| 1153600 | Shangrao | China | CN | 518 |
| 1153540 | Chihuahua | Mexico | MX | 519 |
| 1153230 | Cologne | Germany | DE | 520 |
| 1152180 | Asmara | Eritrea | ER | 521 |
| 1151220 | Bishkek | Kyrgyzstan | KG | 522 |
| 1150530 | Chenzhou | China | CN | 523 |
| 1150480 | Zhaoqing | China | CN | 524 |
| 1148400 | Ufa | Russia | RU | 525 |
| 1145590 | Nyala | Sudan | SD | 526 |
| 1142870 | Leshan | China | CN | 527 |
| 1140390 | Rostov-on-Don | Russia | RU | 528 |
| 1133630 | Dezhou | China | CN | 529 |
| 1132420 | San Salvador | El Salvador | SV | 530 |
| 1128360 | Diyarbakir | Turkey | TR | 531 |
| 1128190 | Kirkuk | Iraq | IQ | 532 |
| 1127520 | Johor Bahru | Malaysia | MY | 533 |
| 1126220 | Jingzhou Hubei | China | CN | 534 |
| 1123540 | Changshu | China | CN | 535 |
| 1121440 | Goyang | South Korea | KR | 536 |
| 1120900 | Managua | Nicaragua | NI | 537 |
| 1117360 | Kermanshah | Iran | IR | 538 |
| 1117100 | Xuchang | China | CN | 539 |
| 1115580 | Oslo | Norway | NO | 540 |
| 1114370 | Huzhou | China | CN | 541 |
| 1114090 | Blantyre-Limbe | Malawi | MW | 542 |
| 1108180 | Solapur | India | IN | 543 |
| 1105540 | Cartagena | Colombia | CO | 544 |
| 1100240 | Yerevan | Armenia | AM | 545 |
| 1099960 | Ilorin | Nigeria | NG | 546 |
| 1099040 | Mersin | Turkey | TR | 547 |
| 1097310 | Denpasar | Indonesia | ID | 548 |
| 1092200 | Qujing | China | CN | 549 |
| 1091590 | Lille | France | FR | 550 |
| 1087020 | Tbilisi | Georgia | GE | 551 |
| 1086960 | Guiping | China | CN | 552 |
| 1086000 | Voronezh | Russia | RU | 553 |
| 1085990 | Perm | Russia | RU | 554 |
| 1085330 | Marrakech | Morocco | MA | 555 |
| 1081930 | Aracaju | Brazil | BR | 556 |
| 1080490 | Warangal | India | IN | 557 |
| 1080180 | Toulouse | France | FR | 558 |
| 1079980 | Yueyang | China | CN | 559 |
| 1078650 | Hamah | Syria | SY | 560 |
| 1077740 | Tampico | Mexico | MX | 561 |
| 1076120 | Warri | Nigeria | NG | 562 |
| 1073060 | Xintai | China | CN | 563 |
| 1072560 | Padang | Indonesia | ID | 564 |
| 1068550 | Teresina | Brazil | BR | 565 |
| 1066100 | Saltillo | Mexico | MX | 566 |
| 1065400 | Cancun | Mexico | MX | 567 |
| 1064870 | Antwerp | Belgium | BE | 568 |
| 1063680 | Owerri | Nigeria | NG | 569 |
| 1061620 | Cebu City | Philippines | PH | 570 |
| 1057290 | Nampula | Mozambique | MZ | 571 |
| 1056980 | Changwon | South Korea | KR | 572 |
| 1052320 | Chengde | China | CN | 573 |
| 1051040 | San Miguel de Tucuman | Argentina | AR | 574 |
| 1047810 | Lubango | Angola | AO | 575 |
| 1046710 | Acapulco de Juarez | Mexico | MX | 576 |
| 1045070 | Zhucheng | China | CN | 577 |
| 1043580 | Leiyang | China | CN | 578 |
| 1042120 | Pingxiang Jiangxi | China | CN | 579 |
| 1041060 | Dehradun | India | IN | 580 |
| 1039900 | Dushanbe | Tajikistan | TJ | 581 |
| 1037250 | Kayseri | Turkey | TR | 582 |
| 1035090 | Jos | Nigeria | NG | 583 |
| 1034680 | Misratah | Libya | LY | 584 |
| 1034200 | San Pedro Sula | Honduras | HN | 585 |
| 1032980 | Sylhet | Bangladesh | BD | 586 |
| 1031440 | Laiwu | China | CN | 587 |
| 1030550 | Songkhla | Thailand | TH | 588 |
| 1026390 | Nonthaburi | Thailand | TH | 589 |
| 1026250 | Rotterdam | Netherlands | NL | 590 |
| 1024970 | Jixi Heilongjiang | China | CN | 591 |
| 1024430 | Valparaiso | Chile | CL | 592 |
| 1024430 | Jiujiang | China | CN | 593 |
| 1018070 | Bordeaux | France | FR | 594 |
| 1017660 | Najaf | Iraq | IQ | 595 |
| 1017530 | Krasnodar | Russia | RU | 596 |
| 1017030 | Agadir | Morocco | MA | 597 |
| 1016230 | Morelia | Mexico | MX | 598 |
| 1016150 | Bangui | Central African Republic | CF | 599 |
| 1015045 | Fort Worth | United States | US | 600 |
| 1011500 | Guigang | China | CN | 601 |
| 1009450 | Taiz | Yemen | YE | 602 |
| 1009260 | Mudanjiang | China | CN | 603 |
| 1008750 | Hengshui | China | CN | 604 |
| 1008485 | Jacksonville | United States | US | 605 |
| 1007020 | Rajshahi | Bangladesh | BD | 606 |
| 1006700 | Odesa | Ukraine | UA | 607 |
| 1003580 | Xinyu | China | CN | 608 |
| 1002710 | Linfen | China | CN | 609 |
| 1002450 | Zhangzhou | China | CN | 610 |
| 1000720 | Tianmen | China | CN | 611 |
| 1000410 | Liling | China | CN | 612 |
| 996732 | Jerusalem | Israel | IL | 613 |
| 995970 | Yangjiang | China | CN | 614 |
| 995027 | Zamboanga City | Philippines | PH | 615 |
| 992268 | Volgograd | Russia | RU | 616 |
| 991388 | Ciudad Guayana | Venezuela | VE | 617 |
| 990695 | Cabinda | Angola | AO | 618 |
| 990439 | Umuahia | Nigeria | NG | 619 |
| 989859 | Antipolo | Philippines | PH | 620 |
| 989252 | Austin | United States | US | 621 |
| 985078 | Deyang | China | CN | 622 |
| 983715 | Arequipa | Peru | PE | 623 |
| 983121 | Reynosa | Mexico | MX | 624 |
| 983008 | Baishan | China | CN | 625 |
| 982563 | Jiangyin | China | CN | 626 |
| 981688 | Cucuta | Colombia | CO | 627 |
| 981223 | Bogra | Bangladesh | BD | 628 |
| 981069 | Veracruz | Mexico | MX | 629 |
| 980200 | Khulna | Bangladesh | BD | 630 |
| 975189 | Bengbu | China | CN | 631 |
| 974195 | Pathum Thani | Thailand | TH | 632 |
| 972373 | Villahermosa | Mexico | MX | 633 |
| 970074 | Bahawalpur | Pakistan | PK | 634 |
| 966741 | Southampton | United Kingdom | GB | 635 |
| 966494 | Oran | Algeria | DZ | 636 |
| 962403 | Guntur | India | IN | 637 |
| 961625 | West Rand | South Africa | ZA | 638 |
| 956195 | Nice | France | FR | 639 |
| 949309 | Changzhi | China | CN | 640 |
| 948572 | Malang | Indonesia | ID | 641 |
| 947488 | Dandong | China | CN | 642 |
| 944348 | Hermosillo | Mexico | MX | 643 |
| 943921 | Bhiwandi | India | IN | 644 |
| 943458 | Seongnam | South Korea | KR | 645 |
| 943313 | Campo Grande | Brazil | BR | 646 |
| 943212 | San Jose | United States | US | 647 |
| 942459 | Londrina | Brazil | BR | 648 |
| 941758 | Firozabad | India | IN | 649 |
| 941130 | Ashgabat | Turkmenistan | TM | 650 |
| 940911 | Puducherry | India | IN | 651 |
| 940560 | Erbil | Iraq | IQ | 652 |
| 938279 | Changde | China | CN | 653 |
| 935079 | Shangqiu | China | CN | 654 |
| 935017 | Charlotte | United States | US | 655 |
| 934011 | Kuerle | China | CN | 656 |
| 933193 | La Plata | Argentina | AR | 657 |
| 931630 | Liaoyang | China | CN | 658 |
| 931255 | Dnipro | Ukraine | UA | 659 |
| 931092 | Lokoja | Nigeria | NG | 660 |
| 931015 | Cherthala | India | IN | 661 |
| 929772 | Quzhou | China | CN | 662 |
| 929357 | Concepcion | Chile | CL | 663 |
| 929188 | Trujillo | Peru | PE | 664 |
| 928997 | Liverpool | United Kingdom | GB | 665 |
| 927758 | Tuxtla Gutierrez | Mexico | MX | 666 |
| 926519 | Tyumen | Russia | RU | 667 |
| 923221 | Bergamo | Italy | IT | 668 |
| 921011 | Xingtai | China | CN | 669 |
| 919748 | Soshanguve | South Africa | ZA | 670 |
| 918494 | Culiacan | Mexico | MX | 671 |
| 917679 | Columbus | United States | US | 672 |
| 916236 | Ulsan | South Korea | KR | 673 |
| 908873 | Huangshi | China | CN | 674 |
| 906658 | Enugu | Nigeria | NG | 675 |
| 904031 | Yongzhou | China | CN | 676 |
| 903628 | Fuxin | China | CN | 677 |
| 902334 | Xinghua | China | CN | 678 |
| 900351 | Bunia | DR Congo | CD | 679 |
| 899225 | Libreville | Gabon | GA | 680 |
| 898535 | Maiduguri | Nigeria | NG | 681 |
| 897493 | Yibin | China | CN | 682 |
| 897262 | Hufuf-Mubarraz | Saudi Arabia | SA | 683 |
| 892519 | Huaihua | China | CN | 684 |
| 890375 | Xinyang | China | CN | 685 |
| 887939 | Xiaogan | China | CN | 686 |
| 887711 | Ipoh | Malaysia | MY | 687 |
| 886584 | Yangquan | China | CN | 688 |
| 885396 | Kottayam | India | IN | 689 |
| 884046 | Luzhou | China | CN | 690 |
| 881897 | Banghazi | Libya | LY | 691 |
| 880865 | Tainan | Taiwan | TW | 692 |
| 878063 | Tianshui | China | CN | 693 |
| 878004 | Benguela | Angola | AO | 694 |
| 877072 | Bozhou | China | CN | 695 |
| 876063 | Donetsk | Ukraine | UA | 696 |
| 874037 | Indianapolis | United States | US | 697 |
| 873982 | Zanzibar | Tanzania | TZ | 698 |
| 873638 | Kunshan | China | CN | 699 |
| 869441 | Zhuji | China | CN | 700 |
| 863282 | Jincheng | China | CN | 701 |
| 861380 | Malanje | Angola | AO | 702 |
| 860518 | Eskisehir | Turkey | TR | 703 |
| 860169 | Orumiyeh | Iran | IR | 704 |
| 858853 | Nellore | India | IN | 705 |
| 858207 | Quebec City | Canada | CA | 706 |
| 857367 | Winnipeg | Canada | CA | 707 |
| 853869 | Bikaner | India | IN | 708 |
| 851474 | Heze | China | CN | 709 |
| 851392 | Zhumadian | China | CN | 710 |
| 851282 | Palermo | Italy | IT | 711 |
| 851156 | Huludao | China | CN | 712 |
| 848379 | Taixing | China | CN | 713 |
| 848129 | Haicheng | China | CN | 714 |
| 847608 | Anqiu | China | CN | 715 |
| 847350 | Ibb | Yemen | YE | 716 |
| 846841 | Gebze | Turkey | TR | 717 |
| 846517 | Sulaimaniya | Iraq | IQ | 718 |
| 846438 | Langfang | China | CN | 719 |
| 845854 | Liaocheng | China | CN | 720 |
| 845827 | Barcelona Puerto La Cruz | Venezuela | VE | 721 |
| 841911 | Gaomi | China | CN | 722 |
| 841354 | Valencia | Spain | ES | 723 |
| 839309 | Hanchuan | China | CN | 724 |
| 839131 | Dasmarinas | Philippines | PH | 725 |
| 838870 | Kayamkulam | India | IN | 726 |
| 837650 | Cagayan de Oro City | Philippines | PH | 727 |
| 837335 | Meishan | China | CN | 728 |
| 837323 | Saratov | Russia | RU | 729 |
| 836997 | Muzaffarnagar | India | IN | 730 |
| 834581 | Xalapa | Mexico | MX | 731 |
| 834242 | Newcastle upon Tyne | United Kingdom | GB | 732 |
| 831689 | Ar-Rayyan | Qatar | QA | 733 |
| 829276 | Danyang | China | CN | 734 |
| 829049 | Sorocaba | Brazil | BR | 735 |
| 828008 | Merca | Somalia | SO | 736 |
| 827603 | Bucheon | South Korea | KR | 737 |
| 824965 | Amravati | India | IN | 738 |
| 823625 | Kitwe | Zambia | ZM | 739 |
| 823407 | Gaza | Palestine | PS | 740 |
| 822497 | Oshogbo | Nigeria | NG | 741 |
| 821187 | Nakhon Ratchasima | Thailand | TH | 742 |
| 820064 | Jiaozuo | China | CN | 743 |
| 819854 | Goma | DR Congo | CD | 744 |
| 819334 | Nottingham | United Kingdom | GB | 745 |
| 819297 | Bologna | Italy | IT | 746 |
| 816586 | Gorakhpur | India | IN | 747 |
| 815472 | Thessaloniki | Greece | GR | 748 |
| 813794 | Linhai | China | CN | 749 |
| 813599 | Yan’an | China | CN | 750 |
| 812913 | Vereeniging | South Africa | ZA | 751 |
| 812244 | Erduosi-Ordoss | China | CN | 752 |
| 811560 | Anqing | China | CN | 753 |
| 809652 | Zaria | Nigeria | NG | 754 |
| 808111 | Bur Sa’id | Egypt | EG | 755 |
| 805079 | Shaoguan | China | CN | 756 |
| 804914 | Cuttack | India | IN | 757 |
| 804189 | Dengzhou | China | CN | 758 |
| 803758 | Frankfurt | Germany | DE | 759 |
| 803062 | Akure | Nigeria | NG | 760 |
| 799729 | Belgaum | India | IN | 761 |
| 799708 | Malegaon | India | IN | 762 |
| 799706 | Banjarmasin | Indonesia | ID | 763 |
| 797302 | Tirupati | India | IN | 764 |
| 797246 | Yuncheng | China | CN | 765 |
| 795748 | Yuxi | China | CN | 766 |
| 793974 | Niigata | Japan | JP | 767 |
| 793083 | Hamilton | Canada | CA | 768 |
| 792664 | Yanji | China | CN | 769 |
| 791579 | Zigong | China | CN | 770 |
| 790986 | Shaoyang | China | CN | 771 |
| 790694 | Qingyuan | China | CN | 772 |
| 789918 | Maturin | Venezuela | VE | 773 |
| 789386 | Sialkot | Pakistan | PK | 774 |
| 788345 | Tongliao | China | CN | 775 |
| 788238 | Nay Pyi Taw | Myanmar | MM | 776 |
| 786000 | Tongling | China | CN | 777 |
| 785116 | Dazhou | China | CN | 778 |
| 784856 | Al-Hudaydah | Yemen | YE | 779 |
| 784420 | Tamale | Ghana | GH | 780 |
| 784261 | Wuzhou | China | CN | 781 |
| 780842 | Suining Sichuan | China | CN | 782 |
| 778685 | Mangalore | India | IN | 783 |
| 778052 | Amara | Iraq | IQ | 784 |
| 777336 | Huambo | Angola | AO | 785 |
| 775312 | Zhangjiagang | China | CN | 786 |
| 775091 | Sargodha | Pakistan | PK | 787 |
| 774229 | Bacoor | Philippines | PH | 788 |
| 774118 | Jiamusi | China | CN | 789 |
| 773030 | Dongtai | China | CN | 790 |
| 772996 | Nanded Waghala | India | IN | 791 |
| 771029 | Ansan | South Korea | KR | 792 |
| 769375 | Krakow | Poland | PL | 793 |
| 767968 | San Francisco | United States | US | 794 |
| 767534 | Lattakia | Syria | SY | 795 |
| 766887 | Bhavnagar | India | IN | 796 |
| 766403 | Ankang | China | CN | 797 |
| 765748 | Xianyang Shaanxi | China | CN | 798 |
| 765324 | Kurnool | India | IN | 799 |
| 764938 | Taicang | China | CN | 800 |
| 764753 | Seattle | United States | US | 801 |
| 764500 | Sao Jose dos Campos | Brazil | BR | 802 |
| 761755 | Panzhihua | China | CN | 803 |
| 761331 | Sokoto | Nigeria | NG | 804 |
| 760857 | Uvira | DR Congo | CD | 805 |
| 758893 | Zarqa | Jordan | JO | 806 |
| 758680 | Yuyao | China | CN | 807 |
| 757989 | Cotonou | Benin | BJ | 808 |
| 757765 | Rasht | Iran | IR | 809 |
| 757604 | Ribeirao Preto | Brazil | BR | 810 |
| 757292 | Oaxaca de Juarez | Mexico | MX | 811 |
| 756783 | Sheffield | United Kingdom | GB | 812 |
| 756507 | Buffalo City | South Africa | ZA | 813 |
| 755941 | Vientiane | Laos | LA | 814 |
| 754296 | Yulin Shaanxi | China | CN | 815 |
| 753877 | Bali | Indonesia | ID | 816 |
| 752910 | Herat | Afghanistan | AF | 817 |
| 752672 | Longyan | China | CN | 818 |
| 752570 | Gold Coast | Australia | AU | 819 |
| 751802 | Gulbarga | India | IN | 820 |
| 751521 | Jammu | India | IN | 821 |
| 751457 | Miluo | China | CN | 822 |
library(plotly)
library(dplyr)
slope <- -1
intercept <- 1000
fig <- plot_ly(data = bigs, x = ~rank, y = ~population,
text = ~city,
name = "Biggest cities of the world",
type = "scatter",mode = "lines")
fig
#<small>source: https://en.wikipedia.org/wiki/List_of_largest_cities)</small>
Hamlet <- "
To be, or not to be: that is the question:
Whether ’tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing end them? To die: to sleep;
No more; and, by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to, ’tis a consummation
Devoutly to be wish’d. To die, to sleep;
To sleep: perchance to dream: ay, there’s the rub;
For in that sleep of death what dreams may come
When we have shuffled off this mortal coil,
Must give us pause. There’s the respect
That makes calamity of so long life;
For who would bear the whips and scorns of time,
The oppressor’s wrong, the proud man’s contumely,
The pangs of dispriz’d love, the law’s delay,
The insolence of office, and the spurns
That patient merit of the unworthy takes,
When he himself might his quietus make
With a bare bodkin? who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscover’d country from whose bourn
No traveller returns, puzzles the will,
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience does make cowards of us all;
And thus the native hue of resolution
Is sicklied o’er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry,
And lose the name of action. Soft you now!
The fair Ophelia! Nymph, in thy orisons
Be all my sins remember’d."
Hamlet
## [1] "\nTo be, or not to be: that is the question:\nWhether ’tis nobler in the mind to suffer\nThe slings and arrows of outrageous fortune,\nOr to take arms against a sea of troubles,\nAnd by opposing end them? To die: to sleep;\nNo more; and, by a sleep to say we end\nThe heart-ache and the thousand natural shocks\nThat flesh is heir to, ’tis a consummation\nDevoutly to be wish’d. To die, to sleep;\nTo sleep: perchance to dream: ay, there’s the rub;\nFor in that sleep of death what dreams may come\nWhen we have shuffled off this mortal coil,\nMust give us pause. There’s the respect\nThat makes calamity of so long life;\nFor who would bear the whips and scorns of time,\nThe oppressor’s wrong, the proud man’s contumely,\nThe pangs of dispriz’d love, the law’s delay,\nThe insolence of office, and the spurns\nThat patient merit of the unworthy takes,\nWhen he himself might his quietus make\nWith a bare bodkin? who would fardels bear,\nTo grunt and sweat under a weary life,\nBut that the dread of something after death,\nThe undiscover’d country from whose bourn\nNo traveller returns, puzzles the will,\nAnd makes us rather bear those ills we have\nThan fly to others that we know not of?\nThus conscience does make cowards of us all;\nAnd thus the native hue of resolution\nIs sicklied o’er with the pale cast of thought,\nAnd enterprises of great pith and moment\nWith this regard their currents turn awry,\nAnd lose the name of action. Soft you now!\nThe fair Ophelia! Nymph, in thy orisons\nBe all my sins remember’d."
Arany <- "Lenni vagy nem lenni: az itt a kérdés.
Akkor nemesb-e a lélek, ha tűri
Balsorsa minden nyűgét s nyilait;
Vagy ha kiszáll tenger fájdalma ellen,
S fegyvert ragadva véget vet neki?
Meghalni – elszunnyadni – semmi több;
S egy álom által elvégezni mind
A szív keservét, a test eredendő,
Természetes rázkódtatásait
Oly cél, minőt óhajthat a kegyes.
Meghalni – elszunnyadni – és alunni!
Talán álmodni: ez a bökkenő;
Mert hogy mi álmok jőnek a halálban,
Ha majd leráztuk mind e földi bajt,
Ez visszadöbbent. E meggondolás az,
Mi a nyomort oly hosszan élteti
Mert ki viselné a kor gúny-csapásit,
Zsarnok bosszúját, gőgös ember dölyfét,
Útált szerelme kínját, pör-halasztást,
A hivatalnak packázásait,
S mind a rugást, mellyel méltatlanok
Bántalmazzák a tűrő érdemet
Ha nyúgalomba küldhetné magát
Egy puszta tőrrel? – Ki hordaná e terheket,
Izzadva, nyögve élte fáradalmin,
Ha rettegésünk egy halál utáni
Valamitől – a nem ismert tartomány,
Melyből nem tér meg utazó – le nem
Lohasztja kedvünk, inkább tűrni a
Jelen gonoszt, mint ismeretlenek
Felé sietni? – Ekképp az öntudat
Belőlünk mind gyávát csinál,
S az elszántság természetes szinét
A gondolat halványra betegíti;
Ily kétkedés által sok nagyszerű,
Fontos merény kifordul medriből
S elveszti »tett« nevét. – De csöndesen!
A szép Ophelia jő. – Szép hölgy, imádba
Legyenek foglalva minden bűneim."
Arany
## [1] "Lenni vagy nem lenni: az itt a kérdés.\nAkkor nemesb-e a lélek, ha tűri\nBalsorsa minden nyűgét s nyilait;\nVagy ha kiszáll tenger fájdalma ellen,\nS fegyvert ragadva véget vet neki?\nMeghalni – elszunnyadni – semmi több;\nS egy álom által elvégezni mind\nA szív keservét, a test eredendő,\nTermészetes rázkódtatásait\nOly cél, minőt óhajthat a kegyes.\nMeghalni – elszunnyadni – és alunni!\nTalán álmodni: ez a bökkenő;\nMert hogy mi álmok jőnek a halálban,\nHa majd leráztuk mind e földi bajt,\nEz visszadöbbent. E meggondolás az,\nMi a nyomort oly hosszan élteti\nMert ki viselné a kor gúny-csapásit,\nZsarnok bosszúját, gőgös ember dölyfét,\nÚtált szerelme kínját, pör-halasztást,\nA hivatalnak packázásait,\nS mind a rugást, mellyel méltatlanok\nBántalmazzák a tűrő érdemet\nHa nyúgalomba küldhetné magát\nEgy puszta tőrrel? – Ki hordaná e terheket,\nIzzadva, nyögve élte fáradalmin,\nHa rettegésünk egy halál utáni\nValamitől – a nem ismert tartomány,\nMelyből nem tér meg utazó – le nem\nLohasztja kedvünk, inkább tűrni a\nJelen gonoszt, mint ismeretlenek\nFelé sietni? – Ekképp az öntudat\nBelőlünk mind gyávát csinál,\nS az elszántság természetes szinét\nA gondolat halványra betegíti;\nIly kétkedés által sok nagyszerű,\nFontos merény kifordul medriből\nS elveszti »tett« nevét. – De csöndesen!\nA szép Ophelia jő. – Szép hölgy, imádba\nLegyenek foglalva minden bűneim."
## Corpus consisting of 60 documents, showing 60 documents:
##
## Text Types Tokens Sentences Year President FirstName
## 1789-Washington 625 1537 23 1789 Washington George
## 1793-Washington 96 147 4 1793 Washington George
## 1797-Adams 826 2577 37 1797 Adams John
## 1801-Jefferson 717 1923 41 1801 Jefferson Thomas
## 1805-Jefferson 804 2380 45 1805 Jefferson Thomas
## 1809-Madison 535 1261 21 1809 Madison James
## 1813-Madison 541 1302 33 1813 Madison James
## 1817-Monroe 1040 3677 121 1817 Monroe James
## 1821-Monroe 1259 4886 131 1821 Monroe James
## 1825-Adams 1003 3147 74 1825 Adams John Quincy
## 1829-Jackson 517 1208 25 1829 Jackson Andrew
## 1833-Jackson 499 1267 29 1833 Jackson Andrew
## 1837-VanBuren 1315 4158 95 1837 Van Buren Martin
## 1841-Harrison 1896 9125 210 1841 Harrison William Henry
## 1845-Polk 1334 5186 153 1845 Polk James Knox
## 1849-Taylor 496 1178 22 1849 Taylor Zachary
## 1853-Pierce 1165 3636 104 1853 Pierce Franklin
## 1857-Buchanan 945 3083 89 1857 Buchanan James
## 1861-Lincoln 1075 3999 135 1861 Lincoln Abraham
## 1865-Lincoln 360 775 26 1865 Lincoln Abraham
## 1869-Grant 485 1229 40 1869 Grant Ulysses S.
## 1873-Grant 552 1472 43 1873 Grant Ulysses S.
## 1877-Hayes 831 2707 59 1877 Hayes Rutherford B.
## 1881-Garfield 1021 3209 111 1881 Garfield James A.
## 1885-Cleveland 676 1816 44 1885 Cleveland Grover
## 1889-Harrison 1352 4721 157 1889 Harrison Benjamin
## 1893-Cleveland 821 2125 58 1893 Cleveland Grover
## 1897-McKinley 1232 4353 130 1897 McKinley William
## 1901-McKinley 854 2437 100 1901 McKinley William
## 1905-Roosevelt 404 1079 33 1905 Roosevelt Theodore
## 1909-Taft 1437 5821 158 1909 Taft William Howard
## 1913-Wilson 658 1882 68 1913 Wilson Woodrow
## 1917-Wilson 549 1652 59 1917 Wilson Woodrow
## 1921-Harding 1169 3719 148 1921 Harding Warren G.
## 1925-Coolidge 1220 4440 196 1925 Coolidge Calvin
## 1929-Hoover 1090 3860 158 1929 Hoover Herbert
## 1933-Roosevelt 743 2057 85 1933 Roosevelt Franklin D.
## 1937-Roosevelt 725 1989 96 1937 Roosevelt Franklin D.
## 1941-Roosevelt 526 1519 68 1941 Roosevelt Franklin D.
## 1945-Roosevelt 275 633 27 1945 Roosevelt Franklin D.
## 1949-Truman 781 2504 116 1949 Truman Harry S.
## 1953-Eisenhower 900 2743 119 1953 Eisenhower Dwight D.
## 1957-Eisenhower 621 1907 92 1957 Eisenhower Dwight D.
## 1961-Kennedy 566 1541 52 1961 Kennedy John F.
## 1965-Johnson 568 1710 93 1965 Johnson Lyndon Baines
## 1969-Nixon 743 2416 103 1969 Nixon Richard Milhous
## 1973-Nixon 544 1995 68 1973 Nixon Richard Milhous
## 1977-Carter 527 1370 52 1977 Carter Jimmy
## 1981-Reagan 902 2781 129 1981 Reagan Ronald
## 1985-Reagan 925 2909 123 1985 Reagan Ronald
## 1989-Bush 795 2674 141 1989 Bush George
## 1993-Clinton 642 1833 81 1993 Clinton Bill
## 1997-Clinton 773 2436 111 1997 Clinton Bill
## 2001-Bush 621 1806 97 2001 Bush George W.
## 2005-Bush 772 2312 99 2005 Bush George W.
## 2009-Obama 938 2689 110 2009 Obama Barack
## 2013-Obama 814 2317 88 2013 Obama Barack
## 2017-Trump 582 1660 88 2017 Trump Donald J.
## 2021-Biden 812 2766 216 2021 Biden Joseph R.
## 2025-Trump 1000 3347 177 2025 Trump Donald J.
## Party
## none
## none
## Federalist
## Democratic-Republican
## Democratic-Republican
## Democratic-Republican
## Democratic-Republican
## Democratic-Republican
## Democratic-Republican
## Democratic-Republican
## Democratic
## Democratic
## Democratic
## Whig
## Whig
## Whig
## Democratic
## Democratic
## Republican
## Republican
## Republican
## Republican
## Republican
## Republican
## Democratic
## Republican
## Democratic
## Republican
## Republican
## Republican
## Republican
## Democratic
## Democratic
## Republican
## Republican
## Republican
## Democratic
## Democratic
## Democratic
## Democratic
## Democratic
## Republican
## Republican
## Democratic
## Democratic
## Republican
## Republican
## Democratic
## Republican
## Republican
## Republican
## Democratic
## Democratic
## Republican
## Republican
## Democratic
## Democratic
## Republican
## Democratic
## Republican
require(ggplot2)
korpa %>%
summary %>%
ggplot(aes(x = Year, y = Tokens, group = 1)) +
geom_line() +
geom_point() +
geom_label(aes(label = President, fill = Party), nudge_x = 0.1, nudge_y = 0.1) +
ggtitle("Inauguration Speeches of Presidents - Number of Words") +
theme_bw() +
theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ) +
scale_color_paletteer_d(nord::frost))
library(quanteda)
library(dplyr)
#summary(korpa)
# Most jön a brutale rész: A 'korpa' korpuszból 'mysummary' data frame, TTR-rel!
library(quanteda)
library(dplyr)
data("data_corpus_inaugural")
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
docvars_df <- docvars(data_corpus_inaugural)
#docvars_df
#str(docvars_df)
speech_texts <- as.character(data_corpus_inaugural)
#str(speech_texts)
mysummary <- as.data.frame(summary(korpa, verbose = FALSE))
#mysummary
ms <- mysummary %>% mutate (TTR = Types/Tokens)
#
require(ggplot2)
ms1 <- ms[-2,] # Drop the second row of ms, i.e. Wahington's 2. speech!
ms1 %>%
# options(repr.plot.width = 12, repr.plot.height = 12)
ggplot(aes(x = Year, y = TTR, group = 1)) +
geom_line() +
geom_point() +
geom_label(aes(label = President, fill = Party)) +
ggtitle("Inauguration Speeches of Presidents - T(ypes)/(T)okens (R)atio") +
theme_bw() +
theme(plot.title=element_text( hjust=0.5, vjust=0.5, face='bold' ))
library(plotly)
library(quanteda)
#calculate y as the mean word length
korpa <- corpus(data_corpus_inaugural) # save the `corpus` to a short obj name
df <- docvars(data_corpus_inaugural)
df$text <- as.character(data_corpus_inaugural)
library(tokenizers)
words <- tokenize_words(
df$text,
lowercase = TRUE,
stopwords = NULL,
strip_punct = TRUE,
strip_numeric = FALSE,
simplify = FALSE
)
wc <- count_words(df$text)
st <- count_sentences(df$text)
wl <- count_characters(df$text)
fig <- plot_ly(data = ms, type = "scatter", mode = "markers",
x = round(wc/st,2), y = round(wl/wc,2),
text = ~President,
color = ~Party,
colors = c("red","green","blue","tomato","magenta","seagreen","salmon")) %>%
layout(title = "Sentence length vs. word length",
xaxis = list(title = "Sentence Length in Words"),
yaxis = list(title = "Word Length in Characters"))
fig
library(quanteda)
library(quanteda.textplots)
library(quanteda.textstats)
library(ggplot2)
#Example corpus (replace with your actual corpus)
corp_us <- corpus(data_corpus_inaugural) # save the corpus to a short obj name
my_corpus <- corpus_subset(corp_us, Year > 2000)
toki <- tokens(my_corpus)
kwic_results <- kwic(toki, pattern = "country", window = 3)
kwic_results
## Keyword-in-context with 53 matches.
## [2001-Bush, 25] common in our | country | . With a
## [2001-Bush, 312] creed of our | country | , it is
## [2001-Bush, 375] of our own | country | . The ambitions
## [2001-Bush, 415] but not a | country | . We do
## [2001-Bush, 570] , makes our | country | more, not
## [2001-Bush, 674] . If our | country | does not lead
## [2001-Bush, 948] liberty and our | country | should make no
## [2001-Bush, 1226] Many in our | country | do not know
## [2001-Bush, 1758] to make our | country | more just and
## [2005-Bush, 64] that unite our | country | . I am
## [2005-Bush, 988] of your free | country | . The rulers
## [2005-Bush, 1170] measure. Our | country | has accepted obligations
## [2005-Bush, 1321] devotion to our | country | in deaths that
## [2005-Bush, 1427] wealth of our | country | but to its
## [2005-Bush, 1555] future of our | country | , we will
## [2005-Bush, 1843] . And our | country | must abandon all
## [2005-Bush, 1890] questions before our | country | are many.
## [2009-Obama, 1003] forgotten what this | country | has already done
## [2009-Obama, 2550] city and the | country | , alarmed at
## [2013-Obama, 667] understand that our | country | cannot succeed when
## [2013-Obama, 942] that built this | country | and investing in
## [2013-Obama, 990] that in this | country | freedom is reserved
## [2013-Obama, 1089] that make this | country | great. We
## [2013-Obama, 1831] expelled from our | country | . Our journey
## [2013-Obama, 2098] to God and | country | , not party
## [2017-Trump, 47] to rebuild our | country | and restore its
## [2017-Trump, 255] citizens of our | country | . Their victories
## [2017-Trump, 354] , is your | country | . What truly
## [2017-Trump, 405] women of our | country | will be forgotten
## [2017-Trump, 573] and robbed our | country | of so much
## [2017-Trump, 724] confidence of our | country | has dissipated over
## [2017-Trump, 1016] - rebuilding our | country | with American hands
## [2017-Trump, 1154] loyalty to our | country | , we will
## [2017-Trump, 1369] fail. Our | country | will thrive and
## [2021-Biden, 448] silently stalks the | country | . It's taken
## [2021-Biden, 1870] way, our | country | will be stronger
## [2021-Biden, 2175] and for our | country | . Amen.
## [2021-Biden, 2743] and to this | country | we love with
## [2025-Trump, 80] forward, our | country | will flourish and
## [2025-Trump, 237] is sweeping the | country | , sunlight is
## [2025-Trump, 405] illegally entered our | country | from all over
## [2025-Trump, 443] people. Our | country | can no longer
## [2025-Trump, 539] individuals in our | country | — some of
## [2025-Trump, 606] it than any | country | anywhere in the
## [2025-Trump, 635] to hate our | country | despite the love
## [2025-Trump, 977] history of our | country | . As our
## [2025-Trump, 1216] not forget our | country | , we will
## [2025-Trump, 1353] invasion of our | country | . Under the
## [2025-Trump, 1442] to defend our | country | from threats and
## [2025-Trump, 1571] gas of any | country | on earth —
## [2025-Trump, 2293] McKinley made our | country | very rich through
## [2025-Trump, 2335] given to the | country | of Panama after
## [2025-Trump, 2898] ., our | country | was forged and
#