# Load required libraries
pacman::p_load(pacman, tidytext, dplyr, tidyr, ggplot2, readr, topicmodels, udpipe, gridExtra, wordcloud, RColorBrewer, quanteda, quanteda.textstats)
# Specify the CSV file name
csv_file <- "Quran_english.csv"
# Load the CSV file
text_data <- read_csv(csv_file, col_names = FALSE)
## Rows: 6237 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): X1, X2, X3, X4
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns for easier reference
colnames(text_data) <- c("id", "surah", "ayah", "text")
# Add document IDs
text_data <- text_data %>%
mutate(document = row_number())
# Tokenize the text into words
tokens <- text_data %>%
unnest_tokens(word, text)
# Remove stop words
data("stop_words")
# Ensure there are no duplicate columns and match on the 'word' column
tokens <- tokens %>%
anti_join(stop_words, by = "word")
# Count word frequencies
word_counts <- tokens %>%
count(word, sort = TRUE)
# Display the most common words
print(head(word_counts, 30))
## # A tibble: 30 × 2
## word n
## <chr> <int>
## 1 allah 3044
## 2 ye 2013
## 3 lord 975
## 4 thou 771
## 5 thee 615
## 6 day 526
## 7 people 515
## 8 thy 509
## 9 earth 419
## 10 signs 338
## # ℹ 20 more rows
# Plot the most common words
word_counts %>%
top_n(30) %>%
ggplot(aes(x = reorder(word, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Most Common Words in The Quran",
x = "Words",
y = "Frequency") +
theme_minimal()
## Selecting by n

# Sentiment Analysis using Bing Lexicon
bing_sentiments <- tokens %>%
inner_join(get_sentiments("bing"))
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 54830 of `x` matches multiple rows in `y`.
## ℹ Row 2715 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# Count positive and negative words
bing_sentiments_count <- bing_sentiments %>%
count(sentiment, sort = TRUE)
# Add percentage column
bing_sentiments_count <- bing_sentiments_count %>%
mutate(percentage = n / sum(n) * 100)
# Plot Bing sentiment counts and percentages
bing_plot <- ggplot(bing_sentiments_count, aes(x = sentiment, y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), vjust = 1) +
labs(title = "Sentiment Analysis of The Quran using Bing Lexicon",
x = "Sentiment",
y = "Count") +
theme_minimal()
# Handle many-to-many relationship warning (if applicable)
nrc_sentiments <- tokens %>%
inner_join(get_sentiments("nrc"), relationship = "many-to-many")
## Joining with `by = join_by(word)`
# Count NRC sentiments
nrc_sentiments_count <- nrc_sentiments %>%
count(sentiment, sort = TRUE)
# Add percentage column
nrc_sentiments_count <- nrc_sentiments_count %>%
mutate(percentage = n / sum(n) * 100)
# Plot NRC sentiment counts and percentages with adjusted label positioning
nrc_plot <- ggplot(nrc_sentiments_count, aes(x = reorder(sentiment, n), y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), hjust = 1) +
coord_flip() +
labs(title = "NRC Sentiment Analysis of The Quran",
x = "Sentiment",
y = "Count") +
theme_minimal()
# Arrange Bing and NRC plots side by side
grid.arrange(bing_plot, nrc_plot, nrow = 1)

# Generate Word Cloud
set.seed(1234)
wordcloud(words = word_counts$word, freq = word_counts$n, min.freq = 5,
max.words = 100, random.order = FALSE, rot.per = 0.1,
scale = c(3.5, 0.75), colors = brewer.pal(8, "Dark2"))
mtext("Word Cloud of The Quran", side = 3, adj = 0, line = 1, cex = 1, font = 2)

# Prepare data for Topic Modeling
# Create a document-term matrix
dtm <- tokens %>%
count(document, word) %>%
cast_dtm(document, word, n)
# Set the number of topics
num_topics <- 6 # You can adjust this
# Run LDA
lda_model <- LDA(dtm, k = num_topics, control = list(seed = 1234))
# Get the top terms for each topic
lda_terms <- tidy(lda_model, matrix = "beta")
# Display the top terms for each topic in a table format
top_terms <- lda_terms %>%
group_by(topic) %>%
top_n(10, beta) %>%
ungroup() %>%
arrange(topic, -beta)
# Print the top terms for each topic
top_terms %>%
group_by(topic) %>%
summarize(terms = paste(term, collapse = ", ")) %>%
print()
## # A tibble: 6 × 2
## topic terms
## <int> <chr>
## 1 1 allah, ye, lord, thy, people, thee, day, follow, unbelievers, hath
## 2 2 allah, thou, lord, earth, forgiving, people, moses, grace, evil, hath
## 3 3 allah, ye, thou, lord, day, wrong, ls, people, thee, unbelievers
## 4 4 ye, thy, allah, thou, verily, earth, day, evil, heavens, lord
## 5 5 ye, lord, day, thee, allah, people, thou, truth, behold, earth
## 6 6 ye, thee, allah, lord, thou, people, thy, faith, evil, hath
# Plot the top terms for each topic
top_terms %>%
mutate(term = reorder_within(term, beta, topic)) %>%
ggplot(aes(x = term, y = beta, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free_y") +
coord_flip() +
labs(title = "Top Terms in Each Topic in The Quran",
x = "Terms",
y = "Beta") +
scale_x_reordered() +
theme_minimal()

# Textual Complexity: Flesch-Kincaid Readability
quran_text <- paste(text_data$text, collapse = " ")
readability <- textstat_readability(quran_text, measure = "Flesch.Kincaid")
print(paste("Flesch-Kincaid Readability Score; The Quran:", readability))
## [1] "Flesch-Kincaid Readability Score; The Quran: text1"
## [2] "Flesch-Kincaid Readability Score; The Quran: 8.23834157110691"
# Bigram Analysis
bigrams <- text_data %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
separate(bigram, into = c("word1", "word2"), sep = " ") %>%
filter(!is.na(word1), !is.na(word2)) %>% # Remove NA words
filter(!word1 %in% stop_words$word, !word2 %in% stop_words$word) %>%
count(word1, word2, sort = TRUE)
# Plot Most Common Bigrams
bigrams %>%
top_n(20, n) %>%
ggplot(aes(x = reorder(paste(word1, word2, sep = " "), n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Most Common Bigrams in The Quran",
x = "Bigram",
y = "Frequency") +
theme_minimal()

# Named Entity Recognition (NER) using udpipe
# Download and load the English model
if(!exists("ud_model")) {
model <- udpipe_download_model(language = "english")
ud_model <- udpipe_load_model(file = model$file_model)
}
## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.5/master/inst/udpipe-ud-2.5-191206/english-ewt-ud-2.5-191206.udpipe to /cloud/project/english-ewt-ud-2.5-191206.udpipe
## - This model has been trained on version 2.5 of data from https://universaldependencies.org
## - The model is distributed under the CC-BY-SA-NC license: https://creativecommons.org/licenses/by-nc-sa/4.0
## - Visit https://github.com/jwijffels/udpipe.models.ud.2.5 for model license details.
## - For a list of all models and their licenses (most models you can download with this package have either a CC-BY-SA or a CC-BY-SA-NC license) read the documentation at ?udpipe_download_model. For building your own models: visit the documentation by typing vignette('udpipe-train', package = 'udpipe')
## Downloading finished, model stored at '/cloud/project/english-ewt-ud-2.5-191206.udpipe'
# Annotate text
annotations <- udpipe_annotate(ud_model, x = text_data$text)
annotations_df <- as.data.frame(annotations)
# Extract Named Entities
named_entities <- annotations_df %>%
filter(upos == "PROPN") %>%
count(token, sort = TRUE)
# Print Named Entities
print(named_entities)
## token n
## 1 Allah 2935
## 2 Lord 962
## 3 Him 347
## 4 Signs 250
## 5 Book 193
## 6 Moses 143
## 7 Say 137
## 8 Messenger 134
## 9 \u0081Ls 115
## 10 Faith 111
## 11 Merciful 110
## 12 Fire 98
## 13 His 89
## 14 Hell 83
## 15 Pharaoh 79
## 16 Nor 76
## 17 Abraham 74
## 18 Qur\u0081Lan 70
## 19 Mercy 69
## 20 Evil 66
## 21 Garden 52
## 22 Grace 52
## 23 Truth 51
## 24 Satan 45
## 25 Israel 43
## 26 Exalted 42
## 27 Noah 42
## 28 \u0081L 42
## 29 Thy 41
## 30 Islam 39
## 31 Oft 39
## 32 Wisdom 39
## 33 Penalty 38
## 34 Judgment 37
## 35 Way 37
## 36 Joseph 35
## 37 Might 34
## 38 Mary 33
## 39 Such 33
## 40 Power 32
## 41 Glory 30
## 42 Sign 29
## 43 -Forgiving 28
## 44 Cherisher 28
## 45 Nay 26
## 46 Clear 25
## 47 Gardens 25
## 48 Word 25
## 49 Aaron 24
## 50 Adam 24
## 51 Knowledge 24
## 52 Lut 24
## 53 Wise 24
## 54 Full 23
## 55 Soon 23
## 56 Command 22
## 57 Believers 21
## 58 Guidance 21
## 59 Blazing 20
## 60 Righteous 20
## 61 Thee 20
## 62 Will 20
## 63 Children 19
## 64 Hath 19
## 65 Jacob 19
## 66 Straight 19
## 67 \u0081LAd 19
## 68 Solomon 18
## 69 Wrath 18
## 70 David 17
## 71 Jesus 17
## 72 Path 17
## 73 Right 17
## 74 Ark 16
## 75 Hand 16
## 76 Isaac 16
## 77 Sacred 16
## 78 Thamud 16
## 79 Bliss 15
## 80 Gracious 15
## 81 Jews 15
## 82 Pagans 15
## 83 Supreme 15
## 84 Fear 14
## 85 loveth 14
## 86 Christians 13
## 87 Day 13
## 88 Ourselves 13
## 89 god 13
## 90 Glad 12
## 91 Grievous 12
## 92 Guardian 12
## 93 Hereafter 12
## 94 Jinns 12
## 95 People 12
## 96 Tidings 12
## 97 Arabic 11
## 98 Arabs 11
## 99 Hypocrites 11
## 100 Iblis 11
## 101 Shu\u0081Laib 11
## 102 Thou 11
## 103 Death 10
## 104 Light 10
## 105 Mighty 10
## 106 Mim 10
## 107 Mosque 10
## 108 Mount 10
## 109 Muslims 10
## 110 Unseen 10
## 111 Good 9
## 112 Ha 9
## 113 House 9
## 114 Isma\u0081Lil 9
## 115 Knowing 9
## 116 One 9
## 117 Said 9
## 118 Truly 9
## 119 Allah\u0081Ls 8
## 120 Blast 8
## 121 Books 8
## 122 Bounty 8
## 123 Decree 8
## 124 Earth 8
## 125 Enter 8
## 126 Hud 8
## 127 Salih 8
## 128 Throne 8
## 129 Zakariya 8
## 130 grace 8
## 131 guideth 8
## 132 obey 8
## 133 A 7
## 134 Falsehood 7
## 135 Hast 7
## 136 L 7
## 137 Mankind 7
## 138 Message 7
## 139 Obey 7
## 140 Prophet 7
## 141 Shall 7
## 142 Sinai 7
## 143 Chiefs 6
## 144 Egypt 6
## 145 End 6
## 146 Haman 6
## 147 Honour 6
## 148 Madyan 6
## 149 Oft-Forgiving 6
## 150 Prayer 6
## 151 Sin 6
## 152 Warner 6
## 153 Woe 6
## 154 as 6
## 155 All 5
## 156 Charity 5
## 157 Christ 5
## 158 Doth 5
## 159 East 5
## 160 Eternity 5
## 161 Fish 5
## 162 Gospel 5
## 163 High 5
## 164 Left 5
## 165 Life 5
## 166 Medina 5
## 167 Ones 5
## 168 Our 5
## 169 Servants 5
## 170 Time 5
## 171 Unbelief 5
## 172 Who 5
## 173 Assembly 4
## 174 Boiling 4
## 175 Creation 4
## 176 Dawn 4
## 177 Delight 4
## 178 Eat 4
## 179 Enlightenment 4
## 180 Eternal 4
## 181 Heaven 4
## 182 Jinn 4
## 183 M. 4
## 184 Men 4
## 185 Mountains 4
## 186 Muhammad 4
## 187 Nearest 4
## 188 Qarun 4
## 189 Qibla 4
## 190 Retribution 4
## 191 Sabbath 4
## 192 Scripture 4
## 193 Serve 4
## 194 Springs 4
## 195 Strength 4
## 196 Sura 4
## 197 Sustainer 4
## 198 Travel 4
## 199 Unbelievers 4
## 200 Water 4
## 201 Wood 4
## 202 Yahya 4
## 203 Zul 4
## 204 knoweth 4
## 205 thy 4
## 206 unto 4
## 207 Among 3
## 208 Behold 3
## 209 Believe 3
## 210 Beseech 3
## 211 Blessed 3
## 212 Covenant 3
## 213 Creator 3
## 214 Deceiver 3
## 215 Elias 3
## 216 Except 3
## 217 Felicity 3
## 218 Forgiveness 3
## 219 Future 3
## 220 Gabriel 3
## 221 Glorious 3
## 222 Goliath 3
## 223 Irresistible 3
## 224 Isma 3
## 225 Job 3
## 226 Jonah 3
## 227 Justice 3
## 228 Lamp 3
## 229 Living 3
## 230 Luqman 3
## 231 Majesty 3
## 232 Man 3
## 233 Manifest 3
## 234 May 3
## 235 Mocked 3
## 236 New 3
## 237 Sabians 3
## 238 Sad 3
## 239 Samiri 3
## 240 Servant 3
## 241 Sky 3
## 242 Sovereign 3
## 243 Star 3
## 244 Sun 3
## 245 Ta 3
## 246 Tree 3
## 247 Unto 3
## 248 Whether 3
## 249 Zaqqum 3
## 250 Zihar 3
## 251 hath 3
## 252 qarnain 3
## 253 thou 3
## 254 willeth 3
## 255 ye 3
## 256 your 3
## 257 \u0081LIddat 3
## 258 \u0081LImran 3
## 259 -Returning 2
## 260 Achievement 2
## 261 Alas 2
## 262 Ancient 2
## 263 Arrogant 2
## 264 Art 2
## 265 Burning 2
## 266 Christian 2
## 267 Confederates 2
## 268 Deeds 2
## 269 Distress 2
## 270 Dominion 2
## 271 Elisha 2
## 272 Evidence 2
## 273 Fierce 2
## 274 Flame 2
## 275 For 2
## 276 Forgiving 2
## 277 Gog 2
## 278 Guide 2
## 279 Hajj 2
## 280 Holy 2
## 281 Home 2
## 282 Hope 2
## 283 Ill 2
## 284 Jew 2
## 285 Kind 2
## 286 Kindness 2
## 287 King 2
## 288 Knoweth 2
## 289 Lay 2
## 290 Little 2
## 291 Loan 2
## 292 Magog 2
## 293 Make 2
## 294 Makka 2
## 295 Manna 2
## 296 Met 2
## 297 Midian 2
## 298 Misery 2
## 299 Morning 2
## 300 Muhajirs 2
## 301 Parable 2
## 302 Praises 2
## 303 Presence 2
## 304 R 2
## 305 Rass 2
## 306 Returning 2
## 307 Revelation 2
## 308 Reward 2
## 309 Saba 2
## 310 Saying 2
## 311 Scorching 2
## 312 Sijjin 2
## 313 Sinful 2
## 314 Soothing 2
## 315 Strict 2
## 316 Subsisting 2
## 317 Sure 2
## 318 Swear 2
## 319 Talut 2
## 320 Thrones 2
## 321 Till 2
## 322 Trust 2
## 323 Tur 2
## 324 Tuwa 2
## 325 Ways 2
## 326 Weak 2
## 327 Were 2
## 328 Wert 2
## 329 West 2
## 330 Worship 2
## 331 Wrong 2
## 332 Ya 2
## 333 forgiveth 2
## 334 guides 2
## 335 hajj 2
## 336 pledge 2
## 337 suffereth 2
## 338 thee 2
## 339 will 2
## 340 \u0081LAin 2
## 341 \u0081LAllah 2
## 342 \u0081LAziz 2
## 343 \u0081Lil 2
## 344 \u0081Lumra 2
## 345 Abandon 1
## 346 Able 1
## 347 Abode 1
## 348 About 1
## 349 Abundant 1
## 350 Accept 1
## 351 Act 1
## 352 Admonition 1
## 353 Al 1
## 354 Alif 1
## 355 Allahhead 1
## 356 Allah\u0081L 1
## 357 Already 1
## 358 Angel 1
## 359 Appoint 1
## 360 Appointed 1
## 361 Approach 1
## 362 Arafat 1
## 363 Ascent 1
## 364 Associate 1
## 365 Azar 1
## 366 Baal 1
## 367 Badr 1
## 368 Bakka 1
## 369 Baptism 1
## 370 Bear 1
## 371 Bearer 1
## 372 Beautiful 1
## 373 Become 1
## 374 Bedouin 1
## 375 Bedouins 1
## 376 Bee 1
## 377 Being 1
## 378 Believer 1
## 379 Beloved 1
## 380 Besides 1
## 381 Black 1
## 382 Blood 1
## 383 Blow 1
## 384 Brethren 1
## 385 Bring 1
## 386 Bur 1
## 387 Calamity 1
## 388 Calleth 1
## 389 Canopy 1
## 390 Carpets 1
## 391 Cause 1
## 392 Certain 1
## 393 Certainly 1
## 394 Check 1
## 395 Chief 1
## 396 Cities 1
## 397 City 1
## 398 Clamour 1
## 399 Closer 1
## 400 Commanding 1
## 401 Commands 1
## 402 Commission 1
## 403 Companions 1
## 404 Compasseth 1
## 405 Compassion 1
## 406 Conceal 1
## 407 Coral 1
## 408 Cry 1
## 409 Cursed 1
## 410 Dark 1
## 411 Darkness 1
## 412 Deaf 1
## 413 Deem 1
## 414 Deface 1
## 415 Deign 1
## 416 Delights 1
## 417 Deluge 1
## 418 Deny 1
## 419 Descend 1
## 420 Despair 1
## 421 Destination 1
## 422 Destruction 1
## 423 Devotees 1
## 424 Devour 1
## 425 Die 1
## 426 Disciples 1
## 427 Disdain 1
## 428 Disposer 1
## 429 Disputants 1
## 430 Dispute 1
## 431 Divine 1
## 432 Doer 1
## 433 Drive 1
## 434 Easts 1
## 435 Egyptians 1
## 436 Elect 1
## 437 Elephant 1
## 438 Embark 1
## 439 Empire 1
## 440 Enjoy 1
## 441 Equality 1
## 442 Error 1
## 443 Eschew 1
## 444 Establish 1
## 445 Evident 1
## 446 Evolver 1
## 447 Exult 1
## 448 Far 1
## 449 Fasting 1
## 450 Father 1
## 451 Favour 1
## 452 Fealty 1
## 453 Festival 1
## 454 Fig 1
## 455 Fight 1
## 456 Flee 1
## 457 Flood 1
## 458 Forbear 1
## 459 Forbearing 1
## 460 Forts 1
## 461 Friday 1
## 462 Frogs 1
## 463 Fruits 1
## 464 Fulfilleth 1
## 465 Fully 1
## 466 Generous 1
## 467 Giveth 1
## 468 Glass 1
## 469 God 1
## 470 Gomorrah 1
## 471 Grand 1
## 472 Granted 1
## 473 Granteth 1
## 474 Greater 1
## 475 Great\u0081L 1
## 476 Grieve 1
## 477 Groan 1
## 478 Guard 1
## 479 Guardians 1
## 480 Hard 1
## 481 Harut 1
## 482 Haughty 1
## 483 Hearer 1
## 484 Heareth 1
## 485 Hearing 1
## 486 Heavens 1
## 487 Higher 1
## 488 Honourable 1
## 489 Hoopoe 1
## 490 Humiliating 1
## 491 Hunain 1
## 492 Idris 1
## 493 Ignominy 1
## 494 Ignorance 1
## 495 Imams 1
## 496 Into 1
## 497 Invent 1
## 498 Iram 1
## 499 Iron 1
## 500 Israelites 1
## 501 Jizya 1
## 502 John 1
## 503 Join 1
## 504 Jonas 1
## 505 Journey 1
## 506 Joy 1
## 507 Judaism 1
## 508 Judi 1
## 509 Justification 1
## 510 Ka 1
## 511 Kaf 1
## 512 Kafur 1
## 513 Ka\u0081Lba 1
## 514 Kifl 1
## 515 Knewest 1
## 516 Knowledge\u0081L 1
## 517 Labouring 1
## 518 Lam 1
## 519 Lamps 1
## 520 Lat 1
## 521 Law 1
## 522 Learned 1
## 523 Lice 1
## 524 Limit 1
## 525 Listen 1
## 526 Lit 1
## 527 Lot 1
## 528 Lote 1
## 529 Loves 1
## 530 Loveth 1
## 531 Loving 1
## 532 Made 1
## 533 Magians 1
## 534 Magnificent 1
## 535 Maidens 1
## 536 Malik 1
## 537 Manat 1
## 538 March 1
## 539 Marry 1
## 540 Martyr-witnesses 1
## 541 Martyrs 1
## 542 Marut 1
## 543 Marwa 1
## 544 Me 1
## 545 Mead 1
## 546 Mention 1
## 547 Messengers 1
## 548 Michael 1
## 549 Middle 1
## 550 Mine 1
## 551 Minister 1
## 552 Mischief 1
## 553 Miserable 1
## 554 Mission 1
## 555 Month 1
## 556 Months 1
## 557 Moons 1
## 558 Moreover 1
## 559 Muslim 1
## 560 Muslims\u0081L 1
## 561 Nasr\u0081L 1
## 562 Naught 1
## 563 Niche 1
## 564 Nights 1
## 565 Nine 1
## 566 Nineteen 1
## 567 Noah\u0081Ls 1
## 568 Noise 1
## 569 Nun 1
## 570 Obedience 1
## 571 Observant 1
## 572 Ocean 1
## 573 Oceans 1
## 574 Oft-forgiving 1
## 575 Oft-repeated 1
## 576 Omnipotent 1
## 577 Other 1
## 578 Overthrown 1
## 579 Own 1
## 580 Past 1
## 581 Pearls 1
## 582 Pen 1
## 583 Perspicuous 1
## 584 Pilgrimage 1
## 585 Pleasure 1
## 586 Plots 1
## 587 Poetry 1
## 588 Polytheists 1
## 589 Powerful 1
## 590 Pride 1
## 591 Prohibited 1
## 592 Prophets 1
## 593 Prophet\u0081Ls 1
## 594 Provides 1
## 595 Psalms 1
## 596 Punishment 1
## 597 Qaf 1
## 598 Quraish 1
## 599 R. 1
## 600 Rahman 1
## 601 Rain 1
## 602 Raised 1
## 603 Ramadhan 1
## 604 Rank 1
## 605 Ra\u0081Lina 1
## 606 Reach 1
## 607 Ready 1
## 608 Reality 1
## 609 Realm 1
## 610 Record 1
## 611 Rejected 1
## 612 Religion 1
## 613 Repel 1
## 614 Requital 1
## 615 Respite 1
## 616 Rest 1
## 617 Retain 1
## 618 Revile 1
## 619 Righteousness 1
## 620 Robe 1
## 621 Rocky 1
## 622 Roman 1
## 623 Round 1
## 624 Ruler 1
## 625 Sacrifice 1
## 626 Safa 1
## 627 Safety 1
## 628 Salsabil 1
## 629 Salvation 1
## 630 Satisfaction 1
## 631 Sawest 1
## 632 Season 1
## 633 Seest 1
## 634 Seize 1
## 635 Sends 1
## 636 Separatism 1
## 637 Seven 1
## 638 Short 1
## 639 Sight 1
## 640 Sincere 1
## 641 Sing 1
## 642 Sirius 1
## 643 Slay 1
## 644 Smoke 1
## 645 Sodom 1
## 646 Solitary 1
## 647 Solomon\u0081Ls 1
## 648 Son 1
## 649 Soul 1
## 650 Spend 1
## 651 Spirit 1
## 652 Splendour 1
## 653 Stakes 1
## 654 Stand 1
## 655 Steadfast 1
## 656 Steeds 1
## 657 Stones 1
## 658 Stunning 1
## 659 Suckle 1
## 660 Sufficient 1
## 661 Sunset 1
## 662 Sustenance 1
## 663 Suwa\u0081L 1
## 664 Swell 1
## 665 Swifter 1
## 666 Symbols 1
## 667 Tabuk 1
## 668 Taken 1
## 669 Talh 1
## 670 Tasnim 1
## 671 Taurat 1
## 672 Tell 1
## 673 Testing 1
## 674 Than 1
## 675 Therein 1
## 676 Thine 1
## 677 Times 1
## 678 Token 1
## 679 Tract 1
## 680 Traffic 1
## 681 Transgress 1
## 682 True 1
## 683 Tubba 1
## 684 Turn 1
## 685 Turned 1
## 686 Ummat 1
## 687 Unfaith 1
## 688 Universe 1
## 689 Untruth 1
## 690 Us 1
## 691 Vanity 1
## 692 Verily 1
## 693 Very 1
## 694 Victory 1
## 695 Vile 1
## 696 Violate 1
## 697 Violent 1
## 698 Wadd 1
## 699 Warn 1
## 700 Warners 1
## 701 Watcher 1
## 702 Watchful 1
## 703 Wealth 1
## 704 Well 1
## 705 Wherewith 1
## 706 Whisperer 1
## 707 Wilt 1
## 708 Wind 1
## 709 With 1
## 710 World 1
## 711 Wouldst 1
## 712 Written 1
## 713 Yaguth 1
## 714 Yathrib 1
## 715 Ya\u0081Luq 1
## 716 Zaid 1
## 717 Zanjabil 1
## 718 Zodiacal 1
## 719 Zun 1
## 720 angels 1
## 721 apostates 1
## 722 beareth 1
## 723 beckons 1
## 724 begins 1
## 725 belong 1
## 726 belongs 1
## 727 besides 1
## 728 d 1
## 729 doth 1
## 730 dwell 1
## 731 earth 1
## 732 even 1
## 733 fail 1
## 734 fire\u0081L 1
## 735 garden 1
## 736 giveth 1
## 737 glorify 1
## 738 heaven 1
## 739 held 1
## 740 jest 1
## 741 keep 1
## 742 kill 1
## 743 knew 1
## 744 knows 1
## 745 league 1
## 746 marry 1
## 747 non-Arabs 1
## 748 omen 1
## 749 perish 1
## 750 prospereth 1
## 751 satan 1
## 752 seek 1
## 753 sees 1
## 754 sitting 1
## 755 soul 1
## 756 swell 1
## 757 thereunto 1
## 758 \u0081LAbandon 1
## 759 \u0081LBe 1
## 760 \u0081LBesides 1
## 761 \u0081LBy 1
## 762 \u0081LIf 1
## 763 \u0081LIlliyin 1
## 764 \u0081LIlliyun 1
## 765 \u0081LLord 1
## 766 \u0081LMy 1
## 767 \u0081LNay 1
## 768 \u0081LThrow 1
## 769 \u0081LTo 1
## 770 \u0081LTrue 1
## 771 \u0081LUzza 1
## 772 \u0081LWhy 1
## 773 \u0081Liddat 1
## 774 \u0081Lour 1
## 775 \u0081Lworship 1
## 776 \u0081L\u0081L 1
# Plot Named Entities
named_entities %>%
top_n(20, n) %>%
ggplot(aes(x = reorder(token, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Most Frequent Named Entities in The Quran",
x = "Entity",
y = "Frequency") +
theme_minimal()

# Generate a Word Cloud for Named Entities
wordcloud(words = named_entities$token, freq = named_entities$n, min.freq = 2,
max.words = 100, random.order = FALSE, rot.per = 0.1,
scale = c(3.5, 0.75), colors = brewer.pal(8, "Dark2"))
mtext("Word Cloud of Named Entities in The Quran", side = 3, adj = 0, line = 1, cex = 1, font = 2)
