# Load necessary libraries
pacman::p_load(pacman, tidytext, dplyr, tidyr, ggplot2, readr, topicmodels, gridExtra, wordcloud, RColorBrewer, quanteda, quanteda.textstats)
# Load the CSV file
Ion_data <- read_csv("ION_Plato.csv")
## Rows: 180 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ION
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename the column for easier reference
colnames(Ion_data) <- c("text")
# Tokenize the text into words
tokens <- Ion_data %>%
unnest_tokens(word, text)
# Remove stop words
data("stop_words")
tokens <- tokens %>%
anti_join(stop_words)
## Joining with `by = join_by(word)`
# Count word frequencies
word_counts <- tokens %>%
count(word, sort = TRUE)
# Display the most common words
head(word_counts, 30)
## # A tibble: 30 × 2
## word n
## <chr> <int>
## 1 socrates 113
## 2 ion 112
## 3 art 49
## 4 homer 42
## 5 rhapsode 28
## 6 speak 22
## 7 poets 19
## 8 knowledge 18
## 9 speaking 10
## 10 poet 9
## # ℹ 20 more rows
# Plot the most common words
word_counts %>%
top_n(30) %>%
ggplot(aes(x = reorder(word, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Most Common Words in Ion by Plato",
x = "Words",
y = "Frequency") +
theme_minimal()
## Selecting by n

# Sentiment Analysis using Bing Lexicon
bing_sentiments <- tokens %>%
inner_join(get_sentiments("bing"))
## Joining with `by = join_by(word)`
# Count positive and negative words
bing_sentiments_count <- bing_sentiments %>%
count(sentiment, sort = TRUE)
# Add percentage column
bing_sentiments_count <- bing_sentiments_count %>%
mutate(percentage = n / sum(n) * 100)
# Plot Bing sentiment counts and percentages
bing_plot <- ggplot(bing_sentiments_count, aes(x = sentiment, y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), vjust = 1) +
labs(title = "Sentiment Analysis of Ion by Plato using Bing Lexicon",
x = "Sentiment",
y = "Count") +
theme()
# Handle many-to-many relationship warning
nrc_sentiments <- tokens %>%
inner_join(get_sentiments("nrc"), relationship = "many-to-many")
## Joining with `by = join_by(word)`
# Count NRC sentiments
nrc_sentiments_count <- nrc_sentiments %>%
count(sentiment, sort = TRUE)
# Add percentage column
nrc_sentiments_count <- nrc_sentiments_count %>%
mutate(percentage = n / sum(n) * 100)
# Plot NRC sentiment counts and percentages with adjusted label positioning
nrc_plot <- ggplot(nrc_sentiments_count, aes(x = reorder(sentiment, n), y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), hjust = 1) + coord_flip() +
labs(title = "NRC Sentiment Analysis of Ion by Plato",
x = "Sentiment",
y = "Count") +
theme()
# Arrange Bing and NRC plots side by side
grid.arrange(bing_plot, nrc_plot, nrow = 1)

# Generate Word Cloud
set.seed(1234)
wordcloud(words = word_counts$word, freq = word_counts$n, min.freq = 5,
max.words = 100,
random.order = FALSE, rot.per = 0.1,
scale = c(3.5, 0.8),
colors = brewer.pal(8, "Dark2"))
mtext("Word Cloud of Ion by Plato", side = 3, adj = 0, line = 1, cex = 1, font = 2)

# Prepare data for Topic Modeling
# Create a document-term matrix
dtm <- tokens %>%
count(document = row_number(), word) %>%
cast_dtm(document, word, n)
# Set the number of topics
num_topics <- 4
# Run LDA
lda_model <- LDA(dtm, k = num_topics, control = list(seed = 1234))
# Get the top terms for each topic
lda_terms <- tidy(lda_model, matrix = "beta")
# Display the top terms for each topic in a table format
top_terms <- lda_terms %>%
group_by(topic) %>%
top_n(10, beta) %>%
ungroup() %>%
arrange(topic, -beta)
# Print the top terms for each topic
top_terms %>%
group_by(topic) %>%
summarize(terms = paste(term, collapse = ", ")) %>%
print()
## # A tibble: 4 × 2
## topic terms
## <int> <chr>
## 1 1 ion, socrates, art, rhapsode, speak, poet, knowledge, homer, true, judge
## 2 2 socrates, ion, rhapsode, homer, knowledge, speak, poets, hesiod, god, p…
## 3 3 homer, poets, ion, socrates, rhapsode, words, god, reason, true, physic…
## 4 4 ion, socrates, art, homer, speak, speaking, knowledge, mind, hear, char…
# Plot the top terms for each topic
top_terms %>%
mutate(term = reorder_within(term, beta, topic)) %>%
ggplot(aes(x = term, y = beta, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free_y") +
coord_flip() +
labs(title = "Top Terms in Each Topic",
x = "Terms",
y = "Beta") +
scale_x_reordered() +
theme()

# Textual Complexity: Flesch-Kincaid Readability
Ion_text <- paste(Ion_data$text, collapse = " ")
readability <- textstat_readability(Ion_text, measure = "Flesch.Kincaid")
print(paste("Flesch-Kincaid Readability Score: ", readability))
## [1] "Flesch-Kincaid Readability Score: text1"
## [2] "Flesch-Kincaid Readability Score: 9.3041250890948"