library(tidytext)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.2 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggdendro)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(igraph)
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:lubridate':
##
## %--%, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
library(ggraph)
papers_df <- read_csv("C:/PostDoc Journey_Coky/Content Analysis Study/scopus_abstract_example.csv", na = "")
## Rows: 68 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): title, text
## dbl (1): year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bigram_counts <- papers_df %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = TRUE)
# Filter to keep only meaningful bigrams (adjust threshold as needed)
bigram_filtered <- bigram_counts %>%
filter(n > 5) %>% # Only keep bigrams that appear more than 5 times
slice_head(n = 100) # Keep top 100 bigrams
## 1. Bigram Network Visualization (as in your original script but enhanced)
bigram_network <- bigram_filtered %>%
separate(bigram, c("from", "to"), sep = " ") %>%
graph_from_data_frame()
set.seed(123)
ggraph(bigram_network, layout = "fr") +
geom_edge_link(aes(edge_width = n, edge_alpha = n),
color = "darkgray", show.legend = FALSE) +
geom_node_point(aes(size = degree(bigram_network)),
color = "lightblue", alpha = 0.7) +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
scale_edge_width(range = c(0.5, 3)) +
theme_void() +
labs(title = "Bigram Network Analysis of Islamic Finance Research",
subtitle = "Node size represents degree centrality, edge width represents co-occurrence frequency")

## 2. Bigram Co-occurrence Matrix
# Create a document-term matrix for bigrams
bigram_dtm <- papers_df %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
count(title, bigram) %>%
filter(bigram %in% bigram_filtered$bigram) %>%
cast_dtm(title, bigram, n)
# Convert to matrix
bigram_matrix <- as.matrix(bigram_dtm)
## 3. Hierarchical Clustering
# Calculate distance matrix (using cosine similarity)
dist_matrix <- dist(scale(t(bigram_matrix)), method = "euclidean")
# Perform hierarchical clustering
hc <- hclust(dist_matrix, method = "ward.D2")
# Dendrogram visualization
ggdendrogram(hc, rotate = TRUE, size = 3) +
labs(title = "Hierarchical Clustering of Islamic Finance Bigrams",
subtitle = "Ward's method with Euclidean distance") +
theme_minimal() +
theme(axis.text.y = element_text(size = 8))

# Cut dendrogram to create clusters
num_clusters <- 5 # Adjust based on dendrogram inspection
bigram_clusters <- cutree(hc, k = num_clusters)
# Visualize clusters on dendrogram
fviz_dend(hc, k = num_clusters,
cex = 0.6,
k_colors = "jco",
rect = TRUE,
rect_fill = TRUE,
rect_border = "jco",
labels_track_height = 0.8,
main = "Bigram Clusters in Islamic Finance Research")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## 4. K-means Clustering
# Perform k-means clustering
set.seed(123)
kmeans_result <- kmeans(t(bigram_matrix), centers = num_clusters, nstart = 25)
# Visualize k-means clusters
fviz_cluster(kmeans_result, data = t(bigram_matrix),
ellipse.type = "norm",
repel = TRUE,
labelsize = 8,
ggtheme = theme_minimal()) +
labs(title = "K-means Clustering of Islamic Finance Bigrams",
subtitle = paste("Visualization of", num_clusters, "clusters"))
## Too few points to calculate an ellipse

## 5. Cluster Interpretation
# Create a data frame with bigrams and their cluster assignments
cluster_assignments <- data.frame(
bigram = colnames(bigram_matrix),
cluster_hc = bigram_clusters,
cluster_kmeans = kmeans_result$cluster
)
# View top terms in each cluster (hierarchical)
cluster_terms_hc <- cluster_assignments %>%
group_by(cluster_hc) %>%
summarise(terms = paste(bigram, collapse = ", "),
count = n()) %>%
arrange(desc(count))
print(cluster_terms_hc)
## # A tibble: 5 × 3
## cluster_hc terms count
## <int> <chr> <int>
## 1 1 economic development, islamic economics, rights reserved, so… 52
## 2 4 design methodology, limitations implications, methodology ap… 7
## 3 5 islamic banking, islamic banks 2
## 4 2 islamic finance 1
## 5 3 islamic financial 1
# View top terms in each cluster (k-means)
cluster_terms_kmeans <- cluster_assignments %>%
group_by(cluster_kmeans) %>%
summarise(terms = paste(bigram, collapse = ", "),
count = n()) %>%
arrange(desc(count))
print(cluster_terms_kmeans)
## # A tibble: 5 × 3
## cluster_kmeans terms count
## <int> <chr> <int>
## 1 1 islamic economics, islamic financial, rights reserved, s… 47
## 2 4 economic development, saudi arabia, vision 2030, financi… 7
## 3 2 islamic banks, financial performance, halal income, isla… 4
## 4 5 al shariah, maqasid al, takaful operators, esg principles 4
## 5 3 islamic finance 1
## 6. Heatmap Visualization
# Create a heatmap of bigram frequencies by cluster
heatmap_data <- bigram_matrix %>%
t() %>%
scale() # Scale for better visualization
# Reorder rows based on cluster assignments
row_order <- order(cluster_assignments$cluster_hc)
heatmap_data <- heatmap_data[row_order, ]
# Create annotation for clusters
annotation_row <- data.frame(
Cluster = factor(cluster_assignments$cluster_hc[row_order])
)
# First, ensure the pheatmap package is installed and loaded
library(pheatmap)
# Create a cleaner version of the heatmap
try({
# Ensure annotation_row has row names matching heatmap_data
rownames(annotation_row) <- rownames(heatmap_data)
# Create color palette for clusters
cluster_colors <- RColorBrewer::brewer.pal(num_clusters, "Set1")
names(cluster_colors) <- unique(annotation_row$Cluster)
anno_colors <- list(Cluster = cluster_colors)
# Create the heatmap with error handling
pheatmap(heatmap_data,
cluster_rows = FALSE,
cluster_cols = TRUE,
annotation_row = annotation_row,
annotation_colors = anno_colors,
show_colnames = FALSE,
show_rownames = TRUE,
fontsize_row = 6,
color = colorRampPalette(c("navy", "white", "firebrick3"))(50),
main = "Bigram Frequency Heatmap with Cluster Annotation")
})

# Alternative visualization if pheatmap still fails
if (exists("pheatmap_failed") || !exists("pheatmap_success")) {
heatmap(heatmap_data,
Colv = NA,
Rowv = NA,
scale = "none",
col = colorRampPalette(c("navy", "white", "firebrick3"))(50),
main = "Bigram Frequency Heatmap",
margins = c(5,10),
cexRow = 0.6)
# Add cluster information manually
legend("topright",
legend = paste("Cluster", 1:num_clusters),
fill = RColorBrewer::brewer.pal(num_clusters, "Set1"),
border = NA,
bty = "n",
cex = 0.8)
}
