library(tidytext)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.2 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
papers_df <- read_csv("C:/PostDoc Journey_Coky/Content Analysis Study/scopus_abstract_example.csv", na = "")
## Rows: 68 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): title, text
## dbl (1): year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tidy_papers <- papers_df %>%
unnest_tokens(word, text, to_lower = TRUE) %>%
anti_join(stop_words) %>%
filter(!grepl("[0-9]", word)) %>%
filter(nchar(word) > 3) # Add this to remove short words
## Joining with `by = join_by(word)`
tidy_papers %>%
count(word, sort = TRUE) %>%
head(20) %>%
ggplot(aes(x = reorder(word, n), y = n)) +
geom_col(fill = "steelblue") + # Added color
coord_flip() +
labs(title = "Top 20 Most Frequent Words",
x = "Word",
y = "Frequency") +
theme_minimal() # Cleaner theme

# Tokenize into bigrams instead of single words
bigram_counts <- papers_df %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>% # n=2 for pairs
separate(bigram, c("word1", "word2"), sep = " ") %>% # Split into two columns
filter(!word1 %in% stop_words$word, # Remove stopwords
!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>% # Recombine into pairs
count(bigram, sort = TRUE) # Count frequencies
library(ggplot2)
bigram_counts %>%
head(20) %>%
ggplot(aes(x = reorder(bigram, n), y = n)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "Top 20 Bigrams", x = "Word Pair", y = "Frequency")

library(stringr)
papers_df %>%
filter(str_detect(text, regex("islamic finance", ignore_case = TRUE))) %>%
select(text) # Shows full sentences containing the phrase
## # A tibble: 55 × 1
## text
## <chr>
## 1 Purpose: Time orientation as a proxy of culture can play an important role i…
## 2 Purpose: This paper aims to highlight resolution of Islamic finance dispute …
## 3 Purpose: Despite the fact that small and medium enterprises (SMEs) play a cr…
## 4 The present study explores avenues for Islamic economics to achieve financia…
## 5 The challenge for Islamic finance institutions is to find solutions and deve…
## 6 Purpose: This study aims to empirically investigates the effect of the COVID…
## 7 Using a hybrid literature review incorporating the TCCM (Theory, Context, Ch…
## 8 Purpose: Conventional insurance creates a gap in the financial system across…
## 9 Ethical investments, such as socially responsible investment (SRI), and fait…
## 10 Purpose: This study aims to develop a comprehensive sustainability performan…
## # ℹ 45 more rows
library(igraph)
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:lubridate':
##
## %--%, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
library(ggraph)
# Create a bigram network
bigram_network <- bigram_counts %>%
separate(bigram, c("from", "to"), sep = " ") %>%
filter(n > 5) %>% # Adjust threshold
graph_from_data_frame()
# Plot
ggraph(bigram_network, layout = "fr") +
geom_edge_link(aes(edge_width = n), alpha = 0.6) +
geom_node_point(size = 3, color = "lightblue") +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void()
## Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
## ℹ Please use the `transform` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
