library(tidytext)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.2     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
papers_df <- read_csv("C:/PostDoc Journey_Coky/Content Analysis Study/scopus_abstract_example.csv", na = "")
## Rows: 68 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): title, text
## dbl (1): year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tidy_papers <- papers_df %>%
  unnest_tokens(word, text, to_lower = TRUE) %>%
  anti_join(stop_words) %>%
  filter(!grepl("[0-9]", word)) %>%
  filter(nchar(word) > 3)  # Add this to remove short words
## Joining with `by = join_by(word)`
tidy_papers %>%
  count(word, sort = TRUE) %>%
  head(20) %>%
  ggplot(aes(x = reorder(word, n), y = n)) +
  geom_col(fill = "steelblue") +  # Added color
  coord_flip() +
  labs(title = "Top 20 Most Frequent Words",
       x = "Word",
       y = "Frequency") +
  theme_minimal()  # Cleaner theme

# Tokenize into bigrams instead of single words
bigram_counts <- papers_df %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%  # n=2 for pairs
  separate(bigram, c("word1", "word2"), sep = " ") %>%      # Split into two columns
  filter(!word1 %in% stop_words$word,                       # Remove stopwords
         !word2 %in% stop_words$word) %>%
  unite(bigram, word1, word2, sep = " ") %>%               # Recombine into pairs
  count(bigram, sort = TRUE)                               # Count frequencies

library(ggplot2)

bigram_counts %>%
  head(20) %>%
  ggplot(aes(x = reorder(bigram, n), y = n)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 20 Bigrams", x = "Word Pair", y = "Frequency")

library(stringr)

papers_df %>%
  filter(str_detect(text, regex("islamic finance", ignore_case = TRUE))) %>%
  select(text)  # Shows full sentences containing the phrase
## # A tibble: 55 × 1
##    text                                                                         
##    <chr>                                                                        
##  1 Purpose: Time orientation as a proxy of culture can play an important role i…
##  2 Purpose: This paper aims to highlight resolution of Islamic finance dispute …
##  3 Purpose: Despite the fact that small and medium enterprises (SMEs) play a cr…
##  4 The present study explores avenues for Islamic economics to achieve financia…
##  5 The challenge for Islamic finance institutions is to find solutions and deve…
##  6 Purpose: This study aims to empirically investigates the effect of the COVID…
##  7 Using a hybrid literature review incorporating the TCCM (Theory, Context, Ch…
##  8 Purpose: Conventional insurance creates a gap in the financial system across…
##  9 Ethical investments, such as socially responsible investment (SRI), and fait…
## 10 Purpose: This study aims to develop a comprehensive sustainability performan…
## # ℹ 45 more rows
library(igraph)
## 
## Attaching package: 'igraph'
## 
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## 
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## 
## The following object is masked from 'package:tidyr':
## 
##     crossing
## 
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## 
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union
library(ggraph)

# Create a bigram network
bigram_network <- bigram_counts %>%
  separate(bigram, c("from", "to"), sep = " ") %>%
  filter(n > 5) %>%  # Adjust threshold
  graph_from_data_frame()

# Plot
ggraph(bigram_network, layout = "fr") +
  geom_edge_link(aes(edge_width = n), alpha = 0.6) +
  geom_node_point(size = 3, color = "lightblue") +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_void()
## Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
## ℹ Please use the `transform` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.