Word Cloud Right-Based

Felipe Melo

2023-04-25

Word Cloud on Literature Search

This word cloud analysis offer a glimpse of the most important terms used in the scientific literature on rights. Here I used the terms:

“rights” and “indigeneous” and “biodiversity”

This search provided 775 documents

Data cleaning and wrangling

FOR ABSTRACTS

  dtm.ab <- TermDocumentMatrix(docs)
  m.ab <- as.matrix(dtm.ab)
  v.ab <- sort(rowSums(m.ab),decreasing=TRUE)
  d.ab <- data.frame(word = names(v.ab),freq=v.ab)
  head(d.ab, 20) # The 20th most important words
##                        word freq
## indigenous       indigenous 1499
## rights               rights 1113
## biodiversity   biodiversity  901
## conservation   conservation  727
## species             species  708
## knowledge         knowledge  545
## local                 local  495
## communities     communities  475
## resources         resources  461
## forest               forest  452
## traditional     traditional  426
## peoples             peoples  391
## use                     use  386
## management       management  376
## international international  370
## environmental environmental  359
## areas                 areas  357
## development     development  347
## reserved           reserved  332
## diversity         diversity  329
  set.seed(5)
  wordcloud(words = d.ab$word, freq = d.ab$freq, min.freq = 1,
            max.words=200, random.order=FALSE, rot.per=0.35,
            colors=brewer.pal(8, "Dark2"))

Now excluding the search terms

d2.ab<-d.ab[-c(1:3),] # excluding the search terms itself
  head (d2.ab, 20)
##                        word freq
## conservation   conservation  727
## species             species  708
## knowledge         knowledge  545
## local                 local  495
## communities     communities  475
## resources         resources  461
## forest               forest  452
## traditional     traditional  426
## peoples             peoples  391
## use                     use  386
## management       management  376
## international international  370
## environmental environmental  359
## areas                 areas  357
## development     development  347
## reserved           reserved  332
## diversity         diversity  329
## natural             natural  318
## land                   land  317
## new                     new  291
 wordcloud(words = d2.ab$word, freq = d2.ab$freq, min.freq = 1,
            max.words=200, random.order=FALSE, rot.per=0.35,
            colors=brewer.pal(8, "Dark2")) 

FOR TITLES (I won’t generate a word cloud just for titles)

 dtm.t <- TermDocumentMatrix(docs.t)
  m.t <- as.matrix(dtm.t)
  v.t <- sort(rowSums(m.t),decreasing=TRUE)
  d.t <- data.frame(word = names(v.t),freq=v.t)
  head(d.t, 20)
##                      word freq
## indigenous     indigenous  199
## biodiversity biodiversity  134
## conservation conservation  106
## rights             rights  106
## knowledge       knowledge   72
## property         property   62
## intellectual intellectual   56
## traditional   traditional   53
## forest             forest   50
## management     management   48
## diversity       diversity   45
## peoples           peoples   39
## community       community   38
## case                 case   36
## communities   communities   33
## new                   new   33
## marine             marine   33
## local               local   32
## sustainable   sustainable   30
## species           species   30
  d2.t<-d.t[-c(1,2,4),] # exclude search terms
  head (d2.t, 20)
##                      word freq
## conservation conservation  106
## knowledge       knowledge   72
## property         property   62
## intellectual intellectual   56
## traditional   traditional   53
## forest             forest   50
## management     management   48
## diversity       diversity   45
## peoples           peoples   39
## community       community   38
## case                 case   36
## communities   communities   33
## new                   new   33
## marine             marine   33
## local               local   32
## sustainable   sustainable   30
## species           species   30
## resources       resources   30
## development   development   28
## areas               areas   27

FOR ABSTRACTS AND TITLES

d.all.nexus=as.data.frame(merge(d2.t,d2.ab, by="word"), sort = TRUE)
  d.all.nexus<-d.all.nexus[order(-d.all.nexus$freq.y),]
  d.all.nexus$sum<-rowSums(d.all.nexus[,c(2,3)])
  d.all.nexus<-d.all.nexus[,c(1,3)]
  colnames(d.all.nexus)<-c("word", "freq")
 wordcloud(words = d.all.nexus$word, freq = d.all.nexus$freq, min.freq = 1,
            max.words=200, random.order=FALSE, rot.per=0.35,
            colors=brewer.pal(8, "Dark2"))