rm(list = ls())
################# ##############input data 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*xlsx",dir_path,full.names = T, recursive = F)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\pubmed_search_results.xlsx"
library(openxlsx)
data_1 <- read.xlsx(grep("pubmed_search_results.xlsx",dir_path_name,value = T))
dim(data_1)
## [1] 222  14
#install.packages('PubMedWordcloud')
#Sys.setlocale(category  = "LC_ALL", locale = "C")
library(PubMedWordcloud) 
## Warning: package 'PubMedWordcloud' was built under R version 4.0.5
data_1$AbstractText[1]
## [1] "Abnormal sexual differentiation and other reproductive abnormalities in marine animals indicate the presence in seawater of endocrine-disrupting compounds (EDCs) that perturb the function of the sex hormone signaling pathways. However, most studies to date have reported on EDC effects in freshwater and sewage samples, and there is a paucity of bioassay data on the effects of EDCs in marine waters. Our aims in this study were to devise robust methodologies suitable for extracting potential EDCs and to measure their summated effects on activities of androgen receptors (ARs) and estrogen receptors (ER-alpha and ER-beta) in marine samples from Singapore's coastal waters. In this study, we examined the ability of C18, hydrophilic and lipophilic balance, and diol cartridges to extract potential EDCs from seawater samples. Extracts from C18 cartridges exhibited the highest sex hormone bioactivities in reporter gene assays based on a human cell line expressing AR, ER-alpha, and ER-beta. Examination of extracts from 20 coastal locations showed high androgenic and estrogenic agonist activities in confined clusters closest to the main island of Singapore. Sex hormone activity declined rapidly in clusters farther from the main coastline and in more open waters. Unexpectedly, surface and mid-depth samples from the confined high-activity clusters, in the presence of hormone, exhibited AR and ER-alpha activities that were 200-900% higher than those observed for the cognate hormone alone. This enhanced sex hormone activity suggests that analyses of complex seawater mixtures may uncover unusual bioactivities that may not be obvious by studying individual compounds. Our data present a \"snapshot\" of the sex hormone disruptor activity in Singapore's marine environment and indicate that C18 extraction for EDCs used in conjunction with reporter gene bioassays represents a robust and sensitive methodology for measuring summated androgenic and estrogenic activities in seawater."
clean <- cleanAbstracts(data_1$AbstractText[1])
## Warning in tm_map.SimpleCorpus(abstTxt, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(text2.corpus, function(x) removeNumbers(x)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(text2.corpus, tolower): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(text2.corpus, removeWords, stopwords("english")):
## transformation drops documents
clean
##                                    word freq
## hormone                         hormone    7
## edcs                               edcs    5
## sex                                 sex    5
## activities                   activities    4
## marine                           marine    4
## samples                         samples    4
## seawater                       seawater    4
## activity                       activity    3
## clusters                       clusters    3
## effects                         effects    3
## eralpha                         eralpha    3
## waters                           waters    3
## androgenic                   androgenic    2
## bioactivities             bioactivities    2
## cartridges                   cartridges    2
## coastal                         coastal    2
## compounds                     compounds    2
## confined                       confined    2
## data                               data    2
## erbeta                           erbeta    2
## estrogenic                   estrogenic    2
## exhibited                     exhibited    2
## extracts                       extracts    2
## gene                               gene    2
## indicate                       indicate    2
## main                               main    2
## may                                 may    2
## potential                     potential    2
## presence                       presence    2
## receptors                     receptors    2
## reporter                       reporter    2
## robust                           robust    2
## singapores                   singapores    2
## study                             study    2
## summated                       summated    2
## ability                         ability    1
## abnormal                       abnormal    1
## abnormalities             abnormalities    1
## agonist                         agonist    1
## aims                               aims    1
## alone                             alone    1
## analyses                       analyses    1
## androgen                       androgen    1
## animals                         animals    1
## ars                                 ars    1
## assays                           assays    1
## balance                         balance    1
## based                             based    1
## bioassay                       bioassay    1
## bioassays                     bioassays    1
## cell                               cell    1
## closest                         closest    1
## coastline                     coastline    1
## cognate                         cognate    1
## complex                         complex    1
## conjunction                 conjunction    1
## date                               date    1
## declined                       declined    1
## devise                           devise    1
## differentiation         differentiation    1
## diol                               diol    1
## disruptor                     disruptor    1
## edc                                 edc    1
## endocrinedisrupting endocrinedisrupting    1
## enhanced                       enhanced    1
## environment                 environment    1
## estrogen                       estrogen    1
## examination                 examination    1
## examined                       examined    1
## expressing                   expressing    1
## extract                         extract    1
## extracting                   extracting    1
## extraction                   extraction    1
## farther                         farther    1
## freshwater                   freshwater    1
## function                       function    1
## high                               high    1
## highactivity               highactivity    1
## higher                           higher    1
## highest                         highest    1
## however                         however    1
## human                             human    1
## hydrophilic                 hydrophilic    1
## individual                   individual    1
## island                           island    1
## line                               line    1
## lipophilic                   lipophilic    1
## locations                     locations    1
## measure                         measure    1
## measuring                     measuring    1
## methodologies             methodologies    1
## methodology                 methodology    1
## middepth                       middepth    1
## mixtures                       mixtures    1
## observed                       observed    1
## obvious                         obvious    1
## open                               open    1
## pathways                       pathways    1
## paucity                         paucity    1
## perturb                         perturb    1
## present                         present    1
## rapidly                         rapidly    1
## reported                       reported    1
## represents                   represents    1
## reproductive               reproductive    1
## sensitive                     sensitive    1
## sewage                           sewage    1
## sexual                           sexual    1
## showed                           showed    1
## signaling                     signaling    1
## singapore                     singapore    1
## snapshot                       snapshot    1
## studies                         studies    1
## studying                       studying    1
## suggests                       suggests    1
## suitable                       suitable    1
## surface                         surface    1
## uncover                         uncover    1
## unexpectedly               unexpectedly    1
## unusual                         unusual    1
## used                               used    1
dim(clean) #[1] 121   2
## [1] 121   2
plotWordCloud(clean)

library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.0.5
## Loading required package: RColorBrewer
wordcloud(words = clean$word, freq = clean$freq, min.freq = 1,
          max.words = 150, random.order = FALSE, rot.per = 0.2,
          colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## lipophilic could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## measure could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## measuring could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## methodologies could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## methodology could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## middepth could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## obvious could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## pathways could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## perturb could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## reported could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## represents could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## reproductive could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## sewage could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## showed could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## signaling could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## singapore could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## snapshot could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## studying could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## suggests could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## uncover could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## unexpectedly could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## unusual could not be fit on page. It will not be plotted.