rm(list = ls())
################# ##############input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*xlsx",dir_path,full.names = T, recursive = F)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\pubmed_search_results.xlsx"
library(openxlsx)
data_1 <- read.xlsx(grep("pubmed_search_results.xlsx",dir_path_name,value = T))
dim(data_1)
## [1] 222 14
#install.packages('PubMedWordcloud')
#Sys.setlocale(category = "LC_ALL", locale = "C")
library(PubMedWordcloud)
## Warning: package 'PubMedWordcloud' was built under R version 4.0.5
data_1$AbstractText[1]
## [1] "Abnormal sexual differentiation and other reproductive abnormalities in marine animals indicate the presence in seawater of endocrine-disrupting compounds (EDCs) that perturb the function of the sex hormone signaling pathways. However, most studies to date have reported on EDC effects in freshwater and sewage samples, and there is a paucity of bioassay data on the effects of EDCs in marine waters. Our aims in this study were to devise robust methodologies suitable for extracting potential EDCs and to measure their summated effects on activities of androgen receptors (ARs) and estrogen receptors (ER-alpha and ER-beta) in marine samples from Singapore's coastal waters. In this study, we examined the ability of C18, hydrophilic and lipophilic balance, and diol cartridges to extract potential EDCs from seawater samples. Extracts from C18 cartridges exhibited the highest sex hormone bioactivities in reporter gene assays based on a human cell line expressing AR, ER-alpha, and ER-beta. Examination of extracts from 20 coastal locations showed high androgenic and estrogenic agonist activities in confined clusters closest to the main island of Singapore. Sex hormone activity declined rapidly in clusters farther from the main coastline and in more open waters. Unexpectedly, surface and mid-depth samples from the confined high-activity clusters, in the presence of hormone, exhibited AR and ER-alpha activities that were 200-900% higher than those observed for the cognate hormone alone. This enhanced sex hormone activity suggests that analyses of complex seawater mixtures may uncover unusual bioactivities that may not be obvious by studying individual compounds. Our data present a \"snapshot\" of the sex hormone disruptor activity in Singapore's marine environment and indicate that C18 extraction for EDCs used in conjunction with reporter gene bioassays represents a robust and sensitive methodology for measuring summated androgenic and estrogenic activities in seawater."
clean <- cleanAbstracts(data_1$AbstractText[1])
## Warning in tm_map.SimpleCorpus(abstTxt, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(text2.corpus, function(x) removeNumbers(x)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(text2.corpus, tolower): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(text2.corpus, removeWords, stopwords("english")):
## transformation drops documents
clean
## word freq
## hormone hormone 7
## edcs edcs 5
## sex sex 5
## activities activities 4
## marine marine 4
## samples samples 4
## seawater seawater 4
## activity activity 3
## clusters clusters 3
## effects effects 3
## eralpha eralpha 3
## waters waters 3
## androgenic androgenic 2
## bioactivities bioactivities 2
## cartridges cartridges 2
## coastal coastal 2
## compounds compounds 2
## confined confined 2
## data data 2
## erbeta erbeta 2
## estrogenic estrogenic 2
## exhibited exhibited 2
## extracts extracts 2
## gene gene 2
## indicate indicate 2
## main main 2
## may may 2
## potential potential 2
## presence presence 2
## receptors receptors 2
## reporter reporter 2
## robust robust 2
## singapores singapores 2
## study study 2
## summated summated 2
## ability ability 1
## abnormal abnormal 1
## abnormalities abnormalities 1
## agonist agonist 1
## aims aims 1
## alone alone 1
## analyses analyses 1
## androgen androgen 1
## animals animals 1
## ars ars 1
## assays assays 1
## balance balance 1
## based based 1
## bioassay bioassay 1
## bioassays bioassays 1
## cell cell 1
## closest closest 1
## coastline coastline 1
## cognate cognate 1
## complex complex 1
## conjunction conjunction 1
## date date 1
## declined declined 1
## devise devise 1
## differentiation differentiation 1
## diol diol 1
## disruptor disruptor 1
## edc edc 1
## endocrinedisrupting endocrinedisrupting 1
## enhanced enhanced 1
## environment environment 1
## estrogen estrogen 1
## examination examination 1
## examined examined 1
## expressing expressing 1
## extract extract 1
## extracting extracting 1
## extraction extraction 1
## farther farther 1
## freshwater freshwater 1
## function function 1
## high high 1
## highactivity highactivity 1
## higher higher 1
## highest highest 1
## however however 1
## human human 1
## hydrophilic hydrophilic 1
## individual individual 1
## island island 1
## line line 1
## lipophilic lipophilic 1
## locations locations 1
## measure measure 1
## measuring measuring 1
## methodologies methodologies 1
## methodology methodology 1
## middepth middepth 1
## mixtures mixtures 1
## observed observed 1
## obvious obvious 1
## open open 1
## pathways pathways 1
## paucity paucity 1
## perturb perturb 1
## present present 1
## rapidly rapidly 1
## reported reported 1
## represents represents 1
## reproductive reproductive 1
## sensitive sensitive 1
## sewage sewage 1
## sexual sexual 1
## showed showed 1
## signaling signaling 1
## singapore singapore 1
## snapshot snapshot 1
## studies studies 1
## studying studying 1
## suggests suggests 1
## suitable suitable 1
## surface surface 1
## uncover uncover 1
## unexpectedly unexpectedly 1
## unusual unusual 1
## used used 1
dim(clean) #[1] 121 2
## [1] 121 2
plotWordCloud(clean)

library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.0.5
## Loading required package: RColorBrewer
wordcloud(words = clean$word, freq = clean$freq, min.freq = 1,
max.words = 150, random.order = FALSE, rot.per = 0.2,
colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## lipophilic could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## measure could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## measuring could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## methodologies could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## methodology could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## middepth could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## obvious could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## pathways could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## perturb could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## reported could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## represents could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## reproductive could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## sewage could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## showed could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## signaling could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## singapore could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## snapshot could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## studying could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## suggests could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## uncover could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## unexpectedly could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = clean$word, freq = clean$freq, min.freq = 1, :
## unusual could not be fit on page. It will not be plotted.
