Data

The Public Library of Science (PLoS) is a nonprofit open-access science, technology, and medicine publisher. PLoS journals represent an extensive library of open-access journals and other scientific literature under an open-content license. The rplos package interacts with the PLoS journals and allows you to search and retrieve a wide variety of information from PLoS – no API key required!

library(rplos)
#plosabstract searches the abstracts
#the fl command identifies what to retrieve
plos = plosabstract(q= "climate change", fl= "abstract", limit = 200)
#create a corpus stripped of meaningless words
plosCorpus=with(plos, VCorpus(VectorSource(plos)))%>%
  tm_map(stripWhitespace)  %>%
  tm_map(removeNumbers)  %>%
  tm_map(removePunctuation)  %>%
  tm_map(content_transformer(tolower))  %>%
  tm_map(removeWords, stopwords("english"))

PLoS Climate Change Wordcloud

library(dplyr)
library(tm)
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
wordcloud(plosCorpus, max.words = 50, scale = c(6, 1),
          colors=brewer.pal(3, "Set2"),random.order = FALSE)

Using Text from Titles

What happens if we use the words from the titles of articles instead of the abstracts? Maybe the resulting word cloud will be tighter, more interesting?

Retrieve PLoS Titles

library(dplyr)
library(tm)
library(rplos)
#plosabstract searches the abstracts
#the fl command identifies what to retrieve
plos_t = plosabstract(q= "climate change", fl= "title", limit = 500)
head(plos_t)
$meta

$data
NA
#create a corpus of titles stripped of meaningless words
titleCorpus=with(plos_t, VCorpus(VectorSource(plos_t)))%>%
  tm_map(stripWhitespace)  %>%
  tm_map(removeNumbers)  %>%
  tm_map(removePunctuation)  %>%
  tm_map(content_transformer(tolower))  %>%
  tm_map(removeWords, stopwords("english"))

PLoS Climate Change Wordcloud

library(SnowballC)
library(wordcloud)
library(RColorBrewer)
wordcloud(titleCorpus, max.words = 50, scale = c(6, 1),
          colors=brewer.pal(3, "Set2"),random.order = FALSE)

Visualizing Research on Climate Change over Time

library(ggplot2)
plot_throughtime(terms = "Climate Change", limit = 800) + geom_line(size=2, color='springgreen4')

Discussion

The increase in papers published on climate change over time is interesting, and I’d like to look at an even longer range of papers, but PLoS was only founded in 2000 so the data isn’t necessarily robust earlier than 2000ish.

LS0tCnRpdGxlOiAiSG9tZXdvcmsgMTE6IFRleHQgTWluaW5nIENsaW1hdGUgQ2hhbmdlIFJlc2VhcmNoIgpvdXRwdXQ6IAogIGh0bWxfbm90ZWJvb2s6IAogICAgdGhlbWU6IGx1bWVuCi0tLQoKCiNEYXRhClRoZSBQdWJsaWMgTGlicmFyeSBvZiBTY2llbmNlIChQTG9TKSBpcyBhIG5vbnByb2ZpdCBvcGVuLWFjY2VzcyBzY2llbmNlLCB0ZWNobm9sb2d5LCBhbmQgbWVkaWNpbmUgcHVibGlzaGVyLiBQTG9TIGpvdXJuYWxzIHJlcHJlc2VudCBhbiBleHRlbnNpdmUgbGlicmFyeSBvZiBvcGVuLWFjY2VzcyBqb3VybmFscyBhbmQgb3RoZXIgc2NpZW50aWZpYyBsaXRlcmF0dXJlIHVuZGVyIGFuIG9wZW4tY29udGVudCBsaWNlbnNlLiBUaGUgKnJwbG9zKiBwYWNrYWdlIGludGVyYWN0cyB3aXRoIHRoZSBQTG9TIGpvdXJuYWxzIGFuZCBhbGxvd3MgeW91IHRvIHNlYXJjaCBhbmQgcmV0cmlldmUgYSB3aWRlIHZhcmlldHkgb2YgaW5mb3JtYXRpb24gZnJvbSBQTG9TIC0tIG5vIEFQSSBrZXkgcmVxdWlyZWQhCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KHJwbG9zKQojcGxvc2Fic3RyYWN0IHNlYXJjaGVzIHRoZSBhYnN0cmFjdHMKI3RoZSBmbCBjb21tYW5kIGlkZW50aWZpZXMgd2hhdCB0byByZXRyaWV2ZQpwbG9zID0gcGxvc2Fic3RyYWN0KHE9ICJjbGltYXRlIGNoYW5nZSIsIGZsPSAiYWJzdHJhY3QiLCBsaW1pdCA9IDIwMCkKYGBgCgoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KI2NyZWF0ZSBhIGNvcnB1cyBzdHJpcHBlZCBvZiBtZWFuaW5nbGVzcyB3b3JkcwpwbG9zQ29ycHVzPXdpdGgocGxvcywgVkNvcnB1cyhWZWN0b3JTb3VyY2UocGxvcykpKSU+JQogIHRtX21hcChzdHJpcFdoaXRlc3BhY2UpICAlPiUKICB0bV9tYXAocmVtb3ZlTnVtYmVycykgICU+JQogIHRtX21hcChyZW1vdmVQdW5jdHVhdGlvbikgICU+JQogIHRtX21hcChjb250ZW50X3RyYW5zZm9ybWVyKHRvbG93ZXIpKSAgJT4lCiAgdG1fbWFwKHJlbW92ZVdvcmRzLCBzdG9wd29yZHMoImVuZ2xpc2giKSkKYGBgCgojIFBMb1MgQ2xpbWF0ZSBDaGFuZ2UgV29yZGNsb3VkCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHRtKQpsaWJyYXJ5KCJTbm93YmFsbEMiKQpsaWJyYXJ5KCJ3b3JkY2xvdWQiKQpsaWJyYXJ5KCJSQ29sb3JCcmV3ZXIiKQp3b3JkY2xvdWQocGxvc0NvcnB1cywgbWF4LndvcmRzID0gNTAsIHNjYWxlID0gYyg2LCAxKSwKICAgICAgICAgIGNvbG9ycz1icmV3ZXIucGFsKDMsICJTZXQyIikscmFuZG9tLm9yZGVyID0gRkFMU0UpCmBgYAoKCiNVc2luZyBUZXh0IGZyb20gVGl0bGVzCldoYXQgaGFwcGVucyBpZiB3ZSB1c2UgdGhlIHdvcmRzIGZyb20gdGhlIHRpdGxlcyBvZiBhcnRpY2xlcyBpbnN0ZWFkIG9mIHRoZSBhYnN0cmFjdHM/IE1heWJlIHRoZSByZXN1bHRpbmcgd29yZCBjbG91ZCB3aWxsIGJlIHRpZ2h0ZXIsIG1vcmUgaW50ZXJlc3Rpbmc/IAoKI1JldHJpZXZlIFBMb1MgVGl0bGVzCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodG0pCmxpYnJhcnkocnBsb3MpCiNwbG9zYWJzdHJhY3Qgc2VhcmNoZXMgdGhlIGFic3RyYWN0cwojdGhlIGZsIGNvbW1hbmQgaWRlbnRpZmllcyB3aGF0IHRvIHJldHJpZXZlCnBsb3NfdCA9IHBsb3NhYnN0cmFjdChxPSAiY2xpbWF0ZSBjaGFuZ2UiLCBmbD0gInRpdGxlIiwgbGltaXQgPSA1MDApCmhlYWQocGxvc190KQpgYGAKCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CiNjcmVhdGUgYSBjb3JwdXMgb2YgdGl0bGVzIHN0cmlwcGVkIG9mIG1lYW5pbmdsZXNzIHdvcmRzCnRpdGxlQ29ycHVzPXdpdGgocGxvc190LCBWQ29ycHVzKFZlY3RvclNvdXJjZShwbG9zX3QpKSklPiUKICB0bV9tYXAoc3RyaXBXaGl0ZXNwYWNlKSAgJT4lCiAgdG1fbWFwKHJlbW92ZU51bWJlcnMpICAlPiUKICB0bV9tYXAocmVtb3ZlUHVuY3R1YXRpb24pICAlPiUKICB0bV9tYXAoY29udGVudF90cmFuc2Zvcm1lcih0b2xvd2VyKSkgICU+JQogIHRtX21hcChyZW1vdmVXb3Jkcywgc3RvcHdvcmRzKCJlbmdsaXNoIikpCmBgYAoKIyBQTG9TIENsaW1hdGUgQ2hhbmdlIFdvcmRjbG91ZAoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeShTbm93YmFsbEMpCmxpYnJhcnkod29yZGNsb3VkKQpsaWJyYXJ5KFJDb2xvckJyZXdlcikKd29yZGNsb3VkKHRpdGxlQ29ycHVzLCBtYXgud29yZHMgPSA1MCwgc2NhbGUgPSBjKDYsIDEpLAogICAgICAgICAgY29sb3JzPWJyZXdlci5wYWwoMywgIlNldDIiKSxyYW5kb20ub3JkZXIgPSBGQUxTRSkKYGBgCgoKCiMgVmlzdWFsaXppbmcgUmVzZWFyY2ggb24gQ2xpbWF0ZSBDaGFuZ2Ugb3ZlciBUaW1lCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KGdncGxvdDIpCnBsb3RfdGhyb3VnaHRpbWUodGVybXMgPSAiQ2xpbWF0ZSBDaGFuZ2UiLCBsaW1pdCA9IDgwMCkgKyBnZW9tX2xpbmUoc2l6ZT0yLCBjb2xvcj0nc3ByaW5nZ3JlZW40JykKYGBgCgojIERpc2N1c3Npb24KClRoZSBpbmNyZWFzZSBpbiBwYXBlcnMgcHVibGlzaGVkIG9uIGNsaW1hdGUgY2hhbmdlIG92ZXIgdGltZSBpcyBpbnRlcmVzdGluZywgYW5kIEknZCBsaWtlIHRvIGxvb2sgYXQgYW4gZXZlbiBsb25nZXIgcmFuZ2Ugb2YgcGFwZXJzLCBidXQgUExvUyB3YXMgb25seSBmb3VuZGVkIGluIDIwMDAgc28gdGhlIGRhdGEgaXNuJ3QgbmVjZXNzYXJpbHkgcm9idXN0IGVhcmxpZXIgdGhhbiAyMDAwaXNoLgoK