# Get data:
library(gapminder)
# load libraries:
library(ggplot2)
library(gganimate)
# create text label
gapminder$countrylabel <- ifelse(gapminder$country=="United States", 'US',
ifelse(gapminder$country=="China", "CN", ""))
# Make a ggplot, but add frame=year: one image per year
# add text at top of specific points
ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, color = continent)) +
geom_point() +
scale_x_log10() +
theme_bw() +
# gganimate specific bits:
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
transition_time(year) +
ease_aes('linear')+
geom_text(data=gapminder, aes(x=gdpPercap, y=lifeExp, label=gapminder$countrylabel, fill=1, vjust = -0.5 ))
# Save at gif:
# anim_save("C:\\Users\\hed2\\gganimate1.gif")
# load Packages
require(RColorBrewer)
## Loading required package: RColorBrewer
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.6 ✔ dplyr 1.0.8
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
require(magrittr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
require(ggwordcloud)
## Loading required package: ggwordcloud
library(SnowballC)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
# load text data using a filter() function from dplyr package
# devtools::install_github("JohnCoene/sacred")
scripture = sacred::king_james_version
john = scripture %>%
filter(book == "joh")%>%
select(text)
# transforming the text document into corpus
john.corpus = john %>%
tm::VectorSource() %>%
tm::VCorpus()
# replacing special characters to space
toSpace = content_transformer(function (x , pattern )
gsub(pattern, " ", x))
john.corpus = john.corpus %>%
tm_map(toSpace, "/") %>%
tm_map(toSpace, " ") %>%
tm_map(toSpace, "\\|")
# removing stopwords and change all words to small letters
john.corpus = john.corpus %>%
tm_map(FUN = content_transformer(tolower)) %>% # Convert the text to lower case
tm_map(FUN = removeNumbers) %>% # Remove numbers
tm_map(removeWords, stopwords("english")) %>% # Remove english common stopwords
tm_map(removeWords, c("ye", "O", "unto", "yet", "thee", "wherein", "neither", "shall",
"saith", "host", "will", "offer", "say")) %>% # Remove words
tm_map(removePunctuation) %>% # Remove punctuations
tm_map(stripWhitespace) #
# compute the frequency of each word
# changed from corpus format to matrix and to data.frame
john.corpus.tb= john.corpus %>%
tm::TermDocumentMatrix(control = list(removeNumbers = TRUE,
stopwords = TRUE,
stemming = TRUE)) %>%
as.matrix() %>% as.data.frame() %>%
tibble::rownames_to_column() %>%
dplyr::rename(word = 1, freq = 2) %>%
dplyr::arrange(desc(freq))
# make the word cloud
john.corpus.tb[1,1]="Yeshua"
set.seed(42)
ggplot(
john.corpus.tb,
aes(
label = word, size = freq,
color = (freq)
)
) +
geom_text_wordcloud_area(aes(angle = 45 * sample(-2:2, nrow(john.corpus.tb),
replace = TRUE,
prob = c(1, 1, 4, 1, 1)
)),
mask = png::readPNG("C:\\Users\\hed2\\Downloads\\mask.png"
),
rm_outside = TRUE
) +
scale_size_area(max_size = 20) +
theme_minimal() +
# scale_color_brewer(palette = "Paired", direction = -1)
scale_color_gradient(low = "blue", high = "darkred")
## Some words could not fit on page. They have been removed.
Starry sky