Preparation

Packages Loading

#===============================
require(stm)
require(tm)
require(SnowballC)
require(LDAvis)
require(stmCorrViz)
require(stmBrowser)
require(ggplot2)
require(topicmodels)
require(wordcloud)   
require(reshape2) # melt()
require(grid) # arrow()
require(ggthemes)
require(lsa)
set.seed(1234)

Data loading

#===============================
# Data Loading
require(XLConnect)               # load XLConnect package 
wk <- loadWorkbook("../DATA/AERCDATA_.xlsx") 
data <- readWorksheet(wk, sheet="DATA") 
data$KeyYesNo<-as.factor(data$KeyYesNo)
data$AbsYesNo<-as.factor(data$AbsYesNo)
data$JournalFa<-as.factor(data$Journal)
dataID <- readWorksheet(wk, sheet="DATAID") 
load("../DATA/DATA.Rdata")

Basic Text Analysis

Text Cleaning

TEXTBASIC<- Corpus(VectorSource(data$TEXT))
CORPUS <- tm_map(TEXTBASIC, removePunctuation)   
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, removeNumbers)   
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, tolower)  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, removeWords, stopwords("english"))  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, removeWords, stopwords("SMART"))  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, stemDocument)
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, stripWhitespace)  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, PlainTextDocument) 
CORPUS <- tm_map(CORPUS, removeWords, c("and", "for", "that",
                                        "this", "are", "is", 
                                        "am","with","their","the","can",
                                        "from","article", "study","research","analysis"))

as.character(inspect(CORPUS[2]))

# 

Building Courps

tdm <- TermDocumentMatrix(CORPUS)
dtm <- DocumentTermMatrix(CORPUS)
dim(tdm)
[1] 7942  579
dim(dtm)
[1]  579 7942

Word Frequency

freq <- sort(colSums(as.matrix(dtm)), decreasing=TRUE)
wf <- data.frame(word=names(freq), freq=freq)
table(freq)
freq
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
3601 1233  631  394  308  206  193  153  124  115   98   57   62   57   40 
  16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
  40   34   36   44   24   20   21   26   22   22   14   11   14   17    8 
  31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
   6   11   13    8    6   11   10   10   11    8    9    3    4    4   10 
  46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
  13    5    2    8    5    6    3    3    6    6    1    3    4    1    6 
  61   62   63   65   66   67   68   69   70   71   72   73   74   75   76 
   4    1    3    3    1    5    1    3    4    1    1    5    4    4    3 
  77   79   82   83   84   85   86   87   88   89   90   91   92   93   94 
   2    2    2    3    1    1    3    2    1    3    1    2    1    1    3 
  98  101  102  104  106  107  108  112  114  116  117  118  119  126  128 
   1    1    1    2    3    1    1    1    1    1    1    1    1    1    1 
 138  142  144  147  148  152  153  155  156  161  166  174  182  184  187 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 201  203  208  214  217  218  227  289  302  306  478  507  787 1190 1716 
   1    1    1    1    1    2    2    1    1    1    1    1    1    1    1 
head(freq)
 learning education     adult  lifelong    social    taylor 
     1716      1190       787       507       478       306 
require(ggplot2)   
p <- ggplot(data=subset(wf, freq>200), aes(reorder(word, freq), freq))    
p <- p + geom_bar(stat="identity")   
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))   
p 

creating term matrix with TF-IDF weighting

terms_td_idf <-DocumentTermMatrix(CORPUS, control = list(weighting = function(x) weightTfIdf(x, normalize = FALSE)))
terms_td_idf
<<DocumentTermMatrix (documents: 579, terms: 7942)>>
Non-/sparse entries: 38415/4560003
Sparsity           : 99%
Maximal term length: 32
Weighting          : term frequency - inverse document frequency (tf-idf)

Text Network with tf-idf similarity

td.mat <- as.matrix(TermDocumentMatrix(CORPUS))
td.mat.lsa <- lw_bintf(td.mat) * gw_idf(td.mat) # weighting
lsaSpace <- lsa(td.mat.lsa) # create LSA space
dist.mat.lsa <- dist(t(as.textmatrix(lsaSpace))) # compute distance matrix

MDS

fit <- cmdscale(dist.mat.lsa, eig=TRUE, k=2)
points <- data.frame(x=fit$points[, 1], y=fit$points[, 2])
points$DocID<-dataID$NO
qplot(x, y, data = points, geom = "point", alpha = I(1/5))

plot(points$x,points$y)
text(points$x,points$y, points$DocID ,cex=0.6, pos=4, col="red")

library(scatterplot3d)
fit <- cmdscale(dist.mat.lsa, eig = TRUE, k = 3)
colors <- rep(c("blue", "green", "red"), each = 3)
scatterplot3d(fit$points[, 1], fit$points[, 2], fit$points[, 3],
              pch = 16, main = "Semantic Space Scaled to 3D", 
              xlab = "x", ylab = "y",
              zlab = "z", type = "h"
              )

LDA - STM Topic Modeling

stemming/stopword removal, etc.

data$TEXT <- gsub("[^[:alnum:]///' ]", "", data$TEXT)
processed <- textProcessor(data$TEXT, metadata = data)
Building corpus... 
Converting to Lower Case... 
Removing stopwords... 
Removing numbers... 
Removing punctuation... 
Stemming... 
Creating Output... 

structure and index for usage in the stm model. Verify no-missingness.

prep <- prepDocuments(processed$documents, processed$vocab, processed$meta)
Removing 2525 of 5120 terms (2525 of 40505 tokens) due to frequency 
Your corpus now has 579 documents, 2595 terms and 37980 tokens.

output will have object meta, documents, and vocab

docs <- out$documents
vocab <- out$vocab
meta  <-out$meta
plotRemoved(processed$documents, lower.thresh = seq(1, 1000, by = 10))

#prep <- prepDocuments(processed$documents, processed$vocab,processed$meta, 
#                      lower.thresh = 1, 
#                      upper.thresh= 400)
#plotRemoved(prep$documents, lower.thresh = seq(1, 1000, by = 10))

Topic Model Evaluation for search K

heldout <- make.heldout(prep$documents, prep$vocab)
documents <- heldout$documents
vocab <- heldout$voca
K<-c(2:50)
#MODELS <- searchK(documents, vocab, K, seed = 8458159)
plot.searchK(MODELS)
par(mfrow=c(2,2))

qplot(MODELS$results$K, MODELS$results$exclus, geom=c("point", "smooth"))

qplot(MODELS$results$K, MODELS$results$heldout, geom=c("point", "smooth"))

qplot(MODELS$results$K, MODELS$results$semcoh, geom=c("point", "smooth"))

qplot(MODELS$results$K, MODELS$results$em.its, geom=c("point", "smooth"))
par(mfrow=c(1,1))

KRANK_heldout<-as.data.frame(cbind(MODELS$results$K,MODELS$results$heldout))
KRANK_semcoh<-as.data.frame(cbind(MODELS$results$K,MODELS$results$semcoh))
KRANK_heldout
KRANK_semcoh
#SELMODEL <- selectModel(out$documents, out$vocab, K = 12,
#                        prevalence =~ JournalFa + s(Year),
#                        max.em.its = 100,
#                        data = out$meta, runs = 30, 
#                        seed = 8458159)
SELMODEL
$runout
$runout[[1]]
A topic model with 12 topics, 579 documents and a 2595 word dictionary.

$runout[[2]]
A topic model with 12 topics, 579 documents and a 2595 word dictionary.

$runout[[3]]
A topic model with 12 topics, 579 documents and a 2595 word dictionary.

$runout[[4]]
A topic model with 12 topics, 579 documents and a 2595 word dictionary.

$runout[[5]]
A topic model with 12 topics, 579 documents and a 2595 word dictionary.

$runout[[6]]
A topic model with 12 topics, 579 documents and a 2595 word dictionary.


$semcoh
$semcoh[[1]]
 [1]  -87.05865  -59.41714  -71.51104  -82.00782  -79.12326  -89.08245
 [7]  -76.97783 -103.77731  -74.29686  -99.87080  -66.11509  -88.56817

$semcoh[[2]]
 [1] -69.36360 -61.09197 -66.17118 -89.21036 -89.69092 -84.03244 -94.32758
 [8] -63.75269 -82.82261 -79.71068 -68.30266 -94.52467

$semcoh[[3]]
 [1] -66.81428 -87.83997 -75.82347 -73.57541 -67.08413 -83.33052 -92.32675
 [8] -70.32537 -95.09833 -78.20692 -92.40581 -90.20287

$semcoh[[4]]
 [1]  -80.99432  -68.41026 -113.10564  -80.90968  -85.88537  -79.88375
 [7]  -88.19470  -95.23561  -65.62718  -66.72911 -103.87772  -64.96218

$semcoh[[5]]
 [1] -59.10653 -81.88639 -69.66952 -76.12258 -82.46782 -79.90309 -83.49564
 [8] -57.07118 -93.04998 -84.03397 -86.52608 -74.52873

$semcoh[[6]]
 [1] -85.28665 -73.99364 -88.60246 -74.53001 -84.42852 -85.74578 -85.17334
 [8] -81.44334 -62.23362 -78.13769 -67.71376 -75.22818


$exclusivity
$exclusivity[[1]]
 [1] 9.421073 9.424345 9.419363 9.463894 9.515426 9.590383 9.300530 9.523173
 [9] 9.293934 9.539350 9.697545 9.441522

$exclusivity[[2]]
 [1] 9.585271 9.347450 9.176675 9.384348 9.401572 9.416052 9.725567 9.473182
 [9] 9.375482 9.494914 9.352378 9.224919

$exclusivity[[3]]
 [1] 9.298425 9.581715 9.245425 9.026269 9.490519 9.461665 9.390470 9.455644
 [9] 9.324836 9.559014 9.732288 9.613923

$exclusivity[[4]]
 [1] 9.240323 9.809413 9.496921 9.391125 9.511400 9.371661 9.620094 9.454076
 [9] 9.391193 9.508331 9.447692 8.949863

$exclusivity[[5]]
 [1] 9.658110 9.336212 9.332815 9.646949 9.429048 9.301979 9.506108 9.482788
 [9] 9.524420 9.535551 9.546149 9.334337

$exclusivity[[6]]
 [1] 9.611876 9.141486 9.541321 9.428722 9.531174 9.600376 9.384969 9.616844
 [9] 9.064353 9.419461 9.706312 9.524023


$sparsity
$sparsity[[1]]
[1] "Sparsity not calculated for models without content covariates"

$sparsity[[2]]
[1] "Sparsity not calculated for models without content covariates"

$sparsity[[3]]
[1] "Sparsity not calculated for models without content covariates"

$sparsity[[4]]
[1] "Sparsity not calculated for models without content covariates"

$sparsity[[5]]
[1] "Sparsity not calculated for models without content covariates"

$sparsity[[6]]
[1] "Sparsity not calculated for models without content covariates"
plotModels(SELMODEL)

MS1 <- SELMODEL$runout[[1]]
topicQuality(model=MS1, documents=docs)
 [1]  -87.05865  -59.41714  -71.51104  -82.00782  -79.12326  -89.08245
 [7]  -76.97783 -103.77731  -74.29686  -99.87080  -66.11509  -88.56817
 [1] 9.421073 9.424345 9.419363 9.463894 9.515426 9.590383 9.300530 9.523173
 [9] 9.293934 9.539350 9.697545 9.441522

MS1_evalheldout<-eval.heldout(MS1, heldout$missing)
hist(MS1_evalheldout$doc.heldout)

MS1_evalheldout
$expected.heldout
[1] -6.216431

$doc.heldout
 [1] -5.510584 -6.297617 -6.094396 -6.321044 -6.422858 -5.819358 -5.431193
 [8] -6.330077 -6.533379 -5.575992 -6.108958 -6.093024 -6.004971 -6.284460
[15] -6.637358 -6.084366 -6.034673 -6.360354 -6.330268 -6.101589 -5.959203
[22] -6.747699 -6.323365 -6.446081 -6.195463 -6.042336 -6.762732 -6.466730
[29] -6.453595 -6.511990 -6.276536 -6.595945 -6.522674 -5.234419 -6.178808
[36] -5.958154 -6.556319 -5.948685 -6.445488 -6.635783 -6.111792 -6.116628
[43] -6.069014 -6.452958 -5.769283 -6.383192 -6.881477 -6.725529 -6.301543
[50] -6.370689 -6.241868 -4.827986 -6.602414 -6.253014 -6.022395 -6.340751
[57] -6.227529

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
MS2 <- SELMODEL$runout[[2]]
topicQuality(model=MS2, documents=docs)
 [1] -69.36360 -61.09197 -66.17118 -89.21036 -89.69092 -84.03244 -94.32758
 [8] -63.75269 -82.82261 -79.71068 -68.30266 -94.52467
 [1] 9.585271 9.347450 9.176675 9.384348 9.401572 9.416052 9.725567 9.473182
 [9] 9.375482 9.494914 9.352378 9.224919

MS2_evalheldout<-eval.heldout(MS2, heldout$missing)
hist(MS2_evalheldout$doc.heldout)

MS2_evalheldout
$expected.heldout
[1] -6.206362

$doc.heldout
 [1] -5.558018 -6.068438 -6.031112 -6.332154 -6.209974 -5.766385 -5.272792
 [8] -6.677067 -6.393960 -5.742288 -6.248444 -5.985725 -5.809810 -6.288284
[15] -6.622955 -6.320996 -6.211804 -5.877751 -6.729948 -5.828985 -6.054075
[22] -6.810961 -5.992409 -6.511297 -6.020433 -6.026372 -7.093156 -6.427994
[29] -6.306674 -6.273594 -6.301394 -6.307391 -6.691068 -5.380955 -6.399464
[36] -5.908197 -6.501172 -6.015182 -6.591179 -6.701818 -6.235427 -6.236214
[43] -5.978396 -6.306692 -5.831889 -6.591092 -6.774888 -6.806098 -6.308495
[50] -6.224018 -6.174249 -5.044385 -6.463472 -6.275803 -5.795730 -6.164686
[57] -6.259439

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
MS3 <- SELMODEL$runout[[3]]
topicQuality(model=MS3, documents=docs)
 [1] -66.81428 -87.83997 -75.82347 -73.57541 -67.08413 -83.33052 -92.32675
 [8] -70.32537 -95.09833 -78.20692 -92.40581 -90.20287
 [1] 9.298425 9.581715 9.245425 9.026269 9.490519 9.461665 9.390470 9.455644
 [9] 9.324836 9.559014 9.732288 9.613923

MS3_evalheldout<-eval.heldout(MS3, heldout$missing)
hist(MS3_evalheldout$doc.heldout)

MS3_evalheldout
$expected.heldout
[1] -6.230347

$doc.heldout
 [1] -5.758863 -6.412433 -6.146154 -6.506155 -6.332347 -5.846908 -5.585128
 [8] -6.571328 -6.630524 -5.708825 -6.221821 -6.202242 -5.835826 -6.207740
[15] -6.618197 -6.400055 -6.080299 -6.184036 -6.348255 -5.512837 -6.077052
[22] -6.695002 -6.203615 -6.560065 -6.285772 -6.069436 -6.415121 -6.399297
[29] -6.593090 -6.539633 -6.216189 -6.553062 -6.726903 -5.549706 -6.565404
[36] -5.747515 -6.702733 -5.986811 -6.339109 -6.753834 -6.143416 -6.431250
[43] -5.861600 -6.232208 -5.726374 -6.568849 -6.723104 -6.925274 -6.726424
[50] -6.689944 -6.114311 -4.515218 -6.404106 -6.108860 -5.707322 -5.926182
[57] -6.236041

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
MS4 <- SELMODEL$runout[[4]]
topicQuality(model=MS4, documents=docs)
 [1]  -80.99432  -68.41026 -113.10564  -80.90968  -85.88537  -79.88375
 [7]  -88.19470  -95.23561  -65.62718  -66.72911 -103.87772  -64.96218
 [1] 9.240323 9.809413 9.496921 9.391125 9.511400 9.371661 9.620094 9.454076
 [9] 9.391193 9.508331 9.447692 8.949863

MS4_evalheldout<-eval.heldout(MS4, heldout$missing)
hist(MS4_evalheldout$doc.heldout)

MS4_evalheldout
$expected.heldout
[1] -6.253413

$doc.heldout
 [1] -5.547421 -6.348788 -5.951285 -6.398445 -6.382263 -6.175492 -5.565437
 [8] -6.500605 -6.716727 -5.816606 -6.068257 -6.232919 -5.948118 -6.115012
[15] -6.628463 -6.411683 -6.147219 -6.455168 -6.597230 -5.750289 -6.124651
[22] -6.703154 -6.177233 -6.435243 -6.546469 -6.145480 -6.544544 -6.561005
[29] -6.252882 -6.353591 -6.223679 -6.516577 -6.778583 -4.905190 -6.225448
[36] -5.832433 -6.493726 -5.929757 -6.242412 -6.791576 -6.315705 -6.566306
[43] -6.155195 -6.300366 -5.781876 -6.384809 -6.592244 -6.539541 -6.734000
[50] -6.561083 -6.548960 -5.252338 -6.479331 -6.183184 -6.077137 -6.200288
[57] -6.231136

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
MS5 <- SELMODEL$runout[[5]]
topicQuality(model=MS5, documents=docs)
 [1] -59.10653 -81.88639 -69.66952 -76.12258 -82.46782 -79.90309 -83.49564
 [8] -57.07118 -93.04998 -84.03397 -86.52608 -74.52873
 [1] 9.658110 9.336212 9.332815 9.646949 9.429048 9.301979 9.506108 9.482788
 [9] 9.524420 9.535551 9.546149 9.334337

MS5_evalheldout<-eval.heldout(MS5, heldout$missing)
hist(MS5_evalheldout$doc.heldout)

MS5_evalheldout
$expected.heldout
[1] -6.230668

$doc.heldout
 [1] -5.757784 -6.333322 -6.111657 -6.640060 -6.264930 -5.920986 -5.351880
 [8] -6.342519 -6.685615 -5.921120 -5.691588 -6.169200 -5.913724 -6.203328
[15] -6.515082 -6.396366 -5.883360 -6.409520 -6.128259 -5.936402 -6.046711
[22] -6.717631 -6.094611 -6.560396 -6.296929 -6.042945 -6.661286 -6.554439
[29] -6.351716 -6.442477 -6.351104 -6.478583 -6.260696 -5.729056 -6.648963
[36] -5.945381 -6.402547 -6.237331 -6.236717 -6.686005 -6.196169 -6.213280
[43] -6.086453 -6.383583 -5.549048 -6.212460 -7.019121 -6.701370 -6.823452
[50] -6.574287 -6.462147 -4.396197 -6.512477 -6.179842 -6.025346 -6.275679
[57] -6.214927

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
MS6 <- SELMODEL$runout[[6]]
topicQuality(model=MS6, documents=docs)
 [1] -85.28665 -73.99364 -88.60246 -74.53001 -84.42852 -85.74578 -85.17334
 [8] -81.44334 -62.23362 -78.13769 -67.71376 -75.22818
 [1] 9.611876 9.141486 9.541321 9.428722 9.531174 9.600376 9.384969 9.616844
 [9] 9.064353 9.419461 9.706312 9.524023

MS6_evalheldout<-eval.heldout(MS6, heldout$missing)
hist(MS6_evalheldout$doc.heldout)

MS6_evalheldout
$expected.heldout
[1] -6.224699

$doc.heldout
 [1] -5.619660 -6.492202 -6.178650 -6.542955 -6.193185 -6.203928 -5.292074
 [8] -6.370396 -6.790151 -5.639262 -5.762380 -6.256493 -5.820496 -6.280342
[15] -6.464707 -6.385618 -6.138104 -6.126241 -6.238390 -5.774594 -6.061625
[22] -6.884735 -6.219620 -6.491523 -6.309922 -6.059043 -6.589051 -6.283775
[29] -6.293354 -6.421719 -6.309560 -6.503018 -6.639418 -5.236308 -6.512360
[36] -5.839517 -6.637350 -6.076885 -6.214417 -6.685756 -6.027692 -6.211922
[43] -6.102389 -6.381138 -5.842324 -6.296652 -6.838409 -6.663369 -6.868292
[50] -6.379959 -6.136796 -4.936104 -6.521402 -6.173405 -6.253698 -6.134495
[57] -6.200994

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
FINALSELECTED <- SELMODEL$runout[[4]]
topicQuality(model=FINALSELECTED, documents=docs)
 [1]  -80.99432  -68.41026 -113.10564  -80.90968  -85.88537  -79.88375
 [7]  -88.19470  -95.23561  -65.62718  -66.72911 -103.87772  -64.96218
 [1] 9.240323 9.809413 9.496921 9.391125 9.511400 9.371661 9.620094 9.454076
 [9] 9.391193 9.508331 9.447692 8.949863

evalheldout<-eval.heldout(FINALSELECTED, heldout$missing)
hist(evalheldout$doc.heldout)

evalheldout
$expected.heldout
[1] -6.253413

$doc.heldout
 [1] -5.547421 -6.348788 -5.951285 -6.398445 -6.382263 -6.175492 -5.565437
 [8] -6.500605 -6.716727 -5.816606 -6.068257 -6.232919 -5.948118 -6.115012
[15] -6.628463 -6.411683 -6.147219 -6.455168 -6.597230 -5.750289 -6.124651
[22] -6.703154 -6.177233 -6.435243 -6.546469 -6.145480 -6.544544 -6.561005
[29] -6.252882 -6.353591 -6.223679 -6.516577 -6.778583 -4.905190 -6.225448
[36] -5.832433 -6.493726 -5.929757 -6.242412 -6.791576 -6.315705 -6.566306
[43] -6.155195 -6.300366 -5.781876 -6.384809 -6.592244 -6.539541 -6.734000
[50] -6.561083 -6.548960 -5.252338 -6.479331 -6.183184 -6.077137 -6.200288
[57] -6.231136

$index
 [1]   8  12  13  33  47  50  79  82  90 115 129 192 197 206 228 233 242 256
[19] 258 259 268 272 274 283 285 287 291 293 307 364 369 388 398 399 416 417
[37] 425 455 458 464 468 475 482 488 492 498 499 505 511 520 530 541 544 551
[55] 570 573 576

$ntokens
 [1] 39692 29431 47618 49993 37241 45658 35567 40651 42069  6058 29522 62122
[13] 43954 38718 46161 63665 68639 73326 66064  3911 62958 49676 48709 36935
[25] 65056 62553 35824 62778 61506 47434 55881 48757 55958  2454 48739 46668
[37] 41941 24024 37092 54404 37336 49020 70926 45106 37556 29399 62646 55569
[49] 50623 52877 50143  2769 63587 37863 48989 48783 45274
#save(MODELS, SELMODEL, FINALSELECTED, file = "./DATA/DATA.Rdata")

Topic Model Description

Basic Result

#==== Summary
## Label Topics
labelTopics(FINALSELECTED)
Topic 1 Top Words:
     Highest Prob: learn, space, inform, experi, group, peopl, way 
     FREX: everyday, mother, space, onlin, men, theatr, cancer 
     Lift: consumpt, fix, nonhuman, overlap, treatment, victori, wisdom 
     Score: mother, space, everyday, cancer, men, onlin, theatr 
Topic 2 Top Words:
     Highest Prob: research, practic, process, knowledg, chang, context, work 
     FREX: research, action, process, creativ, construct, know, chang 
     Lift: reciproc, recreat, merleauponti, piaget, pragmatist, reaction, proverb 
     Score: research, action, creativ, practic, know, knowledg, embodi 
Topic 3 Top Words:
     Highest Prob: learn, cultur, programm, languag, art, african, report 
     FREX: languag, art, museum, african, programm, english, cultur 
     Lift: ail, britain, confintea, correct, esol, prescrib, shortcom 
     Score: art, languag, african, museum, programm, cultur, visual 
Topic 4 Top Words:
     Highest Prob: learn, train, skill, develop, workplac, career, employ 
     FREX: employe, solv, cps, train, career, skill, workplac 
     Lift: assign, classif, colleagu, confirmatori, domainspecif, employe, financ 
     Score: cps, train, employe, solv, career, workplac, expertis 
Topic 5 Top Words:
     Highest Prob: learn, lifelong, social, capit, immigr, develop, also 
     FREX: lifelong, capit, immigr, vocat, wellb, elder, japan 
     Lift: ecosystem, endors, hivaid, singapor, thailand, devalu, further 
     Score: lifelong, learn, capit, japan, immigr, thailand, elder 
Topic 6 Top Words:
     Highest Prob: educ, adult, student, particip, learner, age, transit 
     FREX: age, transit, graduat, parttim, motiv, student, undergradu 
     Lift: insecur, postschool, workingclass, interviewe, leaver, midlif, multivari 
     Score: student, parttim, older, motiv, transit, age, undergradu 
Topic 7 Top Words:
     Highest Prob: experi, ident, learner, univers, engag, academ, prior 
     FREX: rpl, recognit, academ, interpret, prior, ident, subject 
     Lift: decid, habitus, postgradu, began, bourdieu, classifi, judi 
     Score: rpl, prior, recognit, ident, univers, habitus, academ 
Topic 8 Top Words:
     Highest Prob: educ, women, adult, social, pedagogi, articl, feminist 
     FREX: feminist, women, movement, american, pedagogi, popular, televis 
     Lift: antirac, arab, enforc, garment, husband, marxist, protest 
     Score: women, feminist, pedagogi, educ, adult, movement, american 
Topic 9 Top Words:
     Highest Prob: group, teacher, taylor, educ, franci, llc, copyright 
     FREX: copyright, teacher, llc, authent, apel, group, citizenship 
     Lift: apel, copyright, discrep, driver, eman, horizon, imper 
     Score: teacher, llc, copyright, authent, student, apel, citizenship 
Topic 10 Top Words:
     Highest Prob: learn, adult, educ, transform, theori, critic, reflect 
     FREX: spiritu, radic, transform, theori, critic, reflect, author 
     Lift: autobiographi, alleg, authoritarian, bias, confucian, erasur, haberma 
     Score: transform, theori, adult, learn, critic, spiritu, confucian 
Topic 11 Top Words:
     Highest Prob: communiti, literaci, program, develop, project, adult, health 
     FREX: literaci, communitybas, program, collabor, nonprofit, communiti, health 
     Lift: client, east, heroic, nonetheless, photovoic, sport, user 
     Score: communiti, literaci, program, health, nonprofit, local, sport 
Topic 12 Top Words:
     Highest Prob: educ, polici, higher, countri, adult, european, develop 
     FREX: european, europ, polici, countri, lll, portug, union 
     Lift: efa, export, harmon, latest, lll, shortag, uganda 
     Score: polici, european, countri, higher, bologna, europ, educ 
## Estimating Effect
prepeffect <- estimateEffect(1:12 ~ JournalFa + s(Year), FINALSELECTED, meta = out$meta, uncertainty = "Global")

Summary

#==== Summary
plot.STM(FINALSELECTED, type = "summary")

plot.STM(FINALSELECTED, type = "labels", topics = c(1,2,3))

plot.STM(FINALSELECTED, type = "labels", topics = c(4,5,6))

plot.STM(FINALSELECTED, type = "labels", topics = c(7,8,9))

plot.STM(FINALSELECTED, type = "labels", topics = c(10,11,12))

plot.STM(FINALSELECTED, type = "hist", topics = c(1,2,3,4))

plot.STM(FINALSELECTED, type = "hist", topics = c(5,6,7,8))

plot.STM(FINALSELECTED, type = "hist", topics = c(9,10,11,12))

Summary - Perspective

#==== Summary
plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 4))

plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 5))

plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 12))

plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 9))

Topic and Documents

thoughts1 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =1 )$docs[[1]]
thoughts1
[1] "Self-directed learning and prostate cancer: A thematic analysis of the experiences of twelve patients"
[2] "Talking up learning at work: Cautionary tales in co-opting everyday learning"                         
[3] "The in-between: Exposing everyday learning at work"                                                   
[4] "Invisible theatre, ethics, and the adult educator"                                                    
[5] "Popular education in solidarity economy"                                                              
thoughts2 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =2 )$docs[[1]]
thoughts2
[1] "The Manifestation and Integration of Embodied Knowing Into Social Work Practice"                                             
[2] "ing the complicated matter of what works: Evidence-based research and the problem of practice"                               
[3] "Action Research and Its History as an Adult Education Movement for Social Change"                                            
[4] "Creative Expression as a Way of Knowing in Diabetes Adult Health Education An Action Research "                              
[5] "From autonomy to reciprocity, or vice versa? French personalisms contribution to a new perspective on self-directed learning"
thoughts3 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =3 )$docs[[1]]
thoughts3
[1] "The universal basic education programme and female trafficking in South-South, Nigeria"
[2] "Response to learning through life: Thematic area of poverty reduction"                 
[3] "Generative Learning: Adults Learning Within Ambiguity"                                 
[4] "Learning in the circumstances of practice"                                             
[5] "The perils of confusing lifelong learning with lifelong education"                     
thoughts4 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =4 )$docs[[1]]
thoughts4
[1] "Linking complex problem solving and general mental ability to career advancement: Does a transversal skill reveal incremental predictive validity?"
[2] "Development of the self-directed learning skills scale"                                                                                            
[3] "Development of learning to learn skills in primary school"                                                                                         
[4] "Problem solving in everyday office work—a diary  on differences between experts and novices"                                                     
[5] "What do employers pay for employees’ complex problem solving skills?"                                                                            
thoughts5 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =5 )$docs[[1]]
thoughts5
[1] "Lifelong learning and the social integration of refugees in the UK: The significance of social capital"                                                                     
[2] "The lifelong learning ecosystem in Korea: Evolution of learning capitalism?"                                                                                                
[3] "Bringing learning closer to home: The value and impact of the Lisbon strategy for strengthening the role of local learning centres and partnerships in south-eastern Europe"
[4] "Lifelong learning and vocational training programmes in Northern Aegean (Greece): Weaknesses, possibilities and prospects"                                                  
[5] "An examination of lifelong learning policy rhetoric and practice in Singapore"                                                                                              
thoughts6 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =6 )$docs[[1]]
thoughts6
[1] "Math Anxiety, Math Self-Concept, and Math Self-Efficacy in Adult Learners Compared to Traditional Undergraduate Students"                                     
[2] "First- versus continuing-generation adult students on college perceptions: Are differences actually because of demographic variance?"                         
[3] "The young outsiders: The later life courses of dropout youths"                                                                                                
[4] "Motivation, interest, and positive affect in traditional and nontraditional undergraduate students"                                                           
[5] "Patterns and trends in part-time adult education participation in relation to UK nation, class, place of participation, gender, age and disability, 1998-2003"
thoughts7 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =7 )$docs[[1]]
thoughts7
[1] "Experiential learning in youth work in the UK: A return to Dewey"                                                                                               
[2] "Recognition of prior learning: exploring the knowledge question"                                                                                                
[3] "Outcasts on the inside: academics reinventing themselves online"                                                                                                
[4] "Activists Within the Academy: The Role of Prior Experience in Adult Learners Acquisition of Postgraduate Literacies in a Postapartheid South African University"
[5] "The knowledgeable parenting style: stance takings and subject positions in media encounters"                                                                    
thoughts8 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =8 )$docs[[1]]
thoughts8
[1] "More Important Than Guns: Chinese Adult Education After the Long March"                                                   
[2] "Quien Sabe Mas Lucha Mejor: Adult Educators Care of the Self Practices Within Social Movements in Buenos Aires, Argentina"
[3] "The untold story of foreign devil adult educators in shanghai silk factories (1920 to 1949)"                              
[4] "Adult learning in new social movements: Environmental protest and the struggle for the Clayoquot Sound rainforest"        
[5] "The gendered nature of education under siege: A Palestinian feminist perspective"                                         
thoughts9 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =9 )$docs[[1]]
thoughts9
[1] "Agreement in assessment of prior learning related to higher education: an examination of interrater and intrarater reliability"
[2] "The expanded developmental periphery: Framing the institutional role of university continuing education units"                 
[3] "Teacher professional development for the new school improvement: Botswana"                                                     
[4] "Students as learners through the eyes of their teachers in Rwandan higher education"                                           
[5] "Citizenship and belonging as a moral imperative for lifelong learning"                                                         
thoughts10 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =10 )$docs[[1]]
thoughts10
[1] "Critical Reflection as a Rationalistic Ideal"                                                                 
[2] "A Critical Realist Orientation to Learner Needs"                                                              
[3] "Radical questioning on The Long Walk to Freedom: Nelson Mandela and the practice of critical reflection"      
[4] "Critical Race Theory and Adult Education"                                                                     
[5] "An enabling framework for reflexive learning: Experiential learning and reflexivity in contemporary modernity"
thoughts11 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =11 )$docs[[1]]
thoughts11
[1] "Theyre a lot cleverer than i thought: Challenging perceptions of disability support staff as they tutor in an adult literacy program"
[2] "A framework for lifelong involvement in sport and physical activity: The Irish perspective"                                          
[3] "Are Low-Income Canadians Financially Literate? Placing Financial Literacy in the Context of Personal and Structural Constraints"     
[4] "Reconstructing literacy as an innovation for sustainable development: A policy advocacy for Bangladesh"                              
[5] "The pitfalls of a democracy promotion project for women of Iraq"                                                                     
thoughts12 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =12 )$docs[[1]]
thoughts12
[1] "Second chance routes into higher education: Sweden, Norway and Germany compared"               
[2] "The Bologna Process and higher education in Mercosur: regionalization or Europeanization?"     
[3] "Adult education in transition: Three cases and periods compared"                               
[4] "Does the Celtic Tiger society need to debate the role of higher education and the public good?"
[5] "The lost honour of the Social Dimension: Bologna, exports and the idea of the university"      

Pointestimate of mean of topic in four diffeent

par(mfrow=c(2,2))
plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 10,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 10 (Adult Education)",
                    xlim = c(-.02, .3), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))
plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 5,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 5 (Lifelong Eduction)",
                    xlim = c(-.02, .18), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))
plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 4,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 4 (HRD, Workplace)",
                    xlim = c(-.02, .18), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))
plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 6,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 2 ()",
                    xlim = c(-.02, .18), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))
par(mfrow=c(1,1))

Time Trend Plot with genuine plot

par(mfrow=c(3,2))
P1<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 1,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic1  () \n learn, space, inform, experi, group, peopl, way ")
P2<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 2,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic2  () \n research, practic, process, knowledg, chang, context, work ")
P3<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 3,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic3  () \n learn, cultur, programm, languag, art, african, report ")
P4<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 4,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic4  () \n learn, train, skill, develop, workplac, career, employ ")
P5<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 5,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic5  () \n learn, lifelong, social, capit, immigr, develop")
P6<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 6,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic6  () \n educ, adult, student, particip, learner, age, transit ")
par(mfrow=c(1,1))
par(mfrow=c(3,2))

P7<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 7,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic7  () \n experi, ident, learner, univers, engag, academ, prior ")
P8<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 8,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic8  () \n educ, women, adult, social, pedagogi, articl, feminist ")
P9<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 9,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic9  () \n group, teacher, taylor, educ, franci, llc, copyright ")
P10<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 10,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic10  () \n learn, adult, educ, transform, theori, critic, reflect ")
P11<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 11,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic11  () \n communiti, literaci, program, develop, project, adult, health ")
P12<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 12,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic12  () \n educ, polici, higher, countri, adult, european, develop ")
par(mfrow=c(1,1))

trend plot values

P1
$x
  [1] 2006.000 2006.091 2006.182 2006.273 2006.364 2006.455 2006.545 2006.636
  [9] 2006.727 2006.818 2006.909 2007.000 2007.091 2007.182 2007.273 2007.364
 [17] 2007.455 2007.545 2007.636 2007.727 2007.818 2007.909 2008.000 2008.091
 [25] 2008.182 2008.273 2008.364 2008.455 2008.545 2008.636 2008.727 2008.818
 [33] 2008.909 2009.000 2009.091 2009.182 2009.273 2009.364 2009.455 2009.545
 [41] 2009.636 2009.727 2009.818 2009.909 2010.000 2010.091 2010.182 2010.273
 [49] 2010.364 2010.455 2010.545 2010.636 2010.727 2010.818 2010.909 2011.000
 [57] 2011.091 2011.182 2011.273 2011.364 2011.455 2011.545 2011.636 2011.727
 [65] 2011.818 2011.909 2012.000 2012.091 2012.182 2012.273 2012.364 2012.455
 [73] 2012.545 2012.636 2012.727 2012.818 2012.909 2013.000 2013.091 2013.182
 [81] 2013.273 2013.364 2013.455 2013.545 2013.636 2013.727 2013.818 2013.909
 [89] 2014.000 2014.091 2014.182 2014.273 2014.364 2014.455 2014.545 2014.636
 [97] 2014.727 2014.818 2014.909 2015.000

$topics
[1] 1

$means
$means[[1]]
           1            2            3            4            5            6 
 0.121646610  0.112677540  0.104144860  0.096073307  0.088487620  0.081412536 
           7            8            9           10           11           12 
 0.074872793  0.068893131  0.063498286  0.058712997  0.054562002  0.051070040 
          13           14           15           16           17           18 
 0.048250592  0.046072120  0.044491829  0.043466925  0.042954614  0.042912100 
          19           20           21           22           23           24 
 0.043296591  0.044065290  0.045175404  0.046584139  0.048248699  0.050126290 
          25           26           27           28           29           30 
 0.052174119  0.054349390  0.056609309  0.058911082  0.061211914  0.063469011 
          31           32           33           34           35           36 
 0.065639578  0.067680820  0.069549945  0.071204156  0.072609079  0.073764012 
          37           38           39           40           41           42 
 0.074676674  0.075354783  0.075806057  0.076038214  0.076058972  0.075876050 
          43           44           45           46           47           48 
 0.075497166  0.074930037  0.074182382  0.073275072  0.072281590  0.071288571 
          49           50           51           52           53           54 
 0.070382653  0.069650470  0.069178658  0.069053854  0.069362693  0.070191812 
          55           56           57           58           59           60 
 0.071627846  0.073757431  0.076628549  0.080134567  0.084130196  0.088470151 
          61           62           63           64           65           66 
 0.093009142  0.097601884  0.102103087  0.106367465  0.110249731  0.113604596 
          67           68           69           70           71           72 
 0.116286774  0.118150976  0.119051916  0.118844305  0.117382857  0.114522284 
          73           74           75           76           77           78 
 0.110117299  0.104022613  0.096092940  0.086182993  0.074147482  0.059841122 
          79           80           81           82           83           84 
 0.043400786  0.026091997  0.009462439 -0.004940203 -0.015568247 -0.020874007 
          85           86           87           88           89           90 
-0.019309799 -0.009327939  0.010619256  0.042079472  0.086600392  0.144386346 
          91           92           93           94           95           96 
 0.210268243  0.277733637  0.340270085  0.391365140  0.424506357  0.433181292 
          97           98           99          100 
 0.410877497  0.351082529  0.247283943  0.092969292 


$ci
$ci[[1]]
               1          2           3           4           5           6
2.5%  0.06033089 0.04267653 0.007587055 -0.01993625 -0.03890143 -0.04855318
97.5% 0.18563893 0.18197166 0.198726058  0.21128683  0.21587495  0.21034397
                7           8           9          10          11
2.5%  -0.04941684 -0.04401991 -0.03213293 -0.02086282 -0.00743148
97.5%  0.19668025  0.17892837  0.15807455  0.13368483  0.11249565
                 12          13          14           15           16
2.5%  -0.0008126885 0.003827421 0.002946707 -0.001814473 -0.005471928
97.5%  0.0994601609 0.092417313 0.090738329  0.092046627  0.093386656
                17          18          19           20           21
2.5%  -0.008149204 -0.00829902 -0.00910302 -0.007144584 -0.003585944
97.5%  0.095848254  0.09653464  0.09777119  0.098200580  0.097474366
                 22          23          24          25         26         27
2.5%  -0.0006686304 0.002617926 0.006054669 0.009302451 0.01233992 0.01577919
97.5%  0.0969383961 0.095722812 0.095070783 0.095601111 0.09611446 0.09710053
             28         29         30         31         32         33
2.5%  0.0192271 0.02018445 0.02126717 0.02180828 0.02318449 0.02446694
97.5% 0.0991407 0.10333532 0.10576628 0.10952622 0.11316457 0.11580351
              34         35         36         37         38         39
2.5%  0.02656686 0.03028455 0.03283638 0.03574763 0.03666611 0.03628851
97.5% 0.11652293 0.11697075 0.11641039 0.11551631 0.11518645 0.11603864
              40         41         42         43         44         45
2.5%  0.03441166 0.03202334 0.02926826 0.02655131 0.02378485 0.02276384
97.5% 0.11724366 0.11869381 0.12133242 0.12329735 0.12464132 0.12426431
              46         47         48         49         50         51
2.5%  0.02325486 0.02392923 0.02496968 0.02711446 0.02835949 0.02906568
97.5% 0.12284124 0.12006666 0.11718050 0.11429871 0.11246196 0.11055312
              52         53         54         55         56         57
2.5%  0.02825738 0.02727746 0.02646149 0.02606116 0.02692052 0.02893666
97.5% 0.11080398 0.11213568 0.11338402 0.11622954 0.11889223 0.12162313
              58         59        60         61         62         63
2.5%  0.03403333 0.04031896 0.0466202 0.05235491 0.05848736 0.06442285
97.5% 0.12469454 0.12818195 0.1310538 0.13351312 0.13630548 0.14131187
              64         65         66         67         68         69
2.5%  0.06824009 0.07056416 0.07137705 0.07129732 0.07024584 0.06897428
97.5% 0.14672515 0.15350012 0.15988179 0.16488107 0.17002998 0.17326181
              70         71         72         73         74         75
2.5%  0.06639815 0.06367641 0.06131104 0.05786541 0.05485625 0.04998023
97.5% 0.17490467 0.17481189 0.17195540 0.16541005 0.15502764 0.14282326
              76         77         78          79          80         81
2.5%  0.04305602 0.03095661 0.01271678 -0.01517831 -0.05182833 -0.0872705
97.5% 0.13040367 0.11757329 0.10956937  0.10374215  0.10306541  0.1040658
              82         83         84         85         86          87
2.5%  -0.1205869 -0.1441183 -0.1564773 -0.1510826 -0.1295284 -0.08869126
97.5%  0.1048334  0.1073800  0.1083617  0.1110681  0.1087747  0.10693685
               88         89         90         91         92         93
2.5%  -0.02428702 0.03724353 0.05064816 0.03966253 0.02343571 0.01109009
97.5%  0.10941740 0.14067552 0.24952023 0.39479841 0.54944978 0.69240727
                94          95           96          97         98         99
2.5%  -0.002258241 -0.00992776 -0.008081124 0.001246342 0.01745422 0.04417109
97.5%  0.807250185  0.87913897  0.892233514 0.836660525 0.69926426 0.46058881
             100
2.5%  0.04628183
97.5% 0.14125111
#P2
#P3
#P4
#P5

Time Trend Plot with ggplot

TOPIC 1

P1_1<-P1$x
P1_2<-P1$topics
P1_3<-P1$means[[1]]
P1_4<-P1$ci[[1]]
P1_4<-t(P1_4)
P1C<-as.data.frame(cbind(P1_1,P1_3,P1_4))
colnames(P1C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P1ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 1 Top Words: learn, space, inform, experi, group, peopl, way ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P1GG<-ggplot(P1C) + geom_line(aes(x=P1C$YEAR, y=P1C$Trend), color="black", size=0.5) + labs(title="Topic 1: ") + annotation_custom(P1ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P1GG

Topic 2

P2_1<-P2$x
P2_2<-P2$topics
P2_3<-P2$means[[1]]
P2_4<-P2$ci[[1]]
P2_4<-t(P2_4)
P2C<-as.data.frame(cbind(P2_1,P2_3,P2_4))
colnames(P2C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P2ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 2 Top Words: research, practic, process, knowledg, chang, context, work ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P2GG<-ggplot(P2C) + geom_line(aes(x=P2C$YEAR, y=P2C$Trend), color="black", size=0.5) + labs(title="Topic 1: ") + annotation_custom(P2ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P2GG

Topic 3

P3_1<-P3$x
P3_2<-P3$topics
P3_3<-P3$means[[1]]
P3_4<-P3$ci[[1]]
P3_4<-t(P3_4)
P3C<-as.data.frame(cbind(P3_1,P3_3,P3_4))
colnames(P3C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P3ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 3 Top Words: learn, cultur, programm, languag, art, african, report ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P3GG<-ggplot(P3C) + geom_line(aes(x=P3C$YEAR, y=P3C$Trend), color="black", size=0.5) + labs(title="Topic 3: ") + annotation_custom(P3ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P3GG

Topic 4

P4_1<-P4$x
P4_2<-P4$topics
P4_3<-P4$means[[1]]
P4_4<-P4$ci[[1]]
P4_4<-t(P4_4)
P4C<-as.data.frame(cbind(P4_1,P4_3,P4_4))
colnames(P4C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P4ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 4 Top Words: learn, train, skill, develop, workplac, career, employ ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P4GG<-ggplot(P4C) + geom_line(aes(x=P4C$YEAR, y=P4C$Trend), color="black", size=0.5) + labs(title="Topic 4: ") + annotation_custom(P4ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P4GG

Topic 5

P5_1<-P5$x
P5_2<-P5$topics
P5_3<-P5$means[[1]]
P5_4<-P5$ci[[1]]
P5_4<-t(P5_4)
P5C<-as.data.frame(cbind(P5_1,P5_3,P5_4))
colnames(P5C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P5ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 5 Top Words: learn, lifelong, social, capit, immigr, develop, also ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P5GG<-ggplot(P5C) + geom_line(aes(x=P5C$YEAR, y=P5C$Trend), color="black", size=0.5) + labs(title="Topic 5: ") + annotation_custom(P5ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P5GG

Topic 6

P6_1<-P6$x
P6_2<-P6$topics
P6_3<-P6$means[[1]]
P6_4<-P6$ci[[1]]
P6_4<-t(P6_4)
P6C<-as.data.frame(cbind(P6_1,P6_3,P6_4))
colnames(P6C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P6ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 6 Top Words: educ, adult, student, particip, learner, age, transit ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P6GG<-ggplot(P6C) + geom_line(aes(x=P6C$YEAR, y=P6C$Trend), color="black", size=0.5) + labs(title="Topic 6: ") + annotation_custom(P6ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P6GG

Topic 7

P7_1<-P7$x
P7_2<-P7$topics
P7_3<-P7$means[[1]]
P7_4<-P7$ci[[1]]
P7_4<-t(P7_4)
P7C<-as.data.frame(cbind(P7_1,P7_3,P7_4))
colnames(P7C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P7ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 7 Top Words: experi, ident, learner, univers, engag, academ, prior ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P7GG<-ggplot(P7C) + geom_line(aes(x=P7C$YEAR, y=P7C$Trend), color="black", size=0.5) + labs(title="Topic 7: ") + annotation_custom(P7ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P7GG

Topic 8

P8_1<-P8$x
P8_2<-P8$topics
P8_3<-P8$means[[1]]
P8_4<-P8$ci[[1]]
P8_4<-t(P8_4)
P8C<-as.data.frame(cbind(P8_1,P8_3,P8_4))
colnames(P8C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P8ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 8 Top Words: educ, women, adult, social, pedagogi, articl, feminist ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P8GG<-ggplot(P8C) + geom_line(aes(x=P8C$YEAR, y=P8C$Trend), color="black", size=0.5) + labs(title="Topic 8: ") + annotation_custom(P8ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P8GG

Topic 9

P9_1<-P9$x
P9_2<-P9$topics
P9_3<-P9$means[[1]]
P9_4<-P9$ci[[1]]
P9_4<-t(P9_4)
P9C<-as.data.frame(cbind(P9_1,P9_3,P9_4))
colnames(P9C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P9ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 9 Top Words: group, teacher, taylor, educ, franci, llc, copyright ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P9GG<-ggplot(P9C) + geom_line(aes(x=P9C$YEAR, y=P9C$Trend), color="black", size=0.5) + labs(title="Topic 9: ") + annotation_custom(P9ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P9GG

Topic 10

P10_1<-P10$x
P10_2<-P10$topics
P10_3<-P10$means[[1]]
P10_4<-P10$ci[[1]]
P10_4<-t(P10_4)
P10C<-as.data.frame(cbind(P10_1,P10_3,P10_4))
colnames(P10C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P10ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 10 Top Words: learn, adult, educ, transform, theori, critic, reflect ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P10GG<-ggplot(P10C) + geom_line(aes(x=P10C$YEAR, y=P10C$Trend), color="black", size=0.5) + labs(title="Topic 10: ") + annotation_custom(P10ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P10GG

Topic 11

P11_1<-P11$x
P11_2<-P11$topics
P11_3<-P11$means[[1]]
P11_4<-P11$ci[[1]]
P11_4<-t(P11_4)
P11C<-as.data.frame(cbind(P11_1,P11_3,P11_4))
colnames(P11C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P11ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 11 Top Words: communiti, literaci, program, develop, project, adult, health ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P11GG<-ggplot(P11C) + geom_line(aes(x=P11C$YEAR, y=P11C$Trend), color="black", size=0.5) + labs(title="Topic 11: ") + annotation_custom(P11ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P11GG

Topic 12

P12_1<-P12$x
P12_2<-P12$topics
P12_3<-P12$means[[1]]
P12_4<-P12$ci[[1]]
P12_4<-t(P12_4)
P12C<-as.data.frame(cbind(P12_1,P12_3,P12_4))
colnames(P12C)<-c("YEAR","Trend","LOWCI","HIGHCI")
P12ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
     Topic 12 Top Words: educ, polici, higher, countri, adult, european, develop ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))
P12GG<-ggplot(P12C) + geom_line(aes(x=P12C$YEAR, y=P12C$Trend), color="black", size=0.5) + labs(title="Topic 12: ") + annotation_custom(P12ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P12GG

Time Serise bundle

require(gridExtra)
grid.arrange(P1GG, P2GG, P3GG, P4GG, ncol=2)

grid.arrange(P5GG, P6GG, P7GG, P8GG, ncol=2)

grid.arrange(P9GG, P10GG, P11GG, P12GG, ncol=2)

Additional Analysis

cloud(FINALSELECTED, topic = 1, scale = c(4,.75))

mod.out.corr <- topicCorr(FINALSELECTED)
plot.topicCorr(mod.out.corr)

mod.out.corr
$posadj
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
 [1,]    1    0    0    0    0    0    0    0    0     0     0     0
 [2,]    0    1    0    0    0    0    0    0    1     1     0     0
 [3,]    0    0    1    0    0    0    0    0    0     0     0     0
 [4,]    0    0    0    1    0    0    0    0    0     0     0     0
 [5,]    0    0    0    0    1    0    0    0    0     0     0     1
 [6,]    0    0    0    0    0    1    1    0    0     0     0     0
 [7,]    0    0    0    0    0    1    1    0    0     0     0     0
 [8,]    0    0    0    0    0    0    0    1    0     0     1     0
 [9,]    0    1    0    0    0    0    0    0    1     0     0     0
[10,]    0    1    0    0    0    0    0    0    0     1     0     0
[11,]    0    0    0    0    0    0    0    1    0     0     1     0
[12,]    0    0    0    0    1    0    0    0    0     0     0     1

$poscor
      [,1]       [,2] [,3] [,4]     [,5]       [,6]       [,7]       [,8]
 [1,]    1 0.00000000    0    0 0.000000 0.00000000 0.00000000 0.00000000
 [2,]    0 1.00000000    0    0 0.000000 0.00000000 0.00000000 0.00000000
 [3,]    0 0.00000000    1    0 0.000000 0.00000000 0.00000000 0.00000000
 [4,]    0 0.00000000    0    1 0.000000 0.00000000 0.00000000 0.00000000
 [5,]    0 0.00000000    0    0 1.000000 0.00000000 0.00000000 0.00000000
 [6,]    0 0.00000000    0    0 0.000000 1.00000000 0.02209542 0.00000000
 [7,]    0 0.00000000    0    0 0.000000 0.02209542 1.00000000 0.00000000
 [8,]    0 0.00000000    0    0 0.000000 0.00000000 0.00000000 1.00000000
 [9,]    0 0.04367957    0    0 0.000000 0.00000000 0.00000000 0.00000000
[10,]    0 0.04719925    0    0 0.000000 0.00000000 0.00000000 0.00000000
[11,]    0 0.00000000    0    0 0.000000 0.00000000 0.00000000 0.01451966
[12,]    0 0.00000000    0    0 0.108178 0.00000000 0.00000000 0.00000000
            [,9]      [,10]      [,11]    [,12]
 [1,] 0.00000000 0.00000000 0.00000000 0.000000
 [2,] 0.04367957 0.04719925 0.00000000 0.000000
 [3,] 0.00000000 0.00000000 0.00000000 0.000000
 [4,] 0.00000000 0.00000000 0.00000000 0.000000
 [5,] 0.00000000 0.00000000 0.00000000 0.108178
 [6,] 0.00000000 0.00000000 0.00000000 0.000000
 [7,] 0.00000000 0.00000000 0.00000000 0.000000
 [8,] 0.00000000 0.00000000 0.01451966 0.000000
 [9,] 1.00000000 0.00000000 0.00000000 0.000000
[10,] 0.00000000 1.00000000 0.00000000 0.000000
[11,] 0.00000000 0.00000000 1.00000000 0.000000
[12,] 0.00000000 0.00000000 0.00000000 1.000000

$cor
             [,1]        [,2]        [,3]        [,4]        [,5]        [,6]
 [1,]  1.00000000  0.00000000 -0.04569822 -0.10296952 -0.13983711 -0.11581072
 [2,]  0.00000000  1.00000000 -0.03949244 -0.09343405 -0.08683209 -0.21613649
 [3,] -0.04569822 -0.03949244  1.00000000 -0.08387502 -0.05657482 -0.14884807
 [4,] -0.10296952 -0.09343405 -0.08387502  1.00000000 -0.09694895  0.00000000
 [5,] -0.13983711 -0.08683209 -0.05657482 -0.09694895  1.00000000 -0.10263174
 [6,] -0.11581072 -0.21613649 -0.14884807  0.00000000 -0.10263174  1.00000000
 [7,] -0.04881684 -0.07639733 -0.05613178 -0.10657528 -0.04982322  0.02209542
 [8,] -0.01802237 -0.02284215 -0.04792164 -0.18325683 -0.16339644 -0.13941367
 [9,] -0.10864722  0.04367957 -0.09534453 -0.04761594 -0.04358786 -0.04554520
[10,] -0.09900642  0.04719925 -0.01330170 -0.13685564 -0.17454707 -0.21934259
[11,] -0.06154146  0.00000000  0.00000000 -0.13720859 -0.06073652 -0.16264728
[12,] -0.18693187 -0.23741965 -0.13296600 -0.07298667  0.10817795 -0.08677659
             [,7]        [,8]        [,9]       [,10]       [,11]       [,12]
 [1,] -0.04881684 -0.01802237 -0.10864722 -0.09900642 -0.06154146 -0.18693187
 [2,] -0.07639733 -0.02284215  0.04367957  0.04719925  0.00000000 -0.23741965
 [3,] -0.05613178 -0.04792164 -0.09534453 -0.01330170  0.00000000 -0.13296600
 [4,] -0.10657528 -0.18325683 -0.04761594 -0.13685564 -0.13720859 -0.07298667
 [5,] -0.04982322 -0.16339644 -0.04358786 -0.17454707 -0.06073652  0.10817795
 [6,]  0.02209542 -0.13941367 -0.04554520 -0.21934259 -0.16264728 -0.08677659
 [7,]  1.00000000 -0.14511482 -0.01628195 -0.08235365 -0.10982598 -0.12538456
 [8,] -0.14511482  1.00000000 -0.16849434 -0.05346941  0.01451966 -0.15039534
 [9,] -0.01628195 -0.16849434  1.00000000 -0.10048191 -0.13660523 -0.14584662
[10,] -0.08235365 -0.05346941 -0.10048191  1.00000000 -0.12926363 -0.19871650
[11,] -0.10982598  0.01451966 -0.13660523 -0.12926363  1.00000000 -0.09708651
[12,] -0.12538456 -0.15039534 -0.14584662 -0.19871650 -0.09708651  1.00000000

attr(,"class")
[1] "topicCorr"
---
title: "Data Analysis for AERC proposal"
date: "`r Sys.Date()`"
author: 
  - "Chungil Chae"
  - "Soo Jung Han"
  - "Seung Heun Han"
#output: 
#  rmdformats::readthedown:
#    highlight: kate
output: html_notebook
---

# Preparation
## Packages Loading
```{r}
#===============================

require(stm)
require(tm)
require(SnowballC)
require(LDAvis)
require(stmCorrViz)
require(stmBrowser)
require(ggplot2)
require(topicmodels)
require(wordcloud)   
require(reshape2) # melt()
require(grid) # arrow()
require(ggthemes)
require(lsa)
set.seed(1234)
```

## Data loading
```{r}
#===============================
# Data Loading
require(XLConnect)               # load XLConnect package 
wk <- loadWorkbook("../DATA/AERCDATA_.xlsx") 
data <- readWorksheet(wk, sheet="DATA") 
data$KeyYesNo<-as.factor(data$KeyYesNo)
data$AbsYesNo<-as.factor(data$AbsYesNo)
data$JournalFa<-as.factor(data$Journal)
dataID <- readWorksheet(wk, sheet="DATAID") 
load("../DATA/DATA.Rdata")

```


# Basic Text Analysis
## Text Cleaning

```{r}
TEXTBASIC<- Corpus(VectorSource(data$TEXT))
CORPUS <- tm_map(TEXTBASIC, removePunctuation)   
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, removeNumbers)   
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, tolower)  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, removeWords, stopwords("english"))  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, removeWords, stopwords("SMART"))  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, stemDocument)
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, stripWhitespace)  
as.character(inspect(CORPUS[2]))
CORPUS <- tm_map(CORPUS, PlainTextDocument) 
CORPUS <- tm_map(CORPUS, removeWords, c("and", "for", "that",
                                        "this", "are", "is", 
                                        "am","with","their","the","can",
                                        "from","article", "study","research","analysis"))

as.character(inspect(CORPUS[2]))

# 

```


## Building Courps
```{r}
tdm <- TermDocumentMatrix(CORPUS)
dtm <- DocumentTermMatrix(CORPUS)
dim(tdm)
dim(dtm)

```


## Word Frequency
```{r}
freq <- sort(colSums(as.matrix(dtm)), decreasing=TRUE)
wf <- data.frame(word=names(freq), freq=freq)
table(freq)
head(freq)

require(ggplot2)   
p <- ggplot(data=subset(wf, freq>200), aes(reorder(word, freq), freq))    
p <- p + geom_bar(stat="identity")   
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))   
p 

```


## creating term matrix with TF-IDF weighting
```{r}
terms_td_idf <-DocumentTermMatrix(CORPUS, control = list(weighting = function(x) weightTfIdf(x, normalize = FALSE)))
terms_td_idf

```



## Text Network with tf-idf similarity
```{r}
td.mat <- as.matrix(TermDocumentMatrix(CORPUS))
td.mat.lsa <- lw_bintf(td.mat) * gw_idf(td.mat) # weighting
lsaSpace <- lsa(td.mat.lsa) # create LSA space
dist.mat.lsa <- dist(t(as.textmatrix(lsaSpace))) # compute distance matrix

```


# MDS
```{r}
fit <- cmdscale(dist.mat.lsa, eig=TRUE, k=2)
points <- data.frame(x=fit$points[, 1], y=fit$points[, 2])
points$DocID<-dataID$NO
qplot(x, y, data = points, geom = "point", alpha = I(1/5))
plot(points$x,points$y)
text(points$x,points$y, points$DocID ,cex=0.6, pos=4, col="red")


library(scatterplot3d)
fit <- cmdscale(dist.mat.lsa, eig = TRUE, k = 3)
colors <- rep(c("blue", "green", "red"), each = 3)
scatterplot3d(fit$points[, 1], fit$points[, 2], fit$points[, 3],
              pch = 16, main = "Semantic Space Scaled to 3D", 
              xlab = "x", ylab = "y",
              zlab = "z", type = "h"
              )

```




# LDA - STM Topic Modeling
## stemming/stopword removal, etc.
```{r}
data$TEXT <- gsub("[^[:alnum:]///' ]", "", data$TEXT)
processed <- textProcessor(data$TEXT, metadata = data)

```



## structure and index for usage in the stm model. Verify no-missingness.
```{r}
prep <- prepDocuments(processed$documents, processed$vocab, processed$meta)

```


## output will have object meta, documents, and vocab
```{r}
docs <- out$documents
vocab <- out$vocab
meta  <-out$meta

plotRemoved(processed$documents, lower.thresh = seq(1, 1000, by = 10))

#prep <- prepDocuments(processed$documents, processed$vocab,processed$meta, 
#                      lower.thresh = 1, 
#                      upper.thresh= 400)
#plotRemoved(prep$documents, lower.thresh = seq(1, 1000, by = 10))
```


## Topic Model Evaluation for search K
```{r}
heldout <- make.heldout(prep$documents, prep$vocab)
documents <- heldout$documents
vocab <- heldout$voca

K<-c(2:50)
#MODELS <- searchK(documents, vocab, K, seed = 8458159)


plot.searchK(MODELS)

par(mfrow=c(2,2))
qplot(MODELS$results$K, MODELS$results$exclus, geom=c("point", "smooth"))
qplot(MODELS$results$K, MODELS$results$heldout, geom=c("point", "smooth"))
qplot(MODELS$results$K, MODELS$results$semcoh, geom=c("point", "smooth"))
qplot(MODELS$results$K, MODELS$results$em.its, geom=c("point", "smooth"))
par(mfrow=c(1,1))

KRANK_heldout<-as.data.frame(cbind(MODELS$results$K,MODELS$results$heldout))
KRANK_semcoh<-as.data.frame(cbind(MODELS$results$K,MODELS$results$semcoh))
KRANK_heldout
KRANK_semcoh

#SELMODEL <- selectModel(out$documents, out$vocab, K = 12,
#                        prevalence =~ JournalFa + s(Year),
#                        max.em.its = 100,
#                        data = out$meta, runs = 30, 
#                        seed = 8458159)
SELMODEL
plotModels(SELMODEL)


MS1 <- SELMODEL$runout[[1]]
topicQuality(model=MS1, documents=docs)
MS1_evalheldout<-eval.heldout(MS1, heldout$missing)
hist(MS1_evalheldout$doc.heldout)
MS1_evalheldout

MS2 <- SELMODEL$runout[[2]]
topicQuality(model=MS2, documents=docs)
MS2_evalheldout<-eval.heldout(MS2, heldout$missing)
hist(MS2_evalheldout$doc.heldout)
MS2_evalheldout

MS3 <- SELMODEL$runout[[3]]
topicQuality(model=MS3, documents=docs)
MS3_evalheldout<-eval.heldout(MS3, heldout$missing)
hist(MS3_evalheldout$doc.heldout)
MS3_evalheldout

MS4 <- SELMODEL$runout[[4]]
topicQuality(model=MS4, documents=docs)
MS4_evalheldout<-eval.heldout(MS4, heldout$missing)
hist(MS4_evalheldout$doc.heldout)
MS4_evalheldout

MS5 <- SELMODEL$runout[[5]]
topicQuality(model=MS5, documents=docs)
MS5_evalheldout<-eval.heldout(MS5, heldout$missing)
hist(MS5_evalheldout$doc.heldout)
MS5_evalheldout

MS6 <- SELMODEL$runout[[6]]
topicQuality(model=MS6, documents=docs)
MS6_evalheldout<-eval.heldout(MS6, heldout$missing)
hist(MS6_evalheldout$doc.heldout)
MS6_evalheldout


FINALSELECTED <- SELMODEL$runout[[4]]
topicQuality(model=FINALSELECTED, documents=docs)
evalheldout<-eval.heldout(FINALSELECTED, heldout$missing)
hist(evalheldout$doc.heldout)
evalheldout

#save(MODELS, SELMODEL, FINALSELECTED, file = "./DATA/DATA.Rdata")

```




# Topic Model Description
## Basic Result
```{r}
#==== Summary
## Label Topics
labelTopics(FINALSELECTED)


## Estimating Effect
prepeffect <- estimateEffect(1:12 ~ JournalFa + s(Year), FINALSELECTED, meta = out$meta, uncertainty = "Global")

```

## Summary
```{r}
#==== Summary
plot.STM(FINALSELECTED, type = "summary")
plot.STM(FINALSELECTED, type = "labels", topics = c(1,2,3))
plot.STM(FINALSELECTED, type = "labels", topics = c(4,5,6))
plot.STM(FINALSELECTED, type = "labels", topics = c(7,8,9))
plot.STM(FINALSELECTED, type = "labels", topics = c(10,11,12))
plot.STM(FINALSELECTED, type = "hist", topics = c(1,2,3,4))
plot.STM(FINALSELECTED, type = "hist", topics = c(5,6,7,8))
plot.STM(FINALSELECTED, type = "hist", topics = c(9,10,11,12))
```

## Summary - Perspective
```{r}
#==== Summary
plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 4))
plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 5))
plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 12))
plot.STM(FINALSELECTED, type = "perspectives", topics = c(10, 9))

```

## Topic and Documents
```{r}
thoughts1 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =1 )$docs[[1]]
thoughts1

thoughts2 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =2 )$docs[[1]]
thoughts2

thoughts3 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =3 )$docs[[1]]
thoughts3

thoughts4 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =4 )$docs[[1]]
thoughts4

thoughts5 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =5 )$docs[[1]]
thoughts5

thoughts6 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =6 )$docs[[1]]
thoughts6

thoughts7 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =7 )$docs[[1]]
thoughts7

thoughts8 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =8 )$docs[[1]]
thoughts8

thoughts9 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =9 )$docs[[1]]
thoughts9

thoughts10 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =10 )$docs[[1]]
thoughts10

thoughts11 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =11 )$docs[[1]]
thoughts11

thoughts12 <- findThoughts(FINALSELECTED, texts = meta$Title, n = 5, topics =12 )$docs[[1]]
thoughts12

```






## Pointestimate of mean of topic in four diffeent
```{r}
par(mfrow=c(2,2))
plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 10,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 10 (Adult Education)",
                    xlim = c(-.02, .3), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))

plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 5,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 5 (Lifelong Eduction)",
                    xlim = c(-.02, .18), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))

plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 4,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 4 (HRD, Workplace)",
                    xlim = c(-.02, .18), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))


plot.estimateEffect(prepeffect, covariate = "JournalFa", topics = 6,
                    model = FINALSELECTED, method = "pointestimate",
                    xlab = "Expected Proportion",
                    main = "TOPIC 2 ()",
                    xlim = c(-.02, .18), 
                    labeltype = "custom",
                    custom.labels = c(
                      'AEQ', 'IJLE'))


par(mfrow=c(1,1))
```

# Time Trend Plot with genuine plot
```{r}
par(mfrow=c(3,2))
P1<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 1,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic1  () \n learn, space, inform, experi, group, peopl, way ")

P2<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 2,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic2  () \n research, practic, process, knowledg, chang, context, work ")

P3<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 3,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic3  () \n learn, cultur, programm, languag, art, african, report ")

P4<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 4,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic4  () \n learn, train, skill, develop, workplac, career, employ ")

P5<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 5,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic5  () \n learn, lifelong, social, capit, immigr, develop")

P6<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 6,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic6  () \n educ, adult, student, particip, learner, age, transit ")

par(mfrow=c(1,1))

par(mfrow=c(3,2))
P7<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 7,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic7  () \n experi, ident, learner, univers, engag, academ, prior ")

P8<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 8,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic8  () \n educ, women, adult, social, pedagogi, articl, feminist ")

P9<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 9,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic9  () \n group, teacher, taylor, educ, franci, llc, copyright ")

P10<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 10,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic10  () \n learn, adult, educ, transform, theori, critic, reflect ")

P11<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 11,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic11  () \n communiti, literaci, program, develop, project, adult, health ")

P12<-plot.estimateEffect(prepeffect, 
                        covariate = "Year",
                        topics = 12,
                        model = FINALSELECTED,
                        method = "continuous", 
                        ylim = c(0, .4),
                        linecol = "blue", 
                        printlegend = F, 
                        main = " Topic12  () \n educ, polici, higher, countri, adult, european, develop ")

par(mfrow=c(1,1))
```






# trend plot values
```{r}
P1
#P2
#P3
#P4
#P5
```


## Time Trend Plot with ggplot
### TOPIC 1
```{r}
P1_1<-P1$x
P1_2<-P1$topics
P1_3<-P1$means[[1]]
P1_4<-P1$ci[[1]]
P1_4<-t(P1_4)
P1C<-as.data.frame(cbind(P1_1,P1_3,P1_4))
colnames(P1C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P1ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 1 Top Words: learn, space, inform, experi, group, peopl, way ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P1GG<-ggplot(P1C) + geom_line(aes(x=P1C$YEAR, y=P1C$Trend), color="black", size=0.5) + labs(title="Topic 1: ") + annotation_custom(P1ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P1GG
```

# Topic 2
```{r}
P2_1<-P2$x
P2_2<-P2$topics
P2_3<-P2$means[[1]]
P2_4<-P2$ci[[1]]
P2_4<-t(P2_4)
P2C<-as.data.frame(cbind(P2_1,P2_3,P2_4))
colnames(P2C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P2ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 2 Top Words: research, practic, process, knowledg, chang, context, work ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P2GG<-ggplot(P2C) + geom_line(aes(x=P2C$YEAR, y=P2C$Trend), color="black", size=0.5) + labs(title="Topic 2: ") + annotation_custom(P2ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P2GG
```

# Topic 3
```{r}
P3_1<-P3$x
P3_2<-P3$topics
P3_3<-P3$means[[1]]
P3_4<-P3$ci[[1]]
P3_4<-t(P3_4)
P3C<-as.data.frame(cbind(P3_1,P3_3,P3_4))
colnames(P3C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P3ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 3 Top Words: learn, cultur, programm, languag, art, african, report ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P3GG<-ggplot(P3C) + geom_line(aes(x=P3C$YEAR, y=P3C$Trend), color="black", size=0.5) + labs(title="Topic 3: ") + annotation_custom(P3ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P3GG
```

# Topic 4
```{r}
P4_1<-P4$x
P4_2<-P4$topics
P4_3<-P4$means[[1]]
P4_4<-P4$ci[[1]]
P4_4<-t(P4_4)
P4C<-as.data.frame(cbind(P4_1,P4_3,P4_4))
colnames(P4C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P4ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 4 Top Words: learn, train, skill, develop, workplac, career, employ ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P4GG<-ggplot(P4C) + geom_line(aes(x=P4C$YEAR, y=P4C$Trend), color="black", size=0.5) + labs(title="Topic 4: ") + annotation_custom(P4ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P4GG
```

# Topic 5
```{r}
P5_1<-P5$x
P5_2<-P5$topics
P5_3<-P5$means[[1]]
P5_4<-P5$ci[[1]]
P5_4<-t(P5_4)
P5C<-as.data.frame(cbind(P5_1,P5_3,P5_4))
colnames(P5C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P5ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
    Topic 5 Top Words: learn, lifelong, social, capit, immigr, develop, also ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P5GG<-ggplot(P5C) + geom_line(aes(x=P5C$YEAR, y=P5C$Trend), color="black", size=0.5) + labs(title="Topic 5: ") + annotation_custom(P5ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P5GG
```

# Topic 6
```{r}
P6_1<-P6$x
P6_2<-P6$topics
P6_3<-P6$means[[1]]
P6_4<-P6$ci[[1]]
P6_4<-t(P6_4)
P6C<-as.data.frame(cbind(P6_1,P6_3,P6_4))
colnames(P6C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P6ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 6 Top Words: educ, adult, student, particip, learner, age, transit ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P6GG<-ggplot(P6C) + geom_line(aes(x=P6C$YEAR, y=P6C$Trend), color="black", size=0.5) + labs(title="Topic 6: ") + annotation_custom(P6ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P6GG
```

# Topic 7
```{r}
P7_1<-P7$x
P7_2<-P7$topics
P7_3<-P7$means[[1]]
P7_4<-P7$ci[[1]]
P7_4<-t(P7_4)
P7C<-as.data.frame(cbind(P7_1,P7_3,P7_4))
colnames(P7C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P7ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 7 Top Words: experi, ident, learner, univers, engag, academ, prior ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P7GG<-ggplot(P7C) + geom_line(aes(x=P7C$YEAR, y=P7C$Trend), color="black", size=0.5) + labs(title="Topic 7: ") + annotation_custom(P7ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P7GG
```

# Topic 8
```{r}
P8_1<-P8$x
P8_2<-P8$topics
P8_3<-P8$means[[1]]
P8_4<-P8$ci[[1]]
P8_4<-t(P8_4)
P8C<-as.data.frame(cbind(P8_1,P8_3,P8_4))
colnames(P8C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P8ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 8 Top Words: educ, women, adult, social, pedagogi, articl, feminist ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P8GG<-ggplot(P8C) + geom_line(aes(x=P8C$YEAR, y=P8C$Trend), color="black", size=0.5) + labs(title="Topic 8: ") + annotation_custom(P8ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P8GG
```

# Topic 9
```{r}
P9_1<-P9$x
P9_2<-P9$topics
P9_3<-P9$means[[1]]
P9_4<-P9$ci[[1]]
P9_4<-t(P9_4)
P9C<-as.data.frame(cbind(P9_1,P9_3,P9_4))
colnames(P9C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P9ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 9 Top Words: group, teacher, taylor, educ, franci, llc, copyright ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P9GG<-ggplot(P9C) + geom_line(aes(x=P9C$YEAR, y=P9C$Trend), color="black", size=0.5) + labs(title="Topic 9: ") + annotation_custom(P9ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P9GG
```

# Topic 10
```{r}
P10_1<-P10$x
P10_2<-P10$topics
P10_3<-P10$means[[1]]
P10_4<-P10$ci[[1]]
P10_4<-t(P10_4)
P10C<-as.data.frame(cbind(P10_1,P10_3,P10_4))
colnames(P10C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P10ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 10 Top Words: learn, adult, educ, transform, theori, critic, reflect ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P10GG<-ggplot(P10C) + geom_line(aes(x=P10C$YEAR, y=P10C$Trend), color="black", size=0.5) + labs(title="Topic 10: ") + annotation_custom(P10ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P10GG
```

# Topic 11
```{r}
P11_1<-P11$x
P11_2<-P11$topics
P11_3<-P11$means[[1]]
P11_4<-P11$ci[[1]]
P11_4<-t(P11_4)
P11C<-as.data.frame(cbind(P11_1,P11_3,P11_4))
colnames(P11C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P11ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 11 Top Words: communiti, literaci, program, develop, project, adult, health ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P11GG<-ggplot(P11C) + geom_line(aes(x=P11C$YEAR, y=P11C$Trend), color="black", size=0.5) + labs(title="Topic 11: ") + annotation_custom(P11ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P11GG
```

# Topic 12
```{r}
P12_1<-P12$x
P12_2<-P12$topics
P12_3<-P12$means[[1]]
P12_4<-P12$ci[[1]]
P12_4<-t(P12_4)
P12C<-as.data.frame(cbind(P12_1,P12_3,P12_4))
colnames(P12C)<-c("YEAR","Trend","LOWCI","HIGHCI")

P12ANNO<-grobTree(
  textGrob(
    "Highest Probability Words: 
	 Topic 12 Top Words: educ, polici, higher, countri, adult, european, develop ",
    x=0.1,  
    y=0.9, 
    hjust=0, 
    gp=gpar(col="black", fontsize=8)))

P12GG<-ggplot(P12C) + geom_line(aes(x=P12C$YEAR, y=P12C$Trend), color="black", size=0.5) + labs(title="Topic 12: ") + annotation_custom(P12ANNO) + ylim(-0.2, 0.7) + xlab("") + ylab("Expected Topic Proportion") + theme_hc() + scale_x_continuous(breaks=c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015))
P12GG
```


# Time Serise bundle
```{r}
require(gridExtra)
grid.arrange(P1GG, P2GG, P3GG, P4GG, ncol=2)
grid.arrange(P5GG, P6GG, P7GG, P8GG, ncol=2)
grid.arrange(P9GG, P10GG, P11GG, P12GG, ncol=2)
```

## Additional Analysis
```{r}
cloud(FINALSELECTED, topic = 1, scale = c(4,.75))
mod.out.corr <- topicCorr(FINALSELECTED)
plot.topicCorr(mod.out.corr)
mod.out.corr

```



