Student ID: 1A182901-2


  1. Set up environment
rm(list=ls(all=TRUE))
setwd("~/Desktop/R/polimetrics")
library(RColorBrewer)
library(readtext)
library(quanteda)
library(ggplot2)
library(cowplot)
setwd("~/Desktop/r/polimetrics/IE")

  1. Read file
myText <- readtext("~/Desktop/r/polimetrics/IE/*.txt",
                   docvarsfrom = "filenames", dvsep = " ", docvarnames = c("Year", "Party"))
testCorpus <- corpus(myText)
testCorpus <- corpus(myText, docid_field = "doc_id")
docnames(testCorpus) <- gsub(".txt", "", docnames(testCorpus))
myDfm <- dfm(testCorpus , remove = stopwords("english"), tolower = TRUE, stem = TRUE,
             remove_punct = TRUE, remove_numbers=TRUE)

  1. Wordfish
wfm <- textmodel_wordfish(myDfm, dir = c(1, 3))
v_wf <- wfm$theta
df_wf <- as.data.frame(v_wf)
df_wf$min <- wfm$theta-wfm$se.theta
df_wf$max <- wfm$theta+wfm$se.theta
df_wf$title <- gsub("19","",wfm$docs)
df_wf$title <- with(df_wf,reorder(title,v_wf))

  1. Wordscore
ws <- textmodel_wordscores(myDfm, c(4.5,13.13,15,6.88,17.63,rep(NA,5)))
#Predict wordscore
pr_raw <- predict(ws, newdata = myDfm[c(6,7,8,9,10), ])
pr_raw_a <- predict(ws, newdata = myDfm[c(1:10), ])
pr_lbg <- predict(ws, rescaling = "lbg", newdata = myDfm[c(6,7,8,9,10), ], interval = "confidence")
#Raw dataframe
v_raw <- c(c(4.5,13.13,15,6.88,17.63),pr_raw)
df_raw <- as.data.frame(v_raw,row.names = docnames(myDfm))
df_raw$title <- gsub("19","",rownames(df_raw))
df_raw$title <- with(df_raw,reorder(title,v_raw))
#All raw dataframe
df_raw_a <- as.data.frame(pr_raw_a)
df_raw_a$title <- gsub("19","",rownames(df_raw_a))
df_raw_a$title <- with(df_raw_a,reorder(title,pr_raw_a))
#LBG dataframe
df_lbg <- as.data.frame(pr_lbg)
df_lbg$title <- gsub("19","",rownames(df_lbg))
colnames(df_lbg) <- c("score","lb","ub","title")
df_lbg$g <- "pr"
df_ref <- data.frame(
  value1 = c(4.5,13.13,15,6.88,17.63),
  value2 = c(4.5,13.13,15,6.88,17.63),
  value3 = c(4.5,13.13,15,6.88,17.63)
)
rownames(df_ref) <- gsub(".txt","",myText$doc_id[1:5])
df_ref$title <- gsub("19","",rownames(df_ref))
colnames(df_ref) <- c("score","lb","ub","title")
df_ref$g <- "ref"
df_lbg_res <- as.data.frame(rbind(df_ref,df_lbg))
df_lbg_res$title <- with(df_lbg_res, reorder(title,score))

  1. Graph dataframe
#Raw
gr <- ggplot(df_raw,aes(y=title,x=v_raw))
gr_res<- gr + geom_point() + ylab("") + xlab("")+theme_light()
#All raw
gr_ra <- ggplot(df_raw_a,aes(y=title,x=pr_raw_a))
gr_ra_res <- gr_ra + geom_point() + ylab("") + xlab("")+theme_light()
#LBG
gr_lbg <- ggplot(df_lbg_res,aes(score,title,color = g)) +
  geom_point()+
  geom_errorbarh(aes(xmin=lb,xmax=ub,height = .1)) +
  xlab("")+
  ylab("")+
  theme_light()+
  theme(legend.position="none")+
  scale_color_manual(values=c("navy", "black"))
#wordfish
gr_wf <- ggplot(df_wf,aes(y=title,x=v_wf))
gr_wf_res <- gr_wf + geom_point() +  ylab("") + xlab("")+theme_light()

  1. Raw wordscores(with reference) versus Wordfish
plot_grid(gr_res, gr_wf_res,labels = c('Raw Wordscores', 'Wordfish'))


  1. ALL wordscores versus Wordfish
plot_grid(gr_ra_res, gr_wf_res,labels = c('All Raw Wordscores', 'Wordfish'))


  1. LBG transformed wordscores(with reference) versus Wordfish
plot_grid(gr_lbg,gr_wf_res,labels = c('LBG Wordscores', 'Wordfish'))


LS0tCnRpdGxlOiAiSG9tZSBBc3NpZ25tZW50IDIiCm91dHB1dDogaHRtbF9ub3RlYm9vawphdXRob3I6IFllbiBDaGVuZyBIc3VhbgotLS0KIyMjI1N0dWRlbnQgSUQ6IDFBMTgyOTAxLTIKKioqCgo+MS4gU2V0IHVwIGVudmlyb25tZW50CgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0Kcm0obGlzdD1scyhhbGw9VFJVRSkpCnNldHdkKCJ+L0Rlc2t0b3AvUi9wb2xpbWV0cmljcyIpCgpsaWJyYXJ5KFJDb2xvckJyZXdlcikKbGlicmFyeShyZWFkdGV4dCkKbGlicmFyeShxdWFudGVkYSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGNvd3Bsb3QpCgpzZXR3ZCgifi9EZXNrdG9wL3IvcG9saW1ldHJpY3MvSUUiKQpgYGAKKioqCj4yLiBSZWFkIGZpbGUKCmBgYHtyLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpteVRleHQgPC0gcmVhZHRleHQoIn4vRGVza3RvcC9yL3BvbGltZXRyaWNzL0lFLyoudHh0IiwKICAgICAgICAgICAgICAgICAgIGRvY3ZhcnNmcm9tID0gImZpbGVuYW1lcyIsIGR2c2VwID0gIiAiLCBkb2N2YXJuYW1lcyA9IGMoIlllYXIiLCAiUGFydHkiKSkKdGVzdENvcnB1cyA8LSBjb3JwdXMobXlUZXh0KQp0ZXN0Q29ycHVzIDwtIGNvcnB1cyhteVRleHQsIGRvY2lkX2ZpZWxkID0gImRvY19pZCIpCmRvY25hbWVzKHRlc3RDb3JwdXMpIDwtIGdzdWIoIi50eHQiLCAiIiwgZG9jbmFtZXModGVzdENvcnB1cykpCgpteURmbSA8LSBkZm0odGVzdENvcnB1cyAsIHJlbW92ZSA9IHN0b3B3b3JkcygiZW5nbGlzaCIpLCB0b2xvd2VyID0gVFJVRSwgc3RlbSA9IFRSVUUsCiAgICAgICAgICAgICByZW1vdmVfcHVuY3QgPSBUUlVFLCByZW1vdmVfbnVtYmVycz1UUlVFKQpgYGAKKioqCj4zLiBXb3JkZmlzaAoKYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CndmbSA8LSB0ZXh0bW9kZWxfd29yZGZpc2gobXlEZm0sIGRpciA9IGMoMSwgMykpCnZfd2YgPC0gd2ZtJHRoZXRhCmRmX3dmIDwtIGFzLmRhdGEuZnJhbWUodl93ZikKZGZfd2YkbWluIDwtIHdmbSR0aGV0YS13Zm0kc2UudGhldGEKZGZfd2YkbWF4IDwtIHdmbSR0aGV0YSt3Zm0kc2UudGhldGEKCgpkZl93ZiR0aXRsZSA8LSBnc3ViKCIxOSIsIiIsd2ZtJGRvY3MpCmRmX3dmJHRpdGxlIDwtIHdpdGgoZGZfd2YscmVvcmRlcih0aXRsZSx2X3dmKSkKYGBgCioqKgo+NC4gV29yZHNjb3JlCgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0Kd3MgPC0gdGV4dG1vZGVsX3dvcmRzY29yZXMobXlEZm0sIGMoNC41LDEzLjEzLDE1LDYuODgsMTcuNjMscmVwKE5BLDUpKSkKI1ByZWRpY3Qgd29yZHNjb3JlCnByX3JhdyA8LSBwcmVkaWN0KHdzLCBuZXdkYXRhID0gbXlEZm1bYyg2LDcsOCw5LDEwKSwgXSkKcHJfcmF3X2EgPC0gcHJlZGljdCh3cywgbmV3ZGF0YSA9IG15RGZtW2MoMToxMCksIF0pCnByX2xiZyA8LSBwcmVkaWN0KHdzLCByZXNjYWxpbmcgPSAibGJnIiwgbmV3ZGF0YSA9IG15RGZtW2MoNiw3LDgsOSwxMCksIF0sIGludGVydmFsID0gImNvbmZpZGVuY2UiKQojUmF3IGRhdGFmcmFtZQp2X3JhdyA8LSBjKGMoNC41LDEzLjEzLDE1LDYuODgsMTcuNjMpLHByX3JhdykKZGZfcmF3IDwtIGFzLmRhdGEuZnJhbWUodl9yYXcscm93Lm5hbWVzID0gZG9jbmFtZXMobXlEZm0pKQpkZl9yYXckdGl0bGUgPC0gZ3N1YigiMTkiLCIiLHJvd25hbWVzKGRmX3JhdykpCmRmX3JhdyR0aXRsZSA8LSB3aXRoKGRmX3JhdyxyZW9yZGVyKHRpdGxlLHZfcmF3KSkKI0FsbCByYXcgZGF0YWZyYW1lCmRmX3Jhd19hIDwtIGFzLmRhdGEuZnJhbWUocHJfcmF3X2EpCmRmX3Jhd19hJHRpdGxlIDwtIGdzdWIoIjE5IiwiIixyb3duYW1lcyhkZl9yYXdfYSkpCmRmX3Jhd19hJHRpdGxlIDwtIHdpdGgoZGZfcmF3X2EscmVvcmRlcih0aXRsZSxwcl9yYXdfYSkpCiNMQkcgZGF0YWZyYW1lCmRmX2xiZyA8LSBhcy5kYXRhLmZyYW1lKHByX2xiZykKZGZfbGJnJHRpdGxlIDwtIGdzdWIoIjE5IiwiIixyb3duYW1lcyhkZl9sYmcpKQpjb2xuYW1lcyhkZl9sYmcpIDwtIGMoInNjb3JlIiwibGIiLCJ1YiIsInRpdGxlIikKZGZfbGJnJGcgPC0gInByIgpkZl9yZWYgPC0gZGF0YS5mcmFtZSgKICB2YWx1ZTEgPSBjKDQuNSwxMy4xMywxNSw2Ljg4LDE3LjYzKSwKICB2YWx1ZTIgPSBjKDQuNSwxMy4xMywxNSw2Ljg4LDE3LjYzKSwKICB2YWx1ZTMgPSBjKDQuNSwxMy4xMywxNSw2Ljg4LDE3LjYzKQopCnJvd25hbWVzKGRmX3JlZikgPC0gZ3N1YigiLnR4dCIsIiIsbXlUZXh0JGRvY19pZFsxOjVdKQpkZl9yZWYkdGl0bGUgPC0gZ3N1YigiMTkiLCIiLHJvd25hbWVzKGRmX3JlZikpCmNvbG5hbWVzKGRmX3JlZikgPC0gYygic2NvcmUiLCJsYiIsInViIiwidGl0bGUiKQpkZl9yZWYkZyA8LSAicmVmIgpkZl9sYmdfcmVzIDwtIGFzLmRhdGEuZnJhbWUocmJpbmQoZGZfcmVmLGRmX2xiZykpCmRmX2xiZ19yZXMkdGl0bGUgPC0gd2l0aChkZl9sYmdfcmVzLCByZW9yZGVyKHRpdGxlLHNjb3JlKSkKYGBgCioqKgo+NS4gR3JhcGggZGF0YWZyYW1lCgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KI1JhdwpnciA8LSBnZ3Bsb3QoZGZfcmF3LGFlcyh5PXRpdGxlLHg9dl9yYXcpKQpncl9yZXM8LSBnciArIGdlb21fcG9pbnQoKSArIHlsYWIoIiIpICsgeGxhYigiIikrdGhlbWVfbGlnaHQoKQojQWxsIHJhdwpncl9yYSA8LSBnZ3Bsb3QoZGZfcmF3X2EsYWVzKHk9dGl0bGUseD1wcl9yYXdfYSkpCmdyX3JhX3JlcyA8LSBncl9yYSArIGdlb21fcG9pbnQoKSArIHlsYWIoIiIpICsgeGxhYigiIikrdGhlbWVfbGlnaHQoKQojTEJHCmdyX2xiZyA8LSBnZ3Bsb3QoZGZfbGJnX3JlcyxhZXMoc2NvcmUsdGl0bGUsY29sb3IgPSBnKSkgKwogIGdlb21fcG9pbnQoKSsKICBnZW9tX2Vycm9yYmFyaChhZXMoeG1pbj1sYix4bWF4PXViLGhlaWdodCA9IC4xKSkgKwogIHhsYWIoIiIpKwogIHlsYWIoIiIpKwogIHRoZW1lX2xpZ2h0KCkrCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikrCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jKCJuYXZ5IiwgImJsYWNrIikpCiN3b3JkZmlzaApncl93ZiA8LSBnZ3Bsb3QoZGZfd2YsYWVzKHk9dGl0bGUseD12X3dmKSkKZ3Jfd2ZfcmVzIDwtIGdyX3dmICsgZ2VvbV9wb2ludCgpICsgIHlsYWIoIiIpICsgeGxhYigiIikrdGhlbWVfbGlnaHQoKQpgYGAKKioqCj42LiBSYXcgd29yZHNjb3Jlcyh3aXRoIHJlZmVyZW5jZSkgdmVyc3VzIFdvcmRmaXNoCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KcGxvdF9ncmlkKGdyX3JlcywgZ3Jfd2ZfcmVzLGxhYmVscyA9IGMoJ1JhdyBXb3Jkc2NvcmVzJywgJ1dvcmRmaXNoJykpCmBgYAoqIFRoZSByZXN1bHQgb2YgcmF3IHNjb3JlcyB3aXRob3V0IGFueSB0cmFuc2Zvcm1hdGlvbiBjb3VsZCBub3QgZGlzcGxheSB0aGUgdHJlbmRzIG9mIHBhcnRpZXMgYmV0d2VlbiAxOTkyIGFuZCAxOTk3LiBUaGUgZm9sbG93aW5nIHR3byBncmFwaHMgYXJlIHR3byBraW5kcyBvZiB0cmFuc2Zvcm1hdGlvbiB2ZXJzdXMgd29yZGZpc2ggcmVzdWx0LgoKKioqCj4gNy4gQUxMIHdvcmRzY29yZXMgdmVyc3VzIFdvcmRmaXNoCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KcGxvdF9ncmlkKGdyX3JhX3JlcywgZ3Jfd2ZfcmVzLGxhYmVscyA9IGMoJ0FsbCBSYXcgV29yZHNjb3JlcycsICdXb3JkZmlzaCcpKQpgYGAKKiBUaGUgcHJlZGljdGlvbiBvZiBhbGwgZG9jdW1lbnRzIHdpdGggcmVmZXJlbmNlIHRleHQgc3RpbGwgcmVtYWluZWQgdGhlIHNhbWUgdHJlbmRzIHRvIHRoZSBwcmV2aW91cyBncmFwaC4gVGhlIExCRyB0cmFuc2Zvcm1hdGlvbiBtYXkgaGF2ZSBmdXJ0aGVyIGluZm9ybWF0aW9uLgoKKioqCj44LiBMQkcgdHJhbnNmb3JtZWQgd29yZHNjb3Jlcyh3aXRoIHJlZmVyZW5jZSkgdmVyc3VzIFdvcmRmaXNoCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KcGxvdF9ncmlkKGdyX2xiZyxncl93Zl9yZXMsbGFiZWxzID0gYygnTEJHIFdvcmRzY29yZXMnLCAnV29yZGZpc2gnKSkKYGBgCgoqIEFjY29yZGluZyB0byB0aGUgV2lraXBlZGlhLCB0aGUgY29hbGl0aW9uIGNoYW5nZWQgc2luY2UgdGhlIGJyZWFrZG93biBvZiBGRi1MYWIgZ292ZXJubWVudCBpbiAxOTk0LiBJbiAxOTk3J3MgZWxlY3Rpb24sIHR3byBjb2FsaXRpb25zLCBGRi1QRHMgYW5kIERMLUxhYi1GRywgd2VyZSBjb21wZXRpbmcgZm9yIHRoZSBtYWpvcml0eS4gVGhpcyBjb21wZXRpdGlvbiBvZiBnb3Zlcm5tZW50IHdhcyBub3QgcmV2ZWFsZWQgYnkgdGhlIHNvY2lhbCBsZWZ0LXJpZ2h0IHNjYWxlLgoKKiBIb3dldmVyLCBpbiB0aGUgcmlnaHQgcGFydCwgdGhlIHJlc3VsdCBvZiB3b3JkZmlzaCBoYWQgc29tZSBwYXR0ZXJucy4gRkcgYW5kIExhYiB3ZXJlIHRoZSB0d28gdGhhdCBjaGFuZ2VkIGEgbG90IGluIHRoZSByZWxhdGl2ZSBwb3NpdGlvbnMsIHRoZXkgd2VyZSBhbHNvIHRoZSBtZW1iZXJzIG9mIHRoZSBuZXcgY29hbGl0aW9uLiBNb3Jlb3ZlciwgdGhlIGNvYWxpdGlvbiBvZiBETC1MYWItRkcgKFJhaW5ib3cgY29hbGl0aW9uKSBpbiAxOTk3IHNoYXJlZCBjbG9zZSByZWxhdGl2ZSBwb3NpdGlvbnMuCgoqIEluIGNvbmNsdXNpb24sIHRoZSB3b3JkZmlzaCByZXN1bHQgcmV2ZWFsZWQgbW9yZSBkeW5hbWljcyBvZiBwb2xpdGljYWwgY29tcGV0aXRpb24gYW5kIGNvYWxpdGlvbi4gT25lIHBvc3NpYmxlIGFyZ3VtZW50IGNvdWxkIGJlIHRoYXQgdGhlIHdvcmRmaXNoIHJldmVhbGVkIHRoZSBnZW5lcmFsLCBtdWx0aXBsZSBzY2FsZXMgb2YgdGhlIGNvbnRleHQsIHdoaWNoIGNvcnJlc3BvbmRlZCBiZXR0ZXIgdG8gdGhlIHJlbGF0aW9uc2hpcCBvZiBwYXJ0aWVzIGluIGVsZWN0aW9ucy4KCioqKgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgaW5jbHVkZT1GQUxTRX0Kc2V0d2QoIn4vRGVza3RvcC9SL3BvbGltZXRyaWNzIikKYGBgCgo=