Student ID: 1A182901-2


  1. Set up environment
rm(list=ls(all=TRUE))
setwd("~/Desktop/R/polimetrics")
library(manifestoR)
library(quanteda)
library(ggplot2)
library(DT)
library(cowplot)
  1. Download Data
#Download Data
mp_setapikey(key.file = NULL, key = "e51dc314a21ce75bd8221fcb338e2ee5")
mp_use_corpus_version("2017-2")
cmp <- mp_maindataset()
cmp$vanilla_i <- vanilla(cmp,invert = TRUE)
sa <- mp_corpus(countryname=="South Africa" & date == 201405)
quanteda_sa <- corpus(sa)
  1. Construct dfm
dfm_sa <- dfm(quanteda_sa, stem = TRUE, tolower=TRUE, remove_punct = TRUE, remove = stopwords("english"),  remove_numbers=TRUE)
dfm_sa2 <- dfm_group(dfm_sa, "manifesto_id") 
  1. 5 most frequent words
ffc <- textstat_frequency(dfm_sa2, n = 5, group = "manifesto_id") 
sa_cmp <- cmp[ which(cmp $countryname=="South Africa"  & cmp $date == 201405),]
ffc <- mutate(ffc,manifesto_id = factor(group, labels = sa_cmp$partyname)) 
ffc$name <- paste(LETTERS[1:25],ffc$feature,sep="")
ggplot(ffc,aes(x = reorder(name,frequency), y = frequency, fill = manifesto_id)) +
  geom_col(width = 0.8, show.legend = FALSE) +
  theme_bw()+
  labs(x = NULL, y = "5 most frequent words for each party") +
  facet_wrap(vars(manifesto_id), ncol = 2, scales = "free") +
  scale_fill_brewer(palette = "Set1")+
  scale_x_discrete(labels = setNames(ffc$feature, ffc$name))+
  coord_flip()

  1. Wordfish
wfmsa <- textmodel_wordfish(dfm_sa2, dir = c(2, 3))
v_wf <- wfmsa$theta
df_wf <- as.data.frame(v_wf)
df_wf$min <- wfmsa$theta-wfmsa$se.theta
df_wf$max <- wfmsa$theta+wfmsa$se.theta
df_wf$title <- sa_cmp$partyname
##Graph
gr_wf <- ggplot(df_wf,aes(y=reorder(title,v_wf),x=v_wf,color=title))
gr_wf_res <- gr_wf + geom_point(size=3) +  ylab("") + xlab("") + theme_light() + theme(legend.position="none")
gr_wf_res

  1. Wordscore
ws <- textmodel_wordscores(dfm_sa2, c(NA,5.2,10.5,NA,NA))
pr_raw <- predict(ws, newdata = dfm_sa2)
df_raw <- as.data.frame(pr_raw)
df_raw$title <- sa_cmp$partyname
##Graph
gr_ws <- ggplot(df_raw,aes(y=reorder(title,pr_raw),x=pr_raw,color=title))
gr_ws_res <- gr_ws + geom_point(size=3) + ylab("") + xlab("")+theme_light() + theme(legend.position="none")
gr_ws_res

  1. CMP data
sa_cmp$right <- sa_cmp$per104+sa_cmp$per201+sa_cmp$per203+sa_cmp$per305+sa_cmp$per401+sa_cmp$per402+sa_cmp$per407+sa_cmp$per414+sa_cmp$per505+sa_cmp$per601+sa_cmp$per603+sa_cmp$per605+sa_cmp$per606
sa_cmp$left <- sa_cmp$per103+sa_cmp$per105+sa_cmp$per106+sa_cmp$per107+sa_cmp$per202+sa_cmp$per403+sa_cmp$per404+sa_cmp$per406+sa_cmp$per412+sa_cmp$per413+sa_cmp$per504+sa_cmp$per506+sa_cmp$per701
sa_cmp$RILE <- sa_cmp$right-sa_cmp$left
sa_cmp$RATIO <- (sa_cmp$right-sa_cmp$left)/(sa_cmp$right+sa_cmp$left)
sa_cmp$LOGIT <- mp_scale(sa_cmp, scalingfun = logit_rile)
  1. Graph CMP result
df_sa <- data.frame(party=sa_cmp$partyname,
                     rile=sa_cmp$RILE,
                     ratio=sa_cmp$RATIO,
                     logit=sa_cmp$LOGIT,
                     vanilla_i=sa_cmp$vanilla_i)
gr_rl <- ggplot(df_sa,aes(x=rile,y=reorder(party,rile),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  ylab("")
gr_rl_l <- ggplot(df_sa,aes(x=rile,y=reorder(party,rile),color=party))+
  geom_point()+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  theme(legend.text = element_text(size = 14))+
  ylab("")
gr_ra <- ggplot(df_sa,aes(x=ratio,y=reorder(party,ratio),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  ylab("")
gr_lo <- ggplot(df_sa,aes(x=logit,y=reorder(party,logit),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  ylab("")
gr_va <- ggplot(df_sa,aes(x=vanilla_i,y=reorder(party,vanilla_i),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  xlab("vanilla(invert)")+
  ylab("")
gr_res <- plot_grid(gr_rl,gr_ra,gr_lo,gr_va,labels=colnames(df_sa)[2:5],label_size = 12, nrow = 4,ncol = 1)
legend <- get_legend(gr_rl_l)
gr_res2 <- plot_grid(gr_res,legend,rel_widths = c(1,.8))
gr_res2

  1. Combine Wordfish, Wordscore, and CMP
gr_wf_res2 <- gr_wf_res + theme(axis.ticks = element_blank(), axis.text.y = element_blank())
gr_ws_res2 <- gr_ws_res + theme(axis.ticks = element_blank(), axis.text.y = element_blank())
gr_right <- plot_grid(gr_wf_res2,gr_ws_res2,legend,nrow = 3,labels = c("Wordfish","Wordscore",""),label_size = 12)
gr_com <- plot_grid(gr_res,gr_right)
gr_com

  1. Discussion

---
title: "Home Assignment 3"
output: html_notebook
author: Yen Cheng Hsuan
---
####Student ID: 1A182901-2
***

>1. Set up environment

```{r, message=FALSE, warning=FALSE}
rm(list=ls(all=TRUE))
setwd("~/Desktop/R/polimetrics")
library(manifestoR)
library(quanteda)
library(ggplot2)
library(DT)
library(cowplot)
```
>2. Download Data

```{r, echo=TRUE, message=FALSE, warning=FALSE}
#Download Data
mp_setapikey(key.file = NULL, key = "e51dc314a21ce75bd8221fcb338e2ee5")
mp_use_corpus_version("2017-2")
cmp <- mp_maindataset()
cmp$vanilla_i <- vanilla(cmp,invert = TRUE)
sa <- mp_corpus(countryname=="South Africa" & date == 201405)
quanteda_sa <- corpus(sa)
```
>3. Construct dfm

```{r, warning=FALSE}
dfm_sa <- dfm(quanteda_sa, stem = TRUE, tolower=TRUE, remove_punct = TRUE, remove = stopwords("english"),  remove_numbers=TRUE)

dfm_sa2 <- dfm_group(dfm_sa, "manifesto_id") 
```
>4. 5 most frequent words

```{r, warning=FALSE}
ffc <- textstat_frequency(dfm_sa2, n = 5, group = "manifesto_id") 
sa_cmp <- cmp[ which(cmp $countryname=="South Africa"  & cmp $date == 201405),]
ffc <- mutate(ffc,manifesto_id = factor(group, labels = sa_cmp$partyname)) 
ffc$name <- paste(LETTERS[1:25],ffc$feature,sep="")

ggplot(ffc,aes(x = reorder(name,frequency), y = frequency, fill = manifesto_id)) +
  geom_col(width = 0.8, show.legend = FALSE) +
  theme_bw()+
  labs(x = NULL, y = "5 most frequent words for each party") +
  facet_wrap(vars(manifesto_id), ncol = 2, scales = "free") +
  scale_fill_brewer(palette = "Set1")+
  scale_x_discrete(labels = setNames(ffc$feature, ffc$name))+
  coord_flip()
```
>5. Wordfish

```{r, warning=FALSE}
wfmsa <- textmodel_wordfish(dfm_sa2, dir = c(2, 3))

v_wf <- wfmsa$theta
df_wf <- as.data.frame(v_wf)
df_wf$min <- wfmsa$theta-wfmsa$se.theta
df_wf$max <- wfmsa$theta+wfmsa$se.theta
df_wf$title <- sa_cmp$partyname
##Graph
gr_wf <- ggplot(df_wf,aes(y=reorder(title,v_wf),x=v_wf,color=title))
gr_wf_res <- gr_wf + geom_point(size=3) +  ylab("") + xlab("") + theme_light() + theme(legend.position="none")
gr_wf_res
```
>6. Wordscore

```{r, message=FALSE, warning=FALSE}
ws <- textmodel_wordscores(dfm_sa2, c(NA,5.2,10.5,NA,NA))
pr_raw <- predict(ws, newdata = dfm_sa2)
df_raw <- as.data.frame(pr_raw)
df_raw$title <- sa_cmp$partyname
##Graph
gr_ws <- ggplot(df_raw,aes(y=reorder(title,pr_raw),x=pr_raw,color=title))
gr_ws_res <- gr_ws + geom_point(size=3) + ylab("") + xlab("")+theme_light() + theme(legend.position="none")
gr_ws_res
```
>7. CMP data

```{r, message=FALSE, warning=FALSE}
sa_cmp$right <- sa_cmp$per104+sa_cmp$per201+sa_cmp$per203+sa_cmp$per305+sa_cmp$per401+sa_cmp$per402+sa_cmp$per407+sa_cmp$per414+sa_cmp$per505+sa_cmp$per601+sa_cmp$per603+sa_cmp$per605+sa_cmp$per606
sa_cmp$left <- sa_cmp$per103+sa_cmp$per105+sa_cmp$per106+sa_cmp$per107+sa_cmp$per202+sa_cmp$per403+sa_cmp$per404+sa_cmp$per406+sa_cmp$per412+sa_cmp$per413+sa_cmp$per504+sa_cmp$per506+sa_cmp$per701
sa_cmp$RILE <- sa_cmp$right-sa_cmp$left
sa_cmp$RATIO <- (sa_cmp$right-sa_cmp$left)/(sa_cmp$right+sa_cmp$left)
sa_cmp$LOGIT <- mp_scale(sa_cmp, scalingfun = logit_rile)
```
>8. Graph CMP result

```{r}
df_sa <- data.frame(party=sa_cmp$partyname,
                     rile=sa_cmp$RILE,
                     ratio=sa_cmp$RATIO,
                     logit=sa_cmp$LOGIT,
                     vanilla_i=sa_cmp$vanilla_i)

gr_rl <- ggplot(df_sa,aes(x=rile,y=reorder(party,rile),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  ylab("")
gr_rl_l <- ggplot(df_sa,aes(x=rile,y=reorder(party,rile),color=party))+
  geom_point()+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  theme(legend.text = element_text(size = 14))+
  ylab("")
gr_ra <- ggplot(df_sa,aes(x=ratio,y=reorder(party,ratio),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  ylab("")
gr_lo <- ggplot(df_sa,aes(x=logit,y=reorder(party,logit),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  ylab("")
gr_va <- ggplot(df_sa,aes(x=vanilla_i,y=reorder(party,vanilla_i),color=party))+
  geom_point(size=3)+
  theme_gray()+
  theme(legend.position="none")+
  theme(axis.ticks = element_blank(), axis.text.y = element_blank())+
  xlab("vanilla(invert)")+
  ylab("")
gr_res <- plot_grid(gr_rl,gr_ra,gr_lo,gr_va,labels=colnames(df_sa)[2:5],label_size = 12, nrow = 4,ncol = 1)
legend <- get_legend(gr_rl_l)
gr_res2 <- plot_grid(gr_res,legend,rel_widths = c(1,.8))
gr_res2
```
>9. Combine Wordfish, Wordscore, and CMP

```{r}
gr_wf_res2 <- gr_wf_res + theme(axis.ticks = element_blank(), axis.text.y = element_blank())
gr_ws_res2 <- gr_ws_res + theme(axis.ticks = element_blank(), axis.text.y = element_blank())

gr_right <- plot_grid(gr_wf_res2,gr_ws_res2,legend,nrow = 3,labels = c("Wordfish","Wordscore",""),label_size = 12)
gr_com <- plot_grid(gr_res,gr_right)
gr_com
```

>10. Discussion

* According to the graph of Wordfish, Wordscore, CMP data of South Africa in 2014, we could have several conclusions.
    + Despite the order changed sometimes, African National Congress(red), Congress of people(brown), Economic Freedom Figters(blue) were on the left in every result, comparing to Democratic Alliance(green) and Inkatha Freedom Party(pink).
    + In the left part of the graph,
        1. The divergence between RILE and RATIO results, represented the effect of changing analytical tools.
        2. The result of vanilla(inverted), revealed the distinct pattern, which meant that those manifestoes contained more than left-right scale.
    + Comparing CMP and Wordfish, wordscore,
        1. The results of RATIO and LOGIT were similar to the result of wordscore. Those three left-right scale analyses represented some robustness, regardless using sentences of whole manifestoes.
        2. The two broad methods, vanilla and wordfish, not only presented different patterns, but also suggested the distinct outcomes from other left-right analyses. The further implications or discussions require more information, such as the political coalitions or seats, or the analyses of other scales.


***