SETTING UP
RESEARCH QUESTION 1: How prevalent and stable is the topic of immigration in terms of perceived issue salience during elections?
RESEARCH QUESTION 2: How is immigration discussed in each election period relative to other salient topics (i.e., the economy, healthcare, governance), and which topics were most associated with mentions of immigration? Does this change during each election period?
RESEARCH QUESTION 3: Who is more likely to mention immigration as an important issue, and is there any significance in terms of who mentions immigration with certain topics?
- Plotting proportions of Canadian and foreign-born respondents in the general corpus, and the corpus with only immigration responses
- Plotting the distribution of topic mentions within the general corpus and the immigration corpus, by immigrant status and left/right voters

SETTING UP

Loading Relevant Packages.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidylog)

## 
## Attaching package: 'tidylog'
## 
## The following objects are masked from 'package:dplyr':
## 
##     add_count, add_tally, anti_join, count, distinct, distinct_all,
##     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
##     full_join, group_by, group_by_all, group_by_at, group_by_if,
##     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
##     relocate, rename, rename_all, rename_at, rename_if, rename_with,
##     right_join, sample_frac, sample_n, select, select_all, select_at,
##     select_if, semi_join, slice, slice_head, slice_max, slice_min,
##     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
##     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
##     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
##     transmute_if, ungroup
## 
## The following objects are masked from 'package:tidyr':
## 
##     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
##     spread, uncount
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(tidytext) 
library(textdata)
library(quanteda)

## Package version: 3.3.1
## Unicode version: 14.0
## ICU version: 70.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.

library(quanteda.textstats)
library(quanteda.dictionaries)
library(wesanderson) 
library(knitr)
library(dplyr)
library(haven)
library(ggplot2)
library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp

library(tm)

## Loading required package: NLP
## 
## Attaching package: 'NLP'
## 
## The following objects are masked from 'package:quanteda':
## 
##     meta, meta<-
## 
## The following object is masked from 'package:ggplot2':
## 
##     annotate
## 
## 
## Attaching package: 'tm'
## 
## The following object is masked from 'package:quanteda':
## 
##     stopwords

library(stringr)
library(slam)
library(vader)
library(stringi)
library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

Loading the 2015, 2019, and 2021 CES datasets, and preparing the data.

ces21 <- read_dta("2021 Canadian Election Study v2.0.dta")
ces19 <- read_dta("2019 Canadian Election Study - Online Survey v1.0.dta")
ces15 <- read.csv("CES2015_Combined_CSVstrings.csv")


###### Preparing the data ######

# Subsetting the data, selecting only variables of interest:
# public opinion variables, socio-demographic variables, political/ideological
# placement, etc. Filtering to keep only English responses. 

ces21_subset <- ces21 %>% 
  select(c(cps21_imp_iss, cps21_imp_loc_iss, cps21_age, cps21_genderid, cps21_province, 
           cps21_education, cps21_demsat, pes21_service_freq, pes21_rural_urban,cps21_fed_id,
           pes21_fitin, pes21_immigjobs, cps21_imm, UserLanguage,
           cps21_bornin_canada, cps21_bornin_other, cps21_imm_year, cps21_econ_retro, cps21_own_fin_retro,
           cps21_fed_gov_sat, cps21_employment, cps21_income_cat,
           cps21_vismin_1, cps21_vismin_2, cps21_vismin_3, cps21_vismin_4, cps21_vismin_5, cps21_vismin_6,
           cps21_vismin_7, cps21_vismin_8, cps21_vismin_9, cps21_vismin_10, cps21_vismin_11, cps21_vismin_12, 
           pes21_pidtrad, cps21_language_1, cps21_language_2, cps21_immig_status)) %>% 
  filter(UserLanguage == "EN")

## select: dropped 1,021 variables (cps21_StartDate, cps21_EndDate, Duration__in_seconds_, RecordedDate, cps21_ResponseId, …)

## filter: removed 5,517 rows (26%), 15,451 rows remaining

ces19_subset <- ces19 %>% 
  select(c(cps19_imp_iss, cps19_imp_loc_iss, cps19_age, cps19_gender, cps19_province,
           cps19_education, cps19_demsat, pes19_service_freq, pes19_rural_urban, cps19_fed_id, 
           pes19_fitin, pes19_immigjobs, cps19_imm, pes19_Q_Language, cps19_Q_Language,
           cps19_bornin_canada, cps19_bornin_other, cps19_imm_year, cps19_econ_retro, cps19_own_fin_retro,
           cps19_fed_gov_sat, cps19_employment, cps19_income_cat,
           cps19_ethnicity_23, cps19_ethnicity_24, cps19_ethnicity_25, cps19_ethnicity_26, cps19_ethnicity_27,
           cps19_ethnicity_28, cps19_ethnicity_29, cps19_ethnicity_30, cps19_ethnicity_31,
           cps19_ethnicity_32, cps19_ethnicity_33, cps19_ethnicity_34, cps19_ethnicity_35, cps19_ethnicity_36,
           cps19_ethnicity_37, cps19_ethnicity_38,
           pes19_pidtrad, cps19_language_68, cps19_language_69)) %>%
  filter(cps19_Q_Language == "EN" | pes19_Q_Language == "EN")

## select: dropped 578 variables (cps19_StartDate, cps19_EndDate, cps19_ResponseId, cps19_consent, cps19_citizenship, …)

## filter: removed 6,419 rows (17%), 31,403 rows remaining

# Recoding cps21_bornin_canada
ces21_subset <- ces21_subset %>% 
  mutate(immigrant_status = case_when(
    cps21_bornin_canada == 1 ~ "Canadian-Born",
    cps21_bornin_canada == 2 ~ "Foreign-Born",
    cps21_bornin_canada == 3 ~ "DK/PN"))

## mutate: new variable 'immigrant_status' (character) with 3 unique values and 0% NA

# Recoding federal party ID variable
# This recoding is Liddell Hasting's code 

ces21_subset <- ces21_subset %>% 
  mutate(libsid = case_when(cps21_fed_id == 1 | pes21_pidtrad == 1 ~ 1,
                          cps21_fed_id != 1 & pes21_pidtrad != 1 ~ 0))

## mutate: new variable 'libsid' (double) with 3 unique values and 19% NA

ces21_subset <- ces21_subset %>% 
  mutate(consid = case_when(cps21_fed_id == 2 | pes21_pidtrad == 2 ~ 1,
                          cps21_fed_id != 2 & pes21_pidtrad != 2 ~ 0))

## mutate: new variable 'consid' (double) with 3 unique values and 21% NA

ces21_subset <- ces21_subset %>% 
  mutate(ndpid = case_when(cps21_fed_id == 3 | pes21_pidtrad == 3 ~ 1,
                          cps21_fed_id != 3 & pes21_pidtrad != 3 ~ 0))

## mutate: new variable 'ndpid' (double) with 3 unique values and 22% NA

ces21_subset <- ces21_subset %>% 
  mutate(bqid = case_when(cps21_fed_id == 4 | pes21_pidtrad == 4 ~ 1,
                          cps21_fed_id != 4 & pes21_pidtrad != 4 ~ 0))

## mutate: new variable 'bqid' (double) with 3 unique values and 27% NA

ces21_subset <- ces21_subset %>% 
  mutate(greenid = case_when(cps21_fed_id == 5 | pes21_pidtrad == 5 ~ 1,
                          cps21_fed_id != 5 & pes21_pidtrad != 5 ~ 0))

## mutate: new variable 'greenid' (double) with 3 unique values and 27% NA

ces21_subset <- ces21_subset %>% 
  mutate(ppcid = case_when(cps21_fed_id == 6 | pes21_pidtrad == 6 ~ 1,
                           cps21_fed_id != 6 & pes21_pidtrad != 6 ~ 0))

## mutate: new variable 'ppcid' (double) with 3 unique values and 27% NA

ces21_subset <- ces21_subset %>%
  mutate(fedparty = case_when(libsid == 1 ~ "LPC",
                           consid == 1 ~ "CPC",
                           ndpid == 1 ~ "NDP",
                           bqid == 1 ~ "BQ",
                           greenid == 1 ~ "GPC",
                           ppcid == 1 ~ "PPC"))

## mutate: new variable 'fedparty' (character) with 7 unique values and 15% NA

### Repeating for 2019

# Recoding cps19_bornin_canada
ces19_subset <- ces19_subset %>% 
  mutate(immigrant_status = case_when(
    cps19_bornin_canada == 1 ~ "Canadian-Born",
    cps19_bornin_canada == 2 ~ "Foreign-Born",
    cps19_bornin_canada == 3 ~ "DK/PN"))

## mutate: new variable 'immigrant_status' (character) with 3 unique values and 0% NA

# Recoding federal party ID 
ces19_subset <- ces19_subset %>% 
  mutate(libsid = case_when(cps19_fed_id == 1 | pes19_pidtrad == 1 ~ 1,
                          cps19_fed_id != 1 & pes19_pidtrad != 1 ~ 0))

## mutate: new variable 'libsid' (double) with 3 unique values and 58% NA

ces19_subset <- ces19_subset %>% 
  mutate(consid = case_when(cps19_fed_id == 2 | pes19_pidtrad == 2 ~ 1,
                          cps19_fed_id != 2 & pes19_pidtrad != 2 ~ 0))

## mutate: new variable 'consid' (double) with 3 unique values and 61% NA

ces19_subset <- ces19_subset %>% 
  mutate(ndpid = case_when(cps19_fed_id == 3 | pes19_pidtrad == 3 ~ 1,
                          cps19_fed_id != 3 & pes19_pidtrad != 3 ~ 0))

## mutate: new variable 'ndpid' (double) with 3 unique values and 73% NA

ces19_subset <- ces19_subset %>% 
  mutate(bqid = case_when(cps19_fed_id == 4 | pes19_pidtrad == 4 ~ 1,
                          cps19_fed_id != 4 & pes19_pidtrad != 4 ~ 0))

## mutate: new variable 'bqid' (double) with 3 unique values and 85% NA

ces19_subset <- ces19_subset %>% 
  mutate(greenid = case_when(cps19_fed_id == 5 | pes19_pidtrad == 5 ~ 1,
                          cps19_fed_id != 5 & pes19_pidtrad != 5 ~ 0))

## mutate: new variable 'greenid' (double) with 3 unique values and 81% NA

ces19_subset <- ces19_subset %>% 
  mutate(ppcid = case_when(cps19_fed_id == 6 | pes19_pidtrad == 6 ~ 1,
                           cps19_fed_id != 6 & pes19_pidtrad != 6 ~ 0))

## mutate: new variable 'ppcid' (double) with 3 unique values and 84% NA

ces19_subset <- ces19_subset %>%
  mutate(fedparty = case_when(libsid == 1 ~ "LPC",
                           consid == 1 ~ "CPC",
                           ndpid == 1 ~ "NDP",
                           bqid == 1 ~ "BQ",
                           greenid == 1 ~ "GPC",
                           ppcid == 1 ~ "PPC"))

## mutate: new variable 'fedparty' (character) with 7 unique values and 16% NA

2015 Federal Election: Issue Salience of Immigration/Refugees

Looking first at the 2015 CES variable ‘main_issue’, I want to see how many times immigration/refugees were mentioned, as coded by the interviewer in the original open-ended question.

# Viewing the counts of issues identified by respondents as being the
# most important in the 2015 election. Their open-ended responses were coded 
# into the following categories, sorted from highest count to lowest. 
table_main_issue <- sort(table(ces15$main_issue), decreasing = TRUE)
table_main_issue_without_NAs <- table_main_issue[names(table_main_issue) != "Don't know / not sure / not paying attention"]
print(table_main_issue_without_NAs)

## 
##                                                                     
##                                                                7500 
##                                                             Economy 
##                                                                 843 
##                                                  Health care issues 
##                                                                 519 
##                                            Create jobs / employment 
##                                                                 235 
##                  Environmental / ecological issues / climate change 
##                                                                 202 
##                    Other & multiple responses [not coded elsewhere] 
##                                                                 192 
##                      Seniors: pensions / retirement issues & health 
##                                                                 181 
##                              Educational issues, programs & funding 
##                                                                 172 
##                                      Taxation issues (includes HST) 
##                                                                 149 
##    Ethics&effectiveness: accountability / transparency / leadership 
##                                                                 128 
##                   Foreign affairs / national secuity / US relations 
##                                                                  84 
##                                            Immigration and refugees 
##                                                                  82 
##                         Defeat Conservatives / elect Liberals (NDP) 
##                                                                  71 
##                                    Corruption, dishonesty / honesty 
##                                                                  63 
## Rights / social justice issues: aboriginal, women, immigrants, etc. 
##                                                                  54 
##                                                           Democracy 
##                                                                  48 
##                            General mention: debt, finances, deficit 
##                                                                  48 
##             Family benefits, childcare funding & programs, families 
##                                                                  46 
##                                         Balance the budget / budget 
##                                                                  44 
##                   None, no issue important / too many to single out 
##                                                                  40 
##                                    Health care & education combined 
##                                                                  38 
##                                                              Change 
##                                                                  35 
##                                                             Refused 
##                                                                  31 
##                           Poverty, low incomes, wealth distribution 
##                                                                  30 
##                         Electoral reform & procedural reform issues 
##                                                                  24 
##                                          Canada's future, stability 
##                                                                  23 
##                         Crime / violence, gun crime, justice system 
##                                                                  22 
##                                                        Middle class 
##                                                                  22 
##    Social programs, benefits, services, welfare / health & programs 
##                                                                  22 
##                               Government spending, government waste 
##                                                                  20 
##                Harper/Conservative's contempt of parliament, others 
##                                                                  20 
##                     Military / military spending / spending on vets 
##                                                                  20 
##               Moral issues, family values (regardless of direction) 
##                                                                  19 
##                                                            Bill C51 
##                                                                  17 
##                                              Health & jobs combined 
##                                                                  17 
##                            Aboriginal rights / First Nations issues 
##                                                                  13 
##  Party platform / what the parties stand for  (not coded elsewhere) 
##                                                                  13 
##                             Cost of living, living expenses / wages 
##                                                                  10 
##                                               Abortion (pro or con) 
##                                                                   9 
##                          Defeat Liberals/ elect Conservatives (NDP) 
##                                                                   9 
##                                                              Energy 
##                                                                   9 
##                                                      Infrastructure 
##                                                                   9 
##                                               Economy & environment 
##                                                                   8 
##                 Federal / Provincial relations, "fiscal inequality" 
##                                                                   8 
##                                                              Senate 
##                                                                   8 
##                                  Economy & health / social programs 
##                                                                   7 
##                                                               Niqab 
##                                                                   7 
##                                                         Agriculture 
##                                                                   6 
##                                    Health and  pensions for seniors 
##                                                                   6 
##                                           Health care & environment 
##                                                                   5 
##                                                    Income splitting 
##                                                                   5 
##                                                Legalizing marijuana 
##                                                                   5 
##                                 Negative politics, adds, lies, etc. 
##                                                                   5 
##                                     Education and employment / jobs 
##                                                                   4 
##                                      Quebec sovereignty / interests 
##                                                                   4 
##                                                             Housing 
##                                                                   3 
##                                                                  50 
##                                                                   2 
##                                                                  72 
##                                                                   2 
##                                                    Cost of election 
##                                                                   2 
##                                    Gun control / registry, Bill C68 
##                                                                   2 
##                                             Health & taxes combined 
##                                                                   2 
##                                                                   0 
##                                                                   1 
##                                                                  21 
##                                                                   1 
##                                                                  28 
##                                                                   1 
##                                                                  30 
##                                                                   1 
##                                                                  64 
##                                                                   1 
##                                                                  68 
##                                                                   1 
##                                                                  70 
##                                                                   1 
##                                                                   8 
##                                                                   1 
##                                                                  80 
##                                                                   1 
##                                                      Arts & Culture 
##                                                                   1 
##       Majority government (includes: to get a majority or minority) 
##                                                                   1 
##                                                 Minority government 
##                                                                   1 
##                                             Oil & gas (fuel) prices 
##                                                                   1

sum(table_main_issue_without_NAs)# total number of counts

## [1] 11237

As evident, immigration/refugees was identified as the main issue in the 2015 election by 82/11,237 of the respondents (filtering out those who didn’t report any issues). This is 0.72% of the sample, and is the 11th category on the list from highest counts to lowest. This is only relevant for RQ1 (How prevalent and stable is the topic of immigration in terms of perceived issue salience during elections?).

RESEARCH QUESTION 1: How prevalent and stable is the topic of immigration in terms of perceived issue salience during elections?

First, keyword analysis requires: (1) Determining the unit of analysis (2) Tokenization (3) Counting a specific word over space and/or time (4) Denominating by totals

(1) The relevant unit of analysis for this paper is each document (i.e. each individual response to the open-ended question).

(2) Tokenization.

corpus_ces21 <- corpus(ces21_subset,
                        text_field = "cps21_imp_iss",
                        unique_docnames = TRUE)

# Tokenizing and pre-processing the corpus
ces21_toks <- tokens(corpus_ces21,
                     remove_numbers = TRUE, 
                     remove_punct = TRUE, 
                     remove_url = TRUE) 

ces21_toks <- tokens_remove(ces21_toks,
                            c(stopwords("en"),
                              "now"),
                            padding = FALSE)

ces21_toks <- tokens_remove(ces21_toks, "$")
ces21_toks <- tokens_tolower(ces21_toks)

# Creating a document feature matrix
ces21_dfm <- dfm(ces21_toks)


#### Repeating for CES2019 ####

corpus_ces19 <- corpus(ces19_subset,
                        text_field = "cps19_imp_iss",
                        unique_docnames = TRUE)

# Tokenizing and pre-processing the corpus
ces19_toks <- tokens(corpus_ces19,
                     remove_numbers = TRUE, 
                     remove_punct = TRUE, 
                     remove_url = TRUE) 

ces19_toks <- tokens_remove(ces19_toks,
                            c(stopwords("en"),
                              "now"),
                            padding = FALSE)

ces19_toks <- tokens_tolower(ces19_toks)


# Creating a document feature matrix
ces19_dfm <- dfm(ces19_toks)

(3) Counting a specific word over space and/or time (i.e. “immigration”)

Here, I will be creating a dictionary of immigration-related words in order to count the number of mentions in the corpus. Later on, I will use this dictionary to filter responses containing immigration-related words (0=No, 1=Yes), to further identify how immigration-related responses were discussed (i.e. in relation to the economy, or to covid, etc.)

I will additionally create dictionaries for the topics that are consistently mentioned and most frequent across the 2015, 2019, and 2021 CES, to fulfill RQ2 (How is immigration discussed in each election period relative to other salient topics (i.e., the economy, healthcare, governance), and which topics are most likely to be associated with mentions of immigration? Does this change during each election period?).

The dictionaries were created by: a) general words related to the topic b) going through the entire corpus (ces19_toks and ces21_toks), and sorting relevant tokens within each response into the main categories: immigration, the economy, governance/government, health, and the environment. Few words that were misspelled by respondents are included in the dictionary for greater reliability.

dictionaryimm <- dictionary( 
  list(immigration = c("immigration","refugees","refugee","leeches","permanent",
                       "cultures","immigrant","immigrants","diversity","refuges",
                       "enough","stopping","dangerous","immigrates","vetting",
                       "illegal","imagration","immagration","borders","border",
                       "illegally","immigrantions","migrant","migrants","immigratiom",
                       "illiegal","entering","muslim","muslims","asylum", 
                       "citizenship", "naturalization","deportation", "detention","visa",
                       "emigration", "relocation", "exile","repatriation", "integration", 
                       "settlement", "persecution","work permit","foreigner", "migration", 
                       "undocumented", "foreigners","multicultural","multiculturalism")))


dictionaryecon <- dictionary(
  list(economy = c("education","jobs","job","housing","pension",
                   "pensions","pentios","deficit","economy", "economic",
                   "taxes","tax","price","pricing","prices",
                   "monetary","costing","cost","costs","homelessness",
                   "homeless","expensive","spending","spend","university",
                   "tuition","inflation","living","finance","financial",
                   "fund","funding","resource","resources","budget",
                   "businesses","business","corporation","corporations","affordable",
                   "affordability","class","infrastructure","fiscal","ubi",
                   "income","retirees","retired","retirement","student",
                   "students","ressources","loan","loans","wage",
                   "wages","nurses","subsidy","subsidized","money",
                   "oas","rent","rental","roof","unemployment",
                   "employment","taxpayers","taxation","food","buyers",
                   "buyer","cerb","ecomony","poverty","competitive",
                   "rise","rising","lowering","lower","reduced",
                   "reduce","million","billion","finances","taxing",
                   "rich","richer","richest","pay","bills",
                   "profit","benefits","benefit","dept","debt",
                   "spent","excessive","invest","investment","overspending",
                   "livable","free","osap","balancing","cut",
                   "cuts","lowincome","middleclass","middle","bankrupting",
                   "wasteful","banking","savings","joblessness","welfare",
                   "rising","breaks","wasting","deficites","cpp",
                   "econmy","recession","redistribution","taxed","groceries",
                   "utilities", "evonomy", "deficits","market", "trade",
                   "dollars")))


dictionarygov <- dictionary( 
  list(governance = c("rights","right","canadian","canadians","rid",
                      "get","getting","corrupt","corruption","trudeau",
                      "government","reform","election","drama","queen",
                      "representational","voting","vote","banning","freedom",
                      "freedoms","speech","policy","impact","understanding",
                      "parties","party","control","controlling","federal",
                      "handled","handle","handling","helping","gets",
                      "promise","promisses","promises","far","national",
                      "nation","country","service","services","honesty",
                      "amateur","honest","liberal","affairs","affair",
                      "prime","minister","accountability","accountable","ballot",
                      "ballots","power","handouts","branches","deal",
                      "hypocrisy","ethics","ethical","communist","justin",
                      "democracy","democratic","censorship", "manage","trust",
                      "trustworthy","justice","overreach","policies","coerced",
                      "political","politics","candidate","candidates","integrity",
                      "leader","leadership","help","lack","office",
                      "parties","parliamentary","system","elitist","minister's",
                      "ministers","lies","lie","lying","abuse",
                      "unacceptable","mps","crooked","competent","tyranny",
                      "order","orders","liberals","booted","pm",
                      "running","trustable","truth","defeating","centrist",
                      "naysayers","restrictions","restriction","restrict","transparent",
                      "transparency","representation","proportional","action","responsibility",
                      "time","rule","ruling","replacing","replace",
                      "elect","elected","petty","remove","removed",
                      "jailed","ridings","conservative","conservatives","tolerance",
                      "dishonesty","attitude","weakness","electoral","charter",
                      "respect","informed","ideology","cares","free",
                      "seats","platform","bureaucratic","provincial","deceit",
                      "power","snc","lavalin","scandal","cooperating",
                      "investigation","politician","politicians","hard-right","liar",
                      "liars","unethical","represent","represents","reputable",
                      "governement","re-elected","spoken","mature","honestsy",
                      "harper","voter","voters","politician's","governance",
                      "govern","gov't","interference","trudeau's","ruin",
                      "reelection","turdeau","turd","cheating","scheming",
                      "douchebags","campaign","campaigns","electing","democraticly",
                      "senate","defeated","ousting","idiot","governing",
                      "officials", "official","failing","statements","competency",
                      "petty","scheer","opposition","deception","image",
                      "stephen","scheerer","competence","truthfull","governmental",
                      "misinformation", "trudo","pocitical","transparence","dishonesty")))

dictionaryhealth <- dictionary( 
  list(healthcare = c("covid-19","healthcare","pandemic","health","care",
                      "covid","senior","seniors","senior's","drug",
                      "covid19","vaccination","mandating","mandates","mandate",
                      "normal","vaccinations","recovery","longterm","long",
                      "term","child","childcare","vax","passports",
                      "passport","safe","safety","unvaccinated","vaccinate",
                      "vaccine","spread","eradicating","mandatory","vacxinations",
                      "vaccines","medical","mental","coronavirus","rollout",
                      "booster","shot","hospitals","hospital","measures",
                      "dental","pass","elderly","pharmacare","post",
                      "post-pandemic","lockdown","lockdowns","virus","facilities",
                      "heath","copid","rexovery","combatting","beating",
                      "mandated","menral","masks","mask","ill",
                      "pharma","eye","vision","hearth","medicare",
                      "prescription","doctors")))

# Creating a separate dictionary for the 2021 CES only, including words 
# related to COVID-19, to detect how many mentions of health in 2021 
# pertained specifically to COVID-19. Any mention of the pandemic/covid/vaccine,
# whether specifically related to health, or the economy, or governance, is 
# included in this category. 

dictionarycovid <- dictionary( 
  list(covid = c("covid-19","pandemic","covid","covid19","vaccination",
                 "mandating","mandates","mandate","normal","vaccinations",
                 "recovery","vax","passports","passport","safe",
                 "safety","unvaccinated","vaccinate","vaccine","spread",
                 "eradicating","mandatory","vacxinations","vaccines", "coronavirus",
                 "rollout","booster","shot","measures","pass",
                 "post","post-pandemic","lockdown","lockdowns","virus",
                 "copid","rexovery","combatting","beating","mandated",
                 "masks","mask","ill")))


dictionaryenviro <-dictionary( 
  list(enviro = c("environment","environmentalism","environmental","climate","change",
                  "clean","water","agenda","paris","animal",
                  "sustainable","sustainability","plant","minerals","battery",
                  "mining","wind","hydro","carbon","green",
                  "greener","environemtn","warming","global","species",
                  "extinctions","extinct","environnement","pipeline","pipelines",
                  "pipe","line","lines","natural","oil",
                  "gas","footprint","enviroment","energy","sector",
                  "construction","renewable","construct","planet","enironment",
                  "fossil","fuel","pollution","mountain","bee",
                  "electricity", "pollute")))

To determine the most frequent topics across the entire corpus, as used to create the dictionaries above (in addition to immigration), a keyword frequency analysis was conducted.

# Plotting frequency of all keywords within the corpus (i.e. cps21_imp_iss) to 
# determine the most salient topics
ces21_dfm %>%
  textstat_frequency(n=30) %>%
  ggplot(aes(x = reorder(feature,frequency),y=frequency,fill = (frequency), color = (frequency))) +
  geom_col(alpha=0.5) +
  coord_flip() +
  scale_x_reordered() +
  scale_color_distiller(palette = "PuOr") +
  scale_fill_distiller(palette = "PuOr") +
  theme_minimal() + 
  labs(title = "2021 CES Keyword Frequency Analysis", x="",y="Frequency",color = "", fill = "") +
  theme(legend.position="none")

#### Repeating for 2019 #### 

ces19_dfm %>%
  textstat_frequency(n=30) %>%
  ggplot(aes(x = reorder(feature,frequency),y=frequency,fill = (frequency), color = (frequency))) +
  geom_col(alpha=0.5) +
  coord_flip() +
  scale_x_reordered() +
  scale_color_distiller(palette = "PuOr") +
  scale_fill_distiller(palette = "PuOr") +
  theme_minimal() + 
  labs(title = "2019 CES Keyword Frequency Analysis", x="",y="Frequency",color = "", fill = "") +
  theme(legend.position="none")

It appears that the most frequent keywords mentioned across the 2019 and 2021 CES broadly fall within the categories of: the economy, governance/government/politics, health, and the environment. This is why tokens from each corpus were sorted into these 5 broad categories.

(4) Denominating by totals

In this section, we aim to answer RQ1 (How prevalent and stable is the topic of immigration in terms of perceived issue salience during elections?). Thus, we will use the curated dictionaries to determine how much immigration is mentioned relative to the most frequent categories. We want to see first see whether each topic was mentioned in each response using the dictionaries (i.e. unit of analysis here is each document). Then, after creating a dummy variable indicating whether the category was mentioned or not, we want to see the proportion of total responses within each category, relative to the entire corpus.

## 2021 CES ##

### Proportion of *immigration* responses in the corpus ###
imm_mentions <- liwcalike(corpus_ces21,dictionaryimm)
imm_mentions$imm_dum <- ifelse(imm_mentions$immigration>0,1,0)
imm_mentions <- imm_mentions %>%
  mutate(total_immigration = sum(imm_dum),
         pct_immigration = total_immigration/nrow(imm_mentions))

## mutate: new variable 'total_immigration' (double) with one unique value and 0% NA

##         new variable 'pct_immigration' (double) with one unique value and 0% NA

# 0.01495049 or 1.495% of the responses in the corpus mention immigration-
# related words



### Proportion of *economic* responses in the corpus ###
econ_mentions <- liwcalike(corpus_ces21,dictionaryecon)
econ_mentions$econ_dum <- ifelse(econ_mentions$economy>0,1,0)
econ_mentions <- econ_mentions %>%
  mutate(total_economy = sum(econ_dum),
         pct_economy = total_economy/nrow(econ_mentions))

## mutate: new variable 'total_economy' (double) with one unique value and 0% NA

##         new variable 'pct_economy' (double) with one unique value and 0% NA

# 0.3861886 or 38.618% of the responses in the corpus mention economic-related
# words


### Proportion of *governance/government* responses in the corpus ###
gov_mentions <- liwcalike(corpus_ces21,dictionarygov)
gov_mentions$gov_dum <- ifelse(gov_mentions$governance>0,1,0)
gov_mentions <- gov_mentions %>%
  mutate(total_gov = sum(gov_dum),
         pct_gov = total_gov/nrow(gov_mentions))

## mutate: new variable 'total_gov' (double) with one unique value and 0% NA

##         new variable 'pct_gov' (double) with one unique value and 0% NA

# 0.2245809 or 22.458% of the responses in the corpus mention political-related
# words


### Proportion of *health* responses in the corpus ###
health_mentions <- liwcalike(corpus_ces21,dictionaryhealth)
health_mentions$health_dum <- ifelse(health_mentions$healthcare>0,1,0)
health_mentions <- health_mentions %>%
  mutate(total_health = sum(health_dum),
         pct_health = total_health/nrow(health_mentions))

## mutate: new variable 'total_health' (double) with one unique value and 0% NA

##         new variable 'pct_health' (double) with one unique value and 0% NA

# 0.3251569 or 32.515% of the responses in the corpus mention health-related
# words


### Proportion of *COVID-19* responses in the corpus ###
covid_mentions <- liwcalike(corpus_ces21,dictionarycovid)
covid_mentions$covid_dum <- ifelse(covid_mentions$covid>0,1,0)
covid_mentions <- covid_mentions %>%
  mutate(total_covid = sum(covid_dum),
         pct_covid = total_covid/nrow(covid_mentions))

## mutate: new variable 'total_covid' (double) with one unique value and 0% NA

##         new variable 'pct_covid' (double) with one unique value and 0% NA

# 0.1772701 or 1.772% of the responses in the corpus mention covid-related
# words. In other words, 54.518% of general health responses in the corpus 
# specifically use covid-related words. 


### Proportion of *environmental* responses in the corpus ###
enviro_mentions <- liwcalike(corpus_ces21,dictionaryenviro)
enviro_mentions$enviro_dum <- ifelse(enviro_mentions$enviro>0,1,0)
enviro_mentions <- enviro_mentions %>%
  mutate(total_enviro = sum(enviro_dum),
         pct_enviro = total_enviro/nrow(enviro_mentions))

## mutate: new variable 'total_enviro' (double) with one unique value and 0% NA

##         new variable 'pct_enviro' (double) with one unique value and 0% NA

# 0.1223222 or 12.232% of the responses in the corpus mention environment-
# related words



## Repeating for 2019 CES ##

### Proportion of *immigration* responses in the corpus ###
imm_mentions_19 <- liwcalike(corpus_ces19,dictionaryimm)
imm_mentions_19$imm_dum <- ifelse(imm_mentions_19$immigration>0,1,0)
imm_mentions_19 <- imm_mentions_19 %>%
  mutate(total_immigration = sum(imm_dum),
         pct_immigration = total_immigration/nrow(imm_mentions_19))

## mutate: new variable 'total_immigration' (double) with one unique value and 0% NA

##         new variable 'pct_immigration' (double) with one unique value and 0% NA

# 0.03165303 or 3.165% of the responses in the corpus mention immigration-
# related words


### Proportion of *economic* responses in the corpus ###
econ_mentions_19 <- liwcalike(corpus_ces19,dictionaryecon)
econ_mentions_19$econ_dum <- ifelse(econ_mentions_19$economy>0,1,0)
econ_mentions_19 <- econ_mentions_19 %>%
  mutate(total_economy = sum(econ_dum),
         pct_economy = total_economy/nrow(econ_mentions_19))

## mutate: new variable 'total_economy' (double) with one unique value and 0% NA

##         new variable 'pct_economy' (double) with one unique value and 0% NA

# 0.3649651 or 36.496% of the responses in the corpus mention economic-related
# words


### Proportion of *governance/government* responses in the corpus ###
gov_mentions_19 <- liwcalike(corpus_ces19,dictionarygov)
gov_mentions_19$gov_dum <- ifelse(gov_mentions_19$governance>0,1,0)
gov_mentions_19 <- gov_mentions_19 %>%
  mutate(total_gov = sum(gov_dum),
         pct_gov = total_gov/nrow(gov_mentions_19))

## mutate: new variable 'total_gov' (double) with one unique value and 0% NA

##         new variable 'pct_gov' (double) with one unique value and 0% NA

# 0.1659714 or 16.597% of the responses in the corpus mention political-related
# words


### Proportion of *health* responses in the corpus ###
health_mentions_19 <- liwcalike(corpus_ces19,dictionaryhealth)
health_mentions_19$health_dum <- ifelse(health_mentions_19$healthcare>0,1,0)
health_mentions_19 <- health_mentions_19 %>%
  mutate(total_health = sum(health_dum),
         pct_health = total_health/nrow(health_mentions_19))

## mutate: new variable 'total_health' (double) with one unique value and 0% NA

##         new variable 'pct_health' (double) with one unique value and 0% NA

# 0.1342865 or 13.428% of the responses in the corpus mention health-related
# words


### Proportion of *environmental* responses in the corpus ###
enviro_mentions_19 <- liwcalike(corpus_ces19,dictionaryenviro)
enviro_mentions_19$enviro_dum <- ifelse(enviro_mentions_19$enviro>0,1,0)
enviro_mentions_19 <- enviro_mentions_19 %>%
  mutate(total_enviro = sum(enviro_dum),
         pct_enviro = total_enviro/nrow(enviro_mentions_19))

## mutate: new variable 'total_enviro' (double) with one unique value and 0% NA

##         new variable 'pct_enviro' (double) with one unique value and 0% NA

# 0.1713849 or 17.138% of the responses in the corpus mention environment-
# related words



### Visualizing this comparison in a table format ### 

proportions_2021 <- c(1.495, 38.618, 22.458, 32.515, 12.232)  
# Immigration, Economy, Governance, Health, Environment
proportions_2019 <- c(3.165, 36.496, 16.597, 13.428, 17.138)  
# Immigration, Economy, Governance, Health, Environment

# Calculate the percentage change in proportions for each topic
percentage_change <- ((proportions_2021 - proportions_2019) / proportions_2019) * 100

# Create a table to compare the proportions
comparison_table <- data.frame(
  Topic = c("Immigration", "Economy", "Governance", "Health", "Environment"),
  Proportion_2019 = proportions_2019,
  Proportion_2021 = proportions_2021,
  Percentage_Change = percentage_change
)

# Print the comparison table
print(comparison_table)

##         Topic Proportion_2019 Proportion_2021 Percentage_Change
## 1 Immigration           3.165           1.495        -52.764613
## 2     Economy          36.496          38.618          5.814336
## 3  Governance          16.597          22.458         35.313611
## 4      Health          13.428          32.515        142.143283
## 5 Environment          17.138          12.232        -28.626444

RESEARCH QUESTION 2: How is immigration discussed in each election period relative to other salient topics (i.e., the economy, healthcare, governance), and which topics were most associated with mentions of immigration? Does this change during each election period?

# Adding the dummy variables indicating whether each response contains words
# related to each category, into the main CES 2021 and 2019 datasets
ces21_subset$imm_dum <- imm_mentions$imm_dum
ces21_subset$econ_dum <- econ_mentions$econ_dum
ces21_subset$gov_dum <- gov_mentions$gov_dum
ces21_subset$health_dum <- health_mentions$health_dum
ces21_subset$enviro_dum <- enviro_mentions$enviro_dum

ces19_subset$imm_dum <- imm_mentions_19$imm_dum
ces19_subset$econ_dum <- econ_mentions_19$econ_dum
ces19_subset$gov_dum <- gov_mentions_19$gov_dum
ces19_subset$health_dum <- health_mentions_19$health_dum
ces19_subset$enviro_dum <- enviro_mentions_19$enviro_dum

# Filtering to only view immigration responses 
immigration_responses_only_21 <- ces21_subset[ces21_subset$imm_dum == 1, ]
immigration_responses_only_19 <- ces19_subset[ces19_subset$imm_dum == 1, ]
immigration_responses_only_19 <- subset(immigration_responses_only_19, 
                                  !(rownames(immigration_responses_only_19) == 443)) 
# Removing this response because it contains an unknown character and
# produces an error in the code

### Proportion of *economic* responses in the corpus ###
econ_mentions_in_imm <- liwcalike(immigration_responses_only_21$cps21_imp_iss,dictionaryecon)
econ_mentions_in_imm$econ_dum_imm <- ifelse(econ_mentions_in_imm$economy>0,1,0)
econ_mentions_in_imm <- econ_mentions_in_imm %>%
  mutate(total_economy = sum(econ_dum_imm),
         pct_economy = total_economy/nrow(econ_mentions_in_imm))

## mutate: new variable 'total_economy' (double) with one unique value and 0% NA

##         new variable 'pct_economy' (double) with one unique value and 0% NA

# 0.1948052 or 19.480% of the immigration responses in the corpus mention 
# economic-related words


### Proportion of *governance/government* responses in the corpus ###
gov_mentions_in_imm <- liwcalike(immigration_responses_only_21$cps21_imp_iss,dictionarygov)
gov_mentions_in_imm$gov_dum_imm <- ifelse(gov_mentions_in_imm$governance>0,1,0)
gov_mentions_in_imm <- gov_mentions_in_imm %>%
  mutate(total_gov = sum(gov_dum_imm),
         pct_gov = total_gov/nrow(gov_mentions_in_imm))

## mutate: new variable 'total_gov' (double) with one unique value and 0% NA

##         new variable 'pct_gov' (double) with one unique value and 0% NA

# 0.2337662 or 23.376% of the immigration responses in the corpus mention 
# political-related words


### Proportion of *health* responses in the corpus ###
health_mentions_in_imm <- liwcalike(immigration_responses_only_21$cps21_imp_iss,dictionaryhealth)
health_mentions_in_imm$health_dum_imm <- ifelse(health_mentions_in_imm$healthcare>0,1,0)
health_mentions_in_imm <- health_mentions_in_imm %>%
  mutate(total_health = sum(health_dum_imm),
         pct_health = total_health/nrow(health_mentions_in_imm))

## mutate: new variable 'total_health' (double) with one unique value and 0% NA

##         new variable 'pct_health' (double) with one unique value and 0% NA

# 0.1471861 or 14.718% of the immigration responses in the corpus mention 
# health-related words


### Proportion of *COVID-19* responses in the corpus ###
covid_mentions_in_imm <- liwcalike(immigration_responses_only_21$cps21_imp_iss,dictionarycovid)
covid_mentions_in_imm$covid_dum_imm <- ifelse(covid_mentions_in_imm$covid>0,1,0)
covid_mentions_in_imm <- covid_mentions_in_imm %>%
  mutate(total_covid = sum(covid_dum_imm),
         pct_covid = total_covid/nrow(covid_mentions_in_imm))

## mutate: new variable 'total_covid' (double) with one unique value and 0% NA

##         new variable 'pct_covid' (double) with one unique value and 0% NA

# 0.08225108 or 8.225% of the immigration responses in the corpus mention covid-
# related words. In other words, 55.882% of the immigration responses containing 
# words from the health dictionary specifically pertain to COVID-19. 


### Proportion of *environmental* responses in the corpus ###
enviro_mentions_in_imm <- liwcalike(immigration_responses_only_21$cps21_imp_iss,dictionaryenviro)
enviro_mentions_in_imm$enviro_dum_imm <- ifelse(enviro_mentions_in_imm$enviro>0,1,0)
enviro_mentions_in_imm <- enviro_mentions_in_imm %>%
  mutate(total_enviro = sum(enviro_dum_imm),
         pct_enviro = total_enviro/nrow(enviro_mentions_in_imm))

## mutate: new variable 'total_enviro' (double) with one unique value and 0% NA

##         new variable 'pct_enviro' (double) with one unique value and 0% NA

# 0.02597403 or 2.597% of the immigration responses in the corpus mention 
# environment-related words

# Including the dummy variables for each category within the immigration 
# responses dataset. 

immigration_responses_only_21$econ_dum <- econ_mentions_in_imm$econ_dum_imm
immigration_responses_only_21$gov_dum <- gov_mentions_in_imm$gov_dum_imm
immigration_responses_only_21$health_dum <- health_mentions_in_imm$health_dum_imm
immigration_responses_only_21$enviro_dum <- enviro_mentions_in_imm$enviro_dum_imm

### Repeating for 2019 CES ###

### Proportion of *economic* responses in the corpus ###
econ_mentions_in_imm_19 <- liwcalike(immigration_responses_only_19$cps19_imp_iss,dictionaryecon)
econ_mentions_in_imm_19$econ_dum_imm <- ifelse(econ_mentions_in_imm_19$economy>0,1,0)
econ_mentions_in_imm_19 <- econ_mentions_in_imm_19 %>%
  mutate(total_economy = sum(econ_dum_imm),
         pct_economy = total_economy/nrow(econ_mentions_in_imm_19))

## mutate: new variable 'total_economy' (double) with one unique value and 0% NA

##         new variable 'pct_economy' (double) with one unique value and 0% NA

# 0.183283 or 18.328% of the immigration responses in the corpus mention 
# economic-related words


### Proportion of *governance/government* responses in the corpus ###
gov_mentions_in_imm_19 <- liwcalike(immigration_responses_only_19$cps19_imp_iss,dictionarygov)
gov_mentions_in_imm_19$gov_dum_imm <- ifelse(gov_mentions_in_imm_19$governance>0,1,0)
gov_mentions_in_imm_19 <- gov_mentions_in_imm_19 %>%
  mutate(total_gov = sum(gov_dum_imm),
         pct_gov = total_gov/nrow(gov_mentions_in_imm_19))

## mutate: new variable 'total_gov' (double) with one unique value and 0% NA

##         new variable 'pct_gov' (double) with one unique value and 0% NA

# 0.1973817 or 19.738% of the immigration responses in the corpus mention 
# political-related words


### Proportion of *health* responses in the corpus ###
health_mentions_in_imm_19 <- liwcalike(immigration_responses_only_19$cps19_imp_iss,dictionaryhealth)
health_mentions_in_imm_19$health_dum_imm <- ifelse(health_mentions_in_imm_19$healthcare>0,1,0)
health_mentions_in_imm_19 <- health_mentions_in_imm_19 %>%
  mutate(total_health = sum(health_dum_imm),
         pct_health = total_health/nrow(health_mentions_in_imm_19))

## mutate: new variable 'total_health' (double) with one unique value and 0% NA

##         new variable 'pct_health' (double) with one unique value and 0% NA

# 0.06646526 or 6.646% of the immigration responses in the corpus mention 
# health-related words


### Proportion of *environmental* responses in the corpus ###
enviro_mentions_in_imm_19 <- liwcalike(immigration_responses_only_19$cps19_imp_iss,dictionaryenviro)
enviro_mentions_in_imm_19$enviro_dum_imm <- ifelse(enviro_mentions_in_imm_19$enviro>0,1,0)
enviro_mentions_in_imm_19 <- enviro_mentions_in_imm_19 %>%
  mutate(total_enviro = sum(enviro_dum_imm),
         pct_enviro = total_enviro/nrow(enviro_mentions_in_imm_19))

## mutate: new variable 'total_enviro' (double) with one unique value and 0% NA

##         new variable 'pct_enviro' (double) with one unique value and 0% NA

# 0.05438066 or 5.438% of the immigration responses in the corpus mention 
# environment-related words


immigration_responses_only_19$econ_dum <- econ_mentions_in_imm_19$econ_dum_imm
immigration_responses_only_19$gov_dum <- gov_mentions_in_imm_19$gov_dum_imm
immigration_responses_only_19$health_dum <- health_mentions_in_imm_19$health_dum_imm
immigration_responses_only_19$enviro_dum <- enviro_mentions_in_imm_19$enviro_dum_imm

# Visualizing comparison through a table

proportions_2021_imm <- c(19.480, 23.376, 14.718, 2.597)  
# Economy, Governance, Health, Environment
proportions_2019_imm <- c(18.328, 19.738, 6.646, 5.438)  
# Economy, Governance, Health, Environment

# Calculate the percentage change in proportions for each topic
percentage_change_imm <- ((proportions_2021_imm - proportions_2019_imm) / proportions_2019_imm) * 100

# Create a table to compare the proportions
comparison_table_imm <- data.frame(
  Topic = c("Economy", "Governance", "Health", "Environment"),
  Proportion_2019 = proportions_2019_imm,
  Proportion_2021 = proportions_2021_imm,
  Percentage_Change = percentage_change_imm
)

# Print the comparison table
print(comparison_table_imm)

##         Topic Proportion_2019 Proportion_2021 Percentage_Change
## 1     Economy          18.328          19.480          6.285465
## 2  Governance          19.738          23.376         18.431452
## 3      Health           6.646          14.718        121.456515
## 4 Environment           5.438           2.597        -52.243472

RESEARCH QUESTION 3: Who is more likely to mention immigration as an important issue, and is there any significance in terms of who mentions immigration with certain topics?

Plotting proportions of Canadian and foreign-born respondents in the general corpus, and the corpus with only immigration responses

# Immigrant status
table_imm_unfiltered <- table(ces21_subset$immigrant_status)
prop_imm_un <- prop.table(table_imm_unfiltered)
table_imm_filtered <- table(immigration_responses_only_21$immigrant_status)
prop_imm_fil <- prop.table(table_imm_filtered)

barplot(rbind(prop_imm_un, prop_imm_fil), beside = TRUE,
        main = "Comparing All Responses vs. Immigration Responses",
        xlab = "Canadian vs. Foreign-Born", ylab = "Proportion",
        legend.text = c("2021 Corpus", "2021 Corpus - Immigration Responses Only"),
        args.legend = list(x = "top", bty = "n"))

table_imm_unfiltered_19 <- table(ces19_subset$immigrant_status)
prop_imm_un_19 <- prop.table(table_imm_unfiltered_19)
table_imm_filtered_19 <- table(immigration_responses_only_19$immigrant_status)
prop_imm_fil_19 <- prop.table(table_imm_filtered_19)

barplot(rbind(prop_imm_un_19, prop_imm_fil_19), beside = TRUE,
        main = "Comparing All Responses vs. Immigration Responses",
        xlab = "Canadian vs. Foreign-Born", ylab = "Proportion",
        legend.text = c("2019 Corpus", "2019 Corpus - Immigration Responses Only"),
        args.legend = list(x = "topright", bty = "n"))

Plotting the distribution of topic mentions within the general corpus and the immigration corpus, by immigrant status and left/right voters

# Subsetting within immigration responses which responses contained words
# from each category
immigration_responses_only_21$econ_dum <- econ_mentions_in_imm$econ_dum_imm
immigration_responses_only_21$gov_dum <- gov_mentions_in_imm$gov_dum_imm
immigration_responses_only_21$health_dum <- health_mentions_in_imm$health_dum_imm
immigration_responses_only_21$enviro_dum <- enviro_mentions_in_imm$enviro_dum_imm

econ_imm <- immigration_responses_only_21[immigration_responses_only_21$econ_dum == 1, ]
gov_imm <- immigration_responses_only_21[immigration_responses_only_21$gov_dum == 1, ]
health_imm <- immigration_responses_only_21[immigration_responses_only_21$health_dum == 1, ]
enviro_imm <- immigration_responses_only_21[immigration_responses_only_21$enviro_dum == 1, ]

prop_data <- immigration_responses_only_21 %>%
  group_by(immigrant_status) %>%
  summarise(
    prop_econ = sum(econ_dum == 1) / n(),
    prop_gov = sum(gov_dum == 1) / n(),
    prop_health = sum(health_dum == 1) / n(),
    prop_enviro = sum(enviro_dum == 1) / n()
  ) %>%
  pivot_longer(cols = starts_with("prop"), names_to = "variable", values_to = "prop")

## group_by: one grouping variable (immigrant_status)

## summarise: now 3 rows and 5 columns, ungrouped

## pivot_longer: reorganized (prop_econ, prop_gov, prop_health, prop_enviro) into (variable, prop) [was 3x5, now 12x3]

ggplot(prop_data, aes(x = immigrant_status, y = prop, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Immigrant Status", y = "Proportion of Total Responses", fill = "Category") +
  ggtitle("Comparing Topic Mentions in 2021 Immigration Responses by Immigrant Status") +
  theme_minimal() +
  theme(legend.position = "top")

prop_data_total <- ces21_subset %>%
  group_by(immigrant_status) %>%
  summarise(
    prop_econ = sum(econ_dum == 1) / n(),
    prop_gov = sum(gov_dum == 1) / n(),
    prop_health = sum(health_dum == 1) / n(),
    prop_enviro = sum(enviro_dum == 1) / n()
  ) %>%
  pivot_longer(cols = starts_with("prop"), names_to = "variable", values_to = "prop")

## group_by: one grouping variable (immigrant_status)

## summarise: now 3 rows and 5 columns, ungrouped

## pivot_longer: reorganized (prop_econ, prop_gov, prop_health, prop_enviro) into (variable, prop) [was 3x5, now 12x3]

ggplot(prop_data_total, aes(x = immigrant_status, y = prop, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Immigrant Status", y = "Proportion of Total Responses", fill = "Category") +
  ggtitle("Comparing Topic Mentions in 2021 by Immigrant Status") +
  theme_minimal() +
  theme(legend.position = "top")

## Repeating for 2019 

immigration_responses_only_19$econ_dum <- econ_mentions_in_imm_19$econ_dum_imm
immigration_responses_only_19$gov_dum <- gov_mentions_in_imm_19$gov_dum_imm
immigration_responses_only_19$health_dum <- health_mentions_in_imm_19$health_dum_imm
immigration_responses_only_19$enviro_dum <- enviro_mentions_in_imm_19$enviro_dum_imm

econ_imm_19 <- immigration_responses_only_19[immigration_responses_only_19$econ_dum == 1, ]
gov_imm_19 <- immigration_responses_only_19[immigration_responses_only_19$gov_dum == 1, ]
health_imm_19 <- immigration_responses_only_19[immigration_responses_only_19$health_dum == 1, ]
enviro_imm_19 <- immigration_responses_only_19[immigration_responses_only_19$enviro_dum == 1, ]

prop_data_19 <- immigration_responses_only_19 %>%
  group_by(immigrant_status) %>%
  summarise(
    prop_econ = sum(econ_dum == 1) / n(),
    prop_gov = sum(gov_dum == 1) / n(),
    prop_health = sum(health_dum == 1) / n(),
    prop_enviro = sum(enviro_dum == 1) / n()
  ) %>%
  pivot_longer(cols = starts_with("prop"), names_to = "variable", values_to = "prop")

## group_by: one grouping variable (immigrant_status)

## summarise: now 3 rows and 5 columns, ungrouped

## pivot_longer: reorganized (prop_econ, prop_gov, prop_health, prop_enviro) into (variable, prop) [was 3x5, now 12x3]

ggplot(prop_data_19, aes(x = immigrant_status, y = prop, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Immigrant Status", y = "Proportion of Total Responses", fill = "Category") +
  ggtitle("Comparing Topic Mentions in 2019 Immigration Responses by Immigrant Status") +
  theme_minimal() +
  theme(legend.position = "top")

prop_data_19_total <- ces19_subset %>%
  group_by(immigrant_status) %>%
  summarise(
    prop_econ = sum(econ_dum == 1) / n(),
    prop_gov = sum(gov_dum == 1) / n(),
    prop_health = sum(health_dum == 1) / n(),
    prop_enviro = sum(enviro_dum == 1) / n()
  ) %>%
  pivot_longer(cols = starts_with("prop"), names_to = "variable", values_to = "prop")

## group_by: one grouping variable (immigrant_status)

## summarise: now 3 rows and 5 columns, ungrouped

## pivot_longer: reorganized (prop_econ, prop_gov, prop_health, prop_enviro) into (variable, prop) [was 3x5, now 12x3]

ggplot(prop_data_19_total, aes(x = immigrant_status, y = prop, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Immigrant Status", y = "Proportion of Total Responses", fill = "Category") +
  ggtitle("Comparing Topic Mentions in 2019 Responses by Immigrant Status") +
  theme_minimal() +
  theme(legend.position = "top")

## Doing the same but for left/right instead of immigrant_status 

right_imm <- immigration_responses_only_21 %>%
  filter(fedparty == "CPC" | fedparty == "PPC")

## filter: removed 151 rows (65%), 80 rows remaining

left_imm <- immigration_responses_only_21 %>%
  filter(fedparty == "LPC" | fedparty == "NDP" | fedparty == "GPC")

## filter: removed 122 rows (53%), 109 rows remaining

right_imm_19 <- immigration_responses_only_19 %>%
  filter(fedparty == "CPC" | fedparty == "PPC")

## filter: removed 495 rows (50%), 498 rows remaining

left_imm_19 <- immigration_responses_only_19 %>%
  filter(fedparty == "LPC" | fedparty == "NDP" | fedparty == "GPC")

## filter: removed 617 rows (62%), 376 rows remaining

# Calculate proportions for 2021
prop_right_21 <- nrow(right_imm) / nrow(immigration_responses_only_21)
prop_left_21 <- nrow(left_imm) / nrow(immigration_responses_only_21)

# Calculate proportions for 2019
prop_right_19 <- nrow(right_imm_19) / nrow(immigration_responses_only_19)
prop_left_19 <- nrow(left_imm_19) / nrow(immigration_responses_only_19)


# Calculate differences
diff_right <- prop_right_21 - prop_right_19
diff_left <- prop_left_21 - prop_left_19

comparison_table_lr <- data.frame(
  Year = c("2019", "2021"),
  Right_Proportion = c(prop_right_19, prop_right_21),
  Left_Proportion = c(prop_left_19, prop_left_21),
  Right_Difference = c(NA, diff_right),
  Left_Difference = c(NA, diff_left)
)

comparison_table_lr

write.csv(comparison_table_lr, "comparison_table_lr.csv")


# Adding the left/right variables into the immigration responses dataset
immigration_responses_only_21 <- immigration_responses_only_21 %>% 
  mutate(lr = case_when(
            fedparty == "CPC" | 
              fedparty == "PPC" ~ "Rightist Voters",
            fedparty == "LPC" | 
              fedparty == "NDP" | 
              fedparty == "GPC" ~ "Leftist Voters"))

## mutate: new variable 'lr' (character) with 3 unique values and 18% NA

immigration_responses_only_19 <- immigration_responses_only_19 %>% 
  mutate(lr = case_when(
            fedparty == "CPC" | 
              fedparty == "PPC" ~ "Rightist Voters",
            fedparty == "LPC" | 
              fedparty == "NDP" | 
              fedparty == "GPC" ~ "Leftist Voters"))

## mutate: new variable 'lr' (character) with 3 unique values and 12% NA

# Plotting with left/right
prop_data_lr <- immigration_responses_only_21 %>%
  filter(!is.na(lr)) %>%
  group_by(lr) %>%
  summarise(
    prop_econ = sum(econ_dum == 1) / n(),
    prop_gov = sum(gov_dum == 1) / n(),
    prop_health = sum(health_dum == 1) / n(),
    prop_enviro = sum(enviro_dum == 1) / n()
  ) %>%
  pivot_longer(cols = starts_with("prop"), names_to = "variable", values_to = "prop")

## filter: removed 42 rows (18%), 189 rows remaining

## group_by: one grouping variable (lr)

## summarise: now 2 rows and 5 columns, ungrouped

## pivot_longer: reorganized (prop_econ, prop_gov, prop_health, prop_enviro) into (variable, prop) [was 2x5, now 8x3]

ggplot(prop_data_lr, aes(x = lr, y = prop, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Federal Party ID", y = "Proportion of Total Responses", fill = "Category") +
  ggtitle("Comparing Topic Mentions in 2021 Immigration Responses by Left/Right Voters") +
  theme_minimal() +
  theme(legend.position = "top")

prop_data_lr_19 <- immigration_responses_only_19 %>%
  filter(!is.na(lr)) %>%
  group_by(lr) %>%
  summarise(
    prop_econ = sum(econ_dum == 1) / n(),
    prop_gov = sum(gov_dum == 1) / n(),
    prop_health = sum(health_dum == 1) / n(),
    prop_enviro = sum(enviro_dum == 1) / n()
  ) %>%
  pivot_longer(cols = starts_with("prop"), names_to = "variable", values_to = "prop")

## filter: removed 119 rows (12%), 874 rows remaining

## group_by: one grouping variable (lr)

## summarise: now 2 rows and 5 columns, ungrouped

## pivot_longer: reorganized (prop_econ, prop_gov, prop_health, prop_enviro) into (variable, prop) [was 2x5, now 8x3]

ggplot(prop_data_lr_19, aes(x = lr, y = prop, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Federal Party ID", y = "Proportion of Total Responses", fill = "Category") +
  ggtitle("Comparing Topic Mentions in 2019 Immigration Responses by Left/Right Voters") +
  theme_minimal() +
  theme(legend.position = "top")

Analyzing Salience and Themes in Immigration Discourse: Comparing the 2019 and 2021 Canadian Federal Elections (Computational Text Analysis)

Anjali Bhaheeratha

2024-04-17