#Lab 2
#Setting up R for Lab 2
#Setting working directory
setwd("/Users/timliu/Desktop/SOC252/Lab 2")
#Loading ces data into R
load("/Users/timliu/Desktop/SOC252/Lab 2/ces.Rdata")
#Seeing the first few rows of ces data to see if it worked properly
head(ces)
## cps21_ResponseId cps21_citizenship cps21_age cps21_genderid cps21_province
## 1 R_001Vw6R3CxCzbcR Canadian citizen 57 A man Quebec
## 2 R_00AJoGE6B8Xifwl Canadian citizen 22 A woman British Columbia
## 3 R_00QYXuUFwGAZLgZ Canadian citizen 28 A woman British Columbia
## 4 R_00Wyqmxv6ek6e3L Canadian citizen 29 A woman Ontario
## 5 R_00dscjzlw5eIgEx Canadian citizen 41 A woman Quebec
## 6 R_00nCP7AU1k5Xlkd Canadian citizen 63 A woman Quebec
## cps21_education
## 1 Some technical, community college, CEGEP, College Classique
## 2 Some university
## 3 Bachelor's degree
## 4 Master's degree
## 5 Completed technical, community college, CEGEP, College Classique
## 6 Some technical, community college, CEGEP, College Classique
## cps21_demsat cps21_votechoice cps21_fed_gov_sat
## 1 Fairly satisfied <NA> Not at all satisfied
## 2 Fairly satisfied ndp Fairly satisfied
## 3 Fairly satisfied Don't know/ Prefer not to answer Not very satisfied
## 4 Very satisfied <NA> Very satisfied
## 5 Fairly satisfied ndp Not very satisfied
## 6 Fairly satisfied Bloc Québécois Not very satisfied
## cps21_party_rating_23 cps21_party_rating_24 cps21_party_rating_25
## 1 12 81 30
## 2 71 9 82
## 3 50 25 31
## 4 100 65 71
## 5 29 64 74
## 6 20 0 21
## cps21_party_rating_26 cps21_party_rating_27 cps21_party_rating_29
## 1 45 25 60
## 2 23 36 38
## 3 70 51 NA
## 4 37 41 58
## 5 44 50 74
## 6 100 20 0
## cps21_spend_educ cps21_spend_env
## 1 Spend more Spend about the same as now
## 2 Spend about the same as now Spend more
## 3 Spend more Spend more
## 4 Spend more Spend more
## 5 Spend more Spend about the same as now
## 6 Spend more Spend more
## cps21_spend_defence cps21_spend_imm_min
## 1 Spend about the same as now Spend less
## 2 Spend about the same as now Spend about the same as now
## 3 Spend about the same as now Spend less
## 4 Spend less Spend more
## 5 Spend about the same as now Spend more
## 6 Spend less Spend about the same as now
## cps21_spend_rec_indi cps21_imm
## 1 Spend about the same as now Fewer immigrants
## 2 Spend more About the same number of immigrants as now
## 3 Spend about the same as now Fewer immigrants
## 4 Spend more About the same number of immigrants as now
## 5 Spend more About the same number of immigrants as now
## 6 Spend about the same as now About the same number of immigrants as now
## cps21_fed_id cps21_imm_year cps21_immig_status cps21_vismin_9
## 1 Conservative <NA> <NA> White
## 2 Liberal <NA> <NA> <NA>
## 3 Liberal <NA> <NA> White
## 4 Liberal 2001 Refugee or protected person <NA>
## 5 Liberal <NA> <NA> <NA>
## 6 Bloc Québécois <NA> <NA> White
## cps21_sexuality cps21_employment cps21_union
## 1 Straight or heterosexual Working for pay full-time Yes
## 2 Straight or heterosexual Student and working for pay Yes
## 3 Straight or heterosexual Working for pay full-time No
## 4 Straight or heterosexual Working for pay full-time No
## 5 Straight or heterosexual Working for pay full-time Yes
## 6 Straight or heterosexual Retired No
## cps21_children cps21_income_number cps21_income_cat
## 1 1 44000 <NA>
## 2 0 130000 <NA>
## 3 0 145000 <NA>
## 4 0 63000 <NA>
## 5 3 90000 <NA>
## 6 0 0 Don't know/ Prefer not to answer
## cps21_marital cps21_party_rating_liberal cps21_party_rating_conservative
## 1 Separated 12 81
## 2 Never Married 71 9
## 3 Married 50 25
## 4 Never Married 100 65
## 5 Married 29 64
## 6 Married 20 0
## cps21_party_rating_ndp cps21_party_rating_bloc cps21_party_rating_green
## 1 30 45 25
## 2 82 23 36
## 3 31 70 51
## 4 71 37 41
## 5 74 44 50
## 6 21 100 20
## cps21_party_rating_ppc cps21_vismin_white
## 1 60 White
## 2 38 Not white
## 3 NA White
## 4 58 Not white
## 5 74 Not white
## 6 0 White
#Load libraries that we will use
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
library(ggplot2)
library(broom)
#Select only the relevant variables we will work with
ces_filtered <- ces %>%
select(cps21_genderid, cps21_age, cps21_education, cps21_income_number, cps21_province,
cps21_fed_id, cps21_spend_educ, cps21_spend_defence,cps21_party_rating_liberal)
#Question 1
#First I will filter and organize all the data we will be working with
#View the distribution of party identification
table(ces_filtered$cps21_fed_id)
##
## Liberal Conservative
## 6395 4800
## ndp Bloc Québécois
## 3113 1832
## Green Another party (please specify)
## 563 439
## None of these Don't know/ Prefer not to answer
## 2108 1718
#Filter out 'Don't know' and 'Prefer not to answer' responses
ces_filtered <- ces_filtered %>%
filter(!cps21_fed_id %in% c("Don't know/ Prefer not to answer"))
#Double check to see if cleaned properly
table(ces_filtered$cps21_fed_id)
##
## Liberal Conservative
## 6395 4800
## ndp Bloc Québécois
## 3113 1832
## Green Another party (please specify)
## 563 439
## None of these Don't know/ Prefer not to answer
## 2108 0
#Separating Liberals and Non-Liberals
ces_filtered <- ces_filtered %>%
mutate(cps21_fed_id = ifelse(cps21_fed_id == "Liberal", "Liberal", "Non-Liberal"))
#Checking to see the distribution of Liberal vs. Non-Liberal
table(ces_filtered$cps21_fed_id)
##
## Liberal Non-Liberal
## 6395 12855
#View the distribution of education level
table(ces_filtered$cps21_education)
##
## No schooling
## 4
## Some elementary school
## 17
## Completed elementary school
## 34
## Some secondary/ high school
## 411
## Completed secondary/ high school
## 2415
## Some technical, community college, CEGEP, College Classique
## 1683
## Completed technical, community college, CEGEP, College Classique
## 4054
## Some university
## 2102
## Bachelor's degree
## 5633
## Master's degree
## 2034
## Professional degree or doctorate
## 841
## Don't know/ Prefer not to answer
## 22
#Filter out 'Don't know' and 'Prefer not to answer' responses
ces_filtered <- ces_filtered %>%
filter(!cps21_education %in% c("Don't know/ Prefer not to answer"))
#Double check to see if cleaned properly
table(ces_filtered$cps21_education)
##
## No schooling
## 4
## Some elementary school
## 17
## Completed elementary school
## 34
## Some secondary/ high school
## 411
## Completed secondary/ high school
## 2415
## Some technical, community college, CEGEP, College Classique
## 1683
## Completed technical, community college, CEGEP, College Classique
## 4054
## Some university
## 2102
## Bachelor's degree
## 5633
## Master's degree
## 2034
## Professional degree or doctorate
## 841
## Don't know/ Prefer not to answer
## 0
#Organize education level into simpler categories
ces_filtered <- ces_filtered %>%
mutate(cps21_education = as.character(cps21_education)) %>%
mutate(cps21_education = case_when(
is.na(cps21_education) ~ "Unknown",
cps21_education %in% c("No schooling", "Some elementary school", "Completed elementary school", "Some secondary/ high school") ~ "Some secondary/ high school or less",
cps21_education == "Completed secondary/ high school" ~ "Completed secondary/ high school",
cps21_education %in% c("Some technical, community college, CEGEP, College Classique", "Completed technical, community college, CEGEP, College Classique", "Some university") ~ "Post-Secondary Education below a Bachelor's degree",
cps21_education == "Bachelor's degree" ~ "Bachelor's degree",
cps21_education %in% c("Master's degree", "Professional degree or doctorate") ~ "Completed graduate/professional",
TRUE ~ cps21_education # This keeps any other categories as they are
))
#Check the new education levels
table(ces_filtered$cps21_education)
##
## Bachelor's degree
## 5633
## Completed graduate/professional
## 2875
## Completed secondary/ high school
## 2415
## Post-Secondary Education below a Bachelor's degree
## 7839
## Some secondary/ high school or less
## 466
#Combine specified provinces into 'Other' and only keeping the 5 biggest provinces
ces_filtered <- ces_filtered %>%
mutate(cps21_province = case_when(
cps21_province %in% c("Northwest Territories", "Nunavut", "Prince Edward Island", "Yukon", "Newfoundland and Labrador", "New Brunswick", "Nova Scotia", "Saskatchewan") ~ "Other",
TRUE ~ cps21_province # Keep other provinces unchanged
))
#Check the distribution of provinces after combining
table(ces_filtered$cps21_province)
##
## Alberta British Columbia Manitoba Ontario
## 2375 2123 724 6709
## Other Quebec
## 1527 5770
#Divide income into categories
ces_filtered <- ces_filtered %>%
mutate(cps21_income_number = cut(cps21_income_number,
breaks = c(-Inf, 10000, 50000, 100000, 250000, Inf),
labels = c("0-10k", "10k-50k", "50k-100k", "100k-250k", "250k+"),
right = FALSE))
#Check the distribution of the new income categories
table(ces_filtered$cps21_income_number)
##
## 0-10k 10k-50k 50k-100k 100k-250k 250k+
## 803 4689 7197 5591 436
#Divide age into categories
ces_filtered <- ces_filtered %>%
mutate(cps21_age = cut(cps21_age,
breaks = c(17, 24, 40, 65, Inf),
labels = c("18-24", "25-40", "40-65", "65+"),
right = TRUE))
#Check the distribution of the new age categories
table(ces_filtered$cps21_age)
##
## 18-24 25-40 40-65 65+
## 1242 4539 8388 5059
#After filtering and organizing all the data we will create tables and visualizations to show the data
#Create a summary table with percentages of all demographic characteristics
summary_table_percentage <- ces_filtered %>%
group_by(cps21_fed_id) %>% # Group by party identification
summarize(
Total_Count = n(), # Total number of respondents in each category
Gender_Distribution = list(prop.table(table(cps21_genderid)) * 100), # Percentage of gender distribution
Age_Distribution = list(prop.table(table(cps21_age)) * 100), # Percentage of age categories
Education_Distribution = list(prop.table(table(cps21_education)) * 100), # Percentage of education levels
Income_Distribution = list(prop.table(table(cps21_income_number)) * 100), # Percentage of income categories
Province_Distribution = list(prop.table(table(cps21_province)) * 100) # Percentage of provinces
)
#View the summary table with percentages
print(summary_table_percentage)
## # A tibble: 2 × 7
## cps21_fed_id Total_Count Gender_Distribution Age_Distribution
## <chr> <int> <list> <list>
## 1 Liberal 6393 <table [4]> <table [4]>
## 2 Non-Liberal 12835 <table [4]> <table [4]>
## # ℹ 3 more variables: Education_Distribution <list>,
## # Income_Distribution <list>, Province_Distribution <list>
#Calculate the counts and percentages for gender by party identification
gender_plot_data <- ces_filtered %>%
group_by(cps21_fed_id, cps21_genderid) %>%
summarize(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
## `summarise()` has grouped output by 'cps21_fed_id'. You can override using the
## `.groups` argument.
#Create the bar plot
gender_plot <- ggplot(gender_plot_data, aes(x = cps21_genderid, y = Percentage, fill = cps21_fed_id)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Gender Distribution by Party Identification",
x = "Gender",
y = "Percentage") +
scale_fill_manual(values = c("Liberal" = "#1f78b4", "Non-Liberal" = "#b2df8a")) +
theme_minimal()
#Display the plot
print(gender_plot)

#Calculate the counts and percentages for age by party identification
age_plot_data <- ces_filtered %>%
group_by(cps21_fed_id, cps21_age) %>%
summarize(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
## `summarise()` has grouped output by 'cps21_fed_id'. You can override using the
## `.groups` argument.
#Create the bar plot
age_plot <- ggplot(age_plot_data, aes(x = cps21_age, y = Percentage, fill = cps21_fed_id)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Age Distribution by Party Identification",
x = "Age Group",
y = "Percentage") +
scale_fill_manual(values = c("Liberal" = "#1f78b4", "Non-Liberal" = "#b2df8a")) +
theme_minimal()
#Display the plot
print(age_plot)

#Calculate the counts and percentages for education level by party identification
education_plot_data <- ces_filtered %>%
group_by(cps21_fed_id, cps21_education) %>%
summarize(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
## `summarise()` has grouped output by 'cps21_fed_id'. You can override using the
## `.groups` argument.
#Create the bar plot for education level
education_plot <- ggplot(education_plot_data, aes(x = cps21_education, y = Percentage, fill = cps21_fed_id)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Education Level Distribution by Party Identification",
x = "Education Level",
y = "Percentage") +
scale_fill_manual(values = c("Liberal" = "#1f78b4", "Non-Liberal" = "#b2df8a")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Display the plot
print(education_plot)

#Calculate the counts and percentages for income level by party identification
income_plot_data <- ces_filtered %>%
group_by(cps21_fed_id, cps21_income_number) %>%
summarize(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
## `summarise()` has grouped output by 'cps21_fed_id'. You can override using the
## `.groups` argument.
#Create the bar plot for income levels
income_plot <- ggplot(income_plot_data, aes(x = cps21_income_number, y = Percentage, fill = cps21_fed_id)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Income Level Distribution by Party Identification",
x = "Income Level",
y = "Percentage") +
scale_fill_manual(values = c("Liberal" = "#1f78b4", "Non-Liberal" = "#b2df8a")) +
theme_minimal()
#Display the plot
print(income_plot)

#Calculate the counts and percentages for province by party identification
province_plot_data <- ces_filtered %>%
group_by(cps21_fed_id, cps21_province) %>%
summarize(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
## `summarise()` has grouped output by 'cps21_fed_id'. You can override using the
## `.groups` argument.
#Create the bar plot for province
province_plot <- ggplot(province_plot_data, aes(x = cps21_province, y = Percentage, fill = cps21_fed_id)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Province of Residence Distribution by Party Identification",
x = "Province of Residence",
y = "Percentage") +
scale_fill_manual(values = c("Liberal" = "#1f78b4", "Non-Liberal" = "#b2df8a")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Display the plot
print(province_plot)

#After creating plots for each of the demographic characteristics we can see some visual differences.
#Firstly, in the gender category we see that women are more liberal and men are more non-liberal.
#In the age category, we see a pretty even distribution of party identification with a slight more non-liberals
#from the ages of 18-65. However, in the 65+ age category we see a more significant discrepancy in there being
#more liberals than non-liberals.
#For education level we seem to see that liberals have higher levels of education as more liberals
#have a bachelor's degree and completed graduate/ professional degrees.
#In terms of income levels we also see the same trend as education levels and this may be directly
#connected, but more analysis would need to be done to see the correlation.
#Finally, for province of residence we see that Ontario has significantly much more liberals.
#In Alberta and Quebec we see that there are more non-liberals and for the other provinces and
#territories it seems pretty evenly distributed.
#Question 2
#Running the linear regression model with selected variables
liberal_rating_model <- lm(cps21_party_rating_liberal ~ cps21_age + cps21_education + cps21_province + cps21_spend_educ + cps21_spend_defence, data = ces_filtered)
#Display the summary of the regression model
summary(liberal_rating_model)
##
## Call:
## lm(formula = cps21_party_rating_liberal ~ cps21_age + cps21_education +
## cps21_province + cps21_spend_educ + cps21_spend_defence,
## data = ces_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66.24 -27.50 2.90 24.95 86.66
##
## Coefficients:
## Estimate
## (Intercept) 25.2428
## cps21_age25-40 -1.7839
## cps21_age40-65 -5.2776
## cps21_age65+ -4.0555
## cps21_educationCompleted graduate/professional 2.4615
## cps21_educationCompleted secondary/ high school -9.4368
## cps21_educationPost-Secondary Education below a Bachelor's degree -6.5371
## cps21_educationSome secondary/ high school or less -10.5530
## cps21_provinceBritish Columbia 9.6696
## cps21_provinceManitoba 7.1776
## cps21_provinceOntario 12.5573
## cps21_provinceOther 12.0748
## cps21_provinceQuebec 7.5440
## cps21_spend_educSpend about the same as now 18.6757
## cps21_spend_educSpend more 24.5168
## cps21_spend_educDon't know/ Prefer not to answer 16.3460
## cps21_spend_defenceSpend about the same as now 3.2492
## cps21_spend_defenceSpend more -9.1050
## cps21_spend_defenceDon't know/ Prefer not to answer -1.6577
## Std. Error
## (Intercept) 1.6308
## cps21_age25-40 1.0132
## cps21_age40-65 0.9603
## cps21_age65+ 1.0030
## cps21_educationCompleted graduate/professional 0.7171
## cps21_educationCompleted secondary/ high school 0.7728
## cps21_educationPost-Secondary Education below a Bachelor's degree 0.5531
## cps21_educationSome secondary/ high school or less 1.5151
## cps21_provinceBritish Columbia 0.9318
## cps21_provinceManitoba 1.3254
## cps21_provinceOntario 0.7456
## cps21_provinceOther 1.0231
## cps21_provinceQuebec 0.7729
## cps21_spend_educSpend about the same as now 1.1832
## cps21_spend_educSpend more 1.1587
## cps21_spend_educDon't know/ Prefer not to answer 1.7682
## cps21_spend_defenceSpend about the same as now 0.5638
## cps21_spend_defenceSpend more 0.6854
## cps21_spend_defenceDon't know/ Prefer not to answer 1.1594
## t value
## (Intercept) 15.479
## cps21_age25-40 -1.761
## cps21_age40-65 -5.496
## cps21_age65+ -4.043
## cps21_educationCompleted graduate/professional 3.433
## cps21_educationCompleted secondary/ high school -12.211
## cps21_educationPost-Secondary Education below a Bachelor's degree -11.819
## cps21_educationSome secondary/ high school or less -6.965
## cps21_provinceBritish Columbia 10.377
## cps21_provinceManitoba 5.415
## cps21_provinceOntario 16.841
## cps21_provinceOther 11.802
## cps21_provinceQuebec 9.761
## cps21_spend_educSpend about the same as now 15.785
## cps21_spend_educSpend more 21.159
## cps21_spend_educDon't know/ Prefer not to answer 9.244
## cps21_spend_defenceSpend about the same as now 5.763
## cps21_spend_defenceSpend more -13.284
## cps21_spend_defenceDon't know/ Prefer not to answer -1.430
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## cps21_age25-40 0.078297 .
## cps21_age40-65 3.94e-08 ***
## cps21_age65+ 5.29e-05 ***
## cps21_educationCompleted graduate/professional 0.000599 ***
## cps21_educationCompleted secondary/ high school < 2e-16 ***
## cps21_educationPost-Secondary Education below a Bachelor's degree < 2e-16 ***
## cps21_educationSome secondary/ high school or less 3.39e-12 ***
## cps21_provinceBritish Columbia < 2e-16 ***
## cps21_provinceManitoba 6.19e-08 ***
## cps21_provinceOntario < 2e-16 ***
## cps21_provinceOther < 2e-16 ***
## cps21_provinceQuebec < 2e-16 ***
## cps21_spend_educSpend about the same as now < 2e-16 ***
## cps21_spend_educSpend more < 2e-16 ***
## cps21_spend_educDon't know/ Prefer not to answer < 2e-16 ***
## cps21_spend_defenceSpend about the same as now 8.38e-09 ***
## cps21_spend_defenceSpend more < 2e-16 ***
## cps21_spend_defenceDon't know/ Prefer not to answer 0.152797
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.08 on 19067 degrees of freedom
## (142 observations deleted due to missingness)
## Multiple R-squared: 0.09212, Adjusted R-squared: 0.09126
## F-statistic: 107.5 on 18 and 19067 DF, p-value: < 2.2e-16
#Tidy summary with confidence intervals
regression_table <- tidy(liberal_rating_model, conf.int = TRUE)
print(regression_table)
## # A tibble: 19 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 25.2 1.63 15.5 1.02e-53 22.0 28.4
## 2 cps21_age25-40 -1.78 1.01 -1.76 7.83e- 2 -3.77 0.202
## 3 cps21_age40-65 -5.28 0.960 -5.50 3.94e- 8 -7.16 -3.40
## 4 cps21_age65+ -4.06 1.00 -4.04 5.29e- 5 -6.02 -2.09
## 5 cps21_educationComp… 2.46 0.717 3.43 5.99e- 4 1.06 3.87
## 6 cps21_educationComp… -9.44 0.773 -12.2 3.64e-34 -11.0 -7.92
## 7 cps21_educationPost… -6.54 0.553 -11.8 4.03e-32 -7.62 -5.45
## 8 cps21_educationSome… -10.6 1.52 -6.97 3.39e-12 -13.5 -7.58
## 9 cps21_provinceBriti… 9.67 0.932 10.4 3.68e-25 7.84 11.5
## 10 cps21_provinceManit… 7.18 1.33 5.42 6.19e- 8 4.58 9.78
## 11 cps21_provinceOntar… 12.6 0.746 16.8 3.49e-63 11.1 14.0
## 12 cps21_provinceOther 12.1 1.02 11.8 4.92e-32 10.1 14.1
## 13 cps21_provinceQuebec 7.54 0.773 9.76 1.87e-22 6.03 9.06
## 14 cps21_spend_educSpe… 18.7 1.18 15.8 8.95e-56 16.4 21.0
## 15 cps21_spend_educSpe… 24.5 1.16 21.2 3.09e-98 22.2 26.8
## 16 cps21_spend_educDon… 16.3 1.77 9.24 2.61e-20 12.9 19.8
## 17 cps21_spend_defence… 3.25 0.564 5.76 8.38e- 9 2.14 4.35
## 18 cps21_spend_defence… -9.10 0.685 -13.3 4.31e-40 -10.4 -7.76
## 19 cps21_spend_defence… -1.66 1.16 -1.43 1.53e- 1 -3.93 0.615
#Intercept (estimate = 25.2) The confidence interval (22.0, 28.4) indicates a high level of precision.
#Ages 40-65 and 65+ have ratings are significantly lower by 5.28 and 4.06 points respectively
#compared to the under-25 group, with a CI (-7.16, -3.40) and (-6.02, -2.09) respecitvely
#indicating confidence in this negative association and showing a clear negative relationship
#with Liberal Party perception as age increases.
#Completed graduate/professional degree: Liberal Party ratings are 2.46 points higher than the reference
#education group. CI (1.06, 3.87) suggests a positive association with higher education.
#Completed secondary/high school: This group rates the Liberal Party significantly lower by 9.44 points.
#CI (-11.0, -7.92) indicates a strong negative relationship.
#Post-secondary below Bachelor’s degree: Ratings are lower by 6.54 points, with CI (-7.62, -5.45),
#indicating a negative relationship.
#Some secondary/high school or less: Ratings are lower by 10.6 points, suggesting that lower educational
#attainment is associated with less favorable views. CI (-13.5, -7.58).
# British Columbia: Ratings are 9.67 points higher than the baseline province, with a CI of (7.84, 11.5),
#showing a strong positive effect.
#Manitoba: Ratings are higher by 7.18 points (CI 4.58, 9.78), indicating a positive association.
#Ontario: Ratings are 12.6 points higher (CI 11.1, 14.0), suggesting the highest positive association
#among provinces.
#Other (grouped provinces): 12.1 points higher, with a CI (10.1, 14.1).
#Quebec: Ratings are 7.54 points higher, CI (6.03, 9.06).
#Federal Spending on Education (cps21_spend_educ)
#Spend about the same as now: This is associated with an 18.7-point higher rating of the Liberal Party,
#with a CI (16.4, 21.0), indicating strong support.
#Spend more: This group rates the Liberal Party 24.5 points higher (CI 22.2, 26.8), suggesting a very
#positive association.
#Federal Spending on Defense (cps21_spend_defence)
#Spend about the same as now: A positive association of 3.25 points (CI 2.14, 4.35).
#Spend more: A negative association of -9.10 points (CI -10.4, -7.76), indicating those favoring
#increased defense spending rate the Liberal Party lower.
#Based on the analysis, certain demographics and opinions have clear associations with Liberal Party ratings.
#Younger people, those with higher education, and residents in provinces like Ontario and British Columbia tend
#to view the Liberal Party more favorably. Additionally, those who believe the federal government should spend
#more on education rate the Liberal Party much higher, while those favoring increased defense spending tend to
#rate it lower.
#Question 3
#Filter for only Liberal and Conservative party identifiers and our variables
ces_filtered2 <- ces %>%
filter(cps21_fed_id %in% c("Liberal", "Conservative")) %>%
select(cps21_age, cps21_education, cps21_province, cps21_spend_educ, cps21_spend_defence, cps21_fed_id)
#Redo the filter for our variables that we did prior
#Filter out 'Don't know' and 'Prefer not to answer' responses
ces_filtered2 <- ces_filtered2 %>%
filter(!cps21_education %in% c("Don't know/ Prefer not to answer")) %>%
filter(!cps21_spend_defence %in% c("Don't know/ Prefer not to answer")) %>%
filter(!cps21_spend_educ %in% c("Don't know/ Prefer not to answer"))
#Double check to see if cleaned properly
table(ces_filtered2$cps21_education)
##
## No schooling
## 3
## Some elementary school
## 9
## Completed elementary school
## 15
## Some secondary/ high school
## 202
## Completed secondary/ high school
## 1251
## Some technical, community college, CEGEP, College Classique
## 917
## Completed technical, community college, CEGEP, College Classique
## 2231
## Some university
## 1073
## Bachelor's degree
## 3254
## Master's degree
## 1137
## Professional degree or doctorate
## 492
## Don't know/ Prefer not to answer
## 0
#Organize education level into simpler categories
ces_filtered2 <- ces_filtered2 %>%
mutate(cps21_education = as.character(cps21_education)) %>%
mutate(cps21_education = case_when(
is.na(cps21_education) ~ "Unknown",
cps21_education %in% c("No schooling", "Some elementary school", "Completed elementary school", "Some secondary/ high school") ~ "Some secondary/ high school or less",
cps21_education == "Completed secondary/ high school" ~ "Completed secondary/ high school",
cps21_education %in% c("Some technical, community college, CEGEP, College Classique", "Completed technical, community college, CEGEP, College Classique", "Some university") ~ "Post-Secondary Education below a Bachelor's degree",
cps21_education == "Bachelor's degree" ~ "Bachelor's degree",
cps21_education %in% c("Master's degree", "Professional degree or doctorate") ~ "Completed graduate/professional",
TRUE ~ cps21_education # This keeps any other categories as they are
))
#Check the new education levels
table(ces_filtered2$cps21_education)
##
## Bachelor's degree
## 3254
## Completed graduate/professional
## 1629
## Completed secondary/ high school
## 1251
## Post-Secondary Education below a Bachelor's degree
## 4221
## Some secondary/ high school or less
## 229
#Combine specified provinces into 'Other' and only keeping the 5 biggest provinces
ces_filtered2 <- ces_filtered2 %>%
mutate(cps21_province = case_when(
cps21_province %in% c("Northwest Territories", "Nunavut", "Prince Edward Island", "Yukon", "Newfoundland and Labrador", "New Brunswick", "Nova Scotia", "Saskatchewan") ~ "Other",
TRUE ~ cps21_province # Keep other provinces unchanged
))
#Check the distribution of provinces after combining
table(ces_filtered2$cps21_province)
##
## Alberta British Columbia Manitoba Ontario
## 1510 1144 418 4232
## Other Quebec
## 832 2448
#Divide age into categories
ces_filtered2 <- ces_filtered2 %>%
mutate(cps21_age = cut(cps21_age,
breaks = c(17, 24, 40, 65, Inf),
labels = c("18-24", "25-40", "40-65", "65+"),
right = TRUE))
#Check the distribution of the new age categories
table(ces_filtered2$cps21_age)
##
## 18-24 25-40 40-65 65+
## 506 2170 4727 3181
#Run the logistic regression model
logit_model <- glm(cps21_fed_id ~ cps21_age + cps21_education + cps21_province + cps21_spend_educ + cps21_spend_defence,
data = ces_filtered2, family = "binomial")
#Display the summary of the logistic regression
summary(logit_model)
##
## Call:
## glm(formula = cps21_fed_id ~ cps21_age + cps21_education + cps21_province +
## cps21_spend_educ + cps21_spend_defence, family = "binomial",
## data = ces_filtered2)
##
## Coefficients:
## Estimate
## (Intercept) 1.17092
## cps21_age25-40 0.19253
## cps21_age40-65 0.32866
## cps21_age65+ 0.23039
## cps21_educationCompleted graduate/professional -0.15967
## cps21_educationCompleted secondary/ high school 0.49995
## cps21_educationPost-Secondary Education below a Bachelor's degree 0.31775
## cps21_educationSome secondary/ high school or less 0.65212
## cps21_provinceBritish Columbia -0.94699
## cps21_provinceManitoba -0.58060
## cps21_provinceOntario -1.02207
## cps21_provinceOther -1.19667
## cps21_provinceQuebec -1.30654
## cps21_spend_educSpend about the same as now -1.16713
## cps21_spend_educSpend more -1.81856
## cps21_spend_defenceSpend about the same as now 0.27841
## cps21_spend_defenceSpend more 1.21111
## Std. Error
## (Intercept) 0.16737
## cps21_age25-40 0.11151
## cps21_age40-65 0.10499
## cps21_age65+ 0.10741
## cps21_educationCompleted graduate/professional 0.06819
## cps21_educationCompleted secondary/ high school 0.07296
## cps21_educationPost-Secondary Education below a Bachelor's degree 0.05177
## cps21_educationSome secondary/ high school or less 0.14878
## cps21_provinceBritish Columbia 0.08529
## cps21_provinceManitoba 0.11852
## cps21_provinceOntario 0.06645
## cps21_provinceOther 0.09479
## cps21_provinceQuebec 0.07399
## cps21_spend_educSpend about the same as now 0.11904
## cps21_spend_educSpend more 0.11825
## cps21_spend_defenceSpend about the same as now 0.06018
## cps21_spend_defenceSpend more 0.06687
## z value
## (Intercept) 6.996
## cps21_age25-40 1.727
## cps21_age40-65 3.130
## cps21_age65+ 2.145
## cps21_educationCompleted graduate/professional -2.342
## cps21_educationCompleted secondary/ high school 6.853
## cps21_educationPost-Secondary Education below a Bachelor's degree 6.138
## cps21_educationSome secondary/ high school or less 4.383
## cps21_provinceBritish Columbia -11.104
## cps21_provinceManitoba -4.899
## cps21_provinceOntario -15.381
## cps21_provinceOther -12.624
## cps21_provinceQuebec -17.658
## cps21_spend_educSpend about the same as now -9.805
## cps21_spend_educSpend more -15.379
## cps21_spend_defenceSpend about the same as now 4.626
## cps21_spend_defenceSpend more 18.112
## Pr(>|z|)
## (Intercept) 2.63e-12 ***
## cps21_age25-40 0.08422 .
## cps21_age40-65 0.00175 **
## cps21_age65+ 0.03195 *
## cps21_educationCompleted graduate/professional 0.01921 *
## cps21_educationCompleted secondary/ high school 7.25e-12 ***
## cps21_educationPost-Secondary Education below a Bachelor's degree 8.34e-10 ***
## cps21_educationSome secondary/ high school or less 1.17e-05 ***
## cps21_provinceBritish Columbia < 2e-16 ***
## cps21_provinceManitoba 9.65e-07 ***
## cps21_provinceOntario < 2e-16 ***
## cps21_provinceOther < 2e-16 ***
## cps21_provinceQuebec < 2e-16 ***
## cps21_spend_educSpend about the same as now < 2e-16 ***
## cps21_spend_educSpend more < 2e-16 ***
## cps21_spend_defenceSpend about the same as now 3.73e-06 ***
## cps21_spend_defenceSpend more < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 14452 on 10583 degrees of freedom
## Residual deviance: 12844 on 10567 degrees of freedom
## AIC: 12878
##
## Number of Fisher Scoring iterations: 4
#Get the coefficients, standard errors, and calculate odds ratios
logit_results <- tidy(logit_model) %>%
mutate(odds_ratio = exp(estimate),
conf.low = exp(estimate - 1.96 * std.error), # 95% CI lower bound
conf.high = exp(estimate + 1.96 * std.error)) # 95% CI upper bound
#Display the table with odds ratios and 95% confidence intervals
logit_results
## # A tibble: 17 × 8
## term estimate std.error statistic p.value odds_ratio conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Interce… 1.17 0.167 7.00 2.63e-12 3.22 2.32 4.48
## 2 cps21_ag… 0.193 0.112 1.73 8.42e- 2 1.21 0.974 1.51
## 3 cps21_ag… 0.329 0.105 3.13 1.75e- 3 1.39 1.13 1.71
## 4 cps21_ag… 0.230 0.107 2.15 3.19e- 2 1.26 1.02 1.55
## 5 cps21_ed… -0.160 0.0682 -2.34 1.92e- 2 0.852 0.746 0.974
## 6 cps21_ed… 0.500 0.0730 6.85 7.25e-12 1.65 1.43 1.90
## 7 cps21_ed… 0.318 0.0518 6.14 8.34e-10 1.37 1.24 1.52
## 8 cps21_ed… 0.652 0.149 4.38 1.17e- 5 1.92 1.43 2.57
## 9 cps21_pr… -0.947 0.0853 -11.1 1.20e-28 0.388 0.328 0.458
## 10 cps21_pr… -0.581 0.119 -4.90 9.65e- 7 0.560 0.444 0.706
## 11 cps21_pr… -1.02 0.0664 -15.4 2.19e-53 0.360 0.316 0.410
## 12 cps21_pr… -1.20 0.0948 -12.6 1.55e-36 0.302 0.251 0.364
## 13 cps21_pr… -1.31 0.0740 -17.7 8.86e-70 0.271 0.234 0.313
## 14 cps21_sp… -1.17 0.119 -9.80 1.08e-22 0.311 0.246 0.393
## 15 cps21_sp… -1.82 0.118 -15.4 2.28e-53 0.162 0.129 0.205
## 16 cps21_sp… 0.278 0.0602 4.63 3.73e- 6 1.32 1.17 1.49
## 17 cps21_sp… 1.21 0.0669 18.1 2.58e-73 3.36 2.94 3.83
#The odds of identifying with the Conservative Party increase with age, particularly for those aged 25-40
#and 40-65, with confidence intervals of 0.974 to 1.51 and 1.13 to 1.71, respectively. Individuals aged
#25-40 show a log-odds increase of 0.193, translating to an odds ratio of 1.21, indicating a 21% higher
#likelihood of identifying as Conservative compared to younger individuals. Those aged 40-65 and 65+ have
#odds ratios of 1.39 and 1.26, respectively, suggesting they are also more likely to identify as
#Conservative compared to the reference group. Individuals with lower education levels, such as those who
#completed secondary school or had some secondary education, exhibit significantly higher odds
#(1.65 and 1.92, respectively) of identifying as Conservative. In contrast, those with graduate or
#professional degrees are less likely to identify as Conservative, with an odds ratio of 0.85
#(confidence interval: 0.746 to 0.974). People from provinces like British Columbia, Manitoba, Ontario,
#and Quebec show lower odds of Conservative identification, with odds ratios ranging from 0.27 to 0.56.
#Regarding government spending, those who believe the government should spend more on education are
#significantly less likely to identify as Conservative (odds ratio of 0.16), while those who favor increased
#defense spending exhibit higher odds of Conservative identification, with an odds ratio of 3.36
#(confidence interval: 2.94 to 3.83).
#In summary, individuals' identification with the Liberal or Conservative Party is significantly influenced
#by demographic factors and political opinions. Older respondents are more likely to identify as
#Conservative. Those with lower educational attainment are more inclined to identify as Conservative,
#while individuals with graduate degrees are more likely to be Liberal. Respondents from British Columbia,
#Manitoba, Ontario, and Quebec exhibit decreased odds of identifying as Conservative. Additionally,
#beliefs about government spending significantly influence party affiliation; those favoring increased
#defense spending are more likely to identify as Conservative, while those supporting increased education
#funding tend to lean towards the Liberal Party.