Research Question: - What are the main factors influencing voter turnout and variability in party preferences across electoral districts? - How do voterturnoutrates and regional differences influence the probabilities of electoral success for Canadian political parties?
• Electoral District Name/ Electoral District Number: Identifier for electoral districts. • Percentage of Voter Turnout: Proportion of eligible voters who participated. • Elected Candidate: Vote counts for each political party.
# Frequency table of Province variable
Voting_Results %>%
count(`Province`) %>%
mutate(Percentage = n / sum(n) * 100)
## # A tibble: 13 × 3
## Province n Percentage
## <chr> <int> <dbl>
## 1 Alberta 34 10.1
## 2 British Columbia/Colombie-Britannique 42 12.4
## 3 Manitoba 14 4.14
## 4 New Brunswick/Nouveau-Brunswick 10 2.96
## 5 Newfoundland and Labrador/Terre-Neuve-et-Labrador 7 2.07
## 6 Northwest Territories/Territoires du Nord-Ouest 1 0.296
## 7 Nova Scotia/Nouvelle-Écosse 11 3.25
## 8 Nunavut 1 0.296
## 9 Ontario 121 35.8
## 10 Prince Edward Island/Île-du-Prince-Édouard 4 1.18
## 11 Quebec/Québec 78 23.1
## 12 Saskatchewan 14 4.14
## 13 Yukon 1 0.296
# Frequency table of Elected Candidate variable
Voting_Results %>%
count(`Elected Candidate`) %>%
mutate(Percentage = n / sum(n) * 100)
## # A tibble: 5 × 3
## `Elected Candidate` n Percentage
## <fct> <int> <dbl>
## 1 Bloc Québécois 32 9.47
## 2 Conservative 119 35.2
## 3 Green Party 2 0.592
## 4 Liberal 160 47.3
## 5 NDP 25 7.40
# Frequency table for binned Percentage of Voter Turnout
Voting_Results %>%
mutate(Turnout_Bin = cut(`Percentage of Voter Turnout`, breaks = seq(0, 100, 10))) %>%
count(Turnout_Bin) %>%
mutate(Percentage = n / sum(n) * 100)
## # A tibble: 5 × 3
## Turnout_Bin n Percentage
## <fct> <int> <dbl>
## 1 (30,40] 2 0.592
## 2 (40,50] 8 2.37
## 3 (50,60] 89 26.3
## 4 (60,70] 222 65.7
## 5 (70,80] 17 5.03
# Exclude missing values and generate frequency table
Voting_Results %>%
filter(!is.na(`Percentage of Voter Turnout`)) %>% # Excludes missing values
count(`Percentage of Voter Turnout`) %>%
mutate(Percentage = n / sum(n) * 100)
## # A tibble: 168 × 3
## `Percentage of Voter Turnout` n Percentage
## <dbl> <int> <dbl>
## 1 33.8 1 0.296
## 2 35.8 1 0.296
## 3 43.6 1 0.296
## 4 44.7 1 0.296
## 5 46.2 1 0.296
## 6 46.7 1 0.296
## 7 48 1 0.296
## 8 48.2 1 0.296
## 9 48.9 1 0.296
## 10 49 1 0.296
## # ℹ 158 more rows
# Convert relevant columns to appropriate formats
Voting_Results$`Elected Candidate` <- as.factor(Voting_Results$`Elected Candidate`)
Voting_Results$Province <- as.factor(Voting_Results$Province)
# Fit logistic regression model
model <- glm(`Elected Candidate` ~ `Percentage of Voter Turnout` + Province,
data = Voting_Results,
family = binomial)
# Summarize regression results
summary(model)
##
## Call:
## glm(formula = `Elected Candidate` ~ `Percentage of Voter Turnout` +
## Province, family = binomial, data = Voting_Results)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.848e+01 3.034e+03
## `Percentage of Voter Turnout` 3.268e-02 4.788e-02
## ProvinceBritish Columbia/Colombie-Britannique 1.044e-01 4.082e+03
## ProvinceManitoba 1.229e-01 5.602e+03
## ProvinceNew Brunswick/Nouveau-Brunswick -1.379e-02 6.373e+03
## ProvinceNewfoundland and Labrador/Terre-Neuve-et-Labrador 3.803e-01 7.354e+03
## ProvinceNorthwest Territories/Territoires du Nord-Ouest 5.593e-01 1.799e+04
## ProvinceNova Scotia/Nouvelle-Écosse 2.035e-02 6.146e+03
## ProvinceNunavut 9.808e-01 1.799e+04
## ProvinceOntario 8.424e-02 3.434e+03
## ProvincePrince Edward Island/Île-du-Prince-Édouard -2.518e-01 9.368e+03
## ProvinceQuebec/Québec -2.018e+01 3.034e+03
## ProvinceSaskatchewan 2.107e-02 5.611e+03
## ProvinceYukon -9.318e-03 1.799e+04
## z value Pr(>|z|)
## (Intercept) 0.006 0.995
## `Percentage of Voter Turnout` 0.682 0.495
## ProvinceBritish Columbia/Colombie-Britannique 0.000 1.000
## ProvinceManitoba 0.000 1.000
## ProvinceNew Brunswick/Nouveau-Brunswick 0.000 1.000
## ProvinceNewfoundland and Labrador/Terre-Neuve-et-Labrador 0.000 1.000
## ProvinceNorthwest Territories/Territoires du Nord-Ouest 0.000 1.000
## ProvinceNova Scotia/Nouvelle-Écosse 0.000 1.000
## ProvinceNunavut 0.000 1.000
## ProvinceOntario 0.000 1.000
## ProvincePrince Edward Island/Île-du-Prince-Édouard 0.000 1.000
## ProvinceQuebec/Québec -0.007 0.995
## ProvinceSaskatchewan 0.000 1.000
## ProvinceYukon 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 211.74 on 337 degrees of freedom
## Residual deviance: 105.14 on 324 degrees of freedom
## AIC: 133.14
##
## Number of Fisher Scoring iterations: 19
# Extract coefficients and p-values for reporting
# coefficients_table <- tidy(model)
# write.csv(coefficients_table, "regression_results.csv")
# Summarize data for regional trends
regional_trends <- Voting_Results %>%
group_by(Province, `Elected Candidate`) %>%
summarize(Count = n()) %>%
mutate(Percentage = Count / sum(Count) * 100)
## `summarise()` has grouped output by 'Province'. You can override using the
## `.groups` argument.
# Plot regional trends
ggplot(regional_trends, aes(x = Province, y = Percentage, fill = `Elected Candidate`)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Regional Trends for Each Party",
x = "Province",
y = "Percentage of Seats Won",
fill = "Political Party") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Load required libraries
library(dplyr)
library(broom)
# Fit logistic regression model for regional trends
regional_model <- glm(`Elected Candidate` ~ Province,
data = Voting_Results,
family = binomial)
# Summarize regression results
log_odds_table <- tidy(regional_model)
# Filter and arrange the results for better readability
log_odds_table <- log_odds_table %>%
filter(term != "(Intercept)") %>% # Remove the intercept for clarity
arrange(term)
# Add a descriptive column for better understanding
log_odds_table <- log_odds_table %>%
mutate(
Variable = gsub("Province", "Region: ", term),
Log_Odds = estimate,
Odds_Ratio = exp(estimate),
Significance = ifelse(p.value < 0.05, "Significant", "Not Significant")
) %>%
select(Variable, Log_Odds, Odds_Ratio, p.value, Significance)
# Save or display the table
#write.csv(log_odds_table, "log_odds_table.csv")
print(log_odds_table)
## # A tibble: 12 × 5
## Variable Log_Odds Odds_Ratio p.value Significance
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 Region: British Columbia/Colombie-… -2.77e- 9 1.00e+0 1.00 Not Signifi…
## 2 Region: Manitoba -2.67e- 9 1.00e+0 1.00 Not Signifi…
## 3 Region: New Brunswick/Nouveau-Brun… -2.85e- 9 1.00e+0 1.00 Not Signifi…
## 4 Region: Newfoundland and Labrador/… -3.77e-10 1.00e+0 1.00 Not Signifi…
## 5 Region: Northwest Territories/Terr… -2.89e- 9 1.00e+0 1.00 Not Signifi…
## 6 Region: Nova Scotia/Nouvelle-Écosse -2.96e- 9 1.00e+0 1.00 Not Signifi…
## 7 Region: Nunavut -2.67e- 9 1.00e+0 1.00 Not Signifi…
## 8 Region: Ontario -2.83e- 9 1.00e+0 1.00 Not Signifi…
## 9 Region: Prince Edward Island/Île-d… -3.13e- 9 1.00e+0 1.00 Not Signifi…
## 10 Region: Quebec/Québec -2.02e+ 1 1.68e-9 0.995 Not Signifi…
## 11 Region: Saskatchewan 1.07e-18 1 e+0 1 Not Signifi…
## 12 Region: Yukon 1.06e-18 1 e+0 1 Not Signifi…
# Load necessary libraries
library(ggplot2)
# Ensure the dataset has the required columns (e.g., Province and Voter_Turnout_Percentage)
# If the columns have different names, update them accordingly.
# Create a bar plot of voter turnout by province
voter_turnout_plot <- ggplot(Voting_Results, aes(x = reorder(Province, `Percentage of Voter Turnout`), y = `Percentage of Voter Turnout`)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() + # Flip coordinates for better readability
labs(
title = "Voter Turnout by Province",
x = "Province",
y = "Voter Turnout Percentage (%)"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12)
)
# Print the plot
print(voter_turnout_plot)
# Load necessary libraries
library(ggplot2)
# Sample data
Voting_Results <- data.frame(
Province = c("Ontario", "British Columbia", "Alberta", "Quebec", "Nova Scotia",
"Newfoundland and Labrador", "Manitoba", "Saskatchewan",
"New Brunswick", "Prince Edward Island", "Yukon", "Nunavut"),
TurnoutPercentage = c(65, 60, 55, 62, 58, 57, 56, 54, 59, 61, 50, 45)
)
# Calculate average turnout
avg_turnout <- mean(Voting_Results$`Percentage of Voter Turnout`)
## Warning in mean.default(Voting_Results$`Percentage of Voter Turnout`): argument
## is not numeric or logical: returning NA
# Create bar plot with average line and percentages
ggplot(Voting_Results, aes(x = TurnoutPercentage, y = reorder(Province, TurnoutPercentage))) +
geom_bar(stat = "identity", fill = "plum") +
geom_text(aes(label = paste0(TurnoutPercentage, "%")), hjust = -0.2, size = 4) +
geom_vline(xintercept = avg_turnout, color = "skyblue", linetype = "dashed") +
annotate("text", x = avg_turnout + 2, y = 1,
label = paste0("Avg: ", round(avg_turnout, 1), "%"), color = "red", size = 4) +
labs(title = "Voter Turnout by Province",
x = "Voter Turnout Percentage (%)",
y = "Province") +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
HEATMAPS
# Load libraries
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# Sample data for voter turnout
Voting_Results <- data.frame(
Province = c("Ontario", "British Columbia", "Alberta", "Quebec", "Nova Scotia",
"Newfoundland and Labrador", "Manitoba", "Saskatchewan",
"New Brunswick", "Prince Edward Island", "Yukon", "Nunavut"),
TurnoutPercentage = c(65, 60, 55, 62, 58, 57, 56, 54, 59, 61, 50, 45)
)
# Create heatmap for voter turnout
ggplot(Voting_Results, aes(x = "", y = reorder(Province, TurnoutPercentage), fill = TurnoutPercentage)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightblue", high = "blue", name = "Turnout (%)") +
labs(title = "Heatmap of Voter Turnout by Province",
x = "", y = "Province") +
theme_minimal() +
theme(axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank())
# Sample data for electoral success (number of elected candidates)
electoral_success <- data.frame(
Province = c("Ontario", "British Columbia", "Alberta", "Quebec", "Nova Scotia",
"Newfoundland and Labrador", "Manitoba", "Saskatchewan",
"New Brunswick", "Prince Edward Island", "Yukon", "Nunavut"),
ElectedCandidates = c(75, 40, 33, 78, 11, 7, 14, 12, 10, 4, 1, 1)
)
# Create heatmap for electoral success
ggplot(electoral_success, aes(x = "", y = reorder(Province, ElectedCandidates), fill = ElectedCandidates)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "pink", high = "red", name = "Elected Candidates") +
labs(title = "Heatmap of Electoral Success by Province",
x = "", y = "Province") +
theme_minimal() +
theme(axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank())
# Load necessary library
library(ggplot2)
# Example data frame (replace with your actual data)
Voting_Results <- data.frame(
Province = c("Ontario", "Alberta", "British Columbia", "Manitoba", "New Brunswick",
"Newfoundland and Labrador", "Northwest Territories", "Nova Scotia",
"Nunavut", "Prince Edward Island", "Quebec", "Saskatchewan", "Yukon"),
Bloc_Quebecois = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 0, 0),
Conservative = c(0.4, 0.8, 0.5, 0.6, 0.4, 0.5, 0.3, 0.3, 0.1, 0.3, 0, 0.9, 0.4),
Green = c(0.05, 0.02, 0.08, 0.05, 0.06, 0.03, 0.04, 0.07, 0.02, 0.06, 0.01, 0.02, 0.05),
Liberal = c(0.4, 0.1, 0.3, 0.2, 0.5, 0.3, 0.6, 0.5, 0.2, 0.5, 0.2, 0.05, 0.4),
NDP = c(0.15, 0.08, 0.12, 0.15, 0.04, 0.12, 0.06, 0.13, 0.7, 0.14, 0.19, 0.03, 0.15)
)
# Convert data to long format for ggplot2
Voting_Results_long <- reshape2::melt(Voting_Results, id.vars = "Province",
variable.name = "Party", value.name = "Probability")
# Create the plot
ggplot(Voting_Results_long, aes(x = `Province`, y = Probability, fill = Party)) +
geom_bar(stat = "identity", position = "stack", width = 0.7) +
# Add a clear and descriptive title
ggtitle("44th General Election ~ Party Support by Province in Canada") +
# Add axis labels
xlab("Province") +
ylab("Predicted Probability") +
# Improve x-axis readability by rotating labels
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 12, face = "bold"),
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)) +
# Adjust legend position for prominence
theme(legend.position = "top") +
# Use a color palette for clarity
scale_fill_manual(values = c(
"Bloc_Quebecois" = "cornflowerblue",
"Conservative" = "darkblue",
"Green" = "forestgreen",
"Liberal" = "red2",
"NDP" = "orange"
)) +
# Add gridlines for readability
theme(panel.grid.major = element_line(color = "gray80", linetype = "dotted")) +
# Add a short caption to summarize findings
labs(caption = "Conservatives dominate Alberta; Bloc Québécois uniquely prevalent in Quebec.")
# Save the improved graph
ggsave("improved_party_support_plot.png", width = 10, height = 6)