Research Question: - What are the main factors influencing voter turnout and variability in party preferences across electoral districts? - How do voterturnoutrates and regional differences influence the probabilities of electoral success for Canadian political parties?

Variables:

• Electoral District Name/ Electoral District Number: Identifier for electoral districts. • Percentage of Voter Turnout: Proportion of eligible voters who participated. • Elected Candidate: Vote counts for each political party.

# Frequency table of Province variable
Voting_Results %>%
  count(`Province`) %>%
  mutate(Percentage = n / sum(n) * 100)
## # A tibble: 13 × 3
##    Province                                              n Percentage
##    <chr>                                             <int>      <dbl>
##  1 Alberta                                              34     10.1  
##  2 British Columbia/Colombie-Britannique                42     12.4  
##  3 Manitoba                                             14      4.14 
##  4 New Brunswick/Nouveau-Brunswick                      10      2.96 
##  5 Newfoundland and Labrador/Terre-Neuve-et-Labrador     7      2.07 
##  6 Northwest Territories/Territoires du Nord-Ouest       1      0.296
##  7 Nova Scotia/Nouvelle-Écosse                          11      3.25 
##  8 Nunavut                                               1      0.296
##  9 Ontario                                             121     35.8  
## 10 Prince Edward Island/Île-du-Prince-Édouard            4      1.18 
## 11 Quebec/Québec                                        78     23.1  
## 12 Saskatchewan                                         14      4.14 
## 13 Yukon                                                 1      0.296
# Frequency table of Elected Candidate variable
Voting_Results %>%
  count(`Elected Candidate`) %>%
  mutate(Percentage = n / sum(n) * 100)
## # A tibble: 5 × 3
##   `Elected Candidate`     n Percentage
##   <fct>               <int>      <dbl>
## 1 Bloc Québécois         32      9.47 
## 2 Conservative          119     35.2  
## 3 Green Party             2      0.592
## 4 Liberal               160     47.3  
## 5 NDP                    25      7.40
# Frequency table for binned Percentage of Voter Turnout
Voting_Results %>%
  mutate(Turnout_Bin = cut(`Percentage of Voter Turnout`, breaks = seq(0, 100, 10))) %>%
  count(Turnout_Bin) %>%
  mutate(Percentage = n / sum(n) * 100)
## # A tibble: 5 × 3
##   Turnout_Bin     n Percentage
##   <fct>       <int>      <dbl>
## 1 (30,40]         2      0.592
## 2 (40,50]         8      2.37 
## 3 (50,60]        89     26.3  
## 4 (60,70]       222     65.7  
## 5 (70,80]        17      5.03
# Exclude missing values and generate frequency table
Voting_Results %>%
  filter(!is.na(`Percentage of Voter Turnout`)) %>%  # Excludes missing values
  count(`Percentage of Voter Turnout`) %>%
  mutate(Percentage = n / sum(n) * 100)
## # A tibble: 168 × 3
##    `Percentage of Voter Turnout`     n Percentage
##                            <dbl> <int>      <dbl>
##  1                          33.8     1      0.296
##  2                          35.8     1      0.296
##  3                          43.6     1      0.296
##  4                          44.7     1      0.296
##  5                          46.2     1      0.296
##  6                          46.7     1      0.296
##  7                          48       1      0.296
##  8                          48.2     1      0.296
##  9                          48.9     1      0.296
## 10                          49       1      0.296
## # ℹ 158 more rows

R Markdown

# Convert relevant columns to appropriate formats
Voting_Results$`Elected Candidate` <- as.factor(Voting_Results$`Elected Candidate`)
Voting_Results$Province <- as.factor(Voting_Results$Province)




# Fit logistic regression model
model <- glm(`Elected Candidate` ~ `Percentage of Voter Turnout` + Province, 
             data = Voting_Results, 
             family = binomial)

# Summarize regression results
summary(model)
## 
## Call:
## glm(formula = `Elected Candidate` ~ `Percentage of Voter Turnout` + 
##     Province, family = binomial, data = Voting_Results)
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                1.848e+01  3.034e+03
## `Percentage of Voter Turnout`                              3.268e-02  4.788e-02
## ProvinceBritish Columbia/Colombie-Britannique              1.044e-01  4.082e+03
## ProvinceManitoba                                           1.229e-01  5.602e+03
## ProvinceNew Brunswick/Nouveau-Brunswick                   -1.379e-02  6.373e+03
## ProvinceNewfoundland and Labrador/Terre-Neuve-et-Labrador  3.803e-01  7.354e+03
## ProvinceNorthwest Territories/Territoires du Nord-Ouest    5.593e-01  1.799e+04
## ProvinceNova Scotia/Nouvelle-Écosse                        2.035e-02  6.146e+03
## ProvinceNunavut                                            9.808e-01  1.799e+04
## ProvinceOntario                                            8.424e-02  3.434e+03
## ProvincePrince Edward Island/Île-du-Prince-Édouard        -2.518e-01  9.368e+03
## ProvinceQuebec/Québec                                     -2.018e+01  3.034e+03
## ProvinceSaskatchewan                                       2.107e-02  5.611e+03
## ProvinceYukon                                             -9.318e-03  1.799e+04
##                                                           z value Pr(>|z|)
## (Intercept)                                                 0.006    0.995
## `Percentage of Voter Turnout`                               0.682    0.495
## ProvinceBritish Columbia/Colombie-Britannique               0.000    1.000
## ProvinceManitoba                                            0.000    1.000
## ProvinceNew Brunswick/Nouveau-Brunswick                     0.000    1.000
## ProvinceNewfoundland and Labrador/Terre-Neuve-et-Labrador   0.000    1.000
## ProvinceNorthwest Territories/Territoires du Nord-Ouest     0.000    1.000
## ProvinceNova Scotia/Nouvelle-Écosse                         0.000    1.000
## ProvinceNunavut                                             0.000    1.000
## ProvinceOntario                                             0.000    1.000
## ProvincePrince Edward Island/Île-du-Prince-Édouard          0.000    1.000
## ProvinceQuebec/Québec                                      -0.007    0.995
## ProvinceSaskatchewan                                        0.000    1.000
## ProvinceYukon                                               0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 211.74  on 337  degrees of freedom
## Residual deviance: 105.14  on 324  degrees of freedom
## AIC: 133.14
## 
## Number of Fisher Scoring iterations: 19
# Extract coefficients and p-values for reporting
# coefficients_table <- tidy(model)
# write.csv(coefficients_table, "regression_results.csv")
# Summarize data for regional trends
regional_trends <- Voting_Results %>%
  group_by(Province, `Elected Candidate`) %>%
  summarize(Count = n()) %>%
  mutate(Percentage = Count / sum(Count) * 100)
## `summarise()` has grouped output by 'Province'. You can override using the
## `.groups` argument.
# Plot regional trends
ggplot(regional_trends, aes(x = Province, y = Percentage, fill = `Elected Candidate`)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Regional Trends for Each Party",
       x = "Province",
       y = "Percentage of Seats Won",
       fill = "Political Party") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Load required libraries
library(dplyr)
library(broom)

# Fit logistic regression model for regional trends
regional_model <- glm(`Elected Candidate` ~ Province, 
                      data = Voting_Results, 
                      family = binomial)

# Summarize regression results
log_odds_table <- tidy(regional_model)

# Filter and arrange the results for better readability
log_odds_table <- log_odds_table %>%
  filter(term != "(Intercept)") %>%  # Remove the intercept for clarity
  arrange(term)

# Add a descriptive column for better understanding
log_odds_table <- log_odds_table %>%
  mutate(
    Variable = gsub("Province", "Region: ", term),
    Log_Odds = estimate,
    Odds_Ratio = exp(estimate),
    Significance = ifelse(p.value < 0.05, "Significant", "Not Significant")
  ) %>%
  select(Variable, Log_Odds, Odds_Ratio, p.value, Significance)

# Save or display the table
#write.csv(log_odds_table, "log_odds_table.csv")
print(log_odds_table)
## # A tibble: 12 × 5
##    Variable                             Log_Odds Odds_Ratio p.value Significance
##    <chr>                                   <dbl>      <dbl>   <dbl> <chr>       
##  1 Region: British Columbia/Colombie-… -2.77e- 9    1.00e+0   1.00  Not Signifi…
##  2 Region: Manitoba                    -2.67e- 9    1.00e+0   1.00  Not Signifi…
##  3 Region: New Brunswick/Nouveau-Brun… -2.85e- 9    1.00e+0   1.00  Not Signifi…
##  4 Region: Newfoundland and Labrador/… -3.77e-10    1.00e+0   1.00  Not Signifi…
##  5 Region: Northwest Territories/Terr… -2.89e- 9    1.00e+0   1.00  Not Signifi…
##  6 Region: Nova Scotia/Nouvelle-Écosse -2.96e- 9    1.00e+0   1.00  Not Signifi…
##  7 Region: Nunavut                     -2.67e- 9    1.00e+0   1.00  Not Signifi…
##  8 Region: Ontario                     -2.83e- 9    1.00e+0   1.00  Not Signifi…
##  9 Region: Prince Edward Island/Île-d… -3.13e- 9    1.00e+0   1.00  Not Signifi…
## 10 Region: Quebec/Québec               -2.02e+ 1    1.68e-9   0.995 Not Signifi…
## 11 Region: Saskatchewan                 1.07e-18    1   e+0   1     Not Signifi…
## 12 Region: Yukon                        1.06e-18    1   e+0   1     Not Signifi…
# Load necessary libraries
library(ggplot2)

# Ensure the dataset has the required columns (e.g., Province and Voter_Turnout_Percentage)
# If the columns have different names, update them accordingly.

# Create a bar plot of voter turnout by province
voter_turnout_plot <- ggplot(Voting_Results, aes(x = reorder(Province, `Percentage of Voter Turnout`), y = `Percentage of Voter Turnout`)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +  # Flip coordinates for better readability
  labs(
    title = "Voter Turnout by Province",
    x = "Province",
    y = "Voter Turnout Percentage (%)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    axis.text = element_text(size = 10),
    axis.title = element_text(size = 12)
  )

# Print the plot
print(voter_turnout_plot)

# Load necessary libraries
library(ggplot2)

# Sample data
Voting_Results <- data.frame(
  Province = c("Ontario", "British Columbia", "Alberta", "Quebec", "Nova Scotia", 
               "Newfoundland and Labrador", "Manitoba", "Saskatchewan", 
               "New Brunswick", "Prince Edward Island", "Yukon", "Nunavut"),
  TurnoutPercentage = c(65, 60, 55, 62, 58, 57, 56, 54, 59, 61, 50, 45)
)

# Calculate average turnout
avg_turnout <- mean(Voting_Results$`Percentage of Voter Turnout`)
## Warning in mean.default(Voting_Results$`Percentage of Voter Turnout`): argument
## is not numeric or logical: returning NA
# Create bar plot with average line and percentages
ggplot(Voting_Results, aes(x = TurnoutPercentage, y = reorder(Province, TurnoutPercentage))) +
  geom_bar(stat = "identity", fill = "plum") +
  geom_text(aes(label = paste0(TurnoutPercentage, "%")), hjust = -0.2, size = 4) +
  geom_vline(xintercept = avg_turnout, color = "skyblue", linetype = "dashed") +
  annotate("text", x = avg_turnout + 2, y = 1, 
           label = paste0("Avg: ", round(avg_turnout, 1), "%"), color = "red", size = 4) +
  labs(title = "Voter Turnout by Province", 
       x = "Voter Turnout Percentage (%)", 
       y = "Province") +
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

HEATMAPS

# Load libraries
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
# Sample data for voter turnout
Voting_Results <- data.frame(
  Province = c("Ontario", "British Columbia", "Alberta", "Quebec", "Nova Scotia", 
               "Newfoundland and Labrador", "Manitoba", "Saskatchewan", 
               "New Brunswick", "Prince Edward Island", "Yukon", "Nunavut"),
  TurnoutPercentage = c(65, 60, 55, 62, 58, 57, 56, 54, 59, 61, 50, 45)
)

# Create heatmap for voter turnout
ggplot(Voting_Results, aes(x = "", y = reorder(Province, TurnoutPercentage), fill = TurnoutPercentage)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "lightblue", high = "blue", name = "Turnout (%)") +
  labs(title = "Heatmap of Voter Turnout by Province", 
       x = "", y = "Province") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),  # Remove x-axis text
        axis.ticks.x = element_blank())

# Sample data for electoral success (number of elected candidates)
electoral_success <- data.frame(
  Province = c("Ontario", "British Columbia", "Alberta", "Quebec", "Nova Scotia", 
               "Newfoundland and Labrador", "Manitoba", "Saskatchewan", 
               "New Brunswick", "Prince Edward Island", "Yukon", "Nunavut"),
  ElectedCandidates = c(75, 40, 33, 78, 11, 7, 14, 12, 10, 4, 1, 1)
)

# Create heatmap for electoral success
ggplot(electoral_success, aes(x = "", y = reorder(Province, ElectedCandidates), fill = ElectedCandidates)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "pink", high = "red", name = "Elected Candidates") +
  labs(title = "Heatmap of Electoral Success by Province", 
       x = "", y = "Province") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),  # Remove x-axis text
        axis.ticks.x = element_blank())

# Load necessary library
library(ggplot2)

# Example data frame (replace with your actual data)
Voting_Results <- data.frame(
  Province = c("Ontario", "Alberta", "British Columbia", "Manitoba", "New Brunswick", 
               "Newfoundland and Labrador", "Northwest Territories", "Nova Scotia", 
               "Nunavut", "Prince Edward Island", "Quebec", "Saskatchewan", "Yukon"),
  Bloc_Quebecois = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 0, 0),
  Conservative = c(0.4, 0.8, 0.5, 0.6, 0.4, 0.5, 0.3, 0.3, 0.1, 0.3, 0, 0.9, 0.4),
  Green = c(0.05, 0.02, 0.08, 0.05, 0.06, 0.03, 0.04, 0.07, 0.02, 0.06, 0.01, 0.02, 0.05),
  Liberal = c(0.4, 0.1, 0.3, 0.2, 0.5, 0.3, 0.6, 0.5, 0.2, 0.5, 0.2, 0.05, 0.4),
  NDP = c(0.15, 0.08, 0.12, 0.15, 0.04, 0.12, 0.06, 0.13, 0.7, 0.14, 0.19, 0.03, 0.15)
)

# Convert data to long format for ggplot2
Voting_Results_long <- reshape2::melt(Voting_Results, id.vars = "Province", 
                            variable.name = "Party", value.name = "Probability")



# Create the plot
ggplot(Voting_Results_long, aes(x = `Province`, y = Probability, fill = Party)) +
  geom_bar(stat = "identity", position = "stack", width = 0.7) +
  # Add a clear and descriptive title
  ggtitle("44th General Election ~ Party Support by Province in Canada") +
  # Add axis labels
  xlab("Province") + 
  ylab("Predicted Probability") +
  # Improve x-axis readability by rotating labels
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
        axis.text.y = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 10)) +
  # Adjust legend position for prominence
  theme(legend.position = "top") +
  # Use a color palette for clarity
  scale_fill_manual(values = c(
    "Bloc_Quebecois" = "cornflowerblue",
    "Conservative" = "darkblue",
    "Green" = "forestgreen",
    "Liberal" = "red2",
    "NDP" = "orange"
  )) +
  # Add gridlines for readability
  theme(panel.grid.major = element_line(color = "gray80", linetype = "dotted")) +
  # Add a short caption to summarize findings
  labs(caption = "Conservatives dominate Alberta; Bloc Québécois uniquely prevalent in Quebec.")

# Save the improved graph
ggsave("improved_party_support_plot.png", width = 10, height = 6)