# Read the data from the web
FetchedData <- read.csv("https://drkblake.com/wp-content/uploads/2023/09/TopIssue.csv")
# Save the data on your computer
write.csv(FetchedData, "TopIssue.csv", row.names=FALSE)
# remove the data from the environment
rm (FetchedData)

# ------------------------------
# Setup: Install and load packages
# ------------------------------
if (!require("tidyverse")) install.packages("tidyverse")   # Data wrangling & plotting
if (!require("gmodels")) install.packages("gmodels")       # Crosstabs
if (!require("gt")) install.packages("gt")                 # Table formatting

library(tidyverse)
library(gmodels)
library(gt)

# ------------------------------
# Load the data
# ------------------------------
# Replace "YOURFILENAME.csv" with your dataset name
mydata <- read.csv("TopIssue.csv") #Edit

# ------------------------------
# Define Dependent (DV) and Independent (IV) variables
# ------------------------------
# Replace YOURDVNAME and YOURIVNAME with actual column names in your data
mydata$DV <- mydata$Immigration #Edit
mydata$IV <- mydata$PreferredNetwork #Edit

# ------------------------------
# Visualization: Stacked bar chart of IV by DV
# ------------------------------
graph <- ggplot(mydata, aes(x = IV, fill = DV)) +
  geom_bar(colour = "black") +
  scale_fill_brewer(palette = "Paired") +
  labs(
    title = "Distribution of DV by IV",
    x = "Independent Variable",
    y = "Count",
    fill = "Dependent Variable"
  )

#Show the graph
graph

# ------------------------------
# Crosstabulation of DV by IV (DV = rows, IV = columns)
# ------------------------------

crosstab <- mydata %>%
  count(DV, IV) %>%
  group_by(IV) %>%
  mutate(RowPct = 100 * n / sum(n)) %>%
  ungroup() %>%
  mutate(Cell = paste0(n, "\n(", round(RowPct, 1), "%)")) %>%
  select(DV, IV, Cell) %>%
  pivot_wider(names_from = IV, values_from = Cell)

# Format into gt table
crosstab_table <- crosstab %>%
  gt(rowname_col = "DV") %>%
  tab_header(
    title = "Crosstabulation of DV by IV",
    subtitle = "Counts and (Column Percentages)"
  ) %>%
  cols_label(
    DV = "Dependent Variable"
  )

# Show the polished crosstab table
crosstab_table
Crosstabulation of DV by IV
Counts and (Column Percentages)
CNN Fox
1 Top issue 35 (11.7%) 115 (38.3%)
2 Not top issue 265 (88.3%) 185 (61.7%)
# ------------------------------
# Chi-squared test of independence
# ------------------------------
options(scipen = 999)  # Prevents scientific notation
chitestresults <- chisq.test(mydata$DV, mydata$IV)

# ------------------------------
# Format Chi-squared test results into a table
# ------------------------------
chitest_summary <- tibble(
  Test   = "Chi-squared Test of Independence",
  Chi_sq = chitestresults$statistic,
  df     = chitestresults$parameter,
  p      = chitestresults$p.value
)

chitest_table <- chitest_summary %>%
  gt() %>%
  # Round χ² and p-value to 3 decimals, df to integer
  fmt_number(columns = c(Chi_sq, p), decimals = 3) %>%
  fmt_number(columns = df, decimals = 0) %>%
  tab_header(
    title = "Chi-squared Test Results",
    subtitle = "Test of Independence between DV and IV"
  ) %>%
  cols_label(
    Test   = "Test",
    Chi_sq = "Chi-squared Statistic",
    df     = "Degrees of Freedom",
    p      = "p-value"
  )

# Show the formatted results table
chitest_table
Chi-squared Test Results
Test of Independence between DV and IV
Test Chi-squared Statistic Degrees of Freedom p-value
Chi-squared Test of Independence 55.476 1 0.000