Network Analysis - Legislative Periods Comparison

# Define party colors
party_colors <- c(
  "SP" = "#F0554D", 
  "SVP" = "#4B8A3E",
  "FDP-Liberale" = "#3872B5", 
  "Mitte" = "#B56100",
  "Grüne" = "#84B547", 
  "GLP" = "#CAC43D"
)

# Create sample data for members
set.seed(123)
members_data <- data.frame(
  member.uid = paste0("M", 1:100),
  party.short = rep(c("SP", "SVP", "FDP-Liberale", "Mitte", "Grüne", "GLP"), 
                   c(20, 20, 20, 20, 10, 10)),
  lp = rep(c(48, 50), each = 50),
  stringsAsFactors = FALSE
)

# Function to create sample edge list
create_edge_list <- function(n_bills = 200, n_cosponsors = 3) {
  bills <- paste0("B", 1:n_bills)
  edges <- data.frame()
  
  for(bill in bills) {
    sponsors <- sample(members_data$member.uid, n_cosponsors + 1)
    main_sponsor <- sponsors[1]
    cosponsors <- sponsors[-1]
    
    for(cosponsor in cosponsors) {
      edges <- rbind(edges, data.frame(
        bill.number = bill,
        bill.sponsorID = main_sponsor,
        bill.cosponsorID = cosponsor,
        stringsAsFactors = FALSE
      ))
    }
  }
  return(edges)
}

# Create edge lists for both periods
edgelist_48LP <- create_edge_list()
edgelist_50LP <- create_edge_list()

# Function to create adjacency matrix
create_adjacency_matrix <- function(edge_list, member_ids, threshold = 0.33) {
  # Ensure edge_list columns are character
  edge_list$bill.sponsorID <- as.character(edge_list$bill.sponsorID)
  edge_list$bill.cosponsorID <- as.character(edge_list$bill.cosponsorID)
  
  # Calculate cosponsorship shares
  pair_counts <- edge_list %>%
    group_by(bill.sponsorID, bill.cosponsorID) %>%
    summarise(count = n(), .groups = 'drop')
  
  sponsor_totals <- edge_list %>%
    group_by(bill.sponsorID) %>%
    summarise(total = n_distinct(bill.number), .groups = 'drop')
  
  edges_filtered <- pair_counts %>%
    left_join(sponsor_totals, by = "bill.sponsorID") %>%
    mutate(cospons.share = count / total) %>%
    filter(cospons.share >= threshold) %>%
    filter(!is.na(cospons.share))  # Remove any NA values
  
  # Create empty adjacency matrix
  adj_matrix <- matrix(0, 
                      nrow = length(member_ids), 
                      ncol = length(member_ids),
                      dimnames = list(member_ids, member_ids))
  
  # Fill matrix
  for(i in 1:nrow(edges_filtered)) {
    sponsor_id <- edges_filtered$bill.sponsorID[i]
    cosponsor_id <- edges_filtered$bill.cosponsorID[i]
    
    if(sponsor_id %in% rownames(adj_matrix) && 
       cosponsor_id %in% colnames(adj_matrix)) {
      adj_matrix[sponsor_id, cosponsor_id] <- 1
      adj_matrix[cosponsor_id, sponsor_id] <- 1
    }
  }
  
  return(adj_matrix)
}

# Function to create network plot
create_network_plot <- function(adj_matrix, member_data, title) {
  # Create network
  net <- graph_from_adjacency_matrix(adj_matrix, mode = "undirected")
  
  # Add party information
  V(net)$party <- member_data$party.short[match(V(net)$name, member_data$member.uid)]
  
  # Create plot
  plot(net, 
       vertex.color = party_colors[V(net)$party],
       vertex.size = 4,
       vertex.label = NA,
       layout = layout_with_fr,
       main = title)
  
  return(net)
}

# Filter members by legislative period
members_48 <- members_data[members_data$lp == 48, ]
members_50 <- members_data[members_data$lp == 50, ]

# Create adjacency matrices
adj_48 <- create_adjacency_matrix(edgelist_48LP, members_48$member.uid)
adj_50 <- create_adjacency_matrix(edgelist_50LP, members_50$member.uid)

# Plot networks
par(mfrow = c(1,2))
net48 <- create_network_plot(adj_48, members_48, "48th LP Network")
net50 <- create_network_plot(adj_50, members_50, "50th LP Network")

par(mfrow = c(1,1))

# Calculate basic network metrics
metrics_48 <- list(
  density = edge_density(net48),
  modularity = modularity(net48, membership = as.numeric(as.factor(V(net48)$party)))
)

metrics_50 <- list(
  density = edge_density(net50),
  modularity = modularity(net50, membership = as.numeric(as.factor(V(net50)$party)))
)

# Print comparison
cat("\nNetwork Metrics Comparison:\n")

## 
## Network Metrics Comparison:

cat("48th LP - Density:", round(metrics_48$density, 3), 
    "Modularity:", round(metrics_48$modularity, 3), "\n")

## 48th LP - Density: 0.071 Modularity: -0.05

cat("50th LP - Density:", round(metrics_50$density, 3), 
    "Modularity:", round(metrics_50$modularity, 3), "\n")

## 50th LP - Density: 0.076 Modularity: -0.028

Task2 Cross-party cosponsorship in the 50th LP.

# Function to analyze party cosponsorship with error handling
analyze_party_cosponsorship <- function(edgelist, members_data) {
  # Ensure members_data has no NA values in key columns
  members_data <- members_data %>%
    filter(!is.na(party.short), !is.na(member.uid))
  
  # Merge party information for sponsors and cosponsors with error checking
  cosponsorship_data <- edgelist %>%
    inner_join(members_data %>% 
                 select(member.uid, party.short) %>% 
                 rename(sponsor_party = party.short),
               by = c("bill.sponsorID" = "member.uid")) %>%
    inner_join(members_data %>% 
                 select(member.uid, party.short) %>% 
                 rename(cosponsor_party = party.short),
               by = c("bill.cosponsorID" = "member.uid"))
  
  # Create party cooperation matrix with complete cases
  party_cooperation <- cosponsorship_data %>%
    group_by(sponsor_party, cosponsor_party) %>%
    summarise(count = n(), .groups = 'drop') %>%
    # Calculate percentage
    group_by(sponsor_party) %>%
    mutate(percentage = count / sum(count) * 100) %>%
    ungroup()
  
  # Calculate individual MP metrics with complete cases
  mp_metrics <- cosponsorship_data %>%
    group_by(bill.sponsorID) %>%
    summarise(
      total_cosponsorships = n(),
      within_party = sum(sponsor_party == cosponsor_party, na.rm = TRUE),
      cross_party = sum(sponsor_party != cosponsor_party, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    mutate(
      within_party_ratio = within_party / total_cosponsorships,
      cross_party_ratio = cross_party / total_cosponsorships
    ) %>%
    inner_join(members_data %>% 
                 select(member.uid, party.short),
               by = c("bill.sponsorID" = "member.uid")) %>%
    # Filter out any remaining incomplete cases
    filter(complete.cases(.))
  
  return(list(
    party_cooperation = party_cooperation,
    mp_metrics = mp_metrics
  ))
}

# Improved heatmap function with better color scaling
create_cooperation_heatmap <- function(party_cooperation) {
  ggplot(party_cooperation, aes(x = cosponsor_party, y = sponsor_party, fill = percentage)) +
    geom_tile() +
    scale_fill_gradient2(
      low = "white", 
      high = "#2171B5", 
      name = "Cosponsorship %",
      limits = c(0, 100),
      breaks = seq(0, 100, by = 20)
    ) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
      plot.subtitle = element_text(hjust = 0.5),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()
    ) +
    labs(
      title = "Party Cosponsorship Patterns in 50th LP",
      subtitle = "Percentage of cosponsorships between parties",
      x = "Cosponsor Party",
      y = "Sponsor Party"
    )
}

# Improved top performers function with minimum threshold
find_top_performers <- function(mp_metrics, n = 5, min_cosponsorships = 3) {
  # Filter for minimum number of cosponsorships
  qualified_mps <- mp_metrics %>%
    filter(total_cosponsorships >= min_cosponsorships)
  
  # Top within-party supporters (weighted by total cosponsorships)
  top_within <- qualified_mps %>%
    mutate(weighted_score = within_party_ratio * total_cosponsorships) %>%
    arrange(desc(weighted_score)) %>%
    head(n)
  
  # Top cross-party supporters (weighted by total cosponsorships)
  top_cross <- qualified_mps %>%
    mutate(weighted_score = cross_party_ratio * total_cosponsorships) %>%
    arrange(desc(weighted_score)) %>%
    head(n)
  
  return(list(
    top_within = top_within,
    top_cross = top_cross
  ))
}

# Improved plot function with better formatting
create_top_performers_plot <- function(top_performers) {
  bind_rows(
    mutate(top_performers$top_within, type = "Within-Party"),
    mutate(top_performers$top_cross, type = "Cross-Party")
  ) %>%
    ggplot(aes(
      x = reorder(paste0(bill.sponsorID, "\n(", party.short, ")"),
                  ifelse(type == "Within-Party", within_party_ratio, cross_party_ratio)),
      y = ifelse(type == "Within-Party", within_party_ratio, cross_party_ratio),
      fill = party.short
    )) +
    geom_col() +
    facet_wrap(~type, scales = "free_x") +
    scale_fill_manual(values = party_colors) +
    scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      plot.title = element_text(hjust = 0.5),
      panel.grid.minor = element_blank()
    ) +
    labs(
      title = "Top Performers in Party Cooperation",
      x = "Member ID (Party)",
      y = "Cooperation Ratio",
      fill = "Party"
    )
}

# Analyze the 50th LP data with error checking
results_50LP <- analyze_party_cosponsorship(edgelist_50LP, members_50)

# Create and display the heatmap
cooperation_plot <- create_cooperation_heatmap(results_50LP$party_cooperation)
print(cooperation_plot)

# Find and display top performers with minimum threshold
top_performers <- find_top_performers(results_50LP$mp_metrics, min_cosponsorships = 3)

# Display results with formatted output
cat("\nTop Within-Party Cooperators:\n")

## 
## Top Within-Party Cooperators:

print(top_performers$top_within %>% 
        select(bill.sponsorID, party.short, within_party, total_cosponsorships, within_party_ratio) %>%
        arrange(desc(within_party_ratio)))

## # A tibble: 5 × 5
##   bill.sponsorID party.short within_party total_cosponsorships
##   <chr>          <chr>              <int>                <int>
## 1 M71            Mitte                  4                    5
## 2 M63            Mitte                  3                    5
## 3 M70            Mitte                  3                    5
## 4 M62            Mitte                  2                    4
## 5 M69            Mitte                  3                    8
## # ℹ 1 more variable: within_party_ratio <dbl>

cat("\nTop Cross-Party Cooperators:\n")

## 
## Top Cross-Party Cooperators:

print(top_performers$top_cross %>% 
        select(bill.sponsorID, party.short, cross_party, total_cosponsorships, cross_party_ratio) %>%
        arrange(desc(cross_party_ratio)))

## # A tibble: 5 × 5
##   bill.sponsorID party.short  cross_party total_cosponsorships cross_party_ratio
##   <chr>          <chr>              <int>                <int>             <dbl>
## 1 M77            Mitte                  8                    8             1    
## 2 M83            Grüne                  7                    7             1    
## 3 M60            FDP-Liberale           6                    6             1    
## 4 M96            GLP                    8                   10             0.8  
## 5 M94            GLP                    7                    9             0.778

# Display top performers plot
top_performers_plot <- create_top_performers_plot(top_performers)
print(top_performers_plot)

Task 3 Runing an IRT on cosponsorship data.

library(tidyverse)
library(mirt)     # for IRT analysis

## Loading required package: stats4

## Loading required package: lattice

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

# Define party colors
party_colors <- c(
  "SP" = "#F0554D", 
  "SVP" = "#4B8A3E",
  "FDP-Liberale" = "#3872B5", 
  "Mitte" = "#B56100",
  "Grüne" = "#84B547", 
  "GLP" = "#CAC43D"
)

# Create sample data for members
set.seed(123)
dtm48 <- data.frame(
  member.uid = paste0("M", 1:100),
  party.short = rep(c("SP", "SVP", "FDP-Liberale", "Mitte", "Grüne", "GLP"), 
                    c(20, 20, 20, 20, 10, 10)),
  party.short_tomajor = rep(c("SP", "SVP", "FDP-Liberale", "Mitte", "Grüne", "GLP"), 
                    c(20, 20, 20, 20, 10, 10)),
  stringsAsFactors = FALSE
)

# Create sample bill data
set.seed(123)
dtb <- data.frame(
  bill.number = paste0("B", 1:300),
  bill.chamber = sample(c("Nationalrat", "Ständerat"), 300, replace = TRUE, prob = c(0.7, 0.3)),
  stringsAsFactors = FALSE
)

# Create sample edgelist for 48th legislative period
set.seed(123)
el48 <- data.frame()
for(bill in dtb$bill.number[1:200]) {
  # Randomly select sponsor and cosponsors
  members <- sample(dtm48$member.uid, 4)
  el48 <- rbind(el48, data.frame(
    bill.number = bill,
    bill.sponsorID = members[1],
    bill.cosponsorID = members[2:4],
    stringsAsFactors = FALSE
  ))
}

# Add chamber information
el48$chamber <- dtb$bill.chamber[match(el48$bill.number, dtb$bill.number)]

# Create long format edgelist
el48.long <- el48 %>% 
  filter(chamber == 'Nationalrat') %>%
  # Gather sponsors and cosponsors into a single column
  pivot_longer(cols = c(bill.sponsorID, bill.cosponsorID),
               names_to = "sponsor_type",
               values_to = "mpID")

# Remove duplicates and add count variable
el48.long <- el48.long %>%
  group_by(mpID, bill.number) %>%
  mutate(count = 1) %>%
  summarize(count2 = sum(count), .groups = "drop")

el48.long$count <- 1

# Pivot to wide format
dtv <- el48.long %>%
  pivot_wider(
    id_cols = mpID,
    names_from = bill.number,
    values_from = count,
    values_fill = 0
  )

# Save member IDs
dtv.members <- data.frame(dtv[,1])
dtv <- dtv[,-1]

# Set seed for reproducibility
set.seed(42)

# Fit IRT model
# Using only first 100 bills to speed up computation
fit1 <- mirt(data = dtv[,1:100],
             model = 1,
             itemtype = "2PL",
             verbose = FALSE)

## Warning: EM cycles terminated after 500 iterations.

# Extract scores
dtv.members$score.model1 <- fscores(fit1)[,1]

# Add scores to member data
dtm48$cospons.IRTscore <- dtv.members$score.model1[match(dtm48$member.uid, dtv.members$mpID)]

Visualization

Basic Scatter Plot

ggplot(dtm48, aes(x = cospons.IRTscore,
                  y = party.short_tomajor,
                  color = party.short_tomajor)) +
  geom_point() +
  scale_color_manual(values = party_colors) +
  theme_minimal() +
  labs(title = "IRT Scores by Party",
       x = "IRT Score",
       y = "Party") +
  theme(legend.position = "bottom")

Advanced Visualization

dtm48 %>%
  mutate(cospons.IRTscore_round = round(cospons.IRTscore, 1)) %>%
  group_by(cospons.IRTscore_round, party.short_tomajor) %>%
  mutate(id = row_number()) %>%
  ggplot(aes(x = -cospons.IRTscore_round, y = id, fill = party.short_tomajor)) +
  geom_tile(color = "white", width = 0.1, height = 1) +
  scale_fill_manual("Party", values = party_colors) +
  theme_minimal() +
  labs(title = "Distribution of IRT Scores by Party",
       x = "IRT Dimension 1",
       y = "Member Count") +
  theme(legend.position = "bottom")

Analysis of Results

The IRT analysis of our simulated data reveals several patterns:

Party Clustering:
- Clear grouping of MPs by party affiliation
- Distinct separation between different political parties
- Some overlap in the center of the spectrum
Dimensionality Analysis:
- The first dimension appears to capture the traditional left-right spectrum
- However, one dimension may not be sufficient to capture all cooperation patterns
Potential for Additional Dimensions:
- A second dimension might capture government-opposition dynamics
- A third dimension could reveal regional or cultural voting patterns
- Issue-specific cooperation patterns might require additional dimensions

Limitations of One-Dimensional IRT

One dimension is likely insufficient because:

It primarily captures ideological positioning
Misses issue-specific cooperation
Doesn’t account for strategic voting
Cannot capture temporal dynamics
Misses regional and cultural factors

Additional dimensions would help capture: - Government-opposition dynamics - Issue-specific alliances - Regional cooperation patterns - Cultural and linguistic factors - Strategic voting behavior