Multidimensional Scaling

First, we load the Supreme Court justice-centered data, and select the columns that we’ll need.

load("SCDB_2019_01_justiceCentered_Citation.Rdata")
justice_votes <- as_tibble(SCDB_2019_01_justiceCentered_Citation) %>%
    dplyr::select(c(caseId, term, issueArea, justiceName, direction))

Because we’d like to group the cases by issue area, we filter out those cases that don’t have an issue area, as well as votes that don’t have a direction assigned.

justice_votes_filtered <- justice_votes %>%
    na.omit(c(issueArea, direction))

The data is currently in a tall format, with each justice’s case vote representing a single row. We pivot to a wider format in which the cases are the columns. We remove cases that don’t have all nine justices voting on them, as we won’t be able to apply MDS to these.

wide_justice_votes <- justice_votes_filtered %>%
    dplyr::select(caseId, issueArea, justiceName, direction) %>%
    group_by(issueArea) %>%
    pivot_wider(names_from = caseId, values_from = direction)
wide_justice_votes_filt <- wide_justice_votes[, (colSums(is.na(wide_justice_votes)) == (nrow(wide_justice_votes)-9)) | (colSums(is.na(wide_justice_votes)) == 0)]

We pivot back to a longer format to create a legend of the issue areas each case is in.

long_justice_votes_filt <- wide_justice_votes_filt %>%
    pivot_longer(-c(issueArea, justiceName), names_to = "caseId", values_to = "direction")

issue_legend <- long_justice_votes_filt %>%
    na.omit() %>%
    dplyr::select(c(caseId, issueArea)) %>%
    distinct()

issue_legend <- issue_legend[order(issue_legend$caseId),]

We separate out the cases based on which issue area they’re classified as (creating a list of dataframes), remove the cases that don’t belong to the issue area, and replace the direction variable (1 as “conservative” and 2 as “liberal”) with 0 as “conservative” and 1 as “liberal.” Finally, we add in the justice names that got deleted and alphabetize the justice names.

votes_by_issue <- group_split(wide_justice_votes_filt) %>%
    lapply(function(x) x[, colSums(is.na(x)) != nrow(x)]) %>%
    lapply(function(x) {
        x <- anchors::replace.value(x, colnames(x)[3:ncol(x)], from=1, to=0)
        x <- anchors::replace.value(x, colnames(x)[3:ncol(x)], from=2, to=1)
    }
    )
justices <- unlist(votes_by_issue[[1]][,2])
votes_by_issue <- votes_by_issue %>%
    lapply(function(x) add_row(x, issueArea = unlist(x[1, 1]), justiceName = justices)) %>%
    lapply(function(x) distinct(x, issueArea, justiceName, .keep_all=TRUE))
votes_by_issue <- lapply(votes_by_issue, function(x) x[order(x$justiceName),])
justices_alph <- votes_by_issue[[1]][,2]
justices_alph <- justices_alph[order(justices_alph$justiceName),]

Going to Python to do more computationally-heavy, iterative work, we import the justices’ votes by issue area and the issue legend.

import math
votes_by_issue = r.votes_by_issue
for i in range(len(votes_by_issue)):
    votes_by_issue[i] = list(map(list, zip(*votes_by_issue[i].values())))
issue_legend = list(map(list, zip(*r.issue_legend.values())))

We will be averaging each justices’ votes on a conservative-to-liberal scale in each of the 13 possible issue areas, over a range of 100 cases at a time.

issue_nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14]
start_ind = 0
window = 100
just = 38
areas = 13
before_per_issue = [0] * areas
during_per_issue = [0] * areas
for i in range(start_ind, start_ind + window - 1):
    iss_num = issue_legend[i][1]
    iss_ind = issue_nums.index(iss_num)
    during_per_issue[iss_ind] += 1

We do a rolling window from 1946 through the present, looking at only 100 cases at a time. Then, considering a justice’s liberal-to-conservative score in each issue area as a coordinate in 13-dimensional Euclidean space, we compute the pairwise distances between the justices for each snapshot in time.

def distance(just1, just2):
    allnan = True
    dsum = 0
    for i in range(len(just1)):
        if not (math.isnan(just1[i]) or math.isnan(just2[i])):
            allnan = False
            dsum += (just1[i]-just2[i])**2
    if allnan:
        return float('nan')
    return math.sqrt(dsum)

pairwises = [0] * (len(issue_legend) - window + 1)
while start_ind + window <= len(issue_legend):
    # add issue area of new case
    iss_num = issue_legend[start_ind + window - 1][1]
    iss_ind = issue_nums.index(iss_num)
    during_per_issue[iss_ind] += 1
    percents = [0] * just
    for j in range(just):
        percents[j] = [0] * areas
    # find percentage of liberal votes
    for i in range(areas):
        bef = before_per_issue[i]
        dur = during_per_issue[i]
        for j in range(just):
            zeroes = 0
            ones = 0
            for k in range(bef+2,bef+dur+2):
                v = votes_by_issue[i][j][k]
                if v == 0:
                    zeroes += 1
                elif v == 1:
                    ones += 1
            if (ones+zeroes == 0):
                percents[j][i] = float('nan')
            else:
                percents[j][i] = float(ones)/(ones+zeroes)
    # comput pairwise distances
    pairwises[start_ind] = [0] * just
    for j1 in range(just):
        pairwises[start_ind][j1] = [0] * just
        for j2 in range(just):
            pairwises[start_ind][j1][j2] = distance(percents[j1],percents[j2])
    # remove issue area of old case
    iss_num = issue_legend[start_ind][1]
    iss_ind = issue_nums.index(iss_num)
    before_per_issue[iss_ind] += 1
    during_per_issue[iss_ind] -= 1
    start_ind += 1

We load the data back into R from Python and format it.

pairwises = py$pairwises
pairwises <- pairwises %>%
    lapply(function(x) do.call(rbind.data.frame, x))
pairwises <- lapply(pairwises, setNames, nm=justices_alph$justiceName)

We delete the justices with only NaNs throughout. If there’s any NaN’s left, we delete those too.

pairwises <- pairwises %>%
    lapply(function(x)
        x[rowSums(is.na(x)) != ncol(x), colSums(is.na(x)) != nrow(x)]) %>%
    lapply(function(x)
        x[rowSums(is.na(x)) == 0, colSums(is.na(x)) == 0])

We import a CSV file with the party of the president who appointed each justice.

judges_appointed <- read_csv("judges_appointed.csv")

We run Principal Coordinates Analysis on each of the snapshots and combine all of the data.

mds = NA

for (i in 1:length(pairwises)) {
    pairwise <- pairwises[[i]]
    pairdist <- as.dist(pairwise)
    pairmds <- pairdist %>%
        cmdscale() %>%
        as_tibble()
    pairmds <- cbind(pairmds, colnames(pairwise))
    colnames(pairmds) <- c("Dim1", "Dim2", "justiceName")
    pairmds <- left_join(pairmds, judges_appointed, by="justiceName")
    pairmds$party = factor(pairmds$party, levels = c("D", "R"))
    casenum <- rep(c(i), length(pairwise))
    casedf <- data.frame(casenum)
    pairmds <- cbind(pairmds, casedf)
    if(is.na(mds)) {
        mds = pairmds
    } else {
        mds = rbind(mds, pairmds)
    }
}

We set up the plot that we’ll be animating.

colors <- c("blue", "red")

plot <- ggplot(mds, aes(Dim1, Dim2, label=justiceName, color=party)) +
    xlim(-0.5,0.5) +
    ylim(-0.5,0.5) +
    geom_text(alpha=0.5) +
    scale_color_manual(values=colors) +
    labs(x="Dim 1", y="Dim 2") +
    theme_minimal()

anim <- plot + transition_time(casenum)

We create the final animation, showing ideological distances between justices over time.

animate(anim, nframes=6609, fps=30, width=500, height=500, renderer=av_renderer())
anim_save("pcoa30.mp4")

Multidimensional Scaling

Rionna Flynn

12/9/2019