Research Project

Load Packages:

suppressWarnings(suppressMessages(library(operators)))
suppressWarnings(suppressMessages(library(plyr)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(reshape2)))
suppressWarnings(suppressMessages(library(RCurl)))
suppressWarnings(suppressMessages(library(knitr)))

The data source for congressional results was is Time Magazine’s GitHub Repo. I’m grateful for their work in tidying the data and providing results for each congressional election from 2004-2012 as both as json and csv file. The link I used for the 2012 data below can be found \(\href{https://github.com/TimeMagazine/congressional-election-results.git}{here}\)

First, I loaded and cleanded the data. The file contains records of senate elections, marked as “S” in the district field, and special elections. Special elections are indicated by additional text in the district field. We will excise this info for our purposes. Those congressional districts marked “00” are at-large districts for states with only one seat.

url <- "https://raw.githubusercontent.com/TimeMagazine/congressional-election-results/master/data/results_2012.csv"
cd2012 <- getURL(url)
cd2012 <- read.csv(textConnection(cd2012))
#cd2012 <- read.csv("results_2012.csv",header=T)
cd2012$votes <- as.numeric(as.character(cd2012$votes))

## Warning: NAs introduced by coercion

cd2012 <- subset(cd2012,district != "S")
cd2012$district[cd2012$district == "01 - FULL TERM"] <- "01"
cd2012$district[cd2012$district == "04 - FULL TERM"] <- "04"
cd2012$district[cd2012$district == "10 - FULL TERM"] <- "10"
cd2012$district[cd2012$district == "11 - FULL TERM"] <- "11"
kable(subset(cd2012, state=="CA" & district=="37"))

	id	year	name	state	district	votes	parties
138	2012_CA_37	2012	Bass, Karen R.	CA	37	207039	Democratic
139	2012_CA_37	2012	Osborne, Morgan	CA	37	32541	Write-In,Republican,Republican

By examining the records for the California 37th, we see that some major parties are not simply listed “Democratic” or “Republican”, but some local variation or party alliance. To simplify the results, we will lump all of the Republican and Democratic groups into their umbrella label. This will still leave a significant amount of independent and third parties in our dataset that are aligned with neither of the two.

d <- c("Democratic,Working Families","Working Families,Democratic",
       "Democratic,Progressive,Working Families","Democratic-Farmer Labor",
       "Democratic,Working Families,Independence","Democratic,Independence,Working Families",
       "Democratic-Nonpartisan League","Write-In,Democratic,Democratic","Democratic")
r <- c("Republican,Tax Revolt,Conservative,Independence","Republican,Tax Revolt,Conservative",
       "Republican,Tax Revolt","Republican,Conservative","Conservative,Republican",
       "Republican,Conservative,Libertarian","Republican,Conservative,Independence",
       "Independence,Republican","Independent Party,Republican", "Republican,Constitution",
       "Write-In,Republican,Republican","Conservative,Independence,Republican","Republican")

for (i in 1:length(cd2012$parties)){
  if (cd2012$parties[i] %in% d){
    cd2012$parties[i] <- "Democratic"
  }
  else if (cd2012$parties[i] %in% r){
    cd2012$parties[i] <- "Republican"
  }
}
kable(subset(cd2012, state=="CA" & district=="37"))

	id	year	name	state	district	votes	parties
138	2012_CA_37	2012	Bass, Karen R.	CA	37	207039	Democratic
139	2012_CA_37	2012	Osborne, Morgan	CA	37	32541	Republican

The replacements worked! Now we will perform a a number of operations and aggregations to return useful dataframes.

# calc total vote by state
totalVoteByState <- aggregate(votes ~ state, cd2012, sum)
# calc total vote by district
totalVoteByDistrict <- aggregate(votes ~ state + district, cd2012, sum)
totalVoteByDistrict <- totalVoteByDistrict[order(totalVoteByDistrict$state,totalVoteByDistrict$district),]
# add threshold of simple majority based on each district's vote total
totalVoteByDistrict$simpleMajority <- round(totalVoteByDistrict$votes/2 + 1,0)
kable(head(totalVoteByState,10))

state	votes
AK	288840
AL	1927122
AR	1038054
AZ	2173317
CA	12204357
CO	2450839
CT	1466511
DE	388059
FL	7513534
GA	3553587

kable(head(totalVoteByDistrict,10))

	state	district	votes	simpleMajority
1	AK	00	288840	144421
8	AL	01	196374	98188
51	AL	02	283683	141842
94	AL	03	273447	136724
132	AL	04	268777	134390
167	AL	05	290957	145480
196	AL	06	307529	153766
222	AL	07	306355	153178
9	AR	01	246843	123422
52	AR	02	286598	143300

Next, I created a dataframe for total votes for each party in each district. I added a column for each party’s percentage of the vote in the district as well as a total of the number of votes “wasted” for each party. Our critera for wasted votes are:

Every vote for a losing party
Every vote in excess of the simple majority for a winning party

districtParty <- aggregate(votes ~ state + district + parties, cd2012, sum)
districtParty <- districtParty[order(districtParty$state,districtParty$district),]
districtParty <- join(districtParty,totalVoteByDistrict,by=c("state","district"),type="left")
colnames(districtParty)[5] <- "totalVote"
districtParty <- districtParty[,c(1,2,5,6,4,3)]
districtParty$pctVote <- districtParty$votes/districtParty$totalVote
districtParty$wastedVote <- 0
  
for (i in 1:nrow(districtParty)){
  if (districtParty$votes[i] < districtParty$simpleMajority[i]){
  districtParty$wastedVote[i] <- districtParty$votes[i]
  }
  else if (districtParty$votes[i] >= districtParty$simpleMajority[i]){
  districtParty$wastedVote[i] <- districtParty$votes[i] - districtParty$simpleMajority[i]
  }
}
kable(head(districtParty,10))

state	district	totalVote	simpleMajority	votes	parties	pctVote	wastedVote
AK	00	288840	144421	82927	Democratic	0.2871036	82927
AK	00	288840	144421	15028	Libertarian	0.0520288	15028
AK	00	288840	144421	5589	Non-Affiliated	0.0193498	5589
AK	00	288840	144421	185296	Republican	0.6415178	40875
AL	01	196374	98188	196374	Republican	1.0000000	98186
AL	02	283683	141842	103092	Democratic	0.3634056	103092
AL	02	283683	141842	180591	Republican	0.6365944	38749
AL	03	273447	136724	98141	Democratic	0.3589032	98141
AL	03	273447	136724	175306	Republican	0.6410968	38582
AL	04	268777	134390	69706	Democratic	0.2593451	69706

Next, I created a column to indicate which party won in each district. To do so, I iterated over every district to find which party had the highest percentage of the vote. Winners were marked with “1” in the “win” column so that we could later aggregate results by party.

allStates <- unique(districtParty$state)
allDistricts <- unique(districtParty$district)
districtParty$win <- 0 #create column to indicate wins

for (i in 1:length(allDistricts)){
  for (j in 1:length(allStates)){
    row_to_find <- data.frame(state=allStates[j], district=allDistricts[i])
    if(nrow(merge(row_to_find,districtParty))>0){
      subFrame <- subset(districtParty,state==allStates[j] & district==allDistricts[i])
      w <- which.max(subFrame$pctVote)
      districtParty[districtParty$state==allStates[j] & 
      districtParty$district==allDistricts[i],]$win[w] <- 1
    }
  }
}

Calculate total seats won by state and overall:

seatsWon <- aggregate(win ~ parties,districtParty,sum)
seatsWon <- subset(seatsWon,win!=0)
seatsWon

##       parties win
## 12 Democratic 199
## 39 Republican 234

seatsWonByState <- aggregate(win ~ state + parties,districtParty,sum)
seatsWonByState <- subset(seatsWonByState,win!=0)
seatsWonByState <- seatsWonByState[order(seatsWonByState$state,-seatsWonByState$win),]
kable(head(seatsWonByState,10))

	state	parties	win
156	AK	Republican	1
157	AL	Republican	6
19	AL	Democratic	1
158	AR	Republican	4
21	AZ	Democratic	5
159	AZ	Republican	4
22	CA	Democratic	38
160	CA	Republican	15
161	CO	Republican	4
23	CO	Democratic	3

A quick detour to get a rough comparison of the total votes and wasted votes by party:

totalWastedVoteByParty <- aggregate(wastedVote ~ parties, districtParty, sum)
totalWastedVoteByParty <- totalWastedVoteByParty[order(-totalWastedVoteByParty$wastedVote),]
totalVoteByParty <- aggregate(votes ~ parties, districtParty, sum)
totalVoteByParty <- totalVoteByParty[order(-totalVoteByParty$votes),]
kable(head(totalWastedVoteByParty,6))

	parties	wastedVote
12	Democratic	33763208
39	Republican	25146263
21	Libertarian	1302730
16	Independent	485517
25	No Party Affiliation	374428
14	Green	342912

kable(head(totalVoteByParty,6))

	parties	votes
12	Democratic	59637698
39	Republican	58265212
21	Libertarian	1302730
16	Independent	485517
25	No Party Affiliation	374428
14	Green	342912

Phenomenal! By looking at the at the aggregate, we see that Democrats and Republicans received roughly the same number of votes nationally. Although Democrats received 1,372,486 more votes, Republicans won a 35 seat majority in the House of Representatives. We also see that there are far more wasted votes for Democrats - 8,616,945 more to be precise - just about a third more wasted votes than for Republicans.

Now we will start to create our misapportionment metric by diving into the data for wasted votes for each party by state.

numDistricts <- data.frame(table(totalVoteByDistrict$state))
colnames(numDistricts) <- c("state","number")
numDistricts[9,2] <- 27 # correct, missing two from redistricting
sum(numDistricts$number) # check all 435 are there

## [1] 435

totalWastedVoteByPartyByState <- aggregate(wastedVote ~ state + parties, districtParty, sum)
totalVoteByPartyByState <- aggregate(votes ~ state + parties, districtParty, sum)
totalVoteByPartyByState <- totalVoteByPartyByState[order(totalVoteByPartyByState$state,-totalVoteByPartyByState$votes),]
totalWastedVoteByPartyByState <- totalWastedVoteByPartyByState[order(totalWastedVoteByPartyByState$state,-totalWastedVoteByPartyByState$wastedVote),]
kable(head(totalVoteByPartyByState,10))

	state	parties	votes
156	AK	Republican	185296
18	AK	Democratic	82927
104	AK	Libertarian	15028
146	AK	Non-Affiliated	5589
157	AL	Republican	1233624
19	AL	Democratic	693498
158	AR	Republican	637591
20	AR	Democratic	304770
70	AR	Green	57706
105	AR	Libertarian	37987

kable(head(totalWastedVoteByPartyByState,10))

	state	parties	wastedVote
18	AK	Democratic	82927
156	AK	Republican	40875
104	AK	Libertarian	15028
146	AK	Non-Affiliated	5589
19	AL	Democratic	540320
157	AL	Republican	423234
20	AR	Democratic	304770
158	AR	Republican	118560
70	AR	Green	57706
105	AR	Libertarian	37987

Our first attempt will take a simplified look at the results by ignoring third parties. We will subset the dataframe to only include Democrats and Republicans and then join it with the number of districts and total vote for each state. This will let us create new columns for the efficiency gap and the number of seats misapportioned.

The efficiency gap is calculated by the absolute value in the difference of wasted votes for each party, divided by the total number of votes in the state. The number of misapportioned seats is a product of the efficiency gap and the number of seats/districts in the state.

I also created columns for the advantage given to either party. This advantage is simply the seat misapportionment in their favor. The seat advantage for each state is determined by the party with fewer wasted votes.

twoPartyModel <- subset(totalWastedVoteByPartyByState,parties=="Democratic" | totalWastedVoteByPartyByState$parties=="Republican")
twoPartyModel <- dcast(twoPartyModel,state ~ parties,value.var="wastedVote")
# bind number districts and total vote
twoPartyModel <- join(twoPartyModel,numDistricts,by="state",type="left")
twoPartyModel <- join(twoPartyModel,totalVoteByState,by="state",type="left")
twoPartyModel$efficiency.gap <- abs(twoPartyModel$Democratic-twoPartyModel$Republican)/twoPartyModel$votes
twoPartyModel$seats.misapportioned <- twoPartyModel$efficiency.gap*twoPartyModel$number
colnames(twoPartyModel)[2:5] <- c("w.votes.D","w.votes.R","seats","total.votes")
twoPartyModel$adv.R <- 0 
twoPartyModel$adv.D <- 0

for (i in 1:nrow(twoPartyModel)){
  if (twoPartyModel$w.votes.D[i] < twoPartyModel$w.votes.R[i]){
    twoPartyModel$adv.D[i] <- twoPartyModel$seats.misapportioned[i]
  }
  else if (twoPartyModel$w.votes.R[i] < twoPartyModel$w.votes.D[i]){
    twoPartyModel$adv.R[i] <- twoPartyModel$seats.misapportioned[i]
  }
}


sum(twoPartyModel$adv.R)

## [1] 37.18521

sum(twoPartyModel$adv.D)

## [1] 8.228584

By summing the total advantages for each party, we see that the Republicans have roughly a 29 seat advantage based on the efficiency gap. This measure is remarkably close to the excess number of seats Republicans hold in proportion to their share of the vote.

# Republican Seats in proportional system
(58265212/sum(totalVoteByParty$votes))*435

## [1] 208.982

# Libertarian Seats in proportional system
(1302730/sum(totalVoteByParty$votes))*435

## [1] 4.672549

We calculated above that Republicans won 234 seats in the 2012 election, but we see here that in a strictly proportional system they would have won 209. Of course, seats in the House of Representatives aren’t meant to be proportionally allocated, but here we may be able to disprove the notion that the Republican advantage is the product of political advantage in many smaller, less-populated districts. Our measure of the efficiency gap shows that they hold 29 seats more than they should. This demonstrates how perfect political symmetry - parity in wasted votes - differs from strict proportionality. We demonstrate that Republicans hold 29 seats more than they should, which would actually put them at 4 seats fewer than a strictly proportional allotmment. We see here that symmetry in wasted votes does not manifest strictly proportional outcomes. Some symmetric elections may benefit one of the two major parties at the cost of the other, but it appears in this case that the Republicans picked up most of the five seats Liberatarians would have won.

Now we will add some metrics to our two-party model to see how a symmetric election may have turned out.

twoPartyStateTotals <- subset(totalVoteByPartyByState,parties=="Democratic" | totalVoteByPartyByState$parties=="Republican")
twoPartyStateTotals <- dcast(twoPartyStateTotals,state ~ parties,value.var="votes")
colnames(twoPartyStateTotals)[2:3] <- c("d.votes","r.votes")
twoPartyModel <- join(twoPartyModel,twoPartyStateTotals,by="state",type="left")
# calculate projected seats
twoPartyModel$proj.D.seats <- (twoPartyModel$d.votes/twoPartyModel$total.votes)*twoPartyModel$seats
twoPartyModel$proj.R.seats <- (twoPartyModel$r.votes/twoPartyModel$total.votes)*twoPartyModel$seats
display <- twoPartyModel
#display[,2:9] <- round(display[,2:9],2)
kable(display[,c(1,4,7,12,13)])

state	seats	seats.misapportioned	proj.D.seats	proj.R.seats
AK	1	0.1455893	0.2871036	0.6415178
AL	7	0.4252985	2.5190341	4.4809659
AR	4	0.7175349	1.1743898	2.4568703
AZ	9	0.2580921	3.9216304	4.6863697
CA	53	1.2802878	32.1043754	19.6725347
CO	7	0.3431882	3.0859546	3.2668698
CT	5	0.9623896	3.2433476	1.7057151
DE	1	0.1903164	0.6440593	0.3343744
FL	27	2.9047930	12.1906488	13.7506657
GA	14	0.9293826	5.7080820	8.2894754
HI	2	0.3019603	1.3490258	0.6509742
IA	4	0.1096082	2.0121201	1.8925944
ID	2	0.3749705	0.6558284	1.2808642
IL	18	1.0471468	9.7638073	7.8567970
IN	9	1.3883687	4.0266283	4.7639194
KS	4	0.0627830	0.7393317	2.8021317
KY	6	0.7122713	2.3539121	3.5324701
LA	6	0.7682076	1.2635545	4.0209273
MA	9	0.3879411	6.4761451	2.1714945
MD	8	0.5785490	5.0451608	2.6620387
ME	2	0.5334815	1.2332614	0.7667386
MI	14	2.3112845	7.1244616	6.3863620
MN	8	0.1169094	4.4474870	3.4486441
MO	8	0.9638537	3.3470728	4.3756075
MS	4	0.0277807	1.3620477	2.3295797
MT	1	0.3946763	0.4271876	0.5325134
NC	13	2.7841548	6.5786734	6.3378995
ND	1	0.3682878	0.4178849	0.5495987
NE	3	0.6455137	1.0727520	1.9272480
NH	2	0.4085112	0.9997537	0.9138644
NJ	12	2.0638059	6.5609951	5.2300038
NM	3	0.2758823	1.6976062	1.2998526
NV	4	0.2595020	1.8621360	1.8782758
NY	27	2.0979047	17.2385141	9.3760276
OH	16	3.6932604	7.5091061	8.1559140
OK	5	0.8161184	1.5473006	3.2311991
OR	5	0.7597368	2.7839943	2.0164479
PA	18	4.3056870	9.0498016	8.7794030
RI	2	0.6688927	1.0889133	0.7577967
SC	7	1.4879715	2.9022655	4.0092606
SD	1	0.3510095	0.4255027	0.5744973
TN	9	0.2157263	3.1389991	5.3973430
TX	36	1.8254567	13.8561480	20.8049834
UT	4	0.2132312	1.2986684	2.5943536
VA	11	2.3454557	5.3209992	5.5294050
VT	1	0.0130324	0.7201472	0.2331779
WA	10	0.3250112	5.4443818	4.5556182
WI	8	1.1720342	4.0386575	3.9184214
WV	3	0.0323316	1.2026167	1.7973833
WY	1	0.0486101	0.2386891	0.6900852

Add actual seats won to the model and plot the results:

seatsWonByState <- dcast(seatsWonByState,state ~ parties,value.var="win",fill="0")
seatsWonByState$Democratic <- as.numeric(as.character(seatsWonByState$Democratic))
seatsWonByState$Republican <- as.numeric(as.character(seatsWonByState$Republican))
colnames(seatsWonByState)[2:3] <- c("act.D.seats","act.R.seats")
twoPartyModel <- join(twoPartyModel,seatsWonByState,by="state",type="left")
twoPartyModel$proj.diff <- abs(twoPartyModel$proj.R.seats - twoPartyModel$proj.D.seats)
twoPartyModel$act.diff <- abs(twoPartyModel$act.R.seats - twoPartyModel$act.D.seats)
ggplot(twoPartyModel) + 
  geom_point(aes(x=state, y=proj.D.seats),color="red") +
  geom_point(aes(x=state, y=proj.R.seats),color="black")

ggplot(twoPartyModel) + 
  geom_point(aes(y=efficiency.gap, x=act.diff),color="black") +
  geom_point(aes(y=efficiency.gap, x=proj.diff),color="white")

ggplot(twoPartyModel) + 
  geom_point(aes(x=state, y=seats.misapportioned),color="black") +
  geom_point(aes(x=state, y=adv.R),color="red") +
  geom_point(aes(x=state, y=adv.D),color="blue") +
  ylim(0.5,5) + 
  ggtitle("Misapportioned Congressional Districts Seats By Party")

## Warning: Removed 24 rows containing missing values (geom_point).

## Warning: Removed 32 rows containing missing values (geom_point).

## Warning: Removed 42 rows containing missing values (geom_point).

Now we calculate Democrat:Republican vote ratio and plot against efficiency gap. Now we want to look at the distribution for the efficiency gap based on who has more votes in a state. States with a DR ratio less than 1 have more Republican votes, and those with a ratio greater than 1 have more Demcratic voters

twoPartyModel$marg.victory <- abs(twoPartyModel$d.votes-twoPartyModel$r.votes)/twoPartyModel$total.votes
twoPartyModel$voteratio <- ifelse(twoPartyModel$r.votes<twoPartyModel$d.votes,
                          twoPartyModel$r.votes/twoPartyModel$d.votes,
                          twoPartyModel$d.votes/twoPartyModel$r.votes)
twoPartyModel$majority <- ifelse(twoPartyModel$r.votes>twoPartyModel$d.votes,"R","D")
ggplot(twoPartyModel,aes(x=voteratio, y=efficiency.gap)) + 
  geom_point(aes(colour=majority)) + 
  scale_colour_manual(values=c("blue","red")) + 
  stat_smooth(se=TRUE,level=0.95) + 
  ylim(-0.2,0.4) + xlim(1,4)

## Warning: Removed 50 rows containing non-finite values (stat_smooth).

## Warning: Removed 50 rows containing missing values (geom_point).

ggplot(twoPartyModel,aes(x=voteratio, y=efficiency.gap,colour=majority)) + 
  geom_point() + 
  scale_colour_manual(values=c("blue","red")) + 
  geom_smooth(se=FALSE,method=lm,level=0.95)

ggplot(twoPartyModel,aes(x=marg.victory, y=efficiency.gap,colour=majority)) + 
  geom_point() + 
  scale_colour_manual(values=c("blue","red")) + 
  geom_smooth(se=FALSE,method=lm,level=0.95)

rgap <- subset(twoPartyModel,voteratio<1)
dgap <- subset(twoPartyModel,voteratio>1)
summary(rgap$efficiency.gap)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.006945 0.049450 0.124300 0.133100 0.191900 0.394700

summary(dgap$efficiency.gap)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##

twoPartyModel[,c(1,6,10,11,20)]

##    state efficiency.gap d.votes r.votes majority
## 1     AK    0.145589254   82927  185296        R
## 2     AL    0.060756921  693498 1233624        R
## 3     AR    0.179383731  304770  637591        R
## 4     AZ    0.028676903  946994 1131663        R
## 5     CA    0.024156373 7392703 4530012        D
## 6     CO    0.049026884 1080454 1143796        R
## 7     CT    0.192477929  951281  500290        D
## 8     DE    0.190316421  249933  129757        D
## 9     FL    0.107584926 3392402 3826522        R
## 10    GA    0.066384473 1448869 2104098        R
## 11    HI    0.150980146  285008  137531        D
## 12    IA    0.027402051  772387  726505        D
## 13    ID    0.187485241  208297  406814        R
## 14    IL    0.058174825 2743702 2207818        D
## 15    IN    0.154263188 1142554 1351760        R
## 16    KS    0.015695743  195505  740981        R
## 17    KY    0.118711889  684744 1027582        R
## 18    LA    0.128034606  359190 1143027        R
## 19    MA    0.043104563 2080594  697637        D
## 20    MD    0.072318627 1626872  858406        D
## 21    ME    0.266740751  427819  265982        D
## 22    MI    0.165091749 2327985 2086804        D
## 23    MN    0.014613678 1560984 1210409        D
## 24    MO    0.120481707 1119554 1463586        R
## 25    MS    0.006945186  411398  703635        R
## 26    MT    0.394676283  204939  255468        R
## 27    NC    0.214165756 2218357 2137167        D
## 28    ND    0.368287790  131869  173433        R
## 29    NE    0.215171226  276239  496276        R
## 30    NH    0.204255606  340925  311636        D
## 31    NJ    0.171983824 1794407 1430386        D
## 32    NM    0.091960755  422189  323269        D
## 33    NV    0.064875501  453310  457239        R
## 34    NY    0.077700175 4128031 2245236        D
## 35    OH    0.230828775 2412451 2620251        R
## 36    OK    0.163223687  410324  856872        R
## 37    OR    0.151947354  949660  687839        D
## 38    PA    0.239204835 2793538 2710070        D
## 39    RI    0.334446368  232679  161926        D
## 40    SC    0.212567357  742805 1026129        R
## 41    SD    0.351009465  153789  207640        R
## 42    TN    0.023969590  796513 1369562        R
## 43    TX    0.050707131 2949900 4429270        R
## 44    UT    0.053307799  324309  647873        R
## 45    VA    0.213223247 1806025 1876761        R
## 46    VT    0.013032386  208600   67543        D
## 47    WA    0.032501116 1636726 1369540        D
## 48    WI    0.146504274 1445015 1401995        D
## 49    WV    0.010777199  257101  384253        R
## 50    WY    0.048610103   57573  166452        R

The summary stats for the two efficiency gaps show that the two parties have roughly the same distributions. The mean for the Democratic efficiency gap is a just short of the Republican, but the median efficiency gap for Dems is higher. The Republican efficiency gap, however, is skewed higher given three states where the efficiency gap exceeds 0.35. (Intepretation: roughly one-third of the seats in those states are misapportioned towards Republicans).

Sampling - first create simulated results

sim <- data.frame(a=1:99,b=99:1)
sim$wasted.b <- ifelse(sim$b>=51,sim$b-51,sim$b)
sim$wasted.a <- ifelse(sim$a>=51,sim$a-51,sim$a)
sim$margin <- abs(sim$a-sim$b)
sim$vote.ratio <- ifelse(sim$a<sim$b,sim$a/sim$b,sim$b/sim$a)
sim$efficiency.gap <- abs(sim$wasted.a-sim$wasted.b)/100
# split at 0.33 and 0.93
packed <- sim[sim$vote.ratio<0.33,]
cracked <- sim[sim$vote.ratio>0.33 & sim$vote.ratio<0.93,]
competitive <- sim[sim$vote.ratio>0.93,]
f1 <- lm(efficiency.gap~vote.ratio,packed)
f2 <- lm(efficiency.gap~vote.ratio,cracked)
f3 <- lm(efficiency.gap~vote.ratio,competitive)

The linear regression fits we get for this model gives us the piecewise function:

\[Vote\:Ratio = VR\] \[ Efficiency\:Gap = \begin{cases} 0.467 - 1.51 \times VR, & VR < 0.33 \\ -0.224 + 0.782 \times VR, & 0.33 \leq VR \leq 0.93 \\ 0.510 - 0.510 \times VR, & 0.93 < VR \leq 1 \end{cases} \]

# efficiency gap by district
foo <- dcast(districtParty,state + district + totalVote ~ parties,value.var="wastedVote",fill=0)
foo$efficiency.gap <- abs(foo$Democratic-foo$Republican)/foo$totalVote
foo <- foo[,c("state","district","totalVote","Republican","Democratic","efficiency.gap")]
colnames(foo)[4:5] <- c("w.votes.R","w.votes.D")
bar <- dcast(districtParty,state + district ~ parties,value.var="votes",fill=0)
bar <- bar[,c("state","district","Republican","Democratic")]
colnames(bar)[3:4] <- c("votes.R","votes.D")
bar$vote.ratio <- ifelse(bar$votes.D<bar$votes.R,bar$votes.D/bar$votes.R,bar$votes.R/bar$votes.D)
foo <- join(foo,bar,by=c("state","district"),type="left")
foo$majority <- ifelse(foo$votes.R>foo$votes.D,"R","D")
foo$margin <- (abs(foo$votes.R-foo$votes.D)/foo$totalVote)*100
e2 <- aggregate(efficiency.gap~state,foo,mean) # agg. mean efficiency gap by state based on all districts
colnames(e2)[2] <- "mean.efficiency.gap"
twoPartyModel <- join(twoPartyModel,e2,by="state",type="left")

ggplot(sim) + geom_point(aes(x=margin,y=efficiency.gap))

ggplot(sim) + geom_point(aes(x=vote.ratio,y=efficiency.gap)) # interesting piece-wise function

ggplot() + 
  geom_point(data=sim, aes(x=vote.ratio,y=efficiency.gap)) +
  geom_point(data=foo, aes(x=vote.ratio,y=efficiency.gap,color=majority)) +
  scale_colour_manual(values=c("blue","red")) # +

 # geom_point(data=twoPartyModel, aes(x=voteratio,y=mean.efficiency.gap,color=majority))

sampleState <- function(stateAbbrev){
  record <- subset(twoPartyModel,state==stateAbbrev)
  margin <- record["marg.victory"]
  s <- as.numeric(record["seats"])
  # create vector with "D","R",and "I" each repeated for number of votes in state
  u <- c(rep("D",record["d.votes"]),rep("R",record["r.votes"]),
                          rep("I",record["total.votes"]-record["d.votes"]-record["r.votes"]))
  # randomize
  u <- sample(u)
  # split
  chunk <- split(u, ceiling(seq_along(u)/(length(u)/12))) 
}

x = data.frame(num = 1:26, let = letters, LET = LETTERS) set.seed(10) derp <- split(x, sample(rep(1:2, 13)))

Here we’ll delve briefly into the results for third parties:

thirdParties <- subset(districtParty,districtParty$parties!="Democratic" & districtParty$parties!="Republican")
thirdParties$pctVote <- round(thirdParties$pctVote*100,1)
summary(thirdParties$pctVote)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.800   2.000   3.095   3.375  46.000

There are a remarkable number of independent candidates picked up more than 10% of the vote. California and Florida each have four district where third parties picked up a significant votes. The Green Party also picked up 16% of the vote in the Arizona 3rd and Liberatarian parties fared vary well in districts in AZ, KS, LA, and TX.

kable(subset(thirdParties,thirdParties$pctVote>10))

	state	district	totalVote	simpleMajority	votes	parties	pctVote	wastedVote
26	AR	03	245660	122831	39318	Green	16.0	39318
54	AZ	07	127827	63914	23338	Libertarian	18.3	23338
85	CA	13	288582	144292	38146	No Party Preference	13.2	38146
103	CA	23	216003	108002	57842	No Party Preference	26.8	57842
116	CA	29	150281	75142	38994	No Party Preference	25.9	38994
122	CA	33	318520	159261	146660	No Party Preference	46.0	146660
181	CO	05	307237	153620	53318	Unaffiliated	17.4	53318
221	FL	04	315470	157736	75236	No Party Affiliation	23.8	75236
262	FL	20	244285	122144	29553	No Party Affiliation	12.1	29553
265	FL	21	284400	142201	63137	No Party Affiliation	22.2	63137
271	FL	25	200229	100116	48763	No Party Affiliation	24.4	48763
337	IL	02	297712	148857	40006	Independent	13.4	40006
411	KS	03	293762	146882	92675	Libertarian	31.5	92675
442	LA	04	249531	124766	61637	Libertarian	24.7	61637
445	LA	05	260216	130109	37486	No Party Affiliation	14.4	37486
447	LA	06	306713	153358	32185	Libertarian	10.5	32185
448	LA	06	306713	153358	30975	No Party Affiliation	10.1	30975
469	MA	07	252836	126419	41199	Independent	16.3	41199
630	MS	03	293322	146662	58605	Reform	20.0	58605
993	TN	06	241241	120622	34766	Independent	14.4	34766
1059	TX	17	179262	89632	35978	Libertarian	20.1	35978
1064	TX	19	192063	96032	28824	Libertarian	15.0	28824

Research Project

David Stern

March 22, 2016