Election Analysis: What Happened

Introduction

Unless you weren’t on this planet, you knew November held one of the most fascinating election in US history. This analysis combines American Census demographics and county voting records to dispel or confirm common wisdom spoken about the election and its results. Let’s begin by compiling the independent and dependent variables seperately, from Cook Political Reporting and the ACS API respectively. This will involve joining and transforming elements so that they work nicely together.

The data I gather will be aggregated on the US County Level, so we can determine which factors affect likelihood to vote Democratic (without loss of generality), or likelihood to decrease voter turnout.

knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(UScensus2010)
## Warning: package 'UScensus2010' was built under R version 3.3.2
## Loading required package: maptools
## Warning: package 'maptools' was built under R version 3.3.2
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.3.2
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
## Loading required package: foreign
## 
## Package UScensus2010: US Census 2010 Suite of R Packages
## Version 0.11 created on 2011-11-18.
## Zack Almquist, University of California-Irvine
## ne
## For citation information, type citation("UScensus2010").
## Type help(package=UScensus2010) to get started.
## Compiled by Cook Politcal Reporting, from their github, these are the independent variables: the outcome of the elections:

county<-read.csv(file = "https://raw.githubusercontent.com/tonmcg/County_Level_Election_Results_12-16/master/US_County_Level_Presidential_Results_08-16.csv")

head(county)
##   fips_code             county total_2008 dem_2008 gop_2008 oth_2008
## 1     26041       Delta County      19064     9974     8763      327
## 2     48295    Lipscomb County       1256      155     1093        8
## 3      1127      Walker County      28652     7420    20722      510
## 4     48389      Reeves County       3077     1606     1445       26
## 5     56017 Hot Springs County       2546      619     1834       93
## 6     20043    Doniphan County       3564     1115     2372       77
##   total_2012 dem_2012 gop_2012 oth_2012 total_2016 dem_2016 gop_2016
## 1      18043     8330     9533      180      18467     6431    11112
## 2       1168      119     1044        5       1322      135     1159
## 3      28497     6551    21633      313      29243     4486    24208
## 4       2867     1649     1185       33       3184     1659     1417
## 5       2495      523     1894       78       2535      400     1939
## 6       3369      885     2397       87       3366      584     2601
##   oth_2016
## 1      924
## 2       28
## 3      549
## 4      108
## 5      196
## 6      181
### Add Calcualted Outcome Variables (dependent variables)

Indy<-data.frame(county,"TotalDiff"=(county$total_2016-county$total_2012)/county$total_2012,
                 "DemDiff"=(county$dem_2016-county$dem_2012)/county$dem_2012,
                 "RepDiff"=(county$gop_2016-county$gop_2012)/county$gop_2012,
                 "OthDiff"=(county$oth_2016-county$oth_2012)/county$oth_2012,
                 "Dem_Score_2012"=county$dem_2012/county$total_2012,
                 "Dem_Score_2016"=county$dem_2016/county$total_2016,
                  "Oth_Score_2016"=county$oth_2016/county$total_2016,
                 "Dem_Score_Diff"=county$dem_2016/county$total_2016-county$dem_2012/county$total_2012
                 )

IndyClean<-data.frame(Indy[1],Indy[15:22])



##Now we need demographic information from the Census API, so I registered for a key and it takes some a tiny trick to install the package:  These are the dependent variables

#url2 <- "http://lakshmi.calit2.uci.edu/census2000/R/src/contrib/UScensus2010tract_1.00.tar.gz"
#url3<-"http://lakshmi.calit2.uci.edu/census2000/R/src/contrib/UScensus2010county_1.00.tar.gz"
#install.packages(url3, repos = NULL)
#library(UScensus2010tract)
library(UScensus2010county)
## 
## UScensus2010county: US Census 2010 County Level Shapefiles and Additional
## Demographic Data 
## Version 1.00 created on 2011-11-06 
## copyright (c) 2011, Zack W. Almquist, University of California-Irvine
## Type help(package="UScensus2010county") to get started.
## 
## For citation information, type citation("UScensus2010county").
key<-"3c293c96d558db8fad8e7272e400c7775235bd20"

###Gets Total Population, How Many White, How Many Hispanic:  

#Note all census data from 2010

demvars<-c("P0010001","P0030002","P0040003")
d1<-data.frame(demographics(state="AL",level="county",dem = demvars))
d2<-data.frame(demographics(state="AK",level="county",dem = demvars))
d3<-data.frame(demographics(state="AZ",level="county",dem = demvars))
d4<-data.frame(demographics(state="AR",level="county",dem = demvars))
d5<-data.frame(demographics(state="CA",level="county",dem = demvars))
d6<-data.frame(demographics(state="CO",level="county",dem = demvars))
d7<-data.frame(demographics(state="CT",level="county",dem = demvars))
d8<-data.frame(demographics(state="DE",level="county",dem = demvars))
d9<-data.frame(demographics(state="DC",level="county",dem = demvars))
d10<-data.frame(demographics(state="FL",level="county",dem = demvars))
d11<-data.frame(demographics(state="GA",level="county",dem = demvars))
d12<-data.frame(demographics(state="HI",level="county",dem = demvars))
d13<-data.frame(demographics(state="ID",level="county",dem = demvars))
d14<-data.frame(demographics(state="IL",level="county",dem = demvars))
d15<-data.frame(demographics(state="IN",level="county",dem = demvars))
d16<-data.frame(demographics(state="IA",level="county",dem = demvars))
d17<-data.frame(demographics(state="KS",level="county",dem = demvars))
d18<-data.frame(demographics(state="KY",level="county",dem = demvars))
d19<-data.frame(demographics(state="LA",level="county",dem = demvars))
d20<-data.frame(demographics(state="ME",level="county",dem = demvars))
d21<-data.frame(demographics(state="MD",level="county",dem = demvars))
d22<-data.frame(demographics(state="MA",level="county",dem = demvars))
d23<-data.frame(demographics(state="MI",level="county",dem = demvars))
d24<-data.frame(demographics(state="MN",level="county",dem = demvars))
d25<-data.frame(demographics(state="MS",level="county",dem = demvars))
d26<-data.frame(demographics(state="MO",level="county",dem = demvars))
d27<-data.frame(demographics(state="MT",level="county",dem = demvars))
d28<-data.frame(demographics(state="NE",level="county",dem = demvars))
d29<-data.frame(demographics(state="NV",level="county",dem = demvars))
d30<-data.frame(demographics(state="NH",level="county",dem = demvars))
d31<-data.frame(demographics(state="NJ",level="county",dem = demvars))
d32<-data.frame(demographics(state="NM",level="county",dem = demvars))
d33<-data.frame(demographics(state="NY",level="county",dem = demvars))
d34<-data.frame(demographics(state="NC",level="county",dem = demvars))
d35<-data.frame(demographics(state="ND",level="county",dem = demvars))
d36<-data.frame(demographics(state="OH",level="county",dem = demvars))
d37<-data.frame(demographics(state="OK",level="county",dem = demvars))
d38<-data.frame(demographics(state="OR",level="county",dem = demvars))
d39<-data.frame(demographics(state="PA",level="county",dem = demvars))
d40<-data.frame(demographics(state="RI",level="county",dem = demvars))
d41<-data.frame(demographics(state="SC",level="county",dem = demvars))
d42<-data.frame(demographics(state="SD",level="county",dem = demvars))
d43<-data.frame(demographics(state="TN",level="county",dem = demvars))
d44<-data.frame(demographics(state="TX",level="county",dem = demvars))
d45<-data.frame(demographics(state="UT",level="county",dem = demvars))
d46<-data.frame(demographics(state="VT",level="county",dem = demvars))
d47<-data.frame(demographics(state="VA",level="county",dem = demvars))
d48<-data.frame(demographics(state="WA",level="county",dem = demvars))
d49<-data.frame(demographics(state="WV",level="county",dem = demvars))
d50<-data.frame(demographics(state="WI",level="county",dem = demvars))
d51<-data.frame(demographics(state="WY",level="county",dem = demvars))


alldemo<-rbind(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18,d19,d20,d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31,d32,d33,d34,d35,d36,d37,d38,d39,d40,d41,d42,d43,d44,d45,d46,d47,d48,d49,d50,d51)

colnames(alldemo)<-c("Population","White","Hispanic")

 Counties <- rownames(alldemo)
 alldemo<-cbind(Counties,alldemo)
 
demo<-data.frame(alldemo,"Per.White"=alldemo$White/alldemo$Population,"Per.Hispanic"=alldemo$Hispanic/alldemo$Population)
head(demo) 
##                        Counties Population  White Hispanic Per.White
## Barbour County   Barbour County      27457  13180     1387 0.4800233
## Clay County         Clay County      13932  11380      399 0.8168246
## Marengo County   Marengo County      21027   9751      352 0.4637371
## Houston County   Houston County     101547  71053     2995 0.6997056
## Cherokee County Cherokee County      25989  24081      320 0.9265843
## Baldwin County   Baldwin County     182265 156153     7992 0.8567361
##                 Per.Hispanic
## Barbour County    0.05051535
## Clay County       0.02863910
## Marengo County    0.01674038
## Houston County    0.02949373
## Cherokee County   0.01231290
## Baldwin County    0.04384824
 ###
# Load FIPS to county name lookup file
fips<-read.csv("http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt",header = FALSE)
fips<-data.frame(fips,"L"=paste(fips$V2,sprintf("%03d", fips$V3),sep = ""))
 head(fips)
##   V1 V2 V3             V4 V5    L
## 1 AL  1  1 Autauga County H1 1001
## 2 AL  1  3 Baldwin County H1 1003
## 3 AL  1  5 Barbour County H1 1005
## 4 AL  1  7    Bibb County H1 1007
## 5 AL  1  9  Blount County H1 1009
## 6 AL  1 11 Bullock County H1 1011
 ##Use the Acs Package with the API to get more in-depth data
 
library(acs)
## Warning: package 'acs' was built under R version 3.3.2
## Loading required package: stringr
## Loading required package: plyr
## Loading required package: XML
## 
## Attaching package: 'acs'
## The following object is masked from 'package:base':
## 
##     apply
 ## Sex data
sex <- acs.fetch(
    geography = geo.make(state = "*", county="*"),
    endyear = 2010, span = 0,
    table.number = "P12", 
    col.names = "pretty",key=key,dataset='sf1')

sexdem<-data.frame(sex@geography$state,sex@geography$county,sex@estimate)
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 118,122,123,131,135,139,145,146,149,150,155,156,159,160,164,166,172,183,215,230,274,275,278,282,283,289,293,306,308,327,330,332,336,338,351,352,353,354,355,359,361,364,368,373,380,383,387,391,392,398,406,409,415,416,417,421,423,426,431,435,439,443,446,447,453,462,463,465,468,470,472,475,476,477,479,483,484,485,487,489,490,491,494,499,501,502,503,504,507,512,516,520,531,533,534,537,541,543,553,563,568,570,571,572,573,577,583,584,595,596,599,602,603,607,608,611,612,614,616,620,621,623,624,626,628,629,632,634,635,636,639,644,646,647,649,653,655,656,657,662,663,664,668,670,671,672,673,674,677,678,681,682,686,689,690,691,692,698,701,703,704,705,706,707,708,709,710,713,714,718,719,721,722,724,725,726,727,730,731,733,734,736,738,739,742,744,745,746,747,748,750,751,752,753,756,759,760,763,764,765,769,770,778,782,783,785,786,788,791,795,797,801,802,803,804,807,809,810,811,812,813,814,816,817,822,823,824,825,826,827,829,830,831,832,833,834,835,838,839,840,841,842,845,850,852,853,855,857,858,861,866,871,872,876,877,878,880,881,882,883,884,887,889,895,896,899,900,901,902,907,908,909,911,912,917,918,921,922,926,931,932,934,937,941,942,943,944,946,947,949,950,951,962,971,972,973,974,975,985,989,994,995,996,1001,1002,1009,1014,1017,1018,1019,1020,1021,1022,1023,1027,1029,1030,1031,1032,1034,1039,1040,1042,1043,1044,1045,1048,1049,1051,1054,1057,1058,1061,1062,1063,1064,1065,1068,1069,1071,1072,1073,1074,1075,1077,1079,1080,1081,1085,1087,1090,1091,1093,1097,1098,1099,1101,1102,1106,1107,1108,1109,1110,1111,1113,1180,1181,1182,1184,1185,1192,1199,1206,1207,1208,1212,1213,1214,1223,1226,1229,1231,1240,1242,1244,1245,1250,1251,1252,1253,1255,1269,1272,1274,1278,1284,1289,1298,1301,1305,1306,1311,1313,1319,1322,1325,1326,1328,1329,1330,1334,1335,1340,1342,1346,1352,1355,1356,1359,1360,1365,1374,1375,1380,1384,1389,1391,1396,1400,1402,1406,1408,1409,1410,1411,1413,1414,1417,1418,1420,1422,1424,1425,1427,1431,1432,1433,1435,1437,1438,1439,1440,1442,1444,1445,1446,1447,1448,1449,1450,1452,1457,1458,1461,1463,1465,1466,1467,1474,1476,1477,1478,1479,1480,1481,1484,1486,1488,1489,1491,1493,1494,1495,1496,1498,1500,1501,1502,1503,1505,1506,1507,1508,1511,1512,1513,1514,1515,1517,1519,1522,1523,1524,1525,1528,1530,1531,1532,1533,1534,1535,1537,1538,1539,1540,1541,1542,1544,1545,1547,1548,1549,1550,1552,1553,1554,1556,1559,1562,1565,1567,1568,1569,1571,1574,1576,1579,1580,1581,1583,1585,1587,1588,1592,1593,1594,1595,1596,1597,1601,1604,1607,1609,1614,1615,1620,1622,1624,1625,1627,1629,1632,1634,1637,1638,1640,1644,1648,1651,1655,1659,1660,1662,1663,1666,1667,1668,1669,1671,1672,1675,1676,1678,1681,1682,1684,1685,1690,1692,1693,1694,1695,1696,1699,1701,1702,1703,1708,1710,1711,1713,1714,1718,1721,1723,1724,1725,1726,1729,1730,1734,1735,1736,1737,1738,1740,1742,1743,1744,1745,1746,1747,1749,1750,1754,1756,1757,1758,1766,1770,1774,1778,1780,1781,1785,1786,1788,1792,1793,1794,1795,1800,1805,1810,1815,1818,1820,1821,1823,1827,1830,1835,1838,1839,1841,1844,1845,1846,1847,1848,1849,1851,1852,1853,1854,1855,1856,1857,1858,1861,1864,1867,1868,1871,1877,1879,1880,1881,1885,1886,1887,1892,1902,1904,1905,1909,1910,1912,1913,1916,1924,1925,1928,1930,1935,1940,1942,1943,1945,1947,1948,1949,1951,1952,1958,1965,1966,1967,1969,1970,1973,1980,1983,1984,1986,1987,1988,1991,1997,1999,2004,2007,2009,2014,2015,2016,2018,2019,2020,2022,2025,2026,2028,2029,2032,2033,2035,2036,2042,2044,2045,2051,2052,2053,2054,2055,2057,2060,2064,2065,2066,2067,2068,2069,2072,2074,2075,2076,2077,2078,2081,2082,2083,2084,2085,2086,2087,2089,2091,2092,2094,2097,2098,2099,2100,2101,2104,2105,2106,2107,2109,2112,2113,2117,2118,2119,2120,2123,2126,2127,2128,2129,2132,2137,2138,2141,2142,2143,2145,2147,2151,2152,2154,2155,2157,2158,2161,2162,2164,2165,2166,2169,2172,2173,2177,2179,2181,2183,2188,2189,2190,2193,2194,2198,2200,2201,2205,2209,2210,2213,2214,2216,2218,2220,2223,2224,2227,2228,2229,2230,2232,2233,2235,2236,2239,2242,2243,2245,2248,2252,2254,2257,2262,2263,2264,2265,2267,2268,2269,2270,2272,2273,2274,2277,2280,2281,2287,2289,2290,2292,2294,2296,2300,2301,2303,2304,2306,2307,2308,2310,2311,2312,2313,2316,2320,2323,2325,2327,2328,2334,2336,2340,2343,2345,2346,2347,2350,2353,2355,2356,2359,2360,2362,2368,2370,2371,2372,2374,2375,2378,2381,2382,2383,2387,2393,2394,2396,2397,2399,2401,2402,2403,2406,2407,2408,2413,2414,2415,2418,2422,2424,2425,2429,2430,2431,2433,2434,2435,2437,2438,2440,2441,2442,2444,2446,2447,2448,2449,2452,2454,2455,2458,2459,2461,2462,2464,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2484,2485,2486,2487,2489,2490,2491,2492,2493,2496,2498,2499,2502,2503,2504,2506,2507,2508,2509,2510,2511,2512,2515,2516,2517,2518,2519,2521,2522,2523,2524,2529,2537,2547,2548,2551,2552,2554,2557,2559,2560,2562,2570,2576,2580,2581,2583,2589,2592,2593,2594,2597,2598,2600,2603,2613,2614,2617,2618,2619,2620,2622,2623,2624,2625,2627,2630,2631,2632,2635,2636,2637,2640,2643,2644,2645,2646,2649,2650,2653,2655,2661,2662,2667,2668,2669,2670,2680,2681,2682,2683,2686,2687,2688,2690,2691,2693,2694,2695,2699,2703,2704,2706,2710,2711,2720,2721,2733,2734,2735,2738,2744,2745,2751,2759,2761,2762,2765,2766,2769,2770,2773,2778,2781,2783,2786,2787,2788,2790,2792,2796,2798,2799,2804,2805,2811,2812,2815,2816,2818,2819,2823,2829,2830,2833,2834,2836,2837,2838,2840,2841,2842,2843,2845,2848,2851,2853,2854,2855,2856,2858,2859,2861,2864,2865,2871,2872,2874,2876,2878,2879,2880,2881,2883,2884,2886,2887,2894,2896,2899,2900,2901,2906,2907,2908,2909,2910,2911,2912,2913,2915,2955,2957,2960,2961,2963,2965,2966,2967,2970,2971,2975,2976,2977,2981,2982,2987,2988,2994,2995,2996,3000,3001,3003,3004,3005,3007,3008,3010,3011,3012,3014,3015,3016,3017,3018,3019,3020,3021,3022,3025,3026,3027,3028,3029,3031,3033,3035,3037,3039,3041,3042,3043,3044,3047,3048,3049,3050,3053,3054,3057,3058,3059,3060,3062,3064,3065,3067,3069,3070,3071,3073,3074,3075,3076,3081,3083,3087,3088,3090,3092,3096,3097,3098,3101,3102,3103,3109,3111,3113,3115,3119,3120,3121,3122,3123,3124,3126,3127,3130,3132,3135,3136,3137,3140
## --> row.names NOT used
sexvars<-data.frame("State"=sexdem$sex.geography.state,
"County"=sexdem$sex.geography.county,
"FIPS"=paste(sexdem$sex.geography.state,sexdem$sex.geography.county,sep=""),
"Male"=sexdem$P12..Sex.By.Age..Male.,
"Female"=sexdem$P12..Sex.By.Age..Total.population-sexdem$P12..Sex.By.Age..Male.,
"Per.Male"=sexdem$P12..Sex.By.Age..Male./sexdem$P12..Sex.By.Age..Total.population
)
head(sexvars)
##   State County FIPS  Male Female  Per.Male
## 1     1    001 1001 26569  28002 0.4868703
## 2     1    003 1003 89196  93069 0.4893754
## 3     1    005 1005 14576  12881 0.5308664
## 4     1    007 1007 12301  10614 0.5368099
## 5     1    009 1009 28362  28960 0.4947839
## 6     1    011 1011  5912   5002 0.5416896
## African American & White

race <- acs.fetch(
    geography = geo.make(state = "*", county="*"),
    endyear = 2010, span = 0,
    table.number = "P6", 
    col.names = "pretty",key=key,dataset='sf1')

racedem<-data.frame(race@geography$state,race@geography$county,race@estimate)
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 118,122,123,131,135,139,145,146,149,150,155,156,159,160,164,166,172,183,215,230,274,275,278,282,283,289,293,306,308,327,330,332,336,338,351,352,353,354,355,359,361,364,368,373,380,383,387,391,392,398,406,409,415,416,417,421,423,426,431,435,439,443,446,447,453,462,463,465,468,470,472,475,476,477,479,483,484,485,487,489,490,491,494,499,501,502,503,504,507,512,516,520,531,533,534,537,541,543,553,563,568,570,571,572,573,577,583,584,595,596,599,602,603,607,608,611,612,614,616,620,621,623,624,626,628,629,632,634,635,636,639,644,646,647,649,653,655,656,657,662,663,664,668,670,671,672,673,674,677,678,681,682,686,689,690,691,692,698,701,703,704,705,706,707,708,709,710,713,714,718,719,721,722,724,725,726,727,730,731,733,734,736,738,739,742,744,745,746,747,748,750,751,752,753,756,759,760,763,764,765,769,770,778,782,783,785,786,788,791,795,797,801,802,803,804,807,809,810,811,812,813,814,816,817,822,823,824,825,826,827,829,830,831,832,833,834,835,838,839,840,841,842,845,850,852,853,855,857,858,861,866,871,872,876,877,878,880,881,882,883,884,887,889,895,896,899,900,901,902,907,908,909,911,912,917,918,921,922,926,931,932,934,937,941,942,943,944,946,947,949,950,951,962,971,972,973,974,975,985,989,994,995,996,1001,1002,1009,1014,1017,1018,1019,1020,1021,1022,1023,1027,1029,1030,1031,1032,1034,1039,1040,1042,1043,1044,1045,1048,1049,1051,1054,1057,1058,1061,1062,1063,1064,1065,1068,1069,1071,1072,1073,1074,1075,1077,1079,1080,1081,1085,1087,1090,1091,1093,1097,1098,1099,1101,1102,1106,1107,1108,1109,1110,1111,1113,1180,1181,1182,1184,1185,1192,1199,1206,1207,1208,1212,1213,1214,1223,1226,1229,1231,1240,1242,1244,1245,1250,1251,1252,1253,1255,1269,1272,1274,1278,1284,1289,1298,1301,1305,1306,1311,1313,1319,1322,1325,1326,1328,1329,1330,1334,1335,1340,1342,1346,1352,1355,1356,1359,1360,1365,1374,1375,1380,1384,1389,1391,1396,1400,1402,1406,1408,1409,1410,1411,1413,1414,1417,1418,1420,1422,1424,1425,1427,1431,1432,1433,1435,1437,1438,1439,1440,1442,1444,1445,1446,1447,1448,1449,1450,1452,1457,1458,1461,1463,1465,1466,1467,1474,1476,1477,1478,1479,1480,1481,1484,1486,1488,1489,1491,1493,1494,1495,1496,1498,1500,1501,1502,1503,1505,1506,1507,1508,1511,1512,1513,1514,1515,1517,1519,1522,1523,1524,1525,1528,1530,1531,1532,1533,1534,1535,1537,1538,1539,1540,1541,1542,1544,1545,1547,1548,1549,1550,1552,1553,1554,1556,1559,1562,1565,1567,1568,1569,1571,1574,1576,1579,1580,1581,1583,1585,1587,1588,1592,1593,1594,1595,1596,1597,1601,1604,1607,1609,1614,1615,1620,1622,1624,1625,1627,1629,1632,1634,1637,1638,1640,1644,1648,1651,1655,1659,1660,1662,1663,1666,1667,1668,1669,1671,1672,1675,1676,1678,1681,1682,1684,1685,1690,1692,1693,1694,1695,1696,1699,1701,1702,1703,1708,1710,1711,1713,1714,1718,1721,1723,1724,1725,1726,1729,1730,1734,1735,1736,1737,1738,1740,1742,1743,1744,1745,1746,1747,1749,1750,1754,1756,1757,1758,1766,1770,1774,1778,1780,1781,1785,1786,1788,1792,1793,1794,1795,1800,1805,1810,1815,1818,1820,1821,1823,1827,1830,1835,1838,1839,1841,1844,1845,1846,1847,1848,1849,1851,1852,1853,1854,1855,1856,1857,1858,1861,1864,1867,1868,1871,1877,1879,1880,1881,1885,1886,1887,1892,1902,1904,1905,1909,1910,1912,1913,1916,1924,1925,1928,1930,1935,1940,1942,1943,1945,1947,1948,1949,1951,1952,1958,1965,1966,1967,1969,1970,1973,1980,1983,1984,1986,1987,1988,1991,1997,1999,2004,2007,2009,2014,2015,2016,2018,2019,2020,2022,2025,2026,2028,2029,2032,2033,2035,2036,2042,2044,2045,2051,2052,2053,2054,2055,2057,2060,2064,2065,2066,2067,2068,2069,2072,2074,2075,2076,2077,2078,2081,2082,2083,2084,2085,2086,2087,2089,2091,2092,2094,2097,2098,2099,2100,2101,2104,2105,2106,2107,2109,2112,2113,2117,2118,2119,2120,2123,2126,2127,2128,2129,2132,2137,2138,2141,2142,2143,2145,2147,2151,2152,2154,2155,2157,2158,2161,2162,2164,2165,2166,2169,2172,2173,2177,2179,2181,2183,2188,2189,2190,2193,2194,2198,2200,2201,2205,2209,2210,2213,2214,2216,2218,2220,2223,2224,2227,2228,2229,2230,2232,2233,2235,2236,2239,2242,2243,2245,2248,2252,2254,2257,2262,2263,2264,2265,2267,2268,2269,2270,2272,2273,2274,2277,2280,2281,2287,2289,2290,2292,2294,2296,2300,2301,2303,2304,2306,2307,2308,2310,2311,2312,2313,2316,2320,2323,2325,2327,2328,2334,2336,2340,2343,2345,2346,2347,2350,2353,2355,2356,2359,2360,2362,2368,2370,2371,2372,2374,2375,2378,2381,2382,2383,2387,2393,2394,2396,2397,2399,2401,2402,2403,2406,2407,2408,2413,2414,2415,2418,2422,2424,2425,2429,2430,2431,2433,2434,2435,2437,2438,2440,2441,2442,2444,2446,2447,2448,2449,2452,2454,2455,2458,2459,2461,2462,2464,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2484,2485,2486,2487,2489,2490,2491,2492,2493,2496,2498,2499,2502,2503,2504,2506,2507,2508,2509,2510,2511,2512,2515,2516,2517,2518,2519,2521,2522,2523,2524,2529,2537,2547,2548,2551,2552,2554,2557,2559,2560,2562,2570,2576,2580,2581,2583,2589,2592,2593,2594,2597,2598,2600,2603,2613,2614,2617,2618,2619,2620,2622,2623,2624,2625,2627,2630,2631,2632,2635,2636,2637,2640,2643,2644,2645,2646,2649,2650,2653,2655,2661,2662,2667,2668,2669,2670,2680,2681,2682,2683,2686,2687,2688,2690,2691,2693,2694,2695,2699,2703,2704,2706,2710,2711,2720,2721,2733,2734,2735,2738,2744,2745,2751,2759,2761,2762,2765,2766,2769,2770,2773,2778,2781,2783,2786,2787,2788,2790,2792,2796,2798,2799,2804,2805,2811,2812,2815,2816,2818,2819,2823,2829,2830,2833,2834,2836,2837,2838,2840,2841,2842,2843,2845,2848,2851,2853,2854,2855,2856,2858,2859,2861,2864,2865,2871,2872,2874,2876,2878,2879,2880,2881,2883,2884,2886,2887,2894,2896,2899,2900,2901,2906,2907,2908,2909,2910,2911,2912,2913,2915,2955,2957,2960,2961,2963,2965,2966,2967,2970,2971,2975,2976,2977,2981,2982,2987,2988,2994,2995,2996,3000,3001,3003,3004,3005,3007,3008,3010,3011,3012,3014,3015,3016,3017,3018,3019,3020,3021,3022,3025,3026,3027,3028,3029,3031,3033,3035,3037,3039,3041,3042,3043,3044,3047,3048,3049,3050,3053,3054,3057,3058,3059,3060,3062,3064,3065,3067,3069,3070,3071,3073,3074,3075,3076,3081,3083,3087,3088,3090,3092,3096,3097,3098,3101,3102,3103,3109,3111,3113,3115,3119,3120,3121,3122,3123,3124,3126,3127,3130,3132,3135,3136,3137,3140
## --> row.names NOT used
racevars<-data.frame("State"=racedem$race.geography.state,
"County"=racedem$race.geography.county,
"FIPS"=paste(racedem$race.geography.state,racedem$race.geography.county,sep=""),
"Afr.Am"=racedem$P6..RACE..TOTAL.RACES.TALLIED...Black.or.African.American.alone.or.in.combination.with.one.or.more.other.races,
"Per.Afr.Am"=racedem$P6..RACE..TOTAL.RACES.TALLIED...Black.or.African.American.alone.or.in.combination.with.one.or.more.other.races/racedem$P6..RACE..TOTAL.RACES.TALLIED...Total.races.tallied,
"Per.White"=racedem$P6..RACE..TOTAL.RACES.TALLIED...White.alone.or.in.combination.with.one.or.more.other.races/racedem$P6..RACE..TOTAL.RACES.TALLIED...Total.races.tallied
)
head(racevars)
##   State County FIPS Afr.Am Per.Afr.Am Per.White
## 1     1    001 1001   9976 0.17974451 0.7860759
## 2     1    003 1003  17996 0.09720159 0.8569739
## 3     1    005 1005  13021 0.46969916 0.4816391
## 4     1    007 1007   5136 0.22208769 0.7587996
## 5     1    009 1009    909 0.01566458 0.9256751
## 6     1    011 1011   7706 0.70029080 0.2323700
## Age data
library(acs)
age <- acs.fetch(
    geography = geo.make(state = "*", county="*"),
    endyear = 2010, span = 0,
    table.number = "P13", 
    col.names = "pretty",key=key,dataset='sf1')

agedem<-data.frame(age@geography$state,age@geography$county,age@estimate)
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 118,122,123,131,135,139,145,146,149,150,155,156,159,160,164,166,172,183,215,230,274,275,278,282,283,289,293,306,308,327,330,332,336,338,351,352,353,354,355,359,361,364,368,373,380,383,387,391,392,398,406,409,415,416,417,421,423,426,431,435,439,443,446,447,453,462,463,465,468,470,472,475,476,477,479,483,484,485,487,489,490,491,494,499,501,502,503,504,507,512,516,520,531,533,534,537,541,543,553,563,568,570,571,572,573,577,583,584,595,596,599,602,603,607,608,611,612,614,616,620,621,623,624,626,628,629,632,634,635,636,639,644,646,647,649,653,655,656,657,662,663,664,668,670,671,672,673,674,677,678,681,682,686,689,690,691,692,698,701,703,704,705,706,707,708,709,710,713,714,718,719,721,722,724,725,726,727,730,731,733,734,736,738,739,742,744,745,746,747,748,750,751,752,753,756,759,760,763,764,765,769,770,778,782,783,785,786,788,791,795,797,801,802,803,804,807,809,810,811,812,813,814,816,817,822,823,824,825,826,827,829,830,831,832,833,834,835,838,839,840,841,842,845,850,852,853,855,857,858,861,866,871,872,876,877,878,880,881,882,883,884,887,889,895,896,899,900,901,902,907,908,909,911,912,917,918,921,922,926,931,932,934,937,941,942,943,944,946,947,949,950,951,962,971,972,973,974,975,985,989,994,995,996,1001,1002,1009,1014,1017,1018,1019,1020,1021,1022,1023,1027,1029,1030,1031,1032,1034,1039,1040,1042,1043,1044,1045,1048,1049,1051,1054,1057,1058,1061,1062,1063,1064,1065,1068,1069,1071,1072,1073,1074,1075,1077,1079,1080,1081,1085,1087,1090,1091,1093,1097,1098,1099,1101,1102,1106,1107,1108,1109,1110,1111,1113,1180,1181,1182,1184,1185,1192,1199,1206,1207,1208,1212,1213,1214,1223,1226,1229,1231,1240,1242,1244,1245,1250,1251,1252,1253,1255,1269,1272,1274,1278,1284,1289,1298,1301,1305,1306,1311,1313,1319,1322,1325,1326,1328,1329,1330,1334,1335,1340,1342,1346,1352,1355,1356,1359,1360,1365,1374,1375,1380,1384,1389,1391,1396,1400,1402,1406,1408,1409,1410,1411,1413,1414,1417,1418,1420,1422,1424,1425,1427,1431,1432,1433,1435,1437,1438,1439,1440,1442,1444,1445,1446,1447,1448,1449,1450,1452,1457,1458,1461,1463,1465,1466,1467,1474,1476,1477,1478,1479,1480,1481,1484,1486,1488,1489,1491,1493,1494,1495,1496,1498,1500,1501,1502,1503,1505,1506,1507,1508,1511,1512,1513,1514,1515,1517,1519,1522,1523,1524,1525,1528,1530,1531,1532,1533,1534,1535,1537,1538,1539,1540,1541,1542,1544,1545,1547,1548,1549,1550,1552,1553,1554,1556,1559,1562,1565,1567,1568,1569,1571,1574,1576,1579,1580,1581,1583,1585,1587,1588,1592,1593,1594,1595,1596,1597,1601,1604,1607,1609,1614,1615,1620,1622,1624,1625,1627,1629,1632,1634,1637,1638,1640,1644,1648,1651,1655,1659,1660,1662,1663,1666,1667,1668,1669,1671,1672,1675,1676,1678,1681,1682,1684,1685,1690,1692,1693,1694,1695,1696,1699,1701,1702,1703,1708,1710,1711,1713,1714,1718,1721,1723,1724,1725,1726,1729,1730,1734,1735,1736,1737,1738,1740,1742,1743,1744,1745,1746,1747,1749,1750,1754,1756,1757,1758,1766,1770,1774,1778,1780,1781,1785,1786,1788,1792,1793,1794,1795,1800,1805,1810,1815,1818,1820,1821,1823,1827,1830,1835,1838,1839,1841,1844,1845,1846,1847,1848,1849,1851,1852,1853,1854,1855,1856,1857,1858,1861,1864,1867,1868,1871,1877,1879,1880,1881,1885,1886,1887,1892,1902,1904,1905,1909,1910,1912,1913,1916,1924,1925,1928,1930,1935,1940,1942,1943,1945,1947,1948,1949,1951,1952,1958,1965,1966,1967,1969,1970,1973,1980,1983,1984,1986,1987,1988,1991,1997,1999,2004,2007,2009,2014,2015,2016,2018,2019,2020,2022,2025,2026,2028,2029,2032,2033,2035,2036,2042,2044,2045,2051,2052,2053,2054,2055,2057,2060,2064,2065,2066,2067,2068,2069,2072,2074,2075,2076,2077,2078,2081,2082,2083,2084,2085,2086,2087,2089,2091,2092,2094,2097,2098,2099,2100,2101,2104,2105,2106,2107,2109,2112,2113,2117,2118,2119,2120,2123,2126,2127,2128,2129,2132,2137,2138,2141,2142,2143,2145,2147,2151,2152,2154,2155,2157,2158,2161,2162,2164,2165,2166,2169,2172,2173,2177,2179,2181,2183,2188,2189,2190,2193,2194,2198,2200,2201,2205,2209,2210,2213,2214,2216,2218,2220,2223,2224,2227,2228,2229,2230,2232,2233,2235,2236,2239,2242,2243,2245,2248,2252,2254,2257,2262,2263,2264,2265,2267,2268,2269,2270,2272,2273,2274,2277,2280,2281,2287,2289,2290,2292,2294,2296,2300,2301,2303,2304,2306,2307,2308,2310,2311,2312,2313,2316,2320,2323,2325,2327,2328,2334,2336,2340,2343,2345,2346,2347,2350,2353,2355,2356,2359,2360,2362,2368,2370,2371,2372,2374,2375,2378,2381,2382,2383,2387,2393,2394,2396,2397,2399,2401,2402,2403,2406,2407,2408,2413,2414,2415,2418,2422,2424,2425,2429,2430,2431,2433,2434,2435,2437,2438,2440,2441,2442,2444,2446,2447,2448,2449,2452,2454,2455,2458,2459,2461,2462,2464,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2484,2485,2486,2487,2489,2490,2491,2492,2493,2496,2498,2499,2502,2503,2504,2506,2507,2508,2509,2510,2511,2512,2515,2516,2517,2518,2519,2521,2522,2523,2524,2529,2537,2547,2548,2551,2552,2554,2557,2559,2560,2562,2570,2576,2580,2581,2583,2589,2592,2593,2594,2597,2598,2600,2603,2613,2614,2617,2618,2619,2620,2622,2623,2624,2625,2627,2630,2631,2632,2635,2636,2637,2640,2643,2644,2645,2646,2649,2650,2653,2655,2661,2662,2667,2668,2669,2670,2680,2681,2682,2683,2686,2687,2688,2690,2691,2693,2694,2695,2699,2703,2704,2706,2710,2711,2720,2721,2733,2734,2735,2738,2744,2745,2751,2759,2761,2762,2765,2766,2769,2770,2773,2778,2781,2783,2786,2787,2788,2790,2792,2796,2798,2799,2804,2805,2811,2812,2815,2816,2818,2819,2823,2829,2830,2833,2834,2836,2837,2838,2840,2841,2842,2843,2845,2848,2851,2853,2854,2855,2856,2858,2859,2861,2864,2865,2871,2872,2874,2876,2878,2879,2880,2881,2883,2884,2886,2887,2894,2896,2899,2900,2901,2906,2907,2908,2909,2910,2911,2912,2913,2915,2955,2957,2960,2961,2963,2965,2966,2967,2970,2971,2975,2976,2977,2981,2982,2987,2988,2994,2995,2996,3000,3001,3003,3004,3005,3007,3008,3010,3011,3012,3014,3015,3016,3017,3018,3019,3020,3021,3022,3025,3026,3027,3028,3029,3031,3033,3035,3037,3039,3041,3042,3043,3044,3047,3048,3049,3050,3053,3054,3057,3058,3059,3060,3062,3064,3065,3067,3069,3070,3071,3073,3074,3075,3076,3081,3083,3087,3088,3090,3092,3096,3097,3098,3101,3102,3103,3109,3111,3113,3115,3119,3120,3121,3122,3123,3124,3126,3127,3130,3132,3135,3136,3137,3140
## --> row.names NOT used
agevars<-data.frame("State"=agedem$age.geography.state,
"County"=agedem$age.geography.county,
"Age"=agedem$P13..MEDIAN.AGE.BY.SEX..3...1.expressed.decimal...Both.sexes,
"FIPS"=paste(agedem$age.geography.state,agedem$age.geography.county,sep=""))

head(agevars)
##   State County  Age FIPS
## 1     1    001 37.0 1001
## 2     1    003 41.1 1003
## 3     1    005 39.0 1005
## 4     1    007 37.8 1007
## 5     1    009 39.0 1009
## 6     1    011 38.5 1011
## Employment data Note the API call isn't working for this table so I downloaded it and put it on Github

emp<-read.csv("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/Unemployment.csv")
emp<-data.frame("ID"=as.character(emp$Id2),emp$Unempl.Rate)
## Income

txcountys<-geo.make(state="*",county="*")

income.county<-acs.fetch(key=key,endyear = 2010, span = 5, geography = txcountys, table.number = "B19013")

trincome<-data.frame(income=income.county@estimate)
trincome$ids<-paste(income.county@geography$state, sprintf("%03s", income.county@geography$county), sep="")
names(trincome)<-c("Income", "FIPS")
head(trincome)
##                         Income FIPS
## Autauga County, Alabama  53255 1001
## Baldwin County, Alabama  50147 1003
## Barbour County, Alabama  33219 1005
## Bibb County, Alabama     41770 1007
## Blount County, Alabama   45549 1009
## Bullock County, Alabama  31602 1011
##Education
#Create a weighted mean score of education since the data are ordinal

edu.county<-acs.fetch(key=key,endyear = 2010, span = 5, geography = txcountys, table.number = "B16010")
edu.county@acs.colnames
##  [1] "B16010_001" "B16010_002" "B16010_003" "B16010_004" "B16010_005"
##  [6] "B16010_006" "B16010_007" "B16010_008" "B16010_009" "B16010_010"
## [11] "B16010_011" "B16010_012" "B16010_013" "B16010_014" "B16010_015"
## [16] "B16010_016" "B16010_017" "B16010_018" "B16010_019" "B16010_020"
## [21] "B16010_021" "B16010_022" "B16010_023" "B16010_024" "B16010_025"
## [26] "B16010_026" "B16010_027" "B16010_028" "B16010_029" "B16010_030"
## [31] "B16010_031" "B16010_032" "B16010_033" "B16010_034" "B16010_035"
## [36] "B16010_036" "B16010_037" "B16010_038" "B16010_039" "B16010_040"
## [41] "B16010_041" "B16010_042" "B16010_043" "B16010_044" "B16010_045"
## [46] "B16010_046" "B16010_047" "B16010_048" "B16010_049" "B16010_050"
## [51] "B16010_051" "B16010_052" "B16010_053"
tredu1<-data.frame(edu=edu.county@estimate)
tredu<-data.frame("Total"=tredu1$edu.B16010_001,
      "Some.HS"=tredu1$edu.B16010_002,
      "HS"=tredu1$edu.B16010_015,
      "Some.College"=tredu1$edu.B16010_028,
      "BA"=tredu1$edu.B16010_041,
      "FIPS"=paste(edu.county@geography$state, sprintf("%03s", edu.county@geography$county), sep=""),
      "ED_Score"=(tredu1$edu.B16010_002*1+tredu1$edu.B16010_015*2+tredu1$edu.B16010_028*3+tredu1$edu.B16010_041*4)/tredu1$edu.B16010_001
)

head(tredu)
##    Total Some.HS    HS Some.College    BA FIPS ED_Score
## 1  33884    4970 11936         9609  7369 1001 2.571863
## 2 121560   15103 36340        37512 32605 1003 2.720788
## 3  18879    5312  6679         4337  2551 1005 2.218603
## 4  15082    3839  6382         3349  1512 1007 2.168015
## 5  38085    9651 13872         9791  4771 1009 2.254221
## 6   7301    1848  2670         1906   877 1011 2.248185
###Let's combine all our dependent variables from Census into one dataframe:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:acs':
## 
##     combine
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Census<-full_join(racevars,sexvars,by=c("FIPS"="FIPS"))
Census1<-full_join(Census,agevars,by=c("FIPS"="FIPS"))
Census2<-full_join(Census1,tredu,by=c("FIPS"="FIPS"))
Census3<-full_join(Census2,trincome,by=c("FIPS"="FIPS"))
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
Census4<-full_join(Census3,fips,by=c("FIPS"="L"))
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
Census5<-left_join(Census4,demo,by=c("V4"="Counties"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
Census6<-full_join(Census5,emp,by=c("FIPS"="ID"))
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
##Join Swing States

Swing<-read.csv("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/swing.csv")
Census7<-left_join(Census6,Swing,by=c("State"="State.FIPS.Code"))

##Now Pair it down

CensusClean<-data.frame(
  "State"=Census7$State.x,
  "County"=Census7$County.x,
  "CountyName"=Census7$V4,
  "FIPS"=Census7$FIPS,
  "Per.White"=Census7$Per.White.x,
  "Per.Hispanic"=Census7$Per.Hispanic,
  "Per.Afr.Am"=Census7$Per.Afr.Am,
  "Per.Male"=Census7$Per.Male,
  "MedAge"=Census7$Age,
  "Income"=Census7$Income,
  "ED_Score"=Census7$ED_Score,
  "Unempl"=Census7$emp.Unempl.Rate,
  "Swing"=Census7$SwingState
)


head(CensusClean)
##   State County     CountyName FIPS Per.White Per.Hispanic Per.Afr.Am
## 1     1    001 Autauga County 1001 0.7860759   0.02400542 0.17974451
## 2     1    003 Baldwin County 1003 0.8569739   0.04384824 0.09720159
## 3     1    005 Barbour County 1005 0.4816391   0.05051535 0.46969916
## 4     1    007    Bibb County 1007 0.7587996   0.01771765 0.22208769
## 5     1    009  Blount County 1009 0.9256751   0.08070200 0.01566458
## 6     1    011 Bullock County 1011 0.2323700   0.07119296 0.70029080
##    Per.Male MedAge Income ED_Score     Unempl Swing
## 1 0.4868703   37.0  53255 2.571863 0.04840857     0
## 2 0.4893754   41.1  50147 2.720788 0.05616936     0
## 3 0.5308664   39.0  33219 2.218603 0.12169014     0
## 4 0.5368099   37.8  41770 2.168015 0.05466667     0
## 5 0.4947839   39.0  45549 2.254221 0.05017336     0
## 6 0.5416896   38.5  31602 2.248185 0.12884585     0

Analysis

Now that we have the data we need, let’s visualize it and perform the analyses we need. In general we mean to correlate via a multivariable linear regression the outcome of “Democrat Shift” (WLOG) to the variables provided to answer questions about what underlying causes of the shifts were. A simplifying assumption we make is that the a couple of percent of “other” votes don’t affect the conclusions. Equivalently, around an equal fraction of them would have voted from Trump as Clinton.

library(ggmap)
## Warning: package 'ggmap' was built under R version 3.3.2
##First we want to plot the change in votes by county on a us map, try to figure out what happened. 

cpoints<-read.csv("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/countyzip1.csv")

coords1<-left_join(IndyClean,cpoints,by=c("fips_code"="fips"))

usa_center <- as.numeric(geocode("United States"))
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20States&sensor=false
USAMap <- ggmap(get_googlemap(center=usa_center, scale=2, zoom=4), extent="normal")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.09024,-95.712891&zoom=4&size=640x640&scale=2&maptype=terrain&sensor=false
USAMap+geom_point(aes(x=long, y=lat,color=((coords1$Dem_Score_Diff))), data=coords1, alpha=0.5, size=coords1$Dem_Score_Diff*.5) + 
scale_size_continuous(range=range(coords1$Dem_Score_Diff))+
scale_colour_gradient( low="red",high = "blue",name="Shift")+
scale_fill_continuous(low="red",high = "blue",name="Shift")+
  labs(x="Longitude",y="Latitude", title="Democratic Change from 2012 to 2016")+
xlim(-130,-60)
## Warning: Removed 5 rows containing missing values (geom_point).

##Now How People Voted from 2012 compared to 2016
par(mfrow=c(1,2))

USAMap+geom_point(aes(x=long, y=lat,color=((coords1$Dem_Score_2012))), data=coords1, alpha=0.5, size=coords1$Dem_Score_2012*.5) + 
scale_size_continuous(range=range(coords1$Dem_Score_2012))+
scale_colour_gradient( low="red",high = "blue",name="Shift")+
scale_fill_continuous(low="red",high = "blue",name="Shift")+
  labs(x="Longitude",y="Latitude", title="Vote 2012")+
xlim(-130,-60)
## Warning: Removed 5 rows containing missing values (geom_point).

USAMap+geom_point(aes(x=long, y=lat,color=((coords1$Dem_Score_2016))), data=coords1, alpha=0.5, size=coords1$Dem_Score_2016*.5) + 
scale_size_continuous(range=range(coords1$Dem_Score_2016))+
scale_colour_gradient( low="red",high = "blue",name="Shift")+
scale_fill_continuous(low="red",high = "blue",name="Shift")+
  labs(x="Longitude",y="Latitude", title="Vote 2016")+
xlim(-130,-60)
## Warning: Removed 5 rows containing missing values (geom_point).

We see that the coasts have a bluer shift while the heartland, rustbelt region has a redder shift, implying strategic gains for Republicans in an electoral system, since the coasts are not competative to begin with.

Now let’s build a model of voter shift:

IndyClean$fips_code<-as.factor(IndyClean$fips_code)

model_data<-left_join(IndyClean,CensusClean,by=c("fips_code"="FIPS"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
head(model_data)
##   fips_code    TotalDiff      DemDiff    RepDiff   OthDiff Dem_Score_2012
## 1     26041  0.023499418 -0.227971188 0.16563516 4.1333333      0.4616749
## 2     48295  0.131849315  0.134453782 0.11015326 4.6000000      0.1018836
## 3      1127  0.026178194 -0.315219051 0.11903111 0.7539936      0.2298838
## 4     48389  0.110568539  0.006064281 0.19578059 2.2727273      0.5751657
## 5     56017  0.016032064 -0.235181644 0.02375924 1.5128205      0.2096192
## 6     20043 -0.000890472 -0.340112994 0.08510638 1.0804598      0.2626892
##   Dem_Score_2016 Oth_Score_2016 Dem_Score_Diff State County
## 1      0.3482428     0.05003520  -0.1134320763    26    041
## 2      0.1021180     0.02118003   0.0002344414    48    295
## 3      0.1534042     0.01877372  -0.0764796139     1    127
## 4      0.5210427     0.03391960  -0.0541229648    48    389
## 5      0.1577909     0.07731755  -0.0518283115    56    017
## 6      0.1734997     0.05377302  -0.0891895224    20    043
##           CountyName Per.White Per.Hispanic  Per.Afr.Am  Per.Male MedAge
## 1       Delta County 0.9470906   0.14037865 0.005172186 0.4954814   45.6
## 2    Lipscomb County 0.8610701   0.30496669 0.009459060 0.5033313   37.0
## 3      Walker County 0.9122556   0.01950077 0.062758732 0.4874148   41.2
## 4      Reeves County 0.7736132   0.74243633 0.050903120 0.6008126   35.4
## 5 Hot Springs County 0.9574294   0.02182045 0.005116660 0.4939734   48.6
## 6    Doniphan County 0.9294580   0.02076778 0.038466265 0.5042165   39.6
##   Income ED_Score     Unempl Swing
## 1  41951 2.613803 0.06603993     1
## 2  52566 2.554252 0.03847981     0
## 3  37191 2.250224 0.07441469     0
## 4  32593 1.841421 0.04575062     0
## 5  42469 2.614923 0.02953587     0
## 6  43410 2.514496 0.04570117     0
##Let's checkout our data first to be sure a linear regression works.
hist(IndyClean$Dem_Score_Diff, xlab="",ylab="",main="Dem Shift Percent",col="black")

summary(IndyClean$Dem_Score_Diff)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.23440 -0.10210 -0.06298 -0.06784 -0.03353  0.09346
##Most counties shifted away from dems

par(mfrow=c(2,4))
hist(CensusClean$Per.White,col = 'white', xlab="",ylab="",main="Per White")
hist(CensusClean$Per.Afr.Am,col = 'purple', xlab="",ylab="",main="Per African American")
hist(CensusClean$Per.Hispanic,col = 'blue', xlab="",ylab="",main="Per Hispanic")
hist(CensusClean$Per.Male,col = 'orange', xlab="",ylab="",main="Percent Male")
hist(CensusClean$Income,col = 'green', xlab="",ylab="",main="Income")
hist(CensusClean$MedAge,col = 'grey', xlab="",ylab="",main="Median Age")
hist(CensusClean$ED_Score,col = 'pink', xlab="",ylab="",main="ED Score")
hist(CensusClean$Unempl,col = 'lightgreen', xlab="",ylab="",main="Unemployment")

##Seems pretty good, most are normal 

library(corrplot)
mus<-cor(na.omit(CensusClean[5:12]))
corrplot(mus)

##Note that Per White is Strongly Anticorrelated with Per. Afr Am.  This isn't unexpected in a sense that fractions have to add up to 1 however it is a little concerning to see how this anecdotally suggets seggregation still exits.  We may not want to use both variables in the regression however, given how strongly associated they are. 


##For all of USA

US<-lm(data = model_data,Dem_Score_Diff~Per.White+Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
summary(US)
## 
## Call:
## lm(formula = Dem_Score_Diff ~ Per.White + Per.Hispanic + Per.Afr.Am + 
##     Per.Male + MedAge + Income + ED_Score + Unempl, data = model_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.145011 -0.024934  0.002882  0.026472  0.167259 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.436e-02  2.337e-02  -3.609 0.000312 ***
## Per.White    -4.851e-02  9.368e-03  -5.178 2.38e-07 ***
## Per.Hispanic  9.489e-02  6.125e-03  15.493  < 2e-16 ***
## Per.Afr.Am    1.049e-01  9.846e-03  10.652  < 2e-16 ***
## Per.Male     -1.259e-01  3.289e-02  -3.829 0.000131 ***
## MedAge       -1.412e-03  1.571e-04  -8.984  < 2e-16 ***
## Income        5.696e-07  8.620e-08   6.608 4.56e-11 ***
## ED_Score      5.679e-02  4.072e-03  13.945  < 2e-16 ***
## Unempl       -1.352e-01  3.550e-02  -3.809 0.000142 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03824 on 3101 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.4068, Adjusted R-squared:  0.4053 
## F-statistic: 265.8 on 8 and 3101 DF,  p-value: < 2.2e-16
aov(data = model_data,Dem_Score_Diff~Per.White+Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
## Call:
##    aov(formula = Dem_Score_Diff ~ Per.White + Per.Hispanic + Per.Afr.Am + 
##     Per.Male + MedAge + Income + ED_Score + Unempl, data = model_data)
## 
## Terms:
##                 Per.White Per.Hispanic Per.Afr.Am Per.Male   MedAge
## Sum of Squares   1.345609     0.275660   0.015727 0.088209 0.247513
## Deg. of Freedom         1            1          1        1        1
##                   Income ED_Score   Unempl Residuals
## Sum of Squares  0.798585 0.317265 0.021220  4.534813
## Deg. of Freedom        1        1        1      3101
## 
## Residual standard error: 0.03824096
## Estimated effects may be unbalanced
## 2 observations deleted due to missingness
#All variables are significant, let's pair down the White Variable.

US<-lm(data = model_data,Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
summary(US)
## 
## Call:
## lm(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + Income + ED_Score + Unempl, data = model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14330 -0.02494  0.00298  0.02674  0.16868 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.325e-01  2.153e-02  -6.155 8.49e-10 ***
## Per.Hispanic  1.055e-01  5.796e-03  18.204  < 2e-16 ***
## Per.Afr.Am    1.468e-01  5.617e-03  26.140  < 2e-16 ***
## Per.Male     -1.175e-01  3.299e-02  -3.562 0.000374 ***
## MedAge       -1.659e-03  1.504e-04 -11.032  < 2e-16 ***
## Income        5.656e-07  8.655e-08   6.535 7.43e-11 ***
## ED_Score      5.928e-02  4.060e-03  14.599  < 2e-16 ***
## Unempl       -8.799e-02  3.445e-02  -2.554 0.010692 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0384 on 3102 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.4017, Adjusted R-squared:  0.4003 
## F-statistic: 297.5 on 7 and 3102 DF,  p-value: < 2.2e-16
USa<-aov(data = model_data,Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
USa
## Call:
##    aov(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + Income + ED_Score + Unempl, data = model_data)
## 
## Terms:
##                 Per.Hispanic Per.Afr.Am Per.Male   MedAge   Income
## Sum of Squares      0.439592   1.004823 0.084954 0.381044 0.815650
## Deg. of Freedom            1          1        1        1        1
##                 ED_Score   Unempl Residuals
## Sum of Squares  0.334894 0.009620  4.574024
## Deg. of Freedom        1        1      3102
## 
## Residual standard error: 0.03839974
## Estimated effects may be unbalanced
## 2 observations deleted due to missingness
#R squared is unchanged.
##Check model
par(mfrow=c(2,4))

 plot(y=US$residuals, x=na.omit(model_data)$Income, xlab="Income")
 plot(y=US$residuals, x=na.omit(model_data)$Unempl, xlab="Unempl")
 plot(y=US$residuals, x=na.omit(model_data)$Per.Afr.Am,xlab="Race")
plot(y=US$residuals, x=na.omit(model_data)$ED_Score,xlab="Edu")
plot(y=US$residuals, x=na.omit(model_data)$Per.Male,xlab="Male")
plot(y=US$residuals, x=na.omit(model_data)$Per.Hispanic,xlab="Hispanic")
plot(y=US$residuals, x=na.omit(model_data)$MedAge,xlab="Age")

This Model Says that increased Hispanic Percentage, African American Percentage, Income, of a county means it was more likely to shift positive towards Democrats and that the more Male, the older and the less employed the county was the more it would shift away from Democrats. This seems like it fits a ‘pundit’ model of elections. Let’s see how it plays out in swing states.

####Swing States Model

sw<-lm(data = subset(model_data, Swing==1),Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
summary(sw)
## 
## Call:
## lm(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + Income + ED_Score + Unempl, data = subset(model_data, 
##     Swing == 1))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.087277 -0.021943  0.001238  0.023029  0.091889 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.560e-01  4.259e-02  -8.359 4.19e-16 ***
## Per.Hispanic  1.059e-01  1.858e-02   5.696 1.90e-08 ***
## Per.Afr.Am    2.015e-01  1.095e-02  18.392  < 2e-16 ***
## Per.Male      4.703e-03  5.813e-02   0.081   0.9355    
## MedAge       -5.869e-04  2.845e-04  -2.063   0.0396 *  
## Income        3.734e-07  1.615e-07   2.312   0.0211 *  
## ED_Score      1.083e-01  8.276e-03  13.091  < 2e-16 ***
## Unempl       -2.213e-01  8.583e-02  -2.578   0.0102 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0323 on 619 degrees of freedom
## Multiple R-squared:  0.6034, Adjusted R-squared:  0.5989 
## F-statistic: 134.5 on 7 and 619 DF,  p-value: < 2.2e-16
##let's pair down non-significant variables, make it more parisomonious remove age and percent male.
sw<-lm(data = subset(model_data, Swing==1),Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Income+ED_Score+Unempl)
summary(sw)
## 
## Call:
## lm(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Income + 
##     ED_Score + Unempl, data = subset(model_data, Swing == 1))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.090814 -0.022231  0.000632  0.023387  0.100905 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.853e-01  1.719e-02 -22.419  < 2e-16 ***
## Per.Hispanic  1.124e-01  1.834e-02   6.126  1.6e-09 ***
## Per.Afr.Am    2.071e-01  1.054e-02  19.645  < 2e-16 ***
## Income        3.665e-07  1.598e-07   2.293  0.02216 *  
## ED_Score      1.115e-01  7.628e-03  14.611  < 2e-16 ***
## Unempl       -2.385e-01  8.559e-02  -2.787  0.00549 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03236 on 621 degrees of freedom
## Multiple R-squared:  0.6006, Adjusted R-squared:  0.5974 
## F-statistic: 186.8 on 5 and 621 DF,  p-value: < 2.2e-16
swa<-aov(data = subset(model_data, Swing==1),Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Income+ED_Score+Unempl)
swa
## Call:
##    aov(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Income + 
##     ED_Score + Unempl, data = subset(model_data, Swing == 1))
## 
## Terms:
##                 Per.Hispanic Per.Afr.Am    Income  ED_Score    Unempl
## Sum of Squares     0.0852619  0.3070411 0.3509347 0.2265266 0.0081314
## Deg. of Freedom            1          1         1         1         1
##                 Residuals
## Sum of Squares  0.6501938
## Deg. of Freedom       621
## 
## Residual standard error: 0.03235755
## Estimated effects may be unbalanced

Now let’s do the same but for what predict GOP vs Dem vs Other Vote, for all of US then .

USdem<-lm(data = model_data,Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score+Unempl)
summary(USdem)
## 
## Call:
## lm(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + ED_Score + Unempl, data = model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39190 -0.06715 -0.00506  0.05755  0.50741 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.3208784  0.0563329  -5.696 1.34e-08 ***
## Per.Hispanic  0.2963517  0.0153014  19.368  < 2e-16 ***
## Per.Afr.Am    0.5414358  0.0148475  36.466  < 2e-16 ***
## Per.Male     -0.4170082  0.0871099  -4.787 1.77e-06 ***
## MedAge       -0.0015044  0.0003953  -3.805 0.000144 ***
## ED_Score      0.2961857  0.0079702  37.162  < 2e-16 ***
## Unempl        1.6492788  0.0909644  18.131  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1016 on 3103 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.5611, Adjusted R-squared:  0.5603 
## F-statistic: 661.2 on 6 and 3103 DF,  p-value: < 2.2e-16
USadem<-aov(data = model_data,Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score+Unempl)
USadem
## Call:
##    aov(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + ED_Score + Unempl, data = model_data)
## 
## Terms:
##                 Per.Hispanic Per.Afr.Am Per.Male   MedAge ED_Score
## Sum of Squares       2.09331   20.89077  1.47304  1.33111 11.79557
## Deg. of Freedom            1          1        1        1        1
##                   Unempl Residuals
## Sum of Squares   3.39554  32.05129
## Deg. of Freedom        1      3103
## 
## Residual standard error: 0.1016323
## Estimated effects may be unbalanced
## 2 observations deleted due to missingness
USdemss<-lm(data = subset(model_data, Swing==1),Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score)
summary(USdemss)
## 
## Call:
## lm(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + ED_Score, data = subset(model_data, Swing == 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22376 -0.05038 -0.00094  0.05074  0.49940 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.0120819  0.0958766   0.126      0.9    
## Per.Hispanic  0.1982604  0.0433410   4.574 5.77e-06 ***
## Per.Afr.Am    0.6136069  0.0235193  26.089  < 2e-16 ***
## Per.Male     -0.5665370  0.1357801  -4.172 3.44e-05 ***
## MedAge       -0.0027011  0.0006701  -4.031 6.24e-05 ***
## ED_Score      0.2658311  0.0144355  18.415  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07643 on 621 degrees of freedom
## Multiple R-squared:  0.6737, Adjusted R-squared:  0.6711 
## F-statistic: 256.4 on 5 and 621 DF,  p-value: < 2.2e-16
USademss<-aov(data = subset(model_data, Swing==1),Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score)
USademss
## Call:
##    aov(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male + 
##     MedAge + ED_Score, data = subset(model_data, Swing == 1))
## 
## Terms:
##                 Per.Hispanic Per.Afr.Am Per.Male   MedAge ED_Score
## Sum of Squares      0.411967   3.845383 0.692605 0.557838 1.980748
## Deg. of Freedom            1          1        1        1        1
##                 Residuals
## Sum of Squares   3.627200
## Deg. of Freedom       621
## 
## Residual standard error: 0.07642579
## Estimated effects may be unbalanced
### 3rd Party is extra!

USoth<-lm(data = model_data,Oth_Score_2016~Per.Afr.Am+Per.Male+MedAge+ED_Score+Income)
summary(USoth)
## 
## Call:
## lm(formula = Oth_Score_2016 ~ Per.Afr.Am + Per.Male + MedAge + 
##     ED_Score + Income, data = model_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.068455 -0.011329 -0.002153  0.007162  0.248357 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9.718e-02  1.185e-02  -8.204 3.36e-16 ***
## Per.Afr.Am  -7.159e-02  2.921e-03 -24.506  < 2e-16 ***
## Per.Male     1.428e-01  1.866e-02   7.654 2.59e-14 ***
## MedAge      -1.166e-03  8.162e-05 -14.282  < 2e-16 ***
## ED_Score     5.839e-02  2.271e-03  25.710  < 2e-16 ***
## Income      -4.392e-07  4.894e-08  -8.975  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02185 on 3106 degrees of freedom
## Multiple R-squared:  0.3878, Adjusted R-squared:  0.3868 
## F-statistic: 393.5 on 5 and 3106 DF,  p-value: < 2.2e-16
USaoth<-aov(data = model_data,Oth_Score_2016~Per.Afr.Am+Per.Male+MedAge+ED_Score+Income)
USaoth
## Call:
##    aov(formula = Oth_Score_2016 ~ Per.Afr.Am + Per.Male + MedAge + 
##     ED_Score + Income, data = model_data)
## 
## Terms:
##                 Per.Afr.Am  Per.Male    MedAge  ED_Score    Income
## Sum of Squares   0.4148398 0.0034119 0.1378478 0.3444769 0.0384364
## Deg. of Freedom          1         1         1         1         1
##                 Residuals
## Sum of Squares  1.4822572
## Deg. of Freedom      3106
## 
## Residual standard error: 0.02184545
## Estimated effects may be unbalanced

Thank you!