Unless you weren’t on this planet, you knew November held one of the most fascinating election in US history. This analysis combines American Census demographics and county voting records to dispel or confirm common wisdom spoken about the election and its results. Let’s begin by compiling the independent and dependent variables seperately, from Cook Political Reporting and the ACS API respectively. This will involve joining and transforming elements so that they work nicely together.
The data I gather will be aggregated on the US County Level, so we can determine which factors affect likelihood to vote Democratic (without loss of generality), or likelihood to decrease voter turnout.
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(UScensus2010)
## Warning: package 'UScensus2010' was built under R version 3.3.2
## Loading required package: maptools
## Warning: package 'maptools' was built under R version 3.3.2
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.3.2
## Checking rgeos availability: FALSE
## Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
## which has a restricted licence. It is disabled by default;
## to enable gpclib, type gpclibPermit()
## Loading required package: foreign
##
## Package UScensus2010: US Census 2010 Suite of R Packages
## Version 0.11 created on 2011-11-18.
## Zack Almquist, University of California-Irvine
## ne
## For citation information, type citation("UScensus2010").
## Type help(package=UScensus2010) to get started.
## Compiled by Cook Politcal Reporting, from their github, these are the independent variables: the outcome of the elections:
county<-read.csv(file = "https://raw.githubusercontent.com/tonmcg/County_Level_Election_Results_12-16/master/US_County_Level_Presidential_Results_08-16.csv")
head(county)
## fips_code county total_2008 dem_2008 gop_2008 oth_2008
## 1 26041 Delta County 19064 9974 8763 327
## 2 48295 Lipscomb County 1256 155 1093 8
## 3 1127 Walker County 28652 7420 20722 510
## 4 48389 Reeves County 3077 1606 1445 26
## 5 56017 Hot Springs County 2546 619 1834 93
## 6 20043 Doniphan County 3564 1115 2372 77
## total_2012 dem_2012 gop_2012 oth_2012 total_2016 dem_2016 gop_2016
## 1 18043 8330 9533 180 18467 6431 11112
## 2 1168 119 1044 5 1322 135 1159
## 3 28497 6551 21633 313 29243 4486 24208
## 4 2867 1649 1185 33 3184 1659 1417
## 5 2495 523 1894 78 2535 400 1939
## 6 3369 885 2397 87 3366 584 2601
## oth_2016
## 1 924
## 2 28
## 3 549
## 4 108
## 5 196
## 6 181
### Add Calcualted Outcome Variables (dependent variables)
Indy<-data.frame(county,"TotalDiff"=(county$total_2016-county$total_2012)/county$total_2012,
"DemDiff"=(county$dem_2016-county$dem_2012)/county$dem_2012,
"RepDiff"=(county$gop_2016-county$gop_2012)/county$gop_2012,
"OthDiff"=(county$oth_2016-county$oth_2012)/county$oth_2012,
"Dem_Score_2012"=county$dem_2012/county$total_2012,
"Dem_Score_2016"=county$dem_2016/county$total_2016,
"Oth_Score_2016"=county$oth_2016/county$total_2016,
"Dem_Score_Diff"=county$dem_2016/county$total_2016-county$dem_2012/county$total_2012
)
IndyClean<-data.frame(Indy[1],Indy[15:22])
##Now we need demographic information from the Census API, so I registered for a key and it takes some a tiny trick to install the package: These are the dependent variables
#url2 <- "http://lakshmi.calit2.uci.edu/census2000/R/src/contrib/UScensus2010tract_1.00.tar.gz"
#url3<-"http://lakshmi.calit2.uci.edu/census2000/R/src/contrib/UScensus2010county_1.00.tar.gz"
#install.packages(url3, repos = NULL)
#library(UScensus2010tract)
library(UScensus2010county)
##
## UScensus2010county: US Census 2010 County Level Shapefiles and Additional
## Demographic Data
## Version 1.00 created on 2011-11-06
## copyright (c) 2011, Zack W. Almquist, University of California-Irvine
## Type help(package="UScensus2010county") to get started.
##
## For citation information, type citation("UScensus2010county").
key<-"3c293c96d558db8fad8e7272e400c7775235bd20"
###Gets Total Population, How Many White, How Many Hispanic:
#Note all census data from 2010
demvars<-c("P0010001","P0030002","P0040003")
d1<-data.frame(demographics(state="AL",level="county",dem = demvars))
d2<-data.frame(demographics(state="AK",level="county",dem = demvars))
d3<-data.frame(demographics(state="AZ",level="county",dem = demvars))
d4<-data.frame(demographics(state="AR",level="county",dem = demvars))
d5<-data.frame(demographics(state="CA",level="county",dem = demvars))
d6<-data.frame(demographics(state="CO",level="county",dem = demvars))
d7<-data.frame(demographics(state="CT",level="county",dem = demvars))
d8<-data.frame(demographics(state="DE",level="county",dem = demvars))
d9<-data.frame(demographics(state="DC",level="county",dem = demvars))
d10<-data.frame(demographics(state="FL",level="county",dem = demvars))
d11<-data.frame(demographics(state="GA",level="county",dem = demvars))
d12<-data.frame(demographics(state="HI",level="county",dem = demvars))
d13<-data.frame(demographics(state="ID",level="county",dem = demvars))
d14<-data.frame(demographics(state="IL",level="county",dem = demvars))
d15<-data.frame(demographics(state="IN",level="county",dem = demvars))
d16<-data.frame(demographics(state="IA",level="county",dem = demvars))
d17<-data.frame(demographics(state="KS",level="county",dem = demvars))
d18<-data.frame(demographics(state="KY",level="county",dem = demvars))
d19<-data.frame(demographics(state="LA",level="county",dem = demvars))
d20<-data.frame(demographics(state="ME",level="county",dem = demvars))
d21<-data.frame(demographics(state="MD",level="county",dem = demvars))
d22<-data.frame(demographics(state="MA",level="county",dem = demvars))
d23<-data.frame(demographics(state="MI",level="county",dem = demvars))
d24<-data.frame(demographics(state="MN",level="county",dem = demvars))
d25<-data.frame(demographics(state="MS",level="county",dem = demvars))
d26<-data.frame(demographics(state="MO",level="county",dem = demvars))
d27<-data.frame(demographics(state="MT",level="county",dem = demvars))
d28<-data.frame(demographics(state="NE",level="county",dem = demvars))
d29<-data.frame(demographics(state="NV",level="county",dem = demvars))
d30<-data.frame(demographics(state="NH",level="county",dem = demvars))
d31<-data.frame(demographics(state="NJ",level="county",dem = demvars))
d32<-data.frame(demographics(state="NM",level="county",dem = demvars))
d33<-data.frame(demographics(state="NY",level="county",dem = demvars))
d34<-data.frame(demographics(state="NC",level="county",dem = demvars))
d35<-data.frame(demographics(state="ND",level="county",dem = demvars))
d36<-data.frame(demographics(state="OH",level="county",dem = demvars))
d37<-data.frame(demographics(state="OK",level="county",dem = demvars))
d38<-data.frame(demographics(state="OR",level="county",dem = demvars))
d39<-data.frame(demographics(state="PA",level="county",dem = demvars))
d40<-data.frame(demographics(state="RI",level="county",dem = demvars))
d41<-data.frame(demographics(state="SC",level="county",dem = demvars))
d42<-data.frame(demographics(state="SD",level="county",dem = demvars))
d43<-data.frame(demographics(state="TN",level="county",dem = demvars))
d44<-data.frame(demographics(state="TX",level="county",dem = demvars))
d45<-data.frame(demographics(state="UT",level="county",dem = demvars))
d46<-data.frame(demographics(state="VT",level="county",dem = demvars))
d47<-data.frame(demographics(state="VA",level="county",dem = demvars))
d48<-data.frame(demographics(state="WA",level="county",dem = demvars))
d49<-data.frame(demographics(state="WV",level="county",dem = demvars))
d50<-data.frame(demographics(state="WI",level="county",dem = demvars))
d51<-data.frame(demographics(state="WY",level="county",dem = demvars))
alldemo<-rbind(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18,d19,d20,d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31,d32,d33,d34,d35,d36,d37,d38,d39,d40,d41,d42,d43,d44,d45,d46,d47,d48,d49,d50,d51)
colnames(alldemo)<-c("Population","White","Hispanic")
Counties <- rownames(alldemo)
alldemo<-cbind(Counties,alldemo)
demo<-data.frame(alldemo,"Per.White"=alldemo$White/alldemo$Population,"Per.Hispanic"=alldemo$Hispanic/alldemo$Population)
head(demo)
## Counties Population White Hispanic Per.White
## Barbour County Barbour County 27457 13180 1387 0.4800233
## Clay County Clay County 13932 11380 399 0.8168246
## Marengo County Marengo County 21027 9751 352 0.4637371
## Houston County Houston County 101547 71053 2995 0.6997056
## Cherokee County Cherokee County 25989 24081 320 0.9265843
## Baldwin County Baldwin County 182265 156153 7992 0.8567361
## Per.Hispanic
## Barbour County 0.05051535
## Clay County 0.02863910
## Marengo County 0.01674038
## Houston County 0.02949373
## Cherokee County 0.01231290
## Baldwin County 0.04384824
###
# Load FIPS to county name lookup file
fips<-read.csv("http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt",header = FALSE)
fips<-data.frame(fips,"L"=paste(fips$V2,sprintf("%03d", fips$V3),sep = ""))
head(fips)
## V1 V2 V3 V4 V5 L
## 1 AL 1 1 Autauga County H1 1001
## 2 AL 1 3 Baldwin County H1 1003
## 3 AL 1 5 Barbour County H1 1005
## 4 AL 1 7 Bibb County H1 1007
## 5 AL 1 9 Blount County H1 1009
## 6 AL 1 11 Bullock County H1 1011
##Use the Acs Package with the API to get more in-depth data
library(acs)
## Warning: package 'acs' was built under R version 3.3.2
## Loading required package: stringr
## Loading required package: plyr
## Loading required package: XML
##
## Attaching package: 'acs'
## The following object is masked from 'package:base':
##
## apply
## Sex data
sex <- acs.fetch(
geography = geo.make(state = "*", county="*"),
endyear = 2010, span = 0,
table.number = "P12",
col.names = "pretty",key=key,dataset='sf1')
sexdem<-data.frame(sex@geography$state,sex@geography$county,sex@estimate)
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 118,122,123,131,135,139,145,146,149,150,155,156,159,160,164,166,172,183,215,230,274,275,278,282,283,289,293,306,308,327,330,332,336,338,351,352,353,354,355,359,361,364,368,373,380,383,387,391,392,398,406,409,415,416,417,421,423,426,431,435,439,443,446,447,453,462,463,465,468,470,472,475,476,477,479,483,484,485,487,489,490,491,494,499,501,502,503,504,507,512,516,520,531,533,534,537,541,543,553,563,568,570,571,572,573,577,583,584,595,596,599,602,603,607,608,611,612,614,616,620,621,623,624,626,628,629,632,634,635,636,639,644,646,647,649,653,655,656,657,662,663,664,668,670,671,672,673,674,677,678,681,682,686,689,690,691,692,698,701,703,704,705,706,707,708,709,710,713,714,718,719,721,722,724,725,726,727,730,731,733,734,736,738,739,742,744,745,746,747,748,750,751,752,753,756,759,760,763,764,765,769,770,778,782,783,785,786,788,791,795,797,801,802,803,804,807,809,810,811,812,813,814,816,817,822,823,824,825,826,827,829,830,831,832,833,834,835,838,839,840,841,842,845,850,852,853,855,857,858,861,866,871,872,876,877,878,880,881,882,883,884,887,889,895,896,899,900,901,902,907,908,909,911,912,917,918,921,922,926,931,932,934,937,941,942,943,944,946,947,949,950,951,962,971,972,973,974,975,985,989,994,995,996,1001,1002,1009,1014,1017,1018,1019,1020,1021,1022,1023,1027,1029,1030,1031,1032,1034,1039,1040,1042,1043,1044,1045,1048,1049,1051,1054,1057,1058,1061,1062,1063,1064,1065,1068,1069,1071,1072,1073,1074,1075,1077,1079,1080,1081,1085,1087,1090,1091,1093,1097,1098,1099,1101,1102,1106,1107,1108,1109,1110,1111,1113,1180,1181,1182,1184,1185,1192,1199,1206,1207,1208,1212,1213,1214,1223,1226,1229,1231,1240,1242,1244,1245,1250,1251,1252,1253,1255,1269,1272,1274,1278,1284,1289,1298,1301,1305,1306,1311,1313,1319,1322,1325,1326,1328,1329,1330,1334,1335,1340,1342,1346,1352,1355,1356,1359,1360,1365,1374,1375,1380,1384,1389,1391,1396,1400,1402,1406,1408,1409,1410,1411,1413,1414,1417,1418,1420,1422,1424,1425,1427,1431,1432,1433,1435,1437,1438,1439,1440,1442,1444,1445,1446,1447,1448,1449,1450,1452,1457,1458,1461,1463,1465,1466,1467,1474,1476,1477,1478,1479,1480,1481,1484,1486,1488,1489,1491,1493,1494,1495,1496,1498,1500,1501,1502,1503,1505,1506,1507,1508,1511,1512,1513,1514,1515,1517,1519,1522,1523,1524,1525,1528,1530,1531,1532,1533,1534,1535,1537,1538,1539,1540,1541,1542,1544,1545,1547,1548,1549,1550,1552,1553,1554,1556,1559,1562,1565,1567,1568,1569,1571,1574,1576,1579,1580,1581,1583,1585,1587,1588,1592,1593,1594,1595,1596,1597,1601,1604,1607,1609,1614,1615,1620,1622,1624,1625,1627,1629,1632,1634,1637,1638,1640,1644,1648,1651,1655,1659,1660,1662,1663,1666,1667,1668,1669,1671,1672,1675,1676,1678,1681,1682,1684,1685,1690,1692,1693,1694,1695,1696,1699,1701,1702,1703,1708,1710,1711,1713,1714,1718,1721,1723,1724,1725,1726,1729,1730,1734,1735,1736,1737,1738,1740,1742,1743,1744,1745,1746,1747,1749,1750,1754,1756,1757,1758,1766,1770,1774,1778,1780,1781,1785,1786,1788,1792,1793,1794,1795,1800,1805,1810,1815,1818,1820,1821,1823,1827,1830,1835,1838,1839,1841,1844,1845,1846,1847,1848,1849,1851,1852,1853,1854,1855,1856,1857,1858,1861,1864,1867,1868,1871,1877,1879,1880,1881,1885,1886,1887,1892,1902,1904,1905,1909,1910,1912,1913,1916,1924,1925,1928,1930,1935,1940,1942,1943,1945,1947,1948,1949,1951,1952,1958,1965,1966,1967,1969,1970,1973,1980,1983,1984,1986,1987,1988,1991,1997,1999,2004,2007,2009,2014,2015,2016,2018,2019,2020,2022,2025,2026,2028,2029,2032,2033,2035,2036,2042,2044,2045,2051,2052,2053,2054,2055,2057,2060,2064,2065,2066,2067,2068,2069,2072,2074,2075,2076,2077,2078,2081,2082,2083,2084,2085,2086,2087,2089,2091,2092,2094,2097,2098,2099,2100,2101,2104,2105,2106,2107,2109,2112,2113,2117,2118,2119,2120,2123,2126,2127,2128,2129,2132,2137,2138,2141,2142,2143,2145,2147,2151,2152,2154,2155,2157,2158,2161,2162,2164,2165,2166,2169,2172,2173,2177,2179,2181,2183,2188,2189,2190,2193,2194,2198,2200,2201,2205,2209,2210,2213,2214,2216,2218,2220,2223,2224,2227,2228,2229,2230,2232,2233,2235,2236,2239,2242,2243,2245,2248,2252,2254,2257,2262,2263,2264,2265,2267,2268,2269,2270,2272,2273,2274,2277,2280,2281,2287,2289,2290,2292,2294,2296,2300,2301,2303,2304,2306,2307,2308,2310,2311,2312,2313,2316,2320,2323,2325,2327,2328,2334,2336,2340,2343,2345,2346,2347,2350,2353,2355,2356,2359,2360,2362,2368,2370,2371,2372,2374,2375,2378,2381,2382,2383,2387,2393,2394,2396,2397,2399,2401,2402,2403,2406,2407,2408,2413,2414,2415,2418,2422,2424,2425,2429,2430,2431,2433,2434,2435,2437,2438,2440,2441,2442,2444,2446,2447,2448,2449,2452,2454,2455,2458,2459,2461,2462,2464,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2484,2485,2486,2487,2489,2490,2491,2492,2493,2496,2498,2499,2502,2503,2504,2506,2507,2508,2509,2510,2511,2512,2515,2516,2517,2518,2519,2521,2522,2523,2524,2529,2537,2547,2548,2551,2552,2554,2557,2559,2560,2562,2570,2576,2580,2581,2583,2589,2592,2593,2594,2597,2598,2600,2603,2613,2614,2617,2618,2619,2620,2622,2623,2624,2625,2627,2630,2631,2632,2635,2636,2637,2640,2643,2644,2645,2646,2649,2650,2653,2655,2661,2662,2667,2668,2669,2670,2680,2681,2682,2683,2686,2687,2688,2690,2691,2693,2694,2695,2699,2703,2704,2706,2710,2711,2720,2721,2733,2734,2735,2738,2744,2745,2751,2759,2761,2762,2765,2766,2769,2770,2773,2778,2781,2783,2786,2787,2788,2790,2792,2796,2798,2799,2804,2805,2811,2812,2815,2816,2818,2819,2823,2829,2830,2833,2834,2836,2837,2838,2840,2841,2842,2843,2845,2848,2851,2853,2854,2855,2856,2858,2859,2861,2864,2865,2871,2872,2874,2876,2878,2879,2880,2881,2883,2884,2886,2887,2894,2896,2899,2900,2901,2906,2907,2908,2909,2910,2911,2912,2913,2915,2955,2957,2960,2961,2963,2965,2966,2967,2970,2971,2975,2976,2977,2981,2982,2987,2988,2994,2995,2996,3000,3001,3003,3004,3005,3007,3008,3010,3011,3012,3014,3015,3016,3017,3018,3019,3020,3021,3022,3025,3026,3027,3028,3029,3031,3033,3035,3037,3039,3041,3042,3043,3044,3047,3048,3049,3050,3053,3054,3057,3058,3059,3060,3062,3064,3065,3067,3069,3070,3071,3073,3074,3075,3076,3081,3083,3087,3088,3090,3092,3096,3097,3098,3101,3102,3103,3109,3111,3113,3115,3119,3120,3121,3122,3123,3124,3126,3127,3130,3132,3135,3136,3137,3140
## --> row.names NOT used
sexvars<-data.frame("State"=sexdem$sex.geography.state,
"County"=sexdem$sex.geography.county,
"FIPS"=paste(sexdem$sex.geography.state,sexdem$sex.geography.county,sep=""),
"Male"=sexdem$P12..Sex.By.Age..Male.,
"Female"=sexdem$P12..Sex.By.Age..Total.population-sexdem$P12..Sex.By.Age..Male.,
"Per.Male"=sexdem$P12..Sex.By.Age..Male./sexdem$P12..Sex.By.Age..Total.population
)
head(sexvars)
## State County FIPS Male Female Per.Male
## 1 1 001 1001 26569 28002 0.4868703
## 2 1 003 1003 89196 93069 0.4893754
## 3 1 005 1005 14576 12881 0.5308664
## 4 1 007 1007 12301 10614 0.5368099
## 5 1 009 1009 28362 28960 0.4947839
## 6 1 011 1011 5912 5002 0.5416896
## African American & White
race <- acs.fetch(
geography = geo.make(state = "*", county="*"),
endyear = 2010, span = 0,
table.number = "P6",
col.names = "pretty",key=key,dataset='sf1')
racedem<-data.frame(race@geography$state,race@geography$county,race@estimate)
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 118,122,123,131,135,139,145,146,149,150,155,156,159,160,164,166,172,183,215,230,274,275,278,282,283,289,293,306,308,327,330,332,336,338,351,352,353,354,355,359,361,364,368,373,380,383,387,391,392,398,406,409,415,416,417,421,423,426,431,435,439,443,446,447,453,462,463,465,468,470,472,475,476,477,479,483,484,485,487,489,490,491,494,499,501,502,503,504,507,512,516,520,531,533,534,537,541,543,553,563,568,570,571,572,573,577,583,584,595,596,599,602,603,607,608,611,612,614,616,620,621,623,624,626,628,629,632,634,635,636,639,644,646,647,649,653,655,656,657,662,663,664,668,670,671,672,673,674,677,678,681,682,686,689,690,691,692,698,701,703,704,705,706,707,708,709,710,713,714,718,719,721,722,724,725,726,727,730,731,733,734,736,738,739,742,744,745,746,747,748,750,751,752,753,756,759,760,763,764,765,769,770,778,782,783,785,786,788,791,795,797,801,802,803,804,807,809,810,811,812,813,814,816,817,822,823,824,825,826,827,829,830,831,832,833,834,835,838,839,840,841,842,845,850,852,853,855,857,858,861,866,871,872,876,877,878,880,881,882,883,884,887,889,895,896,899,900,901,902,907,908,909,911,912,917,918,921,922,926,931,932,934,937,941,942,943,944,946,947,949,950,951,962,971,972,973,974,975,985,989,994,995,996,1001,1002,1009,1014,1017,1018,1019,1020,1021,1022,1023,1027,1029,1030,1031,1032,1034,1039,1040,1042,1043,1044,1045,1048,1049,1051,1054,1057,1058,1061,1062,1063,1064,1065,1068,1069,1071,1072,1073,1074,1075,1077,1079,1080,1081,1085,1087,1090,1091,1093,1097,1098,1099,1101,1102,1106,1107,1108,1109,1110,1111,1113,1180,1181,1182,1184,1185,1192,1199,1206,1207,1208,1212,1213,1214,1223,1226,1229,1231,1240,1242,1244,1245,1250,1251,1252,1253,1255,1269,1272,1274,1278,1284,1289,1298,1301,1305,1306,1311,1313,1319,1322,1325,1326,1328,1329,1330,1334,1335,1340,1342,1346,1352,1355,1356,1359,1360,1365,1374,1375,1380,1384,1389,1391,1396,1400,1402,1406,1408,1409,1410,1411,1413,1414,1417,1418,1420,1422,1424,1425,1427,1431,1432,1433,1435,1437,1438,1439,1440,1442,1444,1445,1446,1447,1448,1449,1450,1452,1457,1458,1461,1463,1465,1466,1467,1474,1476,1477,1478,1479,1480,1481,1484,1486,1488,1489,1491,1493,1494,1495,1496,1498,1500,1501,1502,1503,1505,1506,1507,1508,1511,1512,1513,1514,1515,1517,1519,1522,1523,1524,1525,1528,1530,1531,1532,1533,1534,1535,1537,1538,1539,1540,1541,1542,1544,1545,1547,1548,1549,1550,1552,1553,1554,1556,1559,1562,1565,1567,1568,1569,1571,1574,1576,1579,1580,1581,1583,1585,1587,1588,1592,1593,1594,1595,1596,1597,1601,1604,1607,1609,1614,1615,1620,1622,1624,1625,1627,1629,1632,1634,1637,1638,1640,1644,1648,1651,1655,1659,1660,1662,1663,1666,1667,1668,1669,1671,1672,1675,1676,1678,1681,1682,1684,1685,1690,1692,1693,1694,1695,1696,1699,1701,1702,1703,1708,1710,1711,1713,1714,1718,1721,1723,1724,1725,1726,1729,1730,1734,1735,1736,1737,1738,1740,1742,1743,1744,1745,1746,1747,1749,1750,1754,1756,1757,1758,1766,1770,1774,1778,1780,1781,1785,1786,1788,1792,1793,1794,1795,1800,1805,1810,1815,1818,1820,1821,1823,1827,1830,1835,1838,1839,1841,1844,1845,1846,1847,1848,1849,1851,1852,1853,1854,1855,1856,1857,1858,1861,1864,1867,1868,1871,1877,1879,1880,1881,1885,1886,1887,1892,1902,1904,1905,1909,1910,1912,1913,1916,1924,1925,1928,1930,1935,1940,1942,1943,1945,1947,1948,1949,1951,1952,1958,1965,1966,1967,1969,1970,1973,1980,1983,1984,1986,1987,1988,1991,1997,1999,2004,2007,2009,2014,2015,2016,2018,2019,2020,2022,2025,2026,2028,2029,2032,2033,2035,2036,2042,2044,2045,2051,2052,2053,2054,2055,2057,2060,2064,2065,2066,2067,2068,2069,2072,2074,2075,2076,2077,2078,2081,2082,2083,2084,2085,2086,2087,2089,2091,2092,2094,2097,2098,2099,2100,2101,2104,2105,2106,2107,2109,2112,2113,2117,2118,2119,2120,2123,2126,2127,2128,2129,2132,2137,2138,2141,2142,2143,2145,2147,2151,2152,2154,2155,2157,2158,2161,2162,2164,2165,2166,2169,2172,2173,2177,2179,2181,2183,2188,2189,2190,2193,2194,2198,2200,2201,2205,2209,2210,2213,2214,2216,2218,2220,2223,2224,2227,2228,2229,2230,2232,2233,2235,2236,2239,2242,2243,2245,2248,2252,2254,2257,2262,2263,2264,2265,2267,2268,2269,2270,2272,2273,2274,2277,2280,2281,2287,2289,2290,2292,2294,2296,2300,2301,2303,2304,2306,2307,2308,2310,2311,2312,2313,2316,2320,2323,2325,2327,2328,2334,2336,2340,2343,2345,2346,2347,2350,2353,2355,2356,2359,2360,2362,2368,2370,2371,2372,2374,2375,2378,2381,2382,2383,2387,2393,2394,2396,2397,2399,2401,2402,2403,2406,2407,2408,2413,2414,2415,2418,2422,2424,2425,2429,2430,2431,2433,2434,2435,2437,2438,2440,2441,2442,2444,2446,2447,2448,2449,2452,2454,2455,2458,2459,2461,2462,2464,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2484,2485,2486,2487,2489,2490,2491,2492,2493,2496,2498,2499,2502,2503,2504,2506,2507,2508,2509,2510,2511,2512,2515,2516,2517,2518,2519,2521,2522,2523,2524,2529,2537,2547,2548,2551,2552,2554,2557,2559,2560,2562,2570,2576,2580,2581,2583,2589,2592,2593,2594,2597,2598,2600,2603,2613,2614,2617,2618,2619,2620,2622,2623,2624,2625,2627,2630,2631,2632,2635,2636,2637,2640,2643,2644,2645,2646,2649,2650,2653,2655,2661,2662,2667,2668,2669,2670,2680,2681,2682,2683,2686,2687,2688,2690,2691,2693,2694,2695,2699,2703,2704,2706,2710,2711,2720,2721,2733,2734,2735,2738,2744,2745,2751,2759,2761,2762,2765,2766,2769,2770,2773,2778,2781,2783,2786,2787,2788,2790,2792,2796,2798,2799,2804,2805,2811,2812,2815,2816,2818,2819,2823,2829,2830,2833,2834,2836,2837,2838,2840,2841,2842,2843,2845,2848,2851,2853,2854,2855,2856,2858,2859,2861,2864,2865,2871,2872,2874,2876,2878,2879,2880,2881,2883,2884,2886,2887,2894,2896,2899,2900,2901,2906,2907,2908,2909,2910,2911,2912,2913,2915,2955,2957,2960,2961,2963,2965,2966,2967,2970,2971,2975,2976,2977,2981,2982,2987,2988,2994,2995,2996,3000,3001,3003,3004,3005,3007,3008,3010,3011,3012,3014,3015,3016,3017,3018,3019,3020,3021,3022,3025,3026,3027,3028,3029,3031,3033,3035,3037,3039,3041,3042,3043,3044,3047,3048,3049,3050,3053,3054,3057,3058,3059,3060,3062,3064,3065,3067,3069,3070,3071,3073,3074,3075,3076,3081,3083,3087,3088,3090,3092,3096,3097,3098,3101,3102,3103,3109,3111,3113,3115,3119,3120,3121,3122,3123,3124,3126,3127,3130,3132,3135,3136,3137,3140
## --> row.names NOT used
racevars<-data.frame("State"=racedem$race.geography.state,
"County"=racedem$race.geography.county,
"FIPS"=paste(racedem$race.geography.state,racedem$race.geography.county,sep=""),
"Afr.Am"=racedem$P6..RACE..TOTAL.RACES.TALLIED...Black.or.African.American.alone.or.in.combination.with.one.or.more.other.races,
"Per.Afr.Am"=racedem$P6..RACE..TOTAL.RACES.TALLIED...Black.or.African.American.alone.or.in.combination.with.one.or.more.other.races/racedem$P6..RACE..TOTAL.RACES.TALLIED...Total.races.tallied,
"Per.White"=racedem$P6..RACE..TOTAL.RACES.TALLIED...White.alone.or.in.combination.with.one.or.more.other.races/racedem$P6..RACE..TOTAL.RACES.TALLIED...Total.races.tallied
)
head(racevars)
## State County FIPS Afr.Am Per.Afr.Am Per.White
## 1 1 001 1001 9976 0.17974451 0.7860759
## 2 1 003 1003 17996 0.09720159 0.8569739
## 3 1 005 1005 13021 0.46969916 0.4816391
## 4 1 007 1007 5136 0.22208769 0.7587996
## 5 1 009 1009 909 0.01566458 0.9256751
## 6 1 011 1011 7706 0.70029080 0.2323700
## Age data
library(acs)
age <- acs.fetch(
geography = geo.make(state = "*", county="*"),
endyear = 2010, span = 0,
table.number = "P13",
col.names = "pretty",key=key,dataset='sf1')
agedem<-data.frame(age@geography$state,age@geography$county,age@estimate)
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 118,122,123,131,135,139,145,146,149,150,155,156,159,160,164,166,172,183,215,230,274,275,278,282,283,289,293,306,308,327,330,332,336,338,351,352,353,354,355,359,361,364,368,373,380,383,387,391,392,398,406,409,415,416,417,421,423,426,431,435,439,443,446,447,453,462,463,465,468,470,472,475,476,477,479,483,484,485,487,489,490,491,494,499,501,502,503,504,507,512,516,520,531,533,534,537,541,543,553,563,568,570,571,572,573,577,583,584,595,596,599,602,603,607,608,611,612,614,616,620,621,623,624,626,628,629,632,634,635,636,639,644,646,647,649,653,655,656,657,662,663,664,668,670,671,672,673,674,677,678,681,682,686,689,690,691,692,698,701,703,704,705,706,707,708,709,710,713,714,718,719,721,722,724,725,726,727,730,731,733,734,736,738,739,742,744,745,746,747,748,750,751,752,753,756,759,760,763,764,765,769,770,778,782,783,785,786,788,791,795,797,801,802,803,804,807,809,810,811,812,813,814,816,817,822,823,824,825,826,827,829,830,831,832,833,834,835,838,839,840,841,842,845,850,852,853,855,857,858,861,866,871,872,876,877,878,880,881,882,883,884,887,889,895,896,899,900,901,902,907,908,909,911,912,917,918,921,922,926,931,932,934,937,941,942,943,944,946,947,949,950,951,962,971,972,973,974,975,985,989,994,995,996,1001,1002,1009,1014,1017,1018,1019,1020,1021,1022,1023,1027,1029,1030,1031,1032,1034,1039,1040,1042,1043,1044,1045,1048,1049,1051,1054,1057,1058,1061,1062,1063,1064,1065,1068,1069,1071,1072,1073,1074,1075,1077,1079,1080,1081,1085,1087,1090,1091,1093,1097,1098,1099,1101,1102,1106,1107,1108,1109,1110,1111,1113,1180,1181,1182,1184,1185,1192,1199,1206,1207,1208,1212,1213,1214,1223,1226,1229,1231,1240,1242,1244,1245,1250,1251,1252,1253,1255,1269,1272,1274,1278,1284,1289,1298,1301,1305,1306,1311,1313,1319,1322,1325,1326,1328,1329,1330,1334,1335,1340,1342,1346,1352,1355,1356,1359,1360,1365,1374,1375,1380,1384,1389,1391,1396,1400,1402,1406,1408,1409,1410,1411,1413,1414,1417,1418,1420,1422,1424,1425,1427,1431,1432,1433,1435,1437,1438,1439,1440,1442,1444,1445,1446,1447,1448,1449,1450,1452,1457,1458,1461,1463,1465,1466,1467,1474,1476,1477,1478,1479,1480,1481,1484,1486,1488,1489,1491,1493,1494,1495,1496,1498,1500,1501,1502,1503,1505,1506,1507,1508,1511,1512,1513,1514,1515,1517,1519,1522,1523,1524,1525,1528,1530,1531,1532,1533,1534,1535,1537,1538,1539,1540,1541,1542,1544,1545,1547,1548,1549,1550,1552,1553,1554,1556,1559,1562,1565,1567,1568,1569,1571,1574,1576,1579,1580,1581,1583,1585,1587,1588,1592,1593,1594,1595,1596,1597,1601,1604,1607,1609,1614,1615,1620,1622,1624,1625,1627,1629,1632,1634,1637,1638,1640,1644,1648,1651,1655,1659,1660,1662,1663,1666,1667,1668,1669,1671,1672,1675,1676,1678,1681,1682,1684,1685,1690,1692,1693,1694,1695,1696,1699,1701,1702,1703,1708,1710,1711,1713,1714,1718,1721,1723,1724,1725,1726,1729,1730,1734,1735,1736,1737,1738,1740,1742,1743,1744,1745,1746,1747,1749,1750,1754,1756,1757,1758,1766,1770,1774,1778,1780,1781,1785,1786,1788,1792,1793,1794,1795,1800,1805,1810,1815,1818,1820,1821,1823,1827,1830,1835,1838,1839,1841,1844,1845,1846,1847,1848,1849,1851,1852,1853,1854,1855,1856,1857,1858,1861,1864,1867,1868,1871,1877,1879,1880,1881,1885,1886,1887,1892,1902,1904,1905,1909,1910,1912,1913,1916,1924,1925,1928,1930,1935,1940,1942,1943,1945,1947,1948,1949,1951,1952,1958,1965,1966,1967,1969,1970,1973,1980,1983,1984,1986,1987,1988,1991,1997,1999,2004,2007,2009,2014,2015,2016,2018,2019,2020,2022,2025,2026,2028,2029,2032,2033,2035,2036,2042,2044,2045,2051,2052,2053,2054,2055,2057,2060,2064,2065,2066,2067,2068,2069,2072,2074,2075,2076,2077,2078,2081,2082,2083,2084,2085,2086,2087,2089,2091,2092,2094,2097,2098,2099,2100,2101,2104,2105,2106,2107,2109,2112,2113,2117,2118,2119,2120,2123,2126,2127,2128,2129,2132,2137,2138,2141,2142,2143,2145,2147,2151,2152,2154,2155,2157,2158,2161,2162,2164,2165,2166,2169,2172,2173,2177,2179,2181,2183,2188,2189,2190,2193,2194,2198,2200,2201,2205,2209,2210,2213,2214,2216,2218,2220,2223,2224,2227,2228,2229,2230,2232,2233,2235,2236,2239,2242,2243,2245,2248,2252,2254,2257,2262,2263,2264,2265,2267,2268,2269,2270,2272,2273,2274,2277,2280,2281,2287,2289,2290,2292,2294,2296,2300,2301,2303,2304,2306,2307,2308,2310,2311,2312,2313,2316,2320,2323,2325,2327,2328,2334,2336,2340,2343,2345,2346,2347,2350,2353,2355,2356,2359,2360,2362,2368,2370,2371,2372,2374,2375,2378,2381,2382,2383,2387,2393,2394,2396,2397,2399,2401,2402,2403,2406,2407,2408,2413,2414,2415,2418,2422,2424,2425,2429,2430,2431,2433,2434,2435,2437,2438,2440,2441,2442,2444,2446,2447,2448,2449,2452,2454,2455,2458,2459,2461,2462,2464,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2484,2485,2486,2487,2489,2490,2491,2492,2493,2496,2498,2499,2502,2503,2504,2506,2507,2508,2509,2510,2511,2512,2515,2516,2517,2518,2519,2521,2522,2523,2524,2529,2537,2547,2548,2551,2552,2554,2557,2559,2560,2562,2570,2576,2580,2581,2583,2589,2592,2593,2594,2597,2598,2600,2603,2613,2614,2617,2618,2619,2620,2622,2623,2624,2625,2627,2630,2631,2632,2635,2636,2637,2640,2643,2644,2645,2646,2649,2650,2653,2655,2661,2662,2667,2668,2669,2670,2680,2681,2682,2683,2686,2687,2688,2690,2691,2693,2694,2695,2699,2703,2704,2706,2710,2711,2720,2721,2733,2734,2735,2738,2744,2745,2751,2759,2761,2762,2765,2766,2769,2770,2773,2778,2781,2783,2786,2787,2788,2790,2792,2796,2798,2799,2804,2805,2811,2812,2815,2816,2818,2819,2823,2829,2830,2833,2834,2836,2837,2838,2840,2841,2842,2843,2845,2848,2851,2853,2854,2855,2856,2858,2859,2861,2864,2865,2871,2872,2874,2876,2878,2879,2880,2881,2883,2884,2886,2887,2894,2896,2899,2900,2901,2906,2907,2908,2909,2910,2911,2912,2913,2915,2955,2957,2960,2961,2963,2965,2966,2967,2970,2971,2975,2976,2977,2981,2982,2987,2988,2994,2995,2996,3000,3001,3003,3004,3005,3007,3008,3010,3011,3012,3014,3015,3016,3017,3018,3019,3020,3021,3022,3025,3026,3027,3028,3029,3031,3033,3035,3037,3039,3041,3042,3043,3044,3047,3048,3049,3050,3053,3054,3057,3058,3059,3060,3062,3064,3065,3067,3069,3070,3071,3073,3074,3075,3076,3081,3083,3087,3088,3090,3092,3096,3097,3098,3101,3102,3103,3109,3111,3113,3115,3119,3120,3121,3122,3123,3124,3126,3127,3130,3132,3135,3136,3137,3140
## --> row.names NOT used
agevars<-data.frame("State"=agedem$age.geography.state,
"County"=agedem$age.geography.county,
"Age"=agedem$P13..MEDIAN.AGE.BY.SEX..3...1.expressed.decimal...Both.sexes,
"FIPS"=paste(agedem$age.geography.state,agedem$age.geography.county,sep=""))
head(agevars)
## State County Age FIPS
## 1 1 001 37.0 1001
## 2 1 003 41.1 1003
## 3 1 005 39.0 1005
## 4 1 007 37.8 1007
## 5 1 009 39.0 1009
## 6 1 011 38.5 1011
## Employment data Note the API call isn't working for this table so I downloaded it and put it on Github
emp<-read.csv("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/Unemployment.csv")
emp<-data.frame("ID"=as.character(emp$Id2),emp$Unempl.Rate)
## Income
txcountys<-geo.make(state="*",county="*")
income.county<-acs.fetch(key=key,endyear = 2010, span = 5, geography = txcountys, table.number = "B19013")
trincome<-data.frame(income=income.county@estimate)
trincome$ids<-paste(income.county@geography$state, sprintf("%03s", income.county@geography$county), sep="")
names(trincome)<-c("Income", "FIPS")
head(trincome)
## Income FIPS
## Autauga County, Alabama 53255 1001
## Baldwin County, Alabama 50147 1003
## Barbour County, Alabama 33219 1005
## Bibb County, Alabama 41770 1007
## Blount County, Alabama 45549 1009
## Bullock County, Alabama 31602 1011
##Education
#Create a weighted mean score of education since the data are ordinal
edu.county<-acs.fetch(key=key,endyear = 2010, span = 5, geography = txcountys, table.number = "B16010")
edu.county@acs.colnames
## [1] "B16010_001" "B16010_002" "B16010_003" "B16010_004" "B16010_005"
## [6] "B16010_006" "B16010_007" "B16010_008" "B16010_009" "B16010_010"
## [11] "B16010_011" "B16010_012" "B16010_013" "B16010_014" "B16010_015"
## [16] "B16010_016" "B16010_017" "B16010_018" "B16010_019" "B16010_020"
## [21] "B16010_021" "B16010_022" "B16010_023" "B16010_024" "B16010_025"
## [26] "B16010_026" "B16010_027" "B16010_028" "B16010_029" "B16010_030"
## [31] "B16010_031" "B16010_032" "B16010_033" "B16010_034" "B16010_035"
## [36] "B16010_036" "B16010_037" "B16010_038" "B16010_039" "B16010_040"
## [41] "B16010_041" "B16010_042" "B16010_043" "B16010_044" "B16010_045"
## [46] "B16010_046" "B16010_047" "B16010_048" "B16010_049" "B16010_050"
## [51] "B16010_051" "B16010_052" "B16010_053"
tredu1<-data.frame(edu=edu.county@estimate)
tredu<-data.frame("Total"=tredu1$edu.B16010_001,
"Some.HS"=tredu1$edu.B16010_002,
"HS"=tredu1$edu.B16010_015,
"Some.College"=tredu1$edu.B16010_028,
"BA"=tredu1$edu.B16010_041,
"FIPS"=paste(edu.county@geography$state, sprintf("%03s", edu.county@geography$county), sep=""),
"ED_Score"=(tredu1$edu.B16010_002*1+tredu1$edu.B16010_015*2+tredu1$edu.B16010_028*3+tredu1$edu.B16010_041*4)/tredu1$edu.B16010_001
)
head(tredu)
## Total Some.HS HS Some.College BA FIPS ED_Score
## 1 33884 4970 11936 9609 7369 1001 2.571863
## 2 121560 15103 36340 37512 32605 1003 2.720788
## 3 18879 5312 6679 4337 2551 1005 2.218603
## 4 15082 3839 6382 3349 1512 1007 2.168015
## 5 38085 9651 13872 9791 4771 1009 2.254221
## 6 7301 1848 2670 1906 877 1011 2.248185
###Let's combine all our dependent variables from Census into one dataframe:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:acs':
##
## combine
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Census<-full_join(racevars,sexvars,by=c("FIPS"="FIPS"))
Census1<-full_join(Census,agevars,by=c("FIPS"="FIPS"))
Census2<-full_join(Census1,tredu,by=c("FIPS"="FIPS"))
Census3<-full_join(Census2,trincome,by=c("FIPS"="FIPS"))
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
Census4<-full_join(Census3,fips,by=c("FIPS"="L"))
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
Census5<-left_join(Census4,demo,by=c("V4"="Counties"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
Census6<-full_join(Census5,emp,by=c("FIPS"="ID"))
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
##Join Swing States
Swing<-read.csv("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/swing.csv")
Census7<-left_join(Census6,Swing,by=c("State"="State.FIPS.Code"))
##Now Pair it down
CensusClean<-data.frame(
"State"=Census7$State.x,
"County"=Census7$County.x,
"CountyName"=Census7$V4,
"FIPS"=Census7$FIPS,
"Per.White"=Census7$Per.White.x,
"Per.Hispanic"=Census7$Per.Hispanic,
"Per.Afr.Am"=Census7$Per.Afr.Am,
"Per.Male"=Census7$Per.Male,
"MedAge"=Census7$Age,
"Income"=Census7$Income,
"ED_Score"=Census7$ED_Score,
"Unempl"=Census7$emp.Unempl.Rate,
"Swing"=Census7$SwingState
)
head(CensusClean)
## State County CountyName FIPS Per.White Per.Hispanic Per.Afr.Am
## 1 1 001 Autauga County 1001 0.7860759 0.02400542 0.17974451
## 2 1 003 Baldwin County 1003 0.8569739 0.04384824 0.09720159
## 3 1 005 Barbour County 1005 0.4816391 0.05051535 0.46969916
## 4 1 007 Bibb County 1007 0.7587996 0.01771765 0.22208769
## 5 1 009 Blount County 1009 0.9256751 0.08070200 0.01566458
## 6 1 011 Bullock County 1011 0.2323700 0.07119296 0.70029080
## Per.Male MedAge Income ED_Score Unempl Swing
## 1 0.4868703 37.0 53255 2.571863 0.04840857 0
## 2 0.4893754 41.1 50147 2.720788 0.05616936 0
## 3 0.5308664 39.0 33219 2.218603 0.12169014 0
## 4 0.5368099 37.8 41770 2.168015 0.05466667 0
## 5 0.4947839 39.0 45549 2.254221 0.05017336 0
## 6 0.5416896 38.5 31602 2.248185 0.12884585 0
Now that we have the data we need, let’s visualize it and perform the analyses we need. In general we mean to correlate via a multivariable linear regression the outcome of “Democrat Shift” (WLOG) to the variables provided to answer questions about what underlying causes of the shifts were. A simplifying assumption we make is that the a couple of percent of “other” votes don’t affect the conclusions. Equivalently, around an equal fraction of them would have voted from Trump as Clinton.
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.3.2
##First we want to plot the change in votes by county on a us map, try to figure out what happened.
cpoints<-read.csv("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/countyzip1.csv")
coords1<-left_join(IndyClean,cpoints,by=c("fips_code"="fips"))
usa_center <- as.numeric(geocode("United States"))
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20States&sensor=false
USAMap <- ggmap(get_googlemap(center=usa_center, scale=2, zoom=4), extent="normal")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.09024,-95.712891&zoom=4&size=640x640&scale=2&maptype=terrain&sensor=false
USAMap+geom_point(aes(x=long, y=lat,color=((coords1$Dem_Score_Diff))), data=coords1, alpha=0.5, size=coords1$Dem_Score_Diff*.5) +
scale_size_continuous(range=range(coords1$Dem_Score_Diff))+
scale_colour_gradient( low="red",high = "blue",name="Shift")+
scale_fill_continuous(low="red",high = "blue",name="Shift")+
labs(x="Longitude",y="Latitude", title="Democratic Change from 2012 to 2016")+
xlim(-130,-60)
## Warning: Removed 5 rows containing missing values (geom_point).
##Now How People Voted from 2012 compared to 2016
par(mfrow=c(1,2))
USAMap+geom_point(aes(x=long, y=lat,color=((coords1$Dem_Score_2012))), data=coords1, alpha=0.5, size=coords1$Dem_Score_2012*.5) +
scale_size_continuous(range=range(coords1$Dem_Score_2012))+
scale_colour_gradient( low="red",high = "blue",name="Shift")+
scale_fill_continuous(low="red",high = "blue",name="Shift")+
labs(x="Longitude",y="Latitude", title="Vote 2012")+
xlim(-130,-60)
## Warning: Removed 5 rows containing missing values (geom_point).
USAMap+geom_point(aes(x=long, y=lat,color=((coords1$Dem_Score_2016))), data=coords1, alpha=0.5, size=coords1$Dem_Score_2016*.5) +
scale_size_continuous(range=range(coords1$Dem_Score_2016))+
scale_colour_gradient( low="red",high = "blue",name="Shift")+
scale_fill_continuous(low="red",high = "blue",name="Shift")+
labs(x="Longitude",y="Latitude", title="Vote 2016")+
xlim(-130,-60)
## Warning: Removed 5 rows containing missing values (geom_point).
We see that the coasts have a bluer shift while the heartland, rustbelt region has a redder shift, implying strategic gains for Republicans in an electoral system, since the coasts are not competative to begin with.
Now let’s build a model of voter shift:
IndyClean$fips_code<-as.factor(IndyClean$fips_code)
model_data<-left_join(IndyClean,CensusClean,by=c("fips_code"="FIPS"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
head(model_data)
## fips_code TotalDiff DemDiff RepDiff OthDiff Dem_Score_2012
## 1 26041 0.023499418 -0.227971188 0.16563516 4.1333333 0.4616749
## 2 48295 0.131849315 0.134453782 0.11015326 4.6000000 0.1018836
## 3 1127 0.026178194 -0.315219051 0.11903111 0.7539936 0.2298838
## 4 48389 0.110568539 0.006064281 0.19578059 2.2727273 0.5751657
## 5 56017 0.016032064 -0.235181644 0.02375924 1.5128205 0.2096192
## 6 20043 -0.000890472 -0.340112994 0.08510638 1.0804598 0.2626892
## Dem_Score_2016 Oth_Score_2016 Dem_Score_Diff State County
## 1 0.3482428 0.05003520 -0.1134320763 26 041
## 2 0.1021180 0.02118003 0.0002344414 48 295
## 3 0.1534042 0.01877372 -0.0764796139 1 127
## 4 0.5210427 0.03391960 -0.0541229648 48 389
## 5 0.1577909 0.07731755 -0.0518283115 56 017
## 6 0.1734997 0.05377302 -0.0891895224 20 043
## CountyName Per.White Per.Hispanic Per.Afr.Am Per.Male MedAge
## 1 Delta County 0.9470906 0.14037865 0.005172186 0.4954814 45.6
## 2 Lipscomb County 0.8610701 0.30496669 0.009459060 0.5033313 37.0
## 3 Walker County 0.9122556 0.01950077 0.062758732 0.4874148 41.2
## 4 Reeves County 0.7736132 0.74243633 0.050903120 0.6008126 35.4
## 5 Hot Springs County 0.9574294 0.02182045 0.005116660 0.4939734 48.6
## 6 Doniphan County 0.9294580 0.02076778 0.038466265 0.5042165 39.6
## Income ED_Score Unempl Swing
## 1 41951 2.613803 0.06603993 1
## 2 52566 2.554252 0.03847981 0
## 3 37191 2.250224 0.07441469 0
## 4 32593 1.841421 0.04575062 0
## 5 42469 2.614923 0.02953587 0
## 6 43410 2.514496 0.04570117 0
##Let's checkout our data first to be sure a linear regression works.
hist(IndyClean$Dem_Score_Diff, xlab="",ylab="",main="Dem Shift Percent",col="black")
summary(IndyClean$Dem_Score_Diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.23440 -0.10210 -0.06298 -0.06784 -0.03353 0.09346
##Most counties shifted away from dems
par(mfrow=c(2,4))
hist(CensusClean$Per.White,col = 'white', xlab="",ylab="",main="Per White")
hist(CensusClean$Per.Afr.Am,col = 'purple', xlab="",ylab="",main="Per African American")
hist(CensusClean$Per.Hispanic,col = 'blue', xlab="",ylab="",main="Per Hispanic")
hist(CensusClean$Per.Male,col = 'orange', xlab="",ylab="",main="Percent Male")
hist(CensusClean$Income,col = 'green', xlab="",ylab="",main="Income")
hist(CensusClean$MedAge,col = 'grey', xlab="",ylab="",main="Median Age")
hist(CensusClean$ED_Score,col = 'pink', xlab="",ylab="",main="ED Score")
hist(CensusClean$Unempl,col = 'lightgreen', xlab="",ylab="",main="Unemployment")
##Seems pretty good, most are normal
library(corrplot)
mus<-cor(na.omit(CensusClean[5:12]))
corrplot(mus)
##Note that Per White is Strongly Anticorrelated with Per. Afr Am. This isn't unexpected in a sense that fractions have to add up to 1 however it is a little concerning to see how this anecdotally suggets seggregation still exits. We may not want to use both variables in the regression however, given how strongly associated they are.
##For all of USA
US<-lm(data = model_data,Dem_Score_Diff~Per.White+Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
summary(US)
##
## Call:
## lm(formula = Dem_Score_Diff ~ Per.White + Per.Hispanic + Per.Afr.Am +
## Per.Male + MedAge + Income + ED_Score + Unempl, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.145011 -0.024934 0.002882 0.026472 0.167259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.436e-02 2.337e-02 -3.609 0.000312 ***
## Per.White -4.851e-02 9.368e-03 -5.178 2.38e-07 ***
## Per.Hispanic 9.489e-02 6.125e-03 15.493 < 2e-16 ***
## Per.Afr.Am 1.049e-01 9.846e-03 10.652 < 2e-16 ***
## Per.Male -1.259e-01 3.289e-02 -3.829 0.000131 ***
## MedAge -1.412e-03 1.571e-04 -8.984 < 2e-16 ***
## Income 5.696e-07 8.620e-08 6.608 4.56e-11 ***
## ED_Score 5.679e-02 4.072e-03 13.945 < 2e-16 ***
## Unempl -1.352e-01 3.550e-02 -3.809 0.000142 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03824 on 3101 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.4068, Adjusted R-squared: 0.4053
## F-statistic: 265.8 on 8 and 3101 DF, p-value: < 2.2e-16
aov(data = model_data,Dem_Score_Diff~Per.White+Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
## Call:
## aov(formula = Dem_Score_Diff ~ Per.White + Per.Hispanic + Per.Afr.Am +
## Per.Male + MedAge + Income + ED_Score + Unempl, data = model_data)
##
## Terms:
## Per.White Per.Hispanic Per.Afr.Am Per.Male MedAge
## Sum of Squares 1.345609 0.275660 0.015727 0.088209 0.247513
## Deg. of Freedom 1 1 1 1 1
## Income ED_Score Unempl Residuals
## Sum of Squares 0.798585 0.317265 0.021220 4.534813
## Deg. of Freedom 1 1 1 3101
##
## Residual standard error: 0.03824096
## Estimated effects may be unbalanced
## 2 observations deleted due to missingness
#All variables are significant, let's pair down the White Variable.
US<-lm(data = model_data,Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
summary(US)
##
## Call:
## lm(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + Income + ED_Score + Unempl, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.14330 -0.02494 0.00298 0.02674 0.16868
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.325e-01 2.153e-02 -6.155 8.49e-10 ***
## Per.Hispanic 1.055e-01 5.796e-03 18.204 < 2e-16 ***
## Per.Afr.Am 1.468e-01 5.617e-03 26.140 < 2e-16 ***
## Per.Male -1.175e-01 3.299e-02 -3.562 0.000374 ***
## MedAge -1.659e-03 1.504e-04 -11.032 < 2e-16 ***
## Income 5.656e-07 8.655e-08 6.535 7.43e-11 ***
## ED_Score 5.928e-02 4.060e-03 14.599 < 2e-16 ***
## Unempl -8.799e-02 3.445e-02 -2.554 0.010692 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0384 on 3102 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.4017, Adjusted R-squared: 0.4003
## F-statistic: 297.5 on 7 and 3102 DF, p-value: < 2.2e-16
USa<-aov(data = model_data,Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
USa
## Call:
## aov(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + Income + ED_Score + Unempl, data = model_data)
##
## Terms:
## Per.Hispanic Per.Afr.Am Per.Male MedAge Income
## Sum of Squares 0.439592 1.004823 0.084954 0.381044 0.815650
## Deg. of Freedom 1 1 1 1 1
## ED_Score Unempl Residuals
## Sum of Squares 0.334894 0.009620 4.574024
## Deg. of Freedom 1 1 3102
##
## Residual standard error: 0.03839974
## Estimated effects may be unbalanced
## 2 observations deleted due to missingness
#R squared is unchanged.
##Check model
par(mfrow=c(2,4))
plot(y=US$residuals, x=na.omit(model_data)$Income, xlab="Income")
plot(y=US$residuals, x=na.omit(model_data)$Unempl, xlab="Unempl")
plot(y=US$residuals, x=na.omit(model_data)$Per.Afr.Am,xlab="Race")
plot(y=US$residuals, x=na.omit(model_data)$ED_Score,xlab="Edu")
plot(y=US$residuals, x=na.omit(model_data)$Per.Male,xlab="Male")
plot(y=US$residuals, x=na.omit(model_data)$Per.Hispanic,xlab="Hispanic")
plot(y=US$residuals, x=na.omit(model_data)$MedAge,xlab="Age")
This Model Says that increased Hispanic Percentage, African American Percentage, Income, of a county means it was more likely to shift positive towards Democrats and that the more Male, the older and the less employed the county was the more it would shift away from Democrats. This seems like it fits a ‘pundit’ model of elections. Let’s see how it plays out in swing states.
####Swing States Model
sw<-lm(data = subset(model_data, Swing==1),Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+Income+ED_Score+Unempl)
summary(sw)
##
## Call:
## lm(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + Income + ED_Score + Unempl, data = subset(model_data,
## Swing == 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.087277 -0.021943 0.001238 0.023029 0.091889
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.560e-01 4.259e-02 -8.359 4.19e-16 ***
## Per.Hispanic 1.059e-01 1.858e-02 5.696 1.90e-08 ***
## Per.Afr.Am 2.015e-01 1.095e-02 18.392 < 2e-16 ***
## Per.Male 4.703e-03 5.813e-02 0.081 0.9355
## MedAge -5.869e-04 2.845e-04 -2.063 0.0396 *
## Income 3.734e-07 1.615e-07 2.312 0.0211 *
## ED_Score 1.083e-01 8.276e-03 13.091 < 2e-16 ***
## Unempl -2.213e-01 8.583e-02 -2.578 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0323 on 619 degrees of freedom
## Multiple R-squared: 0.6034, Adjusted R-squared: 0.5989
## F-statistic: 134.5 on 7 and 619 DF, p-value: < 2.2e-16
##let's pair down non-significant variables, make it more parisomonious remove age and percent male.
sw<-lm(data = subset(model_data, Swing==1),Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Income+ED_Score+Unempl)
summary(sw)
##
## Call:
## lm(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Income +
## ED_Score + Unempl, data = subset(model_data, Swing == 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.090814 -0.022231 0.000632 0.023387 0.100905
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.853e-01 1.719e-02 -22.419 < 2e-16 ***
## Per.Hispanic 1.124e-01 1.834e-02 6.126 1.6e-09 ***
## Per.Afr.Am 2.071e-01 1.054e-02 19.645 < 2e-16 ***
## Income 3.665e-07 1.598e-07 2.293 0.02216 *
## ED_Score 1.115e-01 7.628e-03 14.611 < 2e-16 ***
## Unempl -2.385e-01 8.559e-02 -2.787 0.00549 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03236 on 621 degrees of freedom
## Multiple R-squared: 0.6006, Adjusted R-squared: 0.5974
## F-statistic: 186.8 on 5 and 621 DF, p-value: < 2.2e-16
swa<-aov(data = subset(model_data, Swing==1),Dem_Score_Diff~Per.Hispanic+Per.Afr.Am+Income+ED_Score+Unempl)
swa
## Call:
## aov(formula = Dem_Score_Diff ~ Per.Hispanic + Per.Afr.Am + Income +
## ED_Score + Unempl, data = subset(model_data, Swing == 1))
##
## Terms:
## Per.Hispanic Per.Afr.Am Income ED_Score Unempl
## Sum of Squares 0.0852619 0.3070411 0.3509347 0.2265266 0.0081314
## Deg. of Freedom 1 1 1 1 1
## Residuals
## Sum of Squares 0.6501938
## Deg. of Freedom 621
##
## Residual standard error: 0.03235755
## Estimated effects may be unbalanced
Now let’s do the same but for what predict GOP vs Dem vs Other Vote, for all of US then .
USdem<-lm(data = model_data,Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score+Unempl)
summary(USdem)
##
## Call:
## lm(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + ED_Score + Unempl, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39190 -0.06715 -0.00506 0.05755 0.50741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3208784 0.0563329 -5.696 1.34e-08 ***
## Per.Hispanic 0.2963517 0.0153014 19.368 < 2e-16 ***
## Per.Afr.Am 0.5414358 0.0148475 36.466 < 2e-16 ***
## Per.Male -0.4170082 0.0871099 -4.787 1.77e-06 ***
## MedAge -0.0015044 0.0003953 -3.805 0.000144 ***
## ED_Score 0.2961857 0.0079702 37.162 < 2e-16 ***
## Unempl 1.6492788 0.0909644 18.131 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1016 on 3103 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5611, Adjusted R-squared: 0.5603
## F-statistic: 661.2 on 6 and 3103 DF, p-value: < 2.2e-16
USadem<-aov(data = model_data,Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score+Unempl)
USadem
## Call:
## aov(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + ED_Score + Unempl, data = model_data)
##
## Terms:
## Per.Hispanic Per.Afr.Am Per.Male MedAge ED_Score
## Sum of Squares 2.09331 20.89077 1.47304 1.33111 11.79557
## Deg. of Freedom 1 1 1 1 1
## Unempl Residuals
## Sum of Squares 3.39554 32.05129
## Deg. of Freedom 1 3103
##
## Residual standard error: 0.1016323
## Estimated effects may be unbalanced
## 2 observations deleted due to missingness
USdemss<-lm(data = subset(model_data, Swing==1),Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score)
summary(USdemss)
##
## Call:
## lm(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + ED_Score, data = subset(model_data, Swing == 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.22376 -0.05038 -0.00094 0.05074 0.49940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0120819 0.0958766 0.126 0.9
## Per.Hispanic 0.1982604 0.0433410 4.574 5.77e-06 ***
## Per.Afr.Am 0.6136069 0.0235193 26.089 < 2e-16 ***
## Per.Male -0.5665370 0.1357801 -4.172 3.44e-05 ***
## MedAge -0.0027011 0.0006701 -4.031 6.24e-05 ***
## ED_Score 0.2658311 0.0144355 18.415 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07643 on 621 degrees of freedom
## Multiple R-squared: 0.6737, Adjusted R-squared: 0.6711
## F-statistic: 256.4 on 5 and 621 DF, p-value: < 2.2e-16
USademss<-aov(data = subset(model_data, Swing==1),Dem_Score_2016~Per.Hispanic+Per.Afr.Am+Per.Male+MedAge+ED_Score)
USademss
## Call:
## aov(formula = Dem_Score_2016 ~ Per.Hispanic + Per.Afr.Am + Per.Male +
## MedAge + ED_Score, data = subset(model_data, Swing == 1))
##
## Terms:
## Per.Hispanic Per.Afr.Am Per.Male MedAge ED_Score
## Sum of Squares 0.411967 3.845383 0.692605 0.557838 1.980748
## Deg. of Freedom 1 1 1 1 1
## Residuals
## Sum of Squares 3.627200
## Deg. of Freedom 621
##
## Residual standard error: 0.07642579
## Estimated effects may be unbalanced
### 3rd Party is extra!
USoth<-lm(data = model_data,Oth_Score_2016~Per.Afr.Am+Per.Male+MedAge+ED_Score+Income)
summary(USoth)
##
## Call:
## lm(formula = Oth_Score_2016 ~ Per.Afr.Am + Per.Male + MedAge +
## ED_Score + Income, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.068455 -0.011329 -0.002153 0.007162 0.248357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.718e-02 1.185e-02 -8.204 3.36e-16 ***
## Per.Afr.Am -7.159e-02 2.921e-03 -24.506 < 2e-16 ***
## Per.Male 1.428e-01 1.866e-02 7.654 2.59e-14 ***
## MedAge -1.166e-03 8.162e-05 -14.282 < 2e-16 ***
## ED_Score 5.839e-02 2.271e-03 25.710 < 2e-16 ***
## Income -4.392e-07 4.894e-08 -8.975 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02185 on 3106 degrees of freedom
## Multiple R-squared: 0.3878, Adjusted R-squared: 0.3868
## F-statistic: 393.5 on 5 and 3106 DF, p-value: < 2.2e-16
USaoth<-aov(data = model_data,Oth_Score_2016~Per.Afr.Am+Per.Male+MedAge+ED_Score+Income)
USaoth
## Call:
## aov(formula = Oth_Score_2016 ~ Per.Afr.Am + Per.Male + MedAge +
## ED_Score + Income, data = model_data)
##
## Terms:
## Per.Afr.Am Per.Male MedAge ED_Score Income
## Sum of Squares 0.4148398 0.0034119 0.1378478 0.3444769 0.0384364
## Deg. of Freedom 1 1 1 1 1
## Residuals
## Sum of Squares 1.4822572
## Deg. of Freedom 3106
##
## Residual standard error: 0.02184545
## Estimated effects may be unbalanced
Thank you!