library(tidyverse)
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.1 ✓ purrr 0.3.4
✓ tibble 3.0.1 ✓ dplyr 1.0.0
✓ tidyr 1.1.0 ✓ stringr 1.4.0
✓ readr 1.3.1 ✓ forcats 0.5.0
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(leaflet)
library(sf)
Linking to GEOS 3.5.1, GDAL 2.2.2, PROJ 4.9.2
library(readxl)
library(DT)
library(plotly)
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(broom)
library(tidycensus)
senate_counties <- read_xlsx("Statewide Results.xlsx", sheet = 1)
New names:
* `` -> ...2
* `` -> ...3
* `` -> ...4
* `` -> ...5
This loads the data.
senate_counties <- read_xlsx("Statewide Results.xlsx", sheet = 1, range = "B7:E63")
This loads the data.
glimpse(senate_counties)
Rows: 56
Columns: 4
$ County <chr> "Beaverhead", "Big Horn", "Blaine", "Broadwater", "Carbon", "Carter", "Cascade", "Chout…
$ `JON TESTER\r\nDemocrat` <dbl> 1876, 3027, 1961, 1071, 2680, 128, 17435, 1275, 1942, 281, 1233, 2892, 281, 1964, 19652…
$ `MATT ROSENDALE\r\nRepublican` <dbl> 2866, 1558, 982, 2086, 3209, 602, 15566, 1312, 2762, 631, 2700, 1208, 951, 3640, 26759,…
$ `RICK BRECKENRIDGE\r\nLibertarian` <dbl> 155, 91, 76, 104, 178, 22, 1008, 70, 179, 29, 140, 136, 57, 189, 1349, 1434, 30, 89, 21…
This shows a preview of data.
senate_counties <- senate_counties %>%
rename(Republican = "MATT ROSENDALE\r\nRepublican") %>%
rename(Democrat = "JON TESTER\r\nDemocrat") %>%
rename(Libertarian = "RICK BRECKENRIDGE\r\nLibertarian")
senate_counties <- senate_counties %>%
mutate(total_votes = Republican + Democrat + Libertarian) %>%
mutate(Repub_advantage = Republican/total_votes - Democrat/total_votes) %>%
mutate(Repub_advantage = round(Repub_advantage*100, 1))
senate_counties %>%
arrange(-Repub_advantage)
This is a table showing the the votes for the candidates by county a long with the republican advantage of each county.
mt_counties <- get_acs(geography = "county",
variables = "B01003_001",
state = "MT",
geometry = TRUE)
Getting data from the 2014-2018 5-year ACS
Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
Using FIPS code '30' for state 'MT'
This gets data for state.
senate_counties[25, "County"] <- "Lewis and Clark" # Changes "&" "and"
mt_counties <- mt_counties %>%
mutate(County = gsub(" County, Montana", "", NAME)) %>% # Removes unnecessary words
rename(Population = estimate) # Renames the 'estimate' to 'Population'
This is just replacing and with & from two mismatched titles.
senate_election <- mt_counties %>%
full_join(senate_counties)
Joining, by = "County"
This joins the data from mt counties with the senate race data.
senate_election %>%
as_tibble() %>%
select(County, Population, Democrat, Republican, Libertarian, total_votes, Repub_advantage) %>%
datatable()
This is a table showing the counties with thier population and the votes per candidate with the republican advantage.
vote_colors <- colorNumeric(palette = "viridis", domain = senate_election$Repub_advantage)
senate_election %>%
leaflet() %>%
addTiles() %>%
addPolygons(weight = 1,
fillColor = ~vote_colors(Repub_advantage),
label = ~paste0(County, ", Republican advantage = ", Repub_advantage),
highlight = highlightOptions(weight = 2)) %>%
setView(-110, 47, zoom = 6) %>%
addLegend(pal = vote_colors, values = ~Repub_advantage)
sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs ).
Need '+proj=longlat +datum=WGS84'
This shows the republican advantage by county. The lighter or more toward yellow indicates a stronger advantage.
senate_election %>%
plot_ly(x = ~Population,
y = ~Repub_advantage,
hoverinfo = "text",
text = ~paste("County:",
County, "<br>",
"Population: ", Population, "<br>",
"Republican advantage: ", Repub_advantage)) %>%
add_markers(marker = list(opacity = 0.7)) %>%
layout(title = "Predicting Republican Vote Advantage from Population, by County",
xaxis = list(title = "County population"),
yaxis = list(title = "Republican vote advantage"))
`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
This is a plotly graph idicating a negative correllation.
pop_model <- lm(Repub_advantage ~ Population, data = senate_election)
This is a linear regression.
summary(pop_model)
Call:
lm(formula = Repub_advantage ~ Population, data = senate_election)
Residuals:
Min 1Q Median 3Q Max
-71.228 -12.013 3.247 15.782 47.948
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.2809050 4.0835813 5.946 2.08e-07 ***
Population -0.0003761 0.0001100 -3.418 0.00121 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.44 on 54 degrees of freedom
Multiple R-squared: 0.1779, Adjusted R-squared: 0.1626
F-statistic: 11.68 on 1 and 54 DF, p-value: 0.001207
This is a summary of the regression results.
tidy(pop_model)
glance(pop_model)
This shows the information in a cleaner way.
senate_election %>%
plot_ly(x = ~Population,
y = ~Repub_advantage,
hoverinfo = "text",
text = ~paste("County:",
County, "<br>",
"Population: ", Population, "<br>",
"Republican advantage: ", Repub_advantage)) %>%
add_markers(showlegend = F, marker = list(opacity = 0.7)) %>%
layout(title = "Predicting Republican Vote Advantage from Population, by County",
xaxis = list(title = "County population"),
yaxis = list(title = "Republican vote advantage")) %>%
add_lines(y = ~fitted(pop_model))
This predicts the republican vote advantage from population and by county.
senate_election <- senate_election %>%
mutate(Longitude = as_tibble(st_coordinates(st_centroid(senate_election$geometry)))$X) %>%
mutate(Latitude = as_tibble(st_coordinates(st_centroid(senate_election$geometry)))$Y)
st_centroid does not give correct centroids for longitude/latitude datast_centroid does not give correct centroids for longitude/latitude data
senate_election %>%
leaflet() %>%
addTiles() %>%
addPolygons(weight = 1) %>%
setView(-110, 47, zoom = 6) %>%
addCircleMarkers(~Longitude, ~Latitude)
sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs ).
Need '+proj=longlat +datum=WGS84'
This is just another graph showing the center of counties.
senate_election %>%
plot_ly(x = ~Longitude, y = ~Repub_advantage) %>%
add_markers()
This is a plotly graph of the longitute and republican vote indicating a positive correlation that the further east the more votes for republican.
longitude_lm <- lm(Repub_advantage ~ Longitude, data = senate_election)
tidy(longitude_lm)
glance(longitude_lm)
This is a regression predicting republican vote from longitude.
senate_election %>%
plot_ly(x = ~Longitude,
y = ~Repub_advantage,
hoverinfo = "text",
text = ~paste("County:", County, "<br>", "Longitude: ", Longitude, "<br>", "Republican advantage: ", Repub_advantage)) %>%
add_markers(marker = list(opacity = 0.7), showlegend = F) %>%
layout(title = "Predicting Republican Vote Advantage from Longitude, by County",
xaxis = list(title = "County longitude"),
yaxis = list(title = "Republican vote advantage")) %>%
add_lines(y = ~fitted(longitude_lm))
This is a plotly graph predicting the republican vote advantage from longitude, by county.
multiple_lm <- lm(Repub_advantage ~ Population + Longitude, data = senate_election)
tidy(multiple_lm)
glance(multiple_lm)
This is a multiple regression, predicting the Republican advantage from both Population and Longitude at the same time.
senate_election %>%
plot_ly(x = ~Longitude, y = ~Population, z = ~Repub_advantage,
text = ~County, hoverinfo = "text") %>%
add_markers(opacity = .7, showlegend = F)
This is a 3D scatterplot showing the republican advantage, population, and votes per longitudinal data.