library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.1     ✔ dplyr   0.8.1
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dslabs)
library(ggrepel)

import US election results data and US murders data

knitr::opts_chunk$set(echo = TRUE)
data(murders)
head(murders)
##        state abb region population total
## 1    Alabama  AL  South    4779736   135
## 2     Alaska  AK   West     710231    19
## 3    Arizona  AZ   West    6392017   232
## 4   Arkansas  AR  South    2915918    93
## 5 California  CA   West   37253956  1257
## 6   Colorado  CO   West    5029196    65
data(polls_us_election_2016) # includes results_us_election_2016
head(polls_us_election_2016)
##   state  startdate    enddate
## 1  U.S. 2016-11-03 2016-11-06
## 2  U.S. 2016-11-01 2016-11-07
## 3  U.S. 2016-11-02 2016-11-06
## 4  U.S. 2016-11-04 2016-11-07
## 5  U.S. 2016-11-03 2016-11-06
## 6  U.S. 2016-11-03 2016-11-06
##                                                     pollster grade
## 1                                   ABC News/Washington Post    A+
## 2                                    Google Consumer Surveys     B
## 3                                                      Ipsos    A-
## 4                                                     YouGov     B
## 5                                           Gravis Marketing    B-
## 6 Fox News/Anderson Robbins Research/Shaw & Company Research     A
##   samplesize population rawpoll_clinton rawpoll_trump rawpoll_johnson
## 1       2220         lv           47.00         43.00            4.00
## 2      26574         lv           38.03         35.69            5.46
## 3       2195         lv           42.00         39.00            6.00
## 4       3677         lv           45.00         41.00            5.00
## 5      16639         rv           47.00         43.00            3.00
## 6       1295         lv           48.00         44.00            3.00
##   rawpoll_mcmullin adjpoll_clinton adjpoll_trump adjpoll_johnson
## 1               NA        45.20163      41.72430        4.626221
## 2               NA        43.34557      41.21439        5.175792
## 3               NA        42.02638      38.81620        6.844734
## 4               NA        45.65676      40.92004        6.069454
## 5               NA        46.84089      42.33184        3.726098
## 6               NA        49.02208      43.95631        3.057876
##   adjpoll_mcmullin
## 1               NA
## 2               NA
## 3               NA
## 4               NA
## 5               NA
## 6               NA
head(results_us_election_2016)
##          state electoral_votes clinton trump others
## 1   California              55    61.7  31.6    6.7
## 2        Texas              38    43.2  52.2    4.5
## 3      Florida              29    47.8  49.0    3.2
## 4     New York              29    59.0  36.5    4.5
## 5     Illinois              20    55.8  38.8    5.4
## 6 Pennsylvania              20    47.9  48.6    3.6

using both tables so lets check if state columns have the same order in both datasets

identical(murders$state, polls_us_election_2016$state)
## [1] FALSE

plot electoral vote vs population

but there is no population column in election dataset so we need to join tables first

tab <- left_join(murders, results_us_election_2016, by = "state")
head(tab)
##        state abb region population total electoral_votes clinton trump
## 1    Alabama  AL  South    4779736   135               9    34.4  62.1
## 2     Alaska  AK   West     710231    19               3    36.6  51.3
## 3    Arizona  AZ   West    6392017   232              11    45.1  48.7
## 4   Arkansas  AR  South    2915918    93               6    33.7  60.6
## 5 California  CA   West   37253956  1257              55    61.7  31.6
## 6   Colorado  CO   West    5029196    65               9    48.2  43.3
##   others
## 1    3.6
## 2   12.2
## 3    6.2
## 4    5.8
## 5    6.7
## 6    8.6

plot

tab %>% ggplot(mapping = aes(population/10^6, electoral_votes, label = abb)) +
        geom_point() +
        geom_text_repel() +
        scale_x_continuous(trans = "log2") +
        scale_y_continuous(trans = "log2") + 
        geom_smooth(method = "lm", se = FALSE)