Import Raw Data

library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df.voter <- read_tsv("~/Downloads/MultiPurpose_SBVMWD3_208-09-28.txt",na = c("","NA"), trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   voter_id = col_integer(),
##   house_number = col_integer(),
##   image_id = col_integer()
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 1025 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual      file                                  expected   <int> <chr> <chr>      <chr>       <chr>                                 actual 1   254 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… file 2   556 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… row 3   970 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… col 4  1043 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… expected 5  1144 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20…

## See problems(...) for more details.
#df_voter <- read.delim("Downloads/MultiPurpose_SBVMWD3_208-09-28.txt",sep = "\t", na = c("","NA"), colClasses = "character", stringsAsFactors = FALSE, skipNul = TRUE)

2018 Statewide Primary (June 5th, 2018)

Create dummy if voted

table(df.voter$`01 06/05/2018 2018 statewide primary election 3784`)
## 
##     A     N     V 
##  3149 12971  1309
df.voter$primary_2018 <- 0
df.voter$primary_2018 <- ifelse(df.voter$`01 06/05/2018 2018 statewide primary election 3784` == "A" | df.voter$`01 06/05/2018 2018 statewide primary election 3784` == "V",
     1, 0)
df.voter$primary_2018[is.na(df.voter$primary_2018)] <- 0
table(df.voter$primary_2018)
## 
##     0     1 
## 14172  4458

Consolidated Election 2017

table(df.voter$`02 11/07/2017 2017 consolidated election 2751`)
## 
##    A    N    V 
##  191 4054   18
df.voter$consolidated_2017 <- 0
df.voter$consolidated_2017  <- ifelse(df.voter$`02 11/07/2017 2017 consolidated election 2751` == "A" | df.voter$`02 11/07/2017 2017 consolidated election 2751` == "V",
     1, 0)
df.voter$consolidated_2017[is.na(df.voter$consolidated_2017)] <- 0
table(df.voter$consolidated_2017)
## 
##     0     1 
## 18421   209

General Election 2016 (November 8th, 2016)

table(df.voter$`10 11/08/2016 2016 presidential general election 2299`)
## 
##    A    N    V 
## 6537 5189 4043
df.voter$general_2016 <- 0
df.voter$general_2016  <- ifelse(df.voter$`10 11/08/2016 2016 presidential general election 2299` == "A" | df.voter$`10 11/08/2016 2016 presidential general election 2299` == "V",
     1, 0)
df.voter$general_2016[is.na(df.voter$general_2016)] <- 0
table(df.voter$general_2016)
## 
##     0     1 
##  8050 10580

Primary Election 2016 (June 7th, 2016)

table(df.voter$`12 06/07/2016 2016 presidential primary election 1221`)
## 
##  A(AI) A(DEM) A(GRN) A(LIB) A(NPP) A(NXD)  A(PF) A(REP)      N  N(AI) 
##     70   1422      6     16    361    115      3    988   4877    142 
## N(DEM) N(GRN) N(LIB) N(NPP) N(NXD)  N(PF) N(REP)  V(AI) V(DEM) V(GRN) 
##   1185     13     27   1212     27     22   1126     44    888      4 
## V(LIB) V(NPP) V(NXD) V(OTH)  V(PF) V(REP) 
##      9    373     10      4      1    564
df.voter$primary_2016 <- 0
df.voter$primary_2016  <- ifelse(df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N" | df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(AI)" |
 df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(DEM)" | 
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(GRN)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(LIB)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(NPP)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(NXD)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(PF)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(REP)",
     0, 1)
df.voter$primary_2016[is.na(df.voter$primary_2016)] <- 0
table(df.voter$primary_2016)
## 
##     0     1 
## 13752  4878

Consolidated Election 2015 (Nov 3rd, 2016)

table(df.voter$`14 11/03/2015 2015 consolidated election 139`)
## 
##    A    N    V 
##  390 4679   61
df.voter$consolidated_2015 <- 0
df.voter$consolidated_2015  <- ifelse(df.voter$`14 11/03/2015 2015 consolidated election 139` == "N", 0, 1)
df.voter$consolidated_2015 [is.na(df.voter$consolidated_2015 )] <- 0
table(df.voter$consolidated_2015 )
## 
##     0     1 
## 18179   451

Generate Universes

Universe_1: Highest Propensity

Criteria used to generate universe:

  • Voted in 2018 Primary
  • Voted in 2017 Consolidated
  • Voted in 2016 General
  • Voted in 2016 Primary
  • Voted in 2015 Consolidated
universe_1 <- df.voter %>% filter(primary_2018 == 1 & consolidated_2017 == 1 & 
                                    general_2016 == 1 & primary_2016 == 1 &
                                    consolidated_2015 == 1) %>%
  select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)

cat("There are", nrow(universe_1), "voters in Universe 1, who have voted in each of the above elections.")
## There are 88 voters in Universe 1, who have voted in each of the above elections.

Universe_2: High Propensity

Criteria used to generate universe:

Same as universe 1, except remove consolidated elections.

  • Voted in 2018 Primary
  • Voted in 2016 General
  • Voted in 2016 Primary
universe_2 <- df.voter %>% filter(primary_2018 == 1 & 
                                    general_2016 == 1 & primary_2016 == 1) %>%
  select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)

cat("There are", nrow(universe_2), "voters in Universe 1, who have voted in each of the above elections.")
## There are 2764 voters in Universe 1, who have voted in each of the above elections.

Universe_3: Mid Propensity

Criteria used to generate universe:

Voted in at least 1 of the past elections below.

  • Voted in 2018 Primary
  • Voted in 2017 Consolidated
  • Voted in 2016 General
  • Voted in 2016 Primary
  • Voted in 2015 Consolidated
universe_3 <- df.voter %>% filter(primary_2018 == 1 | consolidated_2017 == 1 |
                                    general_2016 == 1 | primary_2016 == 1 |
                                    consolidated_2015 == 1) %>%
  select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)

cat("There are", nrow(universe_3), "voters in Universe 1, who have voted in at least one of the above elections.")
## There are 11300 voters in Universe 1, who have voted in at least one of the above elections.

Export Universes

write.csv(universe_1, "~/Documents/SBVWD_universe_1.csv", row.names=FALSE)
write.csv(universe_2, "~/Documents/SBVWD_universe_2.csv", row.names=FALSE)
write.csv(universe_3, "~/Documents/SBVWD_universe_3.csv", row.names=FALSE)