Import Raw Data

library(readr)
library(dplyr)

## Warning: package 'dplyr' was built under R version 3.5.1

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df.voter <- read_tsv("~/Downloads/MultiPurpose_SBVMWD3_208-09-28.txt",na = c("","NA"), trim_ws = TRUE)

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   voter_id = col_integer(),
##   house_number = col_integer(),
##   image_id = col_integer()
## )

## See spec(...) for full column specifications.

## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)

## Warning: 1025 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual      file                                  expected   <int> <chr> <chr>      <chr>       <chr>                                 actual 1   254 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… file 2   556 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… row 3   970 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… col 4  1043 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… expected 5  1144 <NA>  78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.

#df_voter <- read.delim("Downloads/MultiPurpose_SBVMWD3_208-09-28.txt",sep = "\t", na = c("","NA"), colClasses = "character", stringsAsFactors = FALSE, skipNul = TRUE)

2018 Statewide Primary (June 5th, 2018)

Create dummy if voted

table(df.voter$`01 06/05/2018 2018 statewide primary election 3784`)

## 
##     A     N     V 
##  3149 12971  1309

df.voter$primary_2018 <- 0
df.voter$primary_2018 <- ifelse(df.voter$`01 06/05/2018 2018 statewide primary election 3784` == "A" | df.voter$`01 06/05/2018 2018 statewide primary election 3784` == "V",
     1, 0)
df.voter$primary_2018[is.na(df.voter$primary_2018)] <- 0
table(df.voter$primary_2018)

## 
##     0     1 
## 14172  4458

Consolidated Election 2017

table(df.voter$`02 11/07/2017 2017 consolidated election 2751`)

## 
##    A    N    V 
##  191 4054   18

df.voter$consolidated_2017 <- 0
df.voter$consolidated_2017  <- ifelse(df.voter$`02 11/07/2017 2017 consolidated election 2751` == "A" | df.voter$`02 11/07/2017 2017 consolidated election 2751` == "V",
     1, 0)
df.voter$consolidated_2017[is.na(df.voter$consolidated_2017)] <- 0
table(df.voter$consolidated_2017)

## 
##     0     1 
## 18421   209

General Election 2016 (November 8th, 2016)

table(df.voter$`10 11/08/2016 2016 presidential general election 2299`)

## 
##    A    N    V 
## 6537 5189 4043

df.voter$general_2016 <- 0
df.voter$general_2016  <- ifelse(df.voter$`10 11/08/2016 2016 presidential general election 2299` == "A" | df.voter$`10 11/08/2016 2016 presidential general election 2299` == "V",
     1, 0)
df.voter$general_2016[is.na(df.voter$general_2016)] <- 0
table(df.voter$general_2016)

## 
##     0     1 
##  8050 10580

Primary Election 2016 (June 7th, 2016)

table(df.voter$`12 06/07/2016 2016 presidential primary election 1221`)

## 
##  A(AI) A(DEM) A(GRN) A(LIB) A(NPP) A(NXD)  A(PF) A(REP)      N  N(AI) 
##     70   1422      6     16    361    115      3    988   4877    142 
## N(DEM) N(GRN) N(LIB) N(NPP) N(NXD)  N(PF) N(REP)  V(AI) V(DEM) V(GRN) 
##   1185     13     27   1212     27     22   1126     44    888      4 
## V(LIB) V(NPP) V(NXD) V(OTH)  V(PF) V(REP) 
##      9    373     10      4      1    564

df.voter$primary_2016 <- 0
df.voter$primary_2016  <- ifelse(df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N" | df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(AI)" |
 df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(DEM)" | 
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(GRN)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(LIB)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(NPP)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(NXD)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(PF)" |
   df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(REP)",
     0, 1)
df.voter$primary_2016[is.na(df.voter$primary_2016)] <- 0
table(df.voter$primary_2016)

## 
##     0     1 
## 13752  4878

Consolidated Election 2015 (Nov 3rd, 2016)

table(df.voter$`14 11/03/2015 2015 consolidated election 139`)

## 
##    A    N    V 
##  390 4679   61

df.voter$consolidated_2015 <- 0
df.voter$consolidated_2015  <- ifelse(df.voter$`14 11/03/2015 2015 consolidated election 139` == "N", 0, 1)
df.voter$consolidated_2015 [is.na(df.voter$consolidated_2015 )] <- 0
table(df.voter$consolidated_2015 )

## 
##     0     1 
## 18179   451

Generate Universes

Universe_1: Highest Propensity

Criteria used to generate universe:

Voted in 2018 Primary
Voted in 2017 Consolidated
Voted in 2016 General
Voted in 2016 Primary
Voted in 2015 Consolidated

universe_1 <- df.voter %>% filter(primary_2018 == 1 & consolidated_2017 == 1 & 
                                    general_2016 == 1 & primary_2016 == 1 &
                                    consolidated_2015 == 1) %>%
  select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)

cat("There are", nrow(universe_1), "voters in Universe 1, who have voted in each of the above elections.")

## There are 88 voters in Universe 1, who have voted in each of the above elections.

Universe_2: High Propensity

Criteria used to generate universe:

Same as universe 1, except remove consolidated elections.

Voted in 2018 Primary
Voted in 2016 General
Voted in 2016 Primary

universe_2 <- df.voter %>% filter(primary_2018 == 1 & 
                                    general_2016 == 1 & primary_2016 == 1) %>%
  select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)

cat("There are", nrow(universe_2), "voters in Universe 1, who have voted in each of the above elections.")

## There are 2764 voters in Universe 1, who have voted in each of the above elections.

Universe_3: Mid Propensity

Criteria used to generate universe:

Voted in at least 1 of the past elections below.

Voted in 2018 Primary
Voted in 2017 Consolidated
Voted in 2016 General
Voted in 2016 Primary
Voted in 2015 Consolidated

universe_3 <- df.voter %>% filter(primary_2018 == 1 | consolidated_2017 == 1 |
                                    general_2016 == 1 | primary_2016 == 1 |
                                    consolidated_2015 == 1) %>%
  select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)

cat("There are", nrow(universe_3), "voters in Universe 1, who have voted in at least one of the above elections.")

## There are 11300 voters in Universe 1, who have voted in at least one of the above elections.

Export Universes

write.csv(universe_1, "~/Documents/SBVWD_universe_1.csv", row.names=FALSE)
write.csv(universe_2, "~/Documents/SBVWD_universe_2.csv", row.names=FALSE)
write.csv(universe_3, "~/Documents/SBVWD_universe_3.csv", row.names=FALSE)

Generate Voter File for Anthony Jones - San Benardino Valley Municipal Water District 3

Import Raw Data

2018 Statewide Primary (June 5th, 2018)

Consolidated Election 2017

General Election 2016 (November 8th, 2016)

Primary Election 2016 (June 7th, 2016)

Consolidated Election 2015 (Nov 3rd, 2016)

Generate Universes

Universe_1: Highest Propensity

Universe_2: High Propensity

Universe_3: Mid Propensity

Export Universes