Import Raw Data
library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df.voter <- read_tsv("~/Downloads/MultiPurpose_SBVMWD3_208-09-28.txt",na = c("","NA"), trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_character(),
## voter_id = col_integer(),
## house_number = col_integer(),
## image_id = col_integer()
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 1025 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 254 <NA> 78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… file 2 556 <NA> 78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… row 3 970 <NA> 78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… col 4 1043 <NA> 78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20… expected 5 1144 <NA> 78 columns 100 columns '~/Downloads/MultiPurpose_SBVMWD3_20…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
#df_voter <- read.delim("Downloads/MultiPurpose_SBVMWD3_208-09-28.txt",sep = "\t", na = c("","NA"), colClasses = "character", stringsAsFactors = FALSE, skipNul = TRUE)
2018 Statewide Primary (June 5th, 2018)
Create dummy if voted
table(df.voter$`01 06/05/2018 2018 statewide primary election 3784`)
##
## A N V
## 3149 12971 1309
df.voter$primary_2018 <- 0
df.voter$primary_2018 <- ifelse(df.voter$`01 06/05/2018 2018 statewide primary election 3784` == "A" | df.voter$`01 06/05/2018 2018 statewide primary election 3784` == "V",
1, 0)
df.voter$primary_2018[is.na(df.voter$primary_2018)] <- 0
table(df.voter$primary_2018)
##
## 0 1
## 14172 4458
Consolidated Election 2017
table(df.voter$`02 11/07/2017 2017 consolidated election 2751`)
##
## A N V
## 191 4054 18
df.voter$consolidated_2017 <- 0
df.voter$consolidated_2017 <- ifelse(df.voter$`02 11/07/2017 2017 consolidated election 2751` == "A" | df.voter$`02 11/07/2017 2017 consolidated election 2751` == "V",
1, 0)
df.voter$consolidated_2017[is.na(df.voter$consolidated_2017)] <- 0
table(df.voter$consolidated_2017)
##
## 0 1
## 18421 209
General Election 2016 (November 8th, 2016)
table(df.voter$`10 11/08/2016 2016 presidential general election 2299`)
##
## A N V
## 6537 5189 4043
df.voter$general_2016 <- 0
df.voter$general_2016 <- ifelse(df.voter$`10 11/08/2016 2016 presidential general election 2299` == "A" | df.voter$`10 11/08/2016 2016 presidential general election 2299` == "V",
1, 0)
df.voter$general_2016[is.na(df.voter$general_2016)] <- 0
table(df.voter$general_2016)
##
## 0 1
## 8050 10580
Primary Election 2016 (June 7th, 2016)
table(df.voter$`12 06/07/2016 2016 presidential primary election 1221`)
##
## A(AI) A(DEM) A(GRN) A(LIB) A(NPP) A(NXD) A(PF) A(REP) N N(AI)
## 70 1422 6 16 361 115 3 988 4877 142
## N(DEM) N(GRN) N(LIB) N(NPP) N(NXD) N(PF) N(REP) V(AI) V(DEM) V(GRN)
## 1185 13 27 1212 27 22 1126 44 888 4
## V(LIB) V(NPP) V(NXD) V(OTH) V(PF) V(REP)
## 9 373 10 4 1 564
df.voter$primary_2016 <- 0
df.voter$primary_2016 <- ifelse(df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N" | df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(AI)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(DEM)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(GRN)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(LIB)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(NPP)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(NXD)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(PF)" |
df.voter$`12 06/07/2016 2016 presidential primary election 1221` == "N(REP)",
0, 1)
df.voter$primary_2016[is.na(df.voter$primary_2016)] <- 0
table(df.voter$primary_2016)
##
## 0 1
## 13752 4878
Consolidated Election 2015 (Nov 3rd, 2016)
table(df.voter$`14 11/03/2015 2015 consolidated election 139`)
##
## A N V
## 390 4679 61
df.voter$consolidated_2015 <- 0
df.voter$consolidated_2015 <- ifelse(df.voter$`14 11/03/2015 2015 consolidated election 139` == "N", 0, 1)
df.voter$consolidated_2015 [is.na(df.voter$consolidated_2015 )] <- 0
table(df.voter$consolidated_2015 )
##
## 0 1
## 18179 451
Generate Universes
Universe_1: Highest Propensity
Criteria used to generate universe:
- Voted in 2018 Primary
- Voted in 2017 Consolidated
- Voted in 2016 General
- Voted in 2016 Primary
- Voted in 2015 Consolidated
universe_1 <- df.voter %>% filter(primary_2018 == 1 & consolidated_2017 == 1 &
general_2016 == 1 & primary_2016 == 1 &
consolidated_2015 == 1) %>%
select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)
cat("There are", nrow(universe_1), "voters in Universe 1, who have voted in each of the above elections.")
## There are 88 voters in Universe 1, who have voted in each of the above elections.
Universe_2: High Propensity
Criteria used to generate universe:
Same as universe 1, except remove consolidated elections.
- Voted in 2018 Primary
- Voted in 2016 General
- Voted in 2016 Primary
universe_2 <- df.voter %>% filter(primary_2018 == 1 &
general_2016 == 1 & primary_2016 == 1) %>%
select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)
cat("There are", nrow(universe_2), "voters in Universe 1, who have voted in each of the above elections.")
## There are 2764 voters in Universe 1, who have voted in each of the above elections.
Universe_3: Mid Propensity
Criteria used to generate universe:
Voted in at least 1 of the past elections below.
- Voted in 2018 Primary
- Voted in 2017 Consolidated
- Voted in 2016 General
- Voted in 2016 Primary
- Voted in 2015 Consolidated
universe_3 <- df.voter %>% filter(primary_2018 == 1 | consolidated_2017 == 1 |
general_2016 == 1 | primary_2016 == 1 |
consolidated_2015 == 1) %>%
select(voter_id, status, name_first, name_last, name_middle, house_number, street, type, apartment_number, city, state, zip, mail_street, mail_city, mail_state, mail_zip, precinct, precinct_name, party, reg_date, phone_1, email, PAV, birth_place, birth_date, gender)
cat("There are", nrow(universe_3), "voters in Universe 1, who have voted in at least one of the above elections.")
## There are 11300 voters in Universe 1, who have voted in at least one of the above elections.
Export Universes
write.csv(universe_1, "~/Documents/SBVWD_universe_1.csv", row.names=FALSE)
write.csv(universe_2, "~/Documents/SBVWD_universe_2.csv", row.names=FALSE)
write.csv(universe_3, "~/Documents/SBVWD_universe_3.csv", row.names=FALSE)