library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# for (i in 1:7){
# paste('CO', i, sep = ".") <- read.csv(paste('Registered_Voters_List_ Part', i, '.txt', sep = ""))
# }
CO.1 <- read.csv('Registered_Voters_List_ Part1.txt')
# CO.2 <- read.csv('Registered_Voters_List_ Part2.txt')
# CO.3 <- read.csv('Registered_Voters_List_ Part3.txt')
# CO.4 <- read.csv('Registered_Voters_List_ Part4.txt')
# CO.5 <- read.csv('Registered_Voters_List_ Part5.txt')
# CO.6 <- read.csv('Registered_Voters_List_ Part6.txt')
# CO.7 <- read.csv('Registered_Voters_List_ Part7.txt')
# CO.8 <- read.csv('Registered_Voters_List_ Part8.txt')
for the purpose of this markdown, only one .txt file was loaded
colnames(CO.1) == colnames(CO.8)
CO.all <- rbind(CO.1, CO.2, CO.3, CO.4, CO.5, CO.6, CO.7, CO.8)
colnames(CO.1)
## [1] "VOTER_ID" "COUNTY_CODE"
## [3] "COUNTY" "LAST_NAME"
## [5] "FIRST_NAME" "MIDDLE_NAME"
## [7] "NAME_SUFFIX" "VOTER_NAME"
## [9] "STATUS_CODE" "PRECINCT_NAME"
## [11] "ADDRESS_LIBRARY_ID" "HOUSE_NUM"
## [13] "HOUSE_SUFFIX" "PRE_DIR"
## [15] "STREET_NAME" "STREET_TYPE"
## [17] "POST_DIR" "UNIT_TYPE"
## [19] "UNIT_NUM" "RESIDENTIAL_ADDRESS"
## [21] "RESIDENTIAL_CITY" "RESIDENTIAL_STATE"
## [23] "RESIDENTIAL_ZIP_CODE" "RESIDENTIAL_ZIP_PLUS"
## [25] "EFFECTIVE_DATE" "REGISTRATION_DATE"
## [27] "STATUS" "STATUS_REASON"
## [29] "BIRTH_YEAR" "GENDER"
## [31] "PRECINCT" "SPLIT"
## [33] "VOTER_STATUS_ID" "PARTY"
## [35] "PREFERENCE" "PARTY_AFFILIATION_DATE"
## [37] "PHONE_NUM" "MAIL_ADDR1"
## [39] "MAIL_ADDR2" "MAIL_ADDR3"
## [41] "MAILING_CITY" "MAILING_STATE"
## [43] "MAILING_ZIP_CODE" "MAILING_ZIP_PLUS"
## [45] "MAILING_COUNTRY" "SPL_ID"
## [47] "PERMANENT_MAIL_IN_VOTER" "CONGRESSIONAL"
## [49] "STATE_SENATE" "STATE_HOUSE"
## [51] "ID_REQUIRED"
CO.1 <- mutate(CO.1, AGE = (2019 - as.numeric(BIRTH_YEAR)))
age.party <- CO.1 %>%
group_by(PARTY) %>%
count(AGE)
DEM <- filter(age.party, PARTY == 'DEM')
REP <- filter(age.party, PARTY == 'REP')
ggplot(DEM, mapping = aes(AGE, n)) +
geom_bar(stat="identity")
ggplot(REP, mapping = aes(AGE, n)) +
geom_bar(stat="identity")
ggplot(age.party, mapping = aes(AGE, n)) +
geom_bar(stat="identity")
table(CO.1$STATUS)
##
## Active Inactive
## 446999 53000
age.status <- CO.1 %>%
group_by(STATUS) %>%
count(AGE)
Active <- filter(age.status, STATUS == 'Active')
Inactive <- filter(age.status, STATUS == 'Inactive')
ggplot(Active, mapping = aes(AGE,n)) +
geom_bar(stat="identity")
ggplot(Inactive, mapping = aes(AGE,n)) +
geom_bar(stat="identity")