Load the libraries and load the data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
setwd("/Users/shoshigilg/Desktop/DATA 110/BOE")
votes <- read_csv("BOE_-_Precinct_Voter_Counts2.csv")
## Rows: 4470 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Year/Month
## dbl (10): Precinct, TOTAL, DEM, REP, GRN, LIB, WCP, BAR, UNA, OTHERS
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
votes$Precinct <- as.factor(votes$Precinct)
districts <- read_csv("District_Precinct.csv")
## Rows: 274 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): district, precinct
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
districts$district <- as.factor(districts$district)
districts$precinct <- as.factor(districts$precinct)
str(votes)
## spec_tbl_df [4,470 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Year/Month: chr [1:4470] "22-Jan" "22-Jan" "22-Jan" "22-Jan" ...
##  $ Precinct  : Factor w/ 270 levels "1001","1002",..: 95 90 147 228 172 179 42 154 35 78 ...
##  $ TOTAL     : num [1:4470] 1900 4363 2258 2277 3708 ...
##  $ DEM       : num [1:4470] 1074 2293 1294 1612 2289 ...
##  $ REP       : num [1:4470] 318 726 302 239 456 47 713 133 335 489 ...
##  $ GRN       : num [1:4470] 1 5 3 3 9 2 7 3 3 4 ...
##  $ LIB       : num [1:4470] 3 20 14 6 4 1 11 5 6 8 ...
##  $ WCP       : num [1:4470] 0 1 5 1 1 0 1 0 4 3 ...
##  $ BAR       : num [1:4470] 0 0 0 0 0 0 0 0 0 0 ...
##  $ UNA       : num [1:4470] 488 1288 613 405 899 ...
##  $ OTHERS    : num [1:4470] 16 30 27 11 50 6 44 18 16 33 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Year/Month` = col_character(),
##   ..   Precinct = col_double(),
##   ..   TOTAL = col_double(),
##   ..   DEM = col_double(),
##   ..   REP = col_double(),
##   ..   GRN = col_double(),
##   ..   LIB = col_double(),
##   ..   WCP = col_double(),
##   ..   BAR = col_double(),
##   ..   UNA = col_double(),
##   ..   OTHERS = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
str(districts)
## spec_tbl_df [274 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ district: Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ precinct: Factor w/ 272 levels "1001","1002",..: 1 2 3 4 5 6 7 8 9 11 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   district = col_double(),
##   ..   precinct = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Clean the data and select only certain variables

names(votes) <- tolower(names(votes))
names(votes) <- gsub("/", "_", names(votes))
votes2 <- votes %>%
  select("year_month", "precinct", "total", "dem", "rep")
head(votes2)
## # A tibble: 6 × 5
##   year_month precinct total   dem   rep
##   <chr>      <fct>    <dbl> <dbl> <dbl>
## 1 22-Jan     6013      1900  1074   318
## 2 22-Jan     6007      4363  2293   726
## 3 22-Jan     9003      2258  1294   302
## 4 22-Jan     13024     2277  1612   239
## 5 22-Jan     9031      3708  2289   456
## 6 22-Jan     9038       446   309    47

Adding ‘other’ as a category of vote

votes2$other <- votes2$total - votes2$dem - votes2$rep
votes2
## # A tibble: 4,470 × 6
##    year_month precinct total   dem   rep other
##    <chr>      <fct>    <dbl> <dbl> <dbl> <dbl>
##  1 22-Jan     6013      1900  1074   318   508
##  2 22-Jan     6007      4363  2293   726  1344
##  3 22-Jan     9003      2258  1294   302   662
##  4 22-Jan     13024     2277  1612   239   426
##  5 22-Jan     9031      3708  2289   456   963
##  6 22-Jan     9038       446   309    47    90
##  7 22-Jan     4020      4068  2236   713  1119
##  8 22-Jan     9010       864   491   133   240
##  9 22-Jan     4013      2186  1352   335   499
## 10 22-Jan     5017      3719  2431   489   799
## # … with 4,460 more rows

Remove ‘total’ from votes2 data frame

votes2 <- votes2 %>%
  select("year_month", "precinct", "dem", "rep", "other")
head(votes2)
## # A tibble: 6 × 5
##   year_month precinct   dem   rep other
##   <chr>      <fct>    <dbl> <dbl> <dbl>
## 1 22-Jan     6013      1074   318   508
## 2 22-Jan     6007      2293   726  1344
## 3 22-Jan     9003      1294   302   662
## 4 22-Jan     13024     1612   239   426
## 5 22-Jan     9031      2289   456   963
## 6 22-Jan     9038       309    47    90

Fix the dates

library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
votes3 <- votes2 %>%
  mutate(date = as.Date(as.yearmon(year_month, format = "%b-%y")))
head(votes3)
## # A tibble: 6 × 6
##   year_month precinct   dem   rep other date  
##   <chr>      <fct>    <dbl> <dbl> <dbl> <date>
## 1 22-Jan     6013      1074   318   508 NA    
## 2 22-Jan     6007      2293   726  1344 NA    
## 3 22-Jan     9003      1294   302   662 NA    
## 4 22-Jan     13024     1612   239   426 NA    
## 5 22-Jan     9031      2289   456   963 NA    
## 6 22-Jan     9038       309    47    90 NA

Convert from wide to long

votes_long <- votes3 %>%
  pivot_longer(
    cols = 3:5, 
    names_to = c("party"),
    values_to = "total_votes"
  )
head(votes_long)
## # A tibble: 6 × 5
##   year_month precinct date   party total_votes
##   <chr>      <fct>    <date> <chr>       <dbl>
## 1 22-Jan     6013     NA     dem          1074
## 2 22-Jan     6013     NA     rep           318
## 3 22-Jan     6013     NA     other         508
## 4 22-Jan     6007     NA     dem          2293
## 5 22-Jan     6007     NA     rep           726
## 6 22-Jan     6007     NA     other        1344

calculate total votes by district

votes_sum <- votes4 %>%
group_by(district, party) %>%
summarise(votes = sum(total_votes)) %>%
arrange(desc(votes)) ## votes_sum is a new variable created
## `summarise()` has grouped output by 'district'. You can override using the
## `.groups` argument.
votes_sum
## # A tibble: 15 × 3
## # Groups:   district [5]
##    district party   votes
##    <fct>    <chr>   <dbl>
##  1 3        dem   1583279
##  2 5        dem   1436627
##  3 4        dem   1270021
##  4 2        dem   1247568
##  5 1        dem   1168441
##  6 2        other  593177
##  7 3        other  566873
##  8 1        other  510283
##  9 5        other  498672
## 10 1        rep    421914
## 11 3        rep    402717
## 12 5        rep    395452
## 13 2        rep    373526
## 14 4        other  360770
## 15 4        rep    197545
# Barplot
p <- ggplot(data=votes_sum, aes(x=district, y=votes, fill=party)) +
  geom_bar(stat="identity", position=position_dodge())
print(p + scale_fill_manual(values=c("blue", "purple", "red")) + 
        ggtitle("All Votes Cast in Montgomery County Board of Education Elections \nBy District and Party \nDecember 2020 to May 2022") + 
         labs(y = "Votes Cast", x = "District"))

The Essay

For Project 1, I choose Montgomery Counties Board of Education (BOE) Precinct Voter Counts. This dataset is from data.montgomerycountymd.gov and contains political parties updated monthly, with the most recent update being June 14, 2022. Variables included the precincts with Districts 1-5, total vote counts, and eight different political parties. I had to create an additional CSV file to correlate precincts with districts and altered the original BOE dataset to remove the leading zeroes on the precincts. I organized the precinct votes into districts using command str. Naming the votes, I cleaned up the data by only selecting and piping certain variables such as year_month, precinct, total, democrat, and republican. Thus, creating votes2. Then I created an additional variable category for other parties. I removed the ‘total’ from the votes2 data frame and organized the year_month, precinct, dem, rep, and other. Fixing the dates with zoo and lubridate packages, mutating the date = as.Date(as.yearmon(year_month, format = %b-%y" to format the dates. Creating the new dataset votes3 consolidates dem, rep, and other into a column called ‘party,’ which contains the party values—converted from wide to long, organizing each party’s total votes named votes_long. Using left_join votes4 dataset brought votes_long, districts organized by precinct. Summarizing the votes and calculating total votes by district, votes_sum was a new variable created. 
I chose a grouped bar chart to represent all three parties dem for democrat, other for neither, and rep for republican in each of five Montgomery County BOE districts. Democrats are represented with blue, the other parties are purple, and the red is republican. The most interesting pattern is throughout all five districts, democrats are the majority, and Republicans are the minority, didn’t exceed the third parties or write-in. District 3, Bethesda and Potomac, has the highest vote count of all districts.
I could not figure out the legend title and labels to update. I wanted to replace abbreviations for parties with their full names ex. Democrat. When I tried to do so, R took away any other formatting. I had some technical issues that I could resolve, but overall, it was much cleaning to make the graph I wanted.