Comparing Candidates

Here are the packages we will use

library(tidyverse)

## -- Attaching packages ------------------------------------------------------ tidyverse 1.2.1 --

## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0

## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(ggthemes)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

Here is the data from FEC

King_Oct_18<- read_csv("King_Oct_18.csv")

## Warning: Duplicated column names deduplicated: 'committee_name' =>
## 'committee_name_1' [10]

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   report_year = col_integer(),
##   image_number = col_double(),
##   file_number = col_integer(),
##   contributor_zip = col_integer(),
##   contribution_receipt_date = col_date(format = ""),
##   contribution_receipt_amount = col_double(),
##   contributor_aggregate_ytd = col_double(),
##   fec_election_year = col_integer(),
##   load_date = col_time(format = ""),
##   link_id = col_double(),
##   two_year_transaction_period = col_integer(),
##   sub_id = col_double()
## )

## See spec(...) for full column specifications.

## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)

## Warning: 1136 parsing failures.
## row # A tibble: 5 x 5 col     row col       expected   actual  file              expected   <int> <chr>     <chr>      <chr>   <chr>             actual 1    45 load_date valid date 34:38.5 'King_Oct_18.csv' file 2    46 load_date valid date 34:38.5 'King_Oct_18.csv' row 3    47 load_date valid date 34:39.5 'King_Oct_18.csv' col 4    48 load_date valid date 34:43.1 'King_Oct_18.csv' expected 5    49 load_date valid date 34:46.0 'King_Oct_18.csv'
## ... ................. ... ...................................................... ........ ...................................................... ...... ...................................................... .... ...................................................... ... ...................................................... ... ...................................................... ........ ......................................................
## See problems(...) for more details.

Liuba_Oct_18<-read_csv("Liuba_Oct_18.csv")

## Warning: Duplicated column names deduplicated: 'committee_name' =>
## 'committee_name_1' [10]

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   report_year = col_integer(),
##   image_number = col_double(),
##   file_number = col_integer(),
##   contributor_zip = col_integer(),
##   contribution_receipt_date = col_date(format = ""),
##   contribution_receipt_amount = col_double(),
##   contributor_aggregate_ytd = col_double(),
##   fec_election_year = col_integer(),
##   load_date = col_time(format = ""),
##   link_id = col_double(),
##   two_year_transaction_period = col_integer(),
##   sub_id = col_double()
## )
## See spec(...) for full column specifications.

## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)

## Warning: 789 parsing failures.
## row # A tibble: 5 x 5 col     row col       expected   actual  file               expected   <int> <chr>     <chr>      <chr>   <chr>              actual 1   116 load_date valid date 39:45.1 'Liuba_Oct_18.csv' file 2   117 load_date valid date 39:45.1 'Liuba_Oct_18.csv' row 3   118 load_date valid date 39:45.1 'Liuba_Oct_18.csv' col 4   119 load_date valid date 39:45.1 'Liuba_Oct_18.csv' expected 5   120 load_date valid date 39:45.1 'Liuba_Oct_18.csv'
## ... ................. ... ....................................................... ........ ....................................................... ...... ....................................................... .... ....................................................... ... ....................................................... ... ....................................................... ........ .......................................................
## See problems(...) for more details.

Let us combine the 2 data sets

Combined_Candidates<- rbind(King_Oct_18 ,Liuba_Oct_18)

Convert Candidate_LAst to factor

Combined_Candidates$Candidate_Last<-as.factor(Combined_Candidates$Candidate_Last)

head(Combined_Candidates)

## # A tibble: 6 x 77
##   committee_id committee_name Candidate_Last report_year report_type
##   <chr>        <chr>          <fct>                <int> <chr>      
## 1 C00373563    KING FOR CONG~ King                  2017 YE         
## 2 C00373563    KING FOR CONG~ King                  2017 YE         
## 3 C00373563    KING FOR CONG~ King                  2017 YE         
## 4 C00373563    KING FOR CONG~ King                  2017 YE         
## 5 C00373563    KING FOR CONG~ King                  2017 YE         
## 6 C00373563    KING FOR CONG~ King                  2017 YE         
## # ... with 72 more variables: image_number <dbl>, line_number <chr>,
## #   transaction_id <chr>, file_number <int>, committee_name_1 <chr>,
## #   entity_type <chr>, entity_type_desc <chr>, unused_contbr_id <chr>,
## #   contributor_prefix <chr>, contributor_name <chr>,
## #   contributor_first_name <chr>, contributor_middle_name <chr>,
## #   contributor_last_name <chr>, contributor_suffix <chr>,
## #   contributor_street_1 <chr>, contributor_street_2 <chr>,
## #   contributor_city <chr>, contributor_state <chr>,
## #   contributor_zip <int>, contributor_employer <chr>,
## #   contributor_occupation <chr>, contributor_id <chr>,
## #   receipt_type <chr>, receipt_type_desc <chr>, receipt_type_full <chr>,
## #   memo_code <chr>, memo_code_full <chr>,
## #   contribution_receipt_date <date>, contribution_receipt_amount <dbl>,
## #   contributor_aggregate_ytd <dbl>, candidate_id <chr>,
## #   candidate_name <chr>, candidate_first_name <chr>,
## #   candidate_last_name <chr>, candidate_middle_name <chr>,
## #   candidate_prefix <chr>, candidate_suffix <chr>,
## #   candidate_office <chr>, candidate_office_full <chr>,
## #   candidate_office_state <chr>, candidate_office_state_full <chr>,
## #   candidate_office_district <chr>, conduit_committee_id <chr>,
## #   conduit_committee_name <chr>, conduit_committee_street1 <chr>,
## #   conduit_committee_street2 <chr>, conduit_committee_city <chr>,
## #   conduit_committee_state <chr>, conduit_committee_zip <chr>,
## #   donor_committee_name <chr>,
## #   national_committee_nonfederal_account <chr>, election_type <chr>,
## #   election_type_full <chr>, fec_election_type_desc <chr>,
## #   fec_election_year <int>, amendment_indicator <chr>,
## #   amendment_indicator_desc <chr>, schedule_type_full <chr>,
## #   load_date <time>, original_sub_id <chr>,
## #   back_reference_transaction_id <chr>,
## #   back_reference_schedule_name <chr>, filing_form <chr>, link_id <dbl>,
## #   is_individual <chr>, memo_text <chr>,
## #   two_year_transaction_period <int>, schedule_type <chr>,
## #   increased_limit <chr>, sub_id <dbl>, pdf_url <chr>,
## #   line_number_label <chr>

To color the points by name they will need to be factors.

Lets summarize the contributions to each candidate by month

L_monthly<- Liuba_Oct_18 %>% group_by(month=floor_date(contribution_receipt_date, "month")) %>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
L_monthly

## # A tibble: 10 x 2
##    month      contribution_receipt_amount
##    <date>                           <dbl>
##  1 2017-10-01                       8605 
##  2 2017-11-01                      10375 
##  3 2017-12-01                      40647 
##  4 2018-01-01                      28317 
##  5 2018-02-01                      39962.
##  6 2018-03-01                      70358.
##  7 2018-04-01                      36276.
##  8 2018-05-01                      49149.
##  9 2018-06-01                     112122.
## 10 2018-09-01                      47500

We need to add the candidate’s name

L_monthly<-as.data.frame(append(L_monthly,"Liuba"))
L_monthly

##         month contribution_receipt_amount X.Liuba.
## 1  2017-10-01                     8605.00    Liuba
## 2  2017-11-01                    10375.00    Liuba
## 3  2017-12-01                    40647.00    Liuba
## 4  2018-01-01                    28317.00    Liuba
## 5  2018-02-01                    39962.50    Liuba
## 6  2018-03-01                    70357.68    Liuba
## 7  2018-04-01                    36276.19    Liuba
## 8  2018-05-01                    49149.25    Liuba
## 9  2018-06-01                   112121.81    Liuba
## 10 2018-09-01                    47500.00    Liuba

Let us give that new column a better name.

Note the " ." after the name.

L_monthly<-rename(L_monthly,name = X.Liuba.)
L_monthly

##         month contribution_receipt_amount  name
## 1  2017-10-01                     8605.00 Liuba
## 2  2017-11-01                    10375.00 Liuba
## 3  2017-12-01                    40647.00 Liuba
## 4  2018-01-01                    28317.00 Liuba
## 5  2018-02-01                    39962.50 Liuba
## 6  2018-03-01                    70357.68 Liuba
## 7  2018-04-01                    36276.19 Liuba
## 8  2018-05-01                    49149.25 Liuba
## 9  2018-06-01                   112121.81 Liuba
## 10 2018-09-01                    47500.00 Liuba

Now all the same for the other candidate

K_monthly<- King_Oct_18 %>% group_by(month=floor_date(contribution_receipt_date, "month")) %>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#K_monthly

K_monthly<-as.data.frame(append(K_monthly,"King"))
#K_monthly

# now rename column
#df <- rename(df, new_name = old_name)
K_monthly<-rename(K_monthly,name = X.King.)
#K_monthly

Now lets do the same but for WEEKLY

L_weekly<- Liuba_Oct_18 %>% group_by(week=floor_date(contribution_receipt_date, "week")) %>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#L_weekly
L_weekly<-as.data.frame(append(L_weekly,"Liuba"))
#L_weekly
#df <- rename(df, new_name = old_name)
L_weekly<-rename(L_weekly,name = X.Liuba.)
#L_weekly

K_weekly<- King_Oct_18 %>% group_by(week=floor_date(contribution_receipt_date, "week")) %>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#K_weekly
K_weekly<-as.data.frame(append(K_weekly,"King"))
#K_weekly
#df <- rename(df, new_name = old_name)
K_weekly<-rename(K_weekly,name = X.King.)
#K_weekly

Let us take a look at every single contribution

Scattered_contributions<-ggplot(Combined_Candidates ,aes(x=Combined_Candidates$contribution_receipt_date,
                                    y=Combined_Candidates$contribution_receipt_amount , 
                                    color= Candidate_Last))+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_blank())+
  ylim(0,5000)+
  geom_point(alpha= 0.4)+
  geom_jitter()+
    theme_economist()

Scattered_contributions

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 7 rows containing missing values (geom_point).

Now we will plot the plot the monthly hauls.

Monthly_together<- rbind(L_monthly ,K_monthly)

Monthly_contributions_1<-ggplot()+
  geom_line(data = L_monthly , aes(month, contribution_receipt_amount), color = "blue")+
  geom_line(data = K_monthly , aes(month, contribution_receipt_amount), color = "red")+
  scale_x_date(date_labels="%b %y",date_breaks  ="1 month")+
  ggtitle("Liuba v King Monthly Reciepts")+
  labs(subtitle = " Democrate Blue , Republican Red,")+
   theme_economist()+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_blank())+
  theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
  scale_colour_economist()
Monthly_contributions_1

head(Monthly_together, 5)

##        month contribution_receipt_amount  name
## 1 2017-10-01                      8605.0 Liuba
## 2 2017-11-01                     10375.0 Liuba
## 3 2017-12-01                     40647.0 Liuba
## 4 2018-01-01                     28317.0 Liuba
## 5 2018-02-01                     39962.5 Liuba

A little chaotic. Perhaps a bar braph would be better here.

Monthly_contributions_Bar<-ggplot(data = Monthly_together, aes(x=month,y= Monthly_together$contribution_receipt_amount, fill= name))+
  geom_bar(stat="identity",position = "dodge")+
   scale_x_date(date_labels="%b %y",date_breaks  ="1 month")+
  ggtitle("Liuba v King Monthly Reciepts")+
  labs(subtitle = " Democrate Blue , Republican Red,")+
   theme_economist()+
  theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_blank())+
  scale_colour_economist()
  
  
Monthly_contributions_Bar

Next we will plot weekly hauls

Weekly_contributions_1<-ggplot()+
  geom_line(data = L_weekly , aes(week, contribution_receipt_amount), color = "blue")+
  geom_line(data = K_weekly , aes(week, contribution_receipt_amount), color = "red")+
  scale_x_date(date_labels="%b %y",date_breaks  ="1 month")+
  ggtitle("Liuba v King Weekly Reciepts")+
  labs(subtitle = " Democrate Blue , Republican Red,")+
    theme_economist()+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_blank())+
  theme(axis.text.x = element_text(angle = 45,vjust = 1 ,hjust = 1))
Weekly_contributions_1

And what about quarterly since this is how the data is publicized.

K_weekly<- King_Oct_18 %>% group_by(week=floor_date(contribution_receipt_date, “week”)) %>% summarize(contribution_receipt_amount=sum(contribution_receipt_amount))

quarterly<- Monthly_together%>% group_by(quarter=floor_date(Monthly_together$month , "3 months"))%>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
quarterly

## # A tibble: 7 x 2
##   quarter    contribution_receipt_amount
##   <date>                           <dbl>
## 1 2017-01-01                      67335 
## 2 2017-04-01                      55855 
## 3 2017-07-01                     105082 
## 4 2017-10-01                     158552 
## 5 2018-01-01                     340324.
## 6 2018-04-01                     427362.
## 7 2018-07-01                     118501.

King_quarterly<- King_Oct_18%>% group_by(quarter=floor_date(King_Oct_18$contribution_receipt_date , "3 months"))%>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))

head(King_quarterly,5)

## # A tibble: 5 x 2
##   quarter    contribution_receipt_amount
##   <date>                           <dbl>
## 1 2017-01-01                       67335
## 2 2017-04-01                       55855
## 3 2017-07-01                      105082
## 4 2017-10-01                       98925
## 5 2018-01-01                      201687

Now add the candidates name’

King_quarterly['name']='King'

head(King_quarterly)

## # A tibble: 6 x 3
##   quarter    contribution_receipt_amount name 
##   <date>                           <dbl> <chr>
## 1 2017-01-01                       67335 King 
## 2 2017-04-01                       55855 King 
## 3 2017-07-01                      105082 King 
## 4 2017-10-01                       98925 King 
## 5 2018-01-01                      201687 King 
## 6 2018-04-01                      229815 King

Now same for the other candidate

Liuba_quarterly<- Liuba_Oct_18%>% group_by(quarter=floor_date(Liuba_Oct_18$contribution_receipt_date , "3 months"))%>%
  summarize(contribution_receipt_amount=sum(contribution_receipt_amount))

#head(Liuba_quarterly)

Liuba_quarterly['name']='Liuba'

head(Liuba_quarterly)

## # A tibble: 4 x 3
##   quarter    contribution_receipt_amount name 
##   <date>                           <dbl> <chr>
## 1 2017-10-01                      59627  Liuba
## 2 2018-01-01                     138637. Liuba
## 3 2018-04-01                     197547. Liuba
## 4 2018-07-01                      47500  Liuba

Next we will stack the candidates quarterly results.

Candidates_Quarterly<- rbind(King_quarterly, Liuba_quarterly)

Quarterly_contributions_Bar<-ggplot(data = Candidates_Quarterly, aes(x=Candidates_Quarterly$quarter,y= Candidates_Quarterly$contribution_receipt_amount, fill= name))+
  geom_bar(stat="identity",position = "dodge")+
   scale_x_date(date_labels="%b %y",date_breaks  ="1 month")+
  ggtitle("Liuba v King Monthly Reciepts")+
  labs(subtitle = " Democrate Blue , Republican Red,")+
   theme_economist()+
  theme(axis.title.x=element_blank())+
  theme(axis.text.x = element_text(angle = 45,vjust = 1 ,hjust = 1))+
  scale_colour_economist()
  
  
Quarterly_contributions_Bar

now let us see if there was a difference between the occuaptions that gave to each candidate.

Combined_Candidates

King_Occupations<- King_Oct_18%>%
      count(King_Oct_18$contributor_occupation)

King_Occupations

## # A tibble: 236 x 2
##    `King_Oct_18$contributor_occupation`     n
##    <chr>                                <int>
##  1 ACCOUNT MANAGER                          1
##  2 ACCOUNTANT                               3
##  3 ADVANCED UROLOGY                         1
##  4 AGRONOMIAST                              3
##  5 ANALYIST                                 1
##  6 ANALYST                                  1
##  7 ARCHITECT                                1
##  8 ASSOCIATE                                1
##  9 ATTORNEY                                42
## 10 AUTHOR                                   4
## # ... with 226 more rows

now lets select the top 10 most common occupations.

King_top_10_jobs<- King_Occupations%>% top_n(10)

## Selecting by n

King_top_10_jobs

## # A tibble: 10 x 2
##    `King_Oct_18$contributor_occupation`     n
##    <chr>                                <int>
##  1 ATTORNEY                                42
##  2 BROKER                                 111
##  3 CEO                                     45
##  4 CHAIRMAN                                12
##  5 FARMER                                  53
##  6 HOMEMAKER                               71
##  7 OWNER                                   21
##  8 PRESIDENT                               57
##  9 RETIRED                                467
## 10 SELF                                    26

Liuba_Occupations<- Liuba_Oct_18%>%
      count(Liuba_Oct_18$contributor_occupation)

Liuba_top_10_jobs<-Liuba_Occupations%>% top_n(10)

## Selecting by n

next we will plot them

King_job_Bar<-ggplot(data = King_top_10_jobs, aes(x=King_top_10_jobs$`King_Oct_18$contributor_occupation`,y= King_top_10_jobs$n))+
  geom_bar(stat="identity",fill="red")+
  
  ggtitle("Most Common Occupations")+
  labs(subtitle = "King contributors ")+
   theme_economist()+
  theme(axis.title.x=element_blank(),
        axis.title.y =element_blank() )+
  theme(axis.text.x = element_text(angle = 45, ,vjust = 1 ,hjust = 1))+
        scale_colour_economist()

King_job_Bar

Liuba_job_Bar<-ggplot(data = Liuba_top_10_jobs, aes(x=Liuba_top_10_jobs$`Liuba_Oct_18$contributor_occupation`,y= Liuba_top_10_jobs$n))+
  geom_bar(stat="identity",fill="blue")+
   
  ggtitle("Most Common Occupations")+
  labs(subtitle = "Liuba contributors ")+
   theme_economist()+
  theme(axis.title.x=element_blank(),
        axis.title.y =element_blank() )+
   theme(axis.text.x = element_text(angle = 45, ,vjust = 1 ,hjust = 1))+
      scale_colour_economist()

Liuba_job_Bar

A<-grid.arrange(Liuba_job_Bar,King_job_Bar,ncol = 2)

## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]