library(tidyverse)
## -- Attaching packages ------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggthemes)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
King_Oct_18<- read_csv("King_Oct_18.csv")
## Warning: Duplicated column names deduplicated: 'committee_name' =>
## 'committee_name_1' [10]
## Parsed with column specification:
## cols(
## .default = col_character(),
## report_year = col_integer(),
## image_number = col_double(),
## file_number = col_integer(),
## contributor_zip = col_integer(),
## contribution_receipt_date = col_date(format = ""),
## contribution_receipt_amount = col_double(),
## contributor_aggregate_ytd = col_double(),
## fec_election_year = col_integer(),
## load_date = col_time(format = ""),
## link_id = col_double(),
## two_year_transaction_period = col_integer(),
## sub_id = col_double()
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 1136 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 45 load_date valid date 34:38.5 'King_Oct_18.csv' file 2 46 load_date valid date 34:38.5 'King_Oct_18.csv' row 3 47 load_date valid date 34:39.5 'King_Oct_18.csv' col 4 48 load_date valid date 34:43.1 'King_Oct_18.csv' expected 5 49 load_date valid date 34:46.0 'King_Oct_18.csv'
## ... ................. ... ...................................................... ........ ...................................................... ...... ...................................................... .... ...................................................... ... ...................................................... ... ...................................................... ........ ......................................................
## See problems(...) for more details.
Liuba_Oct_18<-read_csv("Liuba_Oct_18.csv")
## Warning: Duplicated column names deduplicated: 'committee_name' =>
## 'committee_name_1' [10]
## Parsed with column specification:
## cols(
## .default = col_character(),
## report_year = col_integer(),
## image_number = col_double(),
## file_number = col_integer(),
## contributor_zip = col_integer(),
## contribution_receipt_date = col_date(format = ""),
## contribution_receipt_amount = col_double(),
## contributor_aggregate_ytd = col_double(),
## fec_election_year = col_integer(),
## load_date = col_time(format = ""),
## link_id = col_double(),
## two_year_transaction_period = col_integer(),
## sub_id = col_double()
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 789 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 116 load_date valid date 39:45.1 'Liuba_Oct_18.csv' file 2 117 load_date valid date 39:45.1 'Liuba_Oct_18.csv' row 3 118 load_date valid date 39:45.1 'Liuba_Oct_18.csv' col 4 119 load_date valid date 39:45.1 'Liuba_Oct_18.csv' expected 5 120 load_date valid date 39:45.1 'Liuba_Oct_18.csv'
## ... ................. ... ....................................................... ........ ....................................................... ...... ....................................................... .... ....................................................... ... ....................................................... ... ....................................................... ........ .......................................................
## See problems(...) for more details.
Combined_Candidates<- rbind(King_Oct_18 ,Liuba_Oct_18)
Convert Candidate_LAst to factor
Combined_Candidates$Candidate_Last<-as.factor(Combined_Candidates$Candidate_Last)
head(Combined_Candidates)
## # A tibble: 6 x 77
## committee_id committee_name Candidate_Last report_year report_type
## <chr> <chr> <fct> <int> <chr>
## 1 C00373563 KING FOR CONG~ King 2017 YE
## 2 C00373563 KING FOR CONG~ King 2017 YE
## 3 C00373563 KING FOR CONG~ King 2017 YE
## 4 C00373563 KING FOR CONG~ King 2017 YE
## 5 C00373563 KING FOR CONG~ King 2017 YE
## 6 C00373563 KING FOR CONG~ King 2017 YE
## # ... with 72 more variables: image_number <dbl>, line_number <chr>,
## # transaction_id <chr>, file_number <int>, committee_name_1 <chr>,
## # entity_type <chr>, entity_type_desc <chr>, unused_contbr_id <chr>,
## # contributor_prefix <chr>, contributor_name <chr>,
## # contributor_first_name <chr>, contributor_middle_name <chr>,
## # contributor_last_name <chr>, contributor_suffix <chr>,
## # contributor_street_1 <chr>, contributor_street_2 <chr>,
## # contributor_city <chr>, contributor_state <chr>,
## # contributor_zip <int>, contributor_employer <chr>,
## # contributor_occupation <chr>, contributor_id <chr>,
## # receipt_type <chr>, receipt_type_desc <chr>, receipt_type_full <chr>,
## # memo_code <chr>, memo_code_full <chr>,
## # contribution_receipt_date <date>, contribution_receipt_amount <dbl>,
## # contributor_aggregate_ytd <dbl>, candidate_id <chr>,
## # candidate_name <chr>, candidate_first_name <chr>,
## # candidate_last_name <chr>, candidate_middle_name <chr>,
## # candidate_prefix <chr>, candidate_suffix <chr>,
## # candidate_office <chr>, candidate_office_full <chr>,
## # candidate_office_state <chr>, candidate_office_state_full <chr>,
## # candidate_office_district <chr>, conduit_committee_id <chr>,
## # conduit_committee_name <chr>, conduit_committee_street1 <chr>,
## # conduit_committee_street2 <chr>, conduit_committee_city <chr>,
## # conduit_committee_state <chr>, conduit_committee_zip <chr>,
## # donor_committee_name <chr>,
## # national_committee_nonfederal_account <chr>, election_type <chr>,
## # election_type_full <chr>, fec_election_type_desc <chr>,
## # fec_election_year <int>, amendment_indicator <chr>,
## # amendment_indicator_desc <chr>, schedule_type_full <chr>,
## # load_date <time>, original_sub_id <chr>,
## # back_reference_transaction_id <chr>,
## # back_reference_schedule_name <chr>, filing_form <chr>, link_id <dbl>,
## # is_individual <chr>, memo_text <chr>,
## # two_year_transaction_period <int>, schedule_type <chr>,
## # increased_limit <chr>, sub_id <dbl>, pdf_url <chr>,
## # line_number_label <chr>
L_monthly<- Liuba_Oct_18 %>% group_by(month=floor_date(contribution_receipt_date, "month")) %>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
L_monthly
## # A tibble: 10 x 2
## month contribution_receipt_amount
## <date> <dbl>
## 1 2017-10-01 8605
## 2 2017-11-01 10375
## 3 2017-12-01 40647
## 4 2018-01-01 28317
## 5 2018-02-01 39962.
## 6 2018-03-01 70358.
## 7 2018-04-01 36276.
## 8 2018-05-01 49149.
## 9 2018-06-01 112122.
## 10 2018-09-01 47500
L_monthly<-as.data.frame(append(L_monthly,"Liuba"))
L_monthly
## month contribution_receipt_amount X.Liuba.
## 1 2017-10-01 8605.00 Liuba
## 2 2017-11-01 10375.00 Liuba
## 3 2017-12-01 40647.00 Liuba
## 4 2018-01-01 28317.00 Liuba
## 5 2018-02-01 39962.50 Liuba
## 6 2018-03-01 70357.68 Liuba
## 7 2018-04-01 36276.19 Liuba
## 8 2018-05-01 49149.25 Liuba
## 9 2018-06-01 112121.81 Liuba
## 10 2018-09-01 47500.00 Liuba
L_monthly<-rename(L_monthly,name = X.Liuba.)
L_monthly
## month contribution_receipt_amount name
## 1 2017-10-01 8605.00 Liuba
## 2 2017-11-01 10375.00 Liuba
## 3 2017-12-01 40647.00 Liuba
## 4 2018-01-01 28317.00 Liuba
## 5 2018-02-01 39962.50 Liuba
## 6 2018-03-01 70357.68 Liuba
## 7 2018-04-01 36276.19 Liuba
## 8 2018-05-01 49149.25 Liuba
## 9 2018-06-01 112121.81 Liuba
## 10 2018-09-01 47500.00 Liuba
K_monthly<- King_Oct_18 %>% group_by(month=floor_date(contribution_receipt_date, "month")) %>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#K_monthly
K_monthly<-as.data.frame(append(K_monthly,"King"))
#K_monthly
# now rename column
#df <- rename(df, new_name = old_name)
K_monthly<-rename(K_monthly,name = X.King.)
#K_monthly
L_weekly<- Liuba_Oct_18 %>% group_by(week=floor_date(contribution_receipt_date, "week")) %>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#L_weekly
L_weekly<-as.data.frame(append(L_weekly,"Liuba"))
#L_weekly
#df <- rename(df, new_name = old_name)
L_weekly<-rename(L_weekly,name = X.Liuba.)
#L_weekly
K_weekly<- King_Oct_18 %>% group_by(week=floor_date(contribution_receipt_date, "week")) %>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#K_weekly
K_weekly<-as.data.frame(append(K_weekly,"King"))
#K_weekly
#df <- rename(df, new_name = old_name)
K_weekly<-rename(K_weekly,name = X.King.)
#K_weekly
Scattered_contributions<-ggplot(Combined_Candidates ,aes(x=Combined_Candidates$contribution_receipt_date,
y=Combined_Candidates$contribution_receipt_amount ,
color= Candidate_Last))+
theme(axis.title.x=element_blank())+
theme(axis.title.y=element_blank())+
ylim(0,5000)+
geom_point(alpha= 0.4)+
geom_jitter()+
theme_economist()
Scattered_contributions
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
Monthly_together<- rbind(L_monthly ,K_monthly)
Monthly_contributions_1<-ggplot()+
geom_line(data = L_monthly , aes(month, contribution_receipt_amount), color = "blue")+
geom_line(data = K_monthly , aes(month, contribution_receipt_amount), color = "red")+
scale_x_date(date_labels="%b %y",date_breaks ="1 month")+
ggtitle("Liuba v King Monthly Reciepts")+
labs(subtitle = " Democrate Blue , Republican Red,")+
theme_economist()+
theme(axis.title.x=element_blank())+
theme(axis.title.y=element_blank())+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
scale_colour_economist()
Monthly_contributions_1
head(Monthly_together, 5)
## month contribution_receipt_amount name
## 1 2017-10-01 8605.0 Liuba
## 2 2017-11-01 10375.0 Liuba
## 3 2017-12-01 40647.0 Liuba
## 4 2018-01-01 28317.0 Liuba
## 5 2018-02-01 39962.5 Liuba
Monthly_contributions_Bar<-ggplot(data = Monthly_together, aes(x=month,y= Monthly_together$contribution_receipt_amount, fill= name))+
geom_bar(stat="identity",position = "dodge")+
scale_x_date(date_labels="%b %y",date_breaks ="1 month")+
ggtitle("Liuba v King Monthly Reciepts")+
labs(subtitle = " Democrate Blue , Republican Red,")+
theme_economist()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x=element_blank())+
theme(axis.title.y=element_blank())+
scale_colour_economist()
Monthly_contributions_Bar
Weekly_contributions_1<-ggplot()+
geom_line(data = L_weekly , aes(week, contribution_receipt_amount), color = "blue")+
geom_line(data = K_weekly , aes(week, contribution_receipt_amount), color = "red")+
scale_x_date(date_labels="%b %y",date_breaks ="1 month")+
ggtitle("Liuba v King Weekly Reciepts")+
labs(subtitle = " Democrate Blue , Republican Red,")+
theme_economist()+
theme(axis.title.x=element_blank())+
theme(axis.title.y=element_blank())+
theme(axis.text.x = element_text(angle = 45,vjust = 1 ,hjust = 1))
Weekly_contributions_1
K_weekly<- King_Oct_18 %>% group_by(week=floor_date(contribution_receipt_date, “week”)) %>% summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
quarterly<- Monthly_together%>% group_by(quarter=floor_date(Monthly_together$month , "3 months"))%>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
quarterly
## # A tibble: 7 x 2
## quarter contribution_receipt_amount
## <date> <dbl>
## 1 2017-01-01 67335
## 2 2017-04-01 55855
## 3 2017-07-01 105082
## 4 2017-10-01 158552
## 5 2018-01-01 340324.
## 6 2018-04-01 427362.
## 7 2018-07-01 118501.
King_quarterly<- King_Oct_18%>% group_by(quarter=floor_date(King_Oct_18$contribution_receipt_date , "3 months"))%>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
head(King_quarterly,5)
## # A tibble: 5 x 2
## quarter contribution_receipt_amount
## <date> <dbl>
## 1 2017-01-01 67335
## 2 2017-04-01 55855
## 3 2017-07-01 105082
## 4 2017-10-01 98925
## 5 2018-01-01 201687
King_quarterly['name']='King'
head(King_quarterly)
## # A tibble: 6 x 3
## quarter contribution_receipt_amount name
## <date> <dbl> <chr>
## 1 2017-01-01 67335 King
## 2 2017-04-01 55855 King
## 3 2017-07-01 105082 King
## 4 2017-10-01 98925 King
## 5 2018-01-01 201687 King
## 6 2018-04-01 229815 King
Liuba_quarterly<- Liuba_Oct_18%>% group_by(quarter=floor_date(Liuba_Oct_18$contribution_receipt_date , "3 months"))%>%
summarize(contribution_receipt_amount=sum(contribution_receipt_amount))
#head(Liuba_quarterly)
Liuba_quarterly['name']='Liuba'
head(Liuba_quarterly)
## # A tibble: 4 x 3
## quarter contribution_receipt_amount name
## <date> <dbl> <chr>
## 1 2017-10-01 59627 Liuba
## 2 2018-01-01 138637. Liuba
## 3 2018-04-01 197547. Liuba
## 4 2018-07-01 47500 Liuba
Candidates_Quarterly<- rbind(King_quarterly, Liuba_quarterly)
Quarterly_contributions_Bar<-ggplot(data = Candidates_Quarterly, aes(x=Candidates_Quarterly$quarter,y= Candidates_Quarterly$contribution_receipt_amount, fill= name))+
geom_bar(stat="identity",position = "dodge")+
scale_x_date(date_labels="%b %y",date_breaks ="1 month")+
ggtitle("Liuba v King Monthly Reciepts")+
labs(subtitle = " Democrate Blue , Republican Red,")+
theme_economist()+
theme(axis.title.x=element_blank())+
theme(axis.text.x = element_text(angle = 45,vjust = 1 ,hjust = 1))+
scale_colour_economist()
Quarterly_contributions_Bar
Combined_Candidates
King_Occupations<- King_Oct_18%>%
count(King_Oct_18$contributor_occupation)
King_Occupations
## # A tibble: 236 x 2
## `King_Oct_18$contributor_occupation` n
## <chr> <int>
## 1 ACCOUNT MANAGER 1
## 2 ACCOUNTANT 3
## 3 ADVANCED UROLOGY 1
## 4 AGRONOMIAST 3
## 5 ANALYIST 1
## 6 ANALYST 1
## 7 ARCHITECT 1
## 8 ASSOCIATE 1
## 9 ATTORNEY 42
## 10 AUTHOR 4
## # ... with 226 more rows
King_top_10_jobs<- King_Occupations%>% top_n(10)
## Selecting by n
King_top_10_jobs
## # A tibble: 10 x 2
## `King_Oct_18$contributor_occupation` n
## <chr> <int>
## 1 ATTORNEY 42
## 2 BROKER 111
## 3 CEO 45
## 4 CHAIRMAN 12
## 5 FARMER 53
## 6 HOMEMAKER 71
## 7 OWNER 21
## 8 PRESIDENT 57
## 9 RETIRED 467
## 10 SELF 26
Liuba_Occupations<- Liuba_Oct_18%>%
count(Liuba_Oct_18$contributor_occupation)
Liuba_top_10_jobs<-Liuba_Occupations%>% top_n(10)
## Selecting by n
King_job_Bar<-ggplot(data = King_top_10_jobs, aes(x=King_top_10_jobs$`King_Oct_18$contributor_occupation`,y= King_top_10_jobs$n))+
geom_bar(stat="identity",fill="red")+
ggtitle("Most Common Occupations")+
labs(subtitle = "King contributors ")+
theme_economist()+
theme(axis.title.x=element_blank(),
axis.title.y =element_blank() )+
theme(axis.text.x = element_text(angle = 45, ,vjust = 1 ,hjust = 1))+
scale_colour_economist()
King_job_Bar
Liuba_job_Bar<-ggplot(data = Liuba_top_10_jobs, aes(x=Liuba_top_10_jobs$`Liuba_Oct_18$contributor_occupation`,y= Liuba_top_10_jobs$n))+
geom_bar(stat="identity",fill="blue")+
ggtitle("Most Common Occupations")+
labs(subtitle = "Liuba contributors ")+
theme_economist()+
theme(axis.title.x=element_blank(),
axis.title.y =element_blank() )+
theme(axis.text.x = element_text(angle = 45, ,vjust = 1 ,hjust = 1))+
scale_colour_economist()
Liuba_job_Bar
A<-grid.arrange(Liuba_job_Bar,King_job_Bar,ncol = 2)
A
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]