# load data
if (!require("stringr")) install.packages('stringr')
## Loading required package: stringr
if (!require("data.table")) install.packages('data.table')
## Loading required package: data.table
if (!require("dplyr")) install.packages('dplyr')
## Loading required package: dplyr
## -------------------------------------------------------------------------
## data.table + dplyr code now lives in dtplyr.
## Please library(dtplyr)!
## -------------------------------------------------------------------------
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if(!require("ggplot2")) install.packages('ggplot2')
## Loading required package: ggplot2
library(dplyr)
library(data.table)
library(stringr)
library(ggplot2)
You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.
In this project we are going to use the Federal Election Commission data set to research the campaign contributions and expenditures for election 2016 vs 2012.
We are going to analyze the data of 4 major party candidates, 2 Democrats and 2 Republicans. Would like to see how the Democrats and Republicans have spent in both the election cycle.
Further would like to do some analysis on how the Political Action Committes have spend their amounts in 2012 and 2016. Would like to investigate how the PACs have sepnd for/against a particular candidates.
We would also like to investigate how the campaigns have spent their money in different states. Would like to present a heat map with the statewise expenditure and represent in plot_ly or choropleth.
What are the cases, and how many are there?
The current data set has so many observations. After data munging the included elements will Contributions, Expenditures, PAC Expenditures.
Describe the method of data collection.
Data is available in the public website of Federal Election Commision. Since this is open data source, we will be downloading the data from the site for our research.
What type of study is this (observational/experiment)?
This is Observational Study.
If you collected the data, state self-collected. If not, provide a citation/link.
The source of data for this research is Federal Election Comission website.
What is the response variable, and what type is it (numerical/categorical)?
Response variables will be numerical.
Average Contributions.
Monthly Expenditures.
State wise expenditures.
PAC Expenditures.
On further analysis of data, will come up with few more variables.
What is the explanatory variable, and what type is it (numerical/categorival)?
Explanatory variables will be categorical.
PAC expense were it positive or negative.
Candidates Campaign.
On further analysis of data, will come up with few more variables.
Provide summary statistics relevant to your research question. For example, if you???re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.
# For now just giving a brief look at only contributions. Yet to analyse the expenses.
# The HRC DataSet to data.frame
hrccontributions=fread('HRC_Cont.csv')
## Warning in fread("HRC_Cont.csv"): Starting data input
## on line 2 and discarding line 1 because it has too
## few or too many items to be column names or data:
## cmte_id,cand_id,cand_nm,contbr_nm,contbr_city,contbr_st,contbr_zip,contbr_employer,contbr_occupation,contb_receipt_amt,contb_receipt_dt,receipt_desc,memo_cd,memo_text,form_tp,file_num,tran_id,election_tp
##
Read 8.0% of 3506081 rows
Read 19.4% of 3506081 rows
Read 27.1% of 3506081 rows
Read 35.7% of 3506081 rows
Read 45.3% of 3506081 rows
Read 57.9% of 3506081 rows
Read 72.2% of 3506081 rows
Read 85.9% of 3506081 rows
Read 90.1% of 3506081 rows
Read 3506081 rows and 19 (of 19) columns from 0.621 GB file in 00:00:14
hrccontributions=data.frame(hrccontributions)
# Calculating the Mean Contributions for HRC.
mean(hrccontributions$V10)
## [1] 147.0197
summary(hrccontributions$V10)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -20000 15 25 147 100 12780000
# The DJT DataSet to data.frame
djtcontributions=fread('DJT_Cont.csv')
## Warning in fread("DJT_Cont.csv"): Starting data input
## on line 2 and discarding line 1 because it has too
## few or too many items to be column names or data:
## cmte_id,cand_id,cand_nm,contbr_nm,contbr_city,contbr_st,contbr_zip,contbr_employer,contbr_occupation,contb_receipt_amt,contb_receipt_dt,receipt_desc,memo_cd,memo_text,form_tp,file_num,tran_id,election_tp
djtcontributions=data.frame(djtcontributions)
# Calculating the Mean Contributions for DJT
mean(djtcontributions$V10)
## [1] 158.8418
summary(djtcontributions$V10)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -84240.0 28.0 64.0 158.8 160.0 86940.0
#HRC Campaign.
top_fifty = hrccontributions %>%
filter(rank(desc(hrccontributions$V10))<=100)
# Removing HRC Victory Fund to find Individual contributors.
top_fifty =top_fifty %>%
filter(top_fifty$V4 != "HILLARY VICTORY FUND - UNITEMIZED")
# Top Fifty Individual Contributor's to HRC Campaign.
knitr::kable(head(plyr::arrange(top_fifty,desc(top_fifty$V10)), n = 20))
V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
C00575795 | P00003392 | Clinton, Hillary Rodham | GOCKE, THOMAS | JUPITER | FL | 334691584 | SELF-EMPLOYED | PHYSICIAN | 20000 | 29-JUN-15 | REFUNDED ON 6/30/15 | SA17A | 1024052 | C330478 | P2016 | NA | ||
C00575795 | P00003392 | Clinton, Hillary Rodham | YOUNG, SAMUEL J. | TEHACHAPI | CA | 935618652 | SELF-EMPLOYED | REAL ESTATE INVESTOR | 10000 | 22-JUN-16 | X | *BEST EFFORTS UPDATE | SA17A | 1109498 | C6027078 | P2016 | NA | |
C00575795 | P00003392 | Clinton, Hillary Rodham | YOUNG, SAMUEL J. | TEHACHAPI | CA | 935618652 | INFORMATION REQUESTED | INFORMATION REQUESTED | 7300 | 22-JUN-16 | SA17A | 1099613 | C6027078 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | HILLARY ACTION FUND - UNITEMIZED | NEW YORK | NY | 101855256 | 5620 | 29-SEP-16 | X | * | SA18 | 1137625 | CZ11554379 | NA | ||||
C00575795 | P00003392 | Clinton, Hillary Rodham | PROPPER, GREG | LOS ANGELES | CA | 900691429 | PROPPER DALEY | PHILANTHROPIC CONSULTING | 5400 | 28-JUN-16 | $2,760 REFUNDED ON 7/12/2016 | SA17A | 1099613 | C6267712 | G2016 | NA | ||
C00575795 | P00003392 | Clinton, Hillary Rodham | PICKER, MICHAEL | SACRAMENTO | CA | 958413111 | STATE OF CALIFORNIA | COMMISSIONER | 5400 | 22-NOV-15 | SA17A | 1081052 | C1586465 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | AUSTIN, ALAN | ATHERTON | CA | 940275458 | N/A | RETIRED | 5400 | 02-AUG-16 | SA17A | 1126762 | C8219422 | G2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | FOX, ALAN | STUDIO CITY | CA | 916042407 | ACF PROPERTY MANAGEMENT | PRESIDENT | 5400 | 22-MAR-16 | SA17A | 1081062 | C3547182 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | BEAUBIEN, JAMES | SANTA MONICA | CA | 904023024 | LATHAM & WATKINS LLP | ATTORNEY | 5400 | 28-APR-15 | SA17A | 1024052 | C189756 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | CARROLL, DANIEL ASHTON | SAN FRANCISCO | CA | 941151125 | TPG CAPITAL | INVESTMENT MANAGER | 5400 | 11-MAR-16 | SA17A | 1081062 | C3328130 | G2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | HOLZMAN, WINNIE | BURBANK | CA | 915054005 | SELF-EMPLOYED | WRITER | 5400 | 11-JUN-15 | SA17A | 1024052 | C222358 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | ITKIN, MARK | LOS ANGELES | CA | 900691128 | WILLIAM MORRIS ENDEAVOR | TALENT AGENT | 5400 | 29-APR-15 | SA17A | 1024052 | C83505 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | FENG, JIONG | CLAREMONT | CA | 917116500 | W. CALIFORNIA ART ACADEMY | TEACHER | 5400 | 04-MAY-16 | SA17A | 1091720 | C4820919 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | WALLACE, RICHARD | SHERMAN OAKS | CA | 914032915 | WARNER BROS TV | WARNER BROTHERS TV | 5400 | 29-APR-15 | SA17A | 1024052 | C83819 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | HANNA, MONA Z. | VILLA PARK | CA | 928615322 | MICHELMAN & ROBINSON, LLP | ATTORNEY | 5400 | 27-JUN-16 | $2700 REFUNDED ON 7/8/2016 | SA17A | 1099613 | C6216307 | G2016 | NA | ||
C00575795 | P00003392 | Clinton, Hillary Rodham | BARSZCZ, MICHAEL | WINTER PARK | FL | 327893347 | SELF-EMPLOYED | ATTORNEY | 5400 | 27-MAY-15 | SA17A | 1024052 | C167847 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | GHAZVINI, MEHRAN | TALLAHASSEE | FL | 323081547 | PREMIER HEALTH CLINIC & REHAB OF TALLA | DOCTOR OF CHIRPORACTIC | 5400 | 26-MAY-15 | SA17A | 1024052 | C167272 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | FINK KOHL, BINA | WESTON | FL | 333262726 | SELF-EMPLOYED | PUBLIC RELATIONS | 5400 | 31-MAY-15 | SA17A | 1024052 | C183235 | P2016 | NA | |||
C00575795 | P00003392 | Clinton, Hillary Rodham | SCHRAGIE, GOLDBLATT | MIAMI BEACH | FL | 331403429 | PROMED MANAGEMENT, INC. | PRESIDENT | 5400 | 09-APR-16 | X | *BEST EFFORTS UPDATE | SA17A | 1091720 | C4101594 | P2016 | NA | |
C00575795 | P00003392 | Clinton, Hillary Rodham | TOLL, ROBERT | MIAMI BEACH | FL | 331404226 | TOLL BROTHERS, INC | EXECUTIVE CHAIRMAN | 5400 | 17-JUN-15 | X | *BEST EFFORTS UPDATE | SA17A | 1081046 | C255920 | P2016 | NA |
# DJT Campaign.
top_fifty_djt = djtcontributions %>%
filter(rank(desc(djtcontributions$V10))<=100)
# Top Fifty Individual Contributor's to DJT Campaign.
knitr::kable(head(plyr::arrange(top_fifty_djt,desc(top_fifty_djt$V10)), n = 20))
V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
C00580100 | P80001571 | Trump, Donald J. | BOCH, ERNIE | NORWOOD | MA | 02062 | BOCH AUTOMOTIVE GROUP | EXECUTIVE | 86936.80 | 28-AUG-15 | SA17A | 1036338 | SA17A.7395 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | FERRERO, LOUIS P MR. | CANTON | GA | 30115 | INFORMATION REQUESTED | INFORMATION REQUESTED | 12500.00 | 21-JUL-16 | SA17A | 1104813 | SA17A.1625219 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | CONSERVATIVE ACTION FUND | ALEXANDRIA | VA | 22314 | 10030.24 | 04-DEC-15 | X | SA17A | 1047287 | SA17A.250744 | P2016 | NA | ||||
C00580100 | P80001571 | Trump, Donald J. | COBB, ROBERT | BIRMINGHAM | AL | 35209 | COBB THEATERS | OWNER | 10000.00 | 02-NOV-16 | SA17A | 1133930 | SA17A.2681043 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | DOBSKI, ROBERT | BLOOMINGTON | IL | 61704 | INFORMATION REQUESTED | INFORMATION REQUESTED | 10000.00 | 03-NOV-16 | SA17A | 1133930 | SA17A.2681307 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | GORMAN, L.D. MR. | HAZARD | KY | 41702 | INFORMATION REQUESTED | INFORMATION REQUESTED | 10000.00 | 21-JUL-16 | SA17A | 1104813 | SA17A.1625209 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | ROVT, ALEXANDER MR. | BROOKLYN | NY | 11234 | INFORMATION REQUESTED | INFORMATION REQUESTED | 10000.00 | 08-NOV-16 | SA17A | 1133930 | SA17A.2823364 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | GIGANTE, PETER | BELLINGHAM | WA | 98225 | SELF-EMPLOYED | INTERNATIONAL TRADE | 10000.00 | 04-AUG-16 | SA17A | 1135871 | SA17A.2178108 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | TANZER, LEONARD J MR. | SCARSDALE | NY | 10583 | PATIENT CARE ASSOCIATES | PRESIDENT | 7300.00 | 07-NOV-16 | SA17A | 1133930 | SA17A.2823366 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | NORTHCUTT, JOHN D MR. III | FAIRHOPE | AL | 36532 | INFORMATION REQUESTED | INFORMATION REQUESTED | 5400.00 | 21-JUL-16 | SA17A | 1104813 | SA17A.1625049 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | HARBERT, NORMAN C MR. | SOTTSDALE | AZ | 85262 | RETIRED | RETIRED | 5400.00 | 09-SEP-16 | SA17A | 1135946 | SA17A.2427439 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | DAY, TIMOTHY T MR. | PHOENIX | AZ | 85018 | INFORMATION REQUESTED | INFORMATION REQUESTED | 5400.00 | 28-JUL-16 | SA17A | 1104813 | SA17A.1625478 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | STERN, MARC MR. | MALIBU | CA | 90265 | THE TCW GROUP, INC | CHAIRMAN | 5400.00 | 12-JUL-16 | SA17A | 1104813 | SA17A.1625121 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | LITTLEFAIR, ANDREW | NEW PORT BEACH | CA | 92663 | CLEAN ENERGY FUELD | PRESIDENT | 5400.00 | 20-OCT-16 | SA17A | 1133930 | SA17A.2678905 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | SCHUMANN, ROBERT | MANHATTAN BEACH | CA | 90266 | REAL ESTATE WEST INC. | BROKER | 5400.00 | 03-NOV-16 | SA17A | 1133930 | SA17A.2681427 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | LISENKO, ROSE H MS. | SAN DIEGO | CA | 92107 | INFORMATION REQUESTED | INFORMATION REQUESTED | 5400.00 | 24-AUG-16 | SA17A | 1135871 | SA17A.2179772 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | NEE, ALICE MS. | NEWPORT COAST | CA | 92657 | INFORMATION REQUESTED | INFORMATION REQUESTED | 5400.00 | 24-AUG-16 | SA17A | 1135871 | SA17A.2180747 | G2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | MIZEL, CAROL MS. | DENVER | CO | 80237 | HOMEMAKER | HOMEMAKER | 5400.00 | 11-SEP-15 | SA17A | 1036338 | SA17A.95497 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | MIZEL, LARRY A MR. | DENVER | CO | 80237 | M.D.C HOLDINGS | INVESTMENT | 5400.00 | 11-SEP-15 | SA17A | 1036338 | SA17A.95501 | P2016 | NA | |||
C00580100 | P80001571 | Trump, Donald J. | TRAVIS, CHARLOTTE MS. | BRIGHTON | CO | 80601 | INFORMATION REQUESTED | INFORMATION REQUESTED | 5400.00 | 04-AUG-16 | SA17A | 1135871 | SA17A.2182781 | G2016 | NA |
hrccontributions$V11=str_replace_all(hrccontributions$V11, "[:digit:]","")
hrccontributions$V11=str_replace_all(hrccontributions$V11, "-","")
ggplot(hrccontributions, aes(x=hrccontributions$V11, y=hrccontributions$V10 ,group=hrccontributions$V11, colour=hrccontributions$V10)) +
geom_line() + ylab("Total Contributions") +
xlab("Month")
djtcontributions$V11=str_replace_all(djtcontributions$V11, "[:digit:]","")
djtcontributions$V11=str_replace_all(djtcontributions$V11, "-","")
ggplot(djtcontributions, aes(x=djtcontributions$V11, y=djtcontributions$V10 ,group=djtcontributions$V11, colour=djtcontributions$V10)) +
geom_line() + ylab("Total Contributions") +
xlab("Month")
Reference : Research analysis inspired from Kaggle 2016 Election Analysis