fec<-read.csv("fec_independent_expenditures.csv", header = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.1
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyr)
fec<-read.csv("fec_independent_expenditures.csv", header = TRUE)
library(dplyr)
library(tidyverse)
library(tidyr)
opposition<-fec%>%
filter(support_oppose_indicator=="O", report_year>="2013", candidate_office=="P",
candidate_id!="P80002801",
candidate_id!="")%>%
group_by(candidate_id)%>%
summarise(expenditure_amount=sum(expenditure_amount, na.rm = TRUE))
#opposition<-omit.na(opposition)
view(opposition)
support<-fec%>%
filter(support_oppose_indicator=="S", report_year>="2013", candidate_office=="P",
candidate_id!="P00547984",
candidate_id!="P20002721",
candidate_id!="P60007671",
candidate_id!="P60007895",
candidate_id!="P60009354",
candidate_id!="P60019239",
candidate_id!="P60021102",
candidate_id!="P60022118",
candidate_id!="P60023215",
candidate_id!="P80003353",
candidate_id!="")%>%
group_by(candidate_id)%>%
summarise(expenditure_amount=sum(expenditure_amount, na.rm = TRUE))
#support<-omit.na(support)
view(support)
fec<-cbind(support, opposition)
fec$opp_amt<-fec[,4]
fec$sup_amt<-fec[,2]
fec<-fec[-c(2:4)]
str(fec)
## 'data.frame': 19 obs. of 3 variables:
## $ candidate_id: chr "P00003392" "P20002671" "P20003281" "P40003576" ...
## $ opp_amt : num 7.75e+07 5.84e+01 5.01e+03 1.79e+04 6.43e+06 ...
## $ sup_amt : num 39339667 3397835 1998276 9122842 16675731 ...
x<-fec$sup_amt
y<-fec$opp_amt
ggplot(fec, aes(x = sup_amt, y = opp_amt, color = candidate_id))+
geom_point()+
geom_smooth(method = "lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
For this part, we used the numeric explanatory variable of Supportive Expenditure Amount, represented as “sup_amt”. Our numeric response variable is Opposition Expenditure Amount, represented as “opp_amt”.
x2 <- sum((x-mean(x))*(y-mean(y)))
x3 <- sum((x-mean(x))^2)
beta_1 <- x2/x3
beta_0 <- mean(y)-(beta_1*mean(x))
beta_1
## [1] 0.9537725
beta_0
## [1] -4679743
mod<-lm(y~x)
mod$coefficients # Verifying using lm()
## (Intercept) x
## -4.679743e+06 9.537725e-01
n<-dim(fec)[1]
beta_1<-mod$coefficients[2]
ss_res <- sum(mod$residuals^2)
ms_res<- ss_res/(n-2)
se_b1 <- sqrt(ms_res)/sqrt(sum((x-mean(x))^2))
ref <- (beta_1-0)/se_b1 # This is the reference distribution
df <- (n-2) # This is how many degrees of freedom
df
## [1] 17
# The test statistic is shown as the following:
t_stat <- beta_1/se_b1 # This is the same as the reference distribution.
t_stat
## x
## 6.777593
p_value<-pt(abs(t_stat), df = n-2, lower.tail = FALSE)*2 # This is the P-value for the model for a two sided (non-directional) test.
p_value
## x
## 3.226803e-06
# Verifying using summary()
summary(mod)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -75146430 -2864893 1345945 3672020 44668373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.680e+06 6.098e+06 -0.767 0.453
## x 9.538e-01 1.407e-01 6.778 3.23e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23450000 on 17 degrees of freedom
## Multiple R-squared: 0.7299, Adjusted R-squared: 0.714
## F-statistic: 45.94 on 1 and 17 DF, p-value: 3.227e-06
We reject the null hypothesis with a p-value of 3.226803e-06 and a significance level of 0.05. There is compelling/convincing evidence that the amount of Supportive Expenditures for a given candidate has an effect on the amount of Opposition Expenditures against said candidate in preparation for the 2016 presidential election.