summary(assn$binary_edu)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.471   1.000   1.000
summary(assn$binary_trust)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.4768  1.0000  1.0000
table(assn$binary_edu, assn$binary_trust)
##    
##        0    1
##   0 1358 1095
##   1 1068 1116
# Kind of evenly distributed between the two binaries, though it looks like those without college educations have slightly less trust in government than those with college degrees


contingencyTable <- table(assn$binary_edu, assn$binary_trust)
View(contingencyTable)
#same interpretation as above


crosstab_output <- capture.output(
  CrossTable(
    contingencyTable,
    fisher = FALSE, # I had to turn this off to run the code for some reason
    chisq = TRUE,
    expected = TRUE,
    sresid = TRUE,
    format = "SPSS",
    simulate.p.value = TRUE
  )
)

crosstab_output
##  [1] ""                                                            
##  [2] "   Cell Contents"                                            
##  [3] "|-------------------------|"                                 
##  [4] "|                   Count |"                                 
##  [5] "|         Expected Values |"                                 
##  [6] "| Chi-square contribution |"                                 
##  [7] "|             Row Percent |"                                 
##  [8] "|          Column Percent |"                                 
##  [9] "|           Total Percent |"                                 
## [10] "|            Std Residual |"                                 
## [11] "|-------------------------|"                                 
## [12] ""                                                            
## [13] "Total Observations in Table:  4637 "                         
## [14] ""                                                            
## [15] "             |  "                                            
## [16] "             |        0  |        1  | Row Total | "         
## [17] "-------------|-----------|-----------|-----------|"          
## [18] "           0 |     1358  |     1095  |     2453  | "         
## [19] "             | 1283.368  | 1169.632  |           | "         
## [20] "             |    4.340  |    4.762  |           | "         
## [21] "             |   55.361% |   44.639% |   52.901% | "         
## [22] "             |   55.977% |   49.525% |           | "         
## [23] "             |   29.286% |   23.614% |           | "         
## [24] "             |    2.083  |   -2.182  |           | "         
## [25] "-------------|-----------|-----------|-----------|"          
## [26] "           1 |     1068  |     1116  |     2184  | "         
## [27] "             | 1142.632  | 1041.368  |           | "         
## [28] "             |    4.875  |    5.349  |           | "         
## [29] "             |   48.901% |   51.099% |   47.099% | "         
## [30] "             |   44.023% |   50.475% |           | "         
## [31] "             |   23.032% |   24.067% |           | "         
## [32] "             |   -2.208  |    2.313  |           | "         
## [33] "-------------|-----------|-----------|-----------|"          
## [34] "Column Total |     2426  |     2211  |     4637  | "         
## [35] "             |   52.318% |   47.682% |           | "         
## [36] "-------------|-----------|-----------|-----------|"          
## [37] ""                                                            
## [38] " "                                                           
## [39] "Statistics for All Table Factors"                            
## [40] ""                                                            
## [41] ""                                                            
## [42] "Pearson's Chi-squared test with simulated p-value"           
## [43] "\t (based on 2000 replicates) "                              
## [44] "------------------------------------------------------------"
## [45] "Chi^2 =  19.32548     d.f. =  NA     p =  0.0004997501 "     
## [46] ""                                                            
## [47] "Pearson's Chi-squared test with simulated p-value"           
## [48] "\t (based on 2000 replicates) "                              
## [49] "------------------------------------------------------------"
## [50] "Chi^2 =  19.32548     d.f. =  NA     p =  0.0004997501 "     
## [51] ""                                                            
## [52] " "                                                           
## [53] "       Minimum expected frequency: 1041.368 "                
## [54] ""
#I was concerned that the larger number of 0's for education were throwing it off but this still looks like those without a college degree are slightly lower on average in their trust in government
createMosaicPlot <- function() {
  mosaicplot(
    contingencyTable,
    main = "Mosaic Plot of Education vs Trust in Government",
    xlab = "Education Level",
    ylab = "Trust in Government",
    color = TRUE,
    shade = TRUE,
    las = 1
  )
}

if (dev.cur() == 1) dev.new()


createMosaicPlot()

# Interesting that this one shows what we found above but also demonstrates that those with a college degree tend to have higher faith in government than those without college degrees
model<-lm(binary_trust~binary_edu, data=assn)

summary(model)
## 
## Call:
## lm(formula = binary_trust ~ binary_edu, data = assn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5110 -0.4464 -0.4464  0.4890  0.5536 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.44639    0.01007  44.348  < 2e-16 ***
## binary_edu   0.06460    0.01467   4.404 1.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4985 on 4635 degrees of freedom
## Multiple R-squared:  0.004168,   Adjusted R-squared:  0.003953 
## F-statistic:  19.4 on 1 and 4635 DF,  p-value: 1.085e-05
# overall, of those without a college degree, typically 44% of them have faith in the government.
# for those with college degrees, that goes up 6.5% so it's closer to 51% that have faith in the government
# The t-value and p-value indicate that this is statistically significant

assn$inverse_edu <- ifelse(assn$binary_edu == 1, 0, 1)
#to make the opposite of the binary variable

model2 <- lm(binary_trust~inverse_edu, data=assn)

summary(model2)
## 
## Call:
## lm(formula = binary_trust ~ inverse_edu, data = assn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5110 -0.4464 -0.4464  0.4890  0.5536 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.51099    0.01067  47.901  < 2e-16 ***
## inverse_edu -0.06460    0.01467  -4.404 1.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4985 on 4635 degrees of freedom
## Multiple R-squared:  0.004168,   Adjusted R-squared:  0.003953 
## F-statistic:  19.4 on 1 and 4635 DF,  p-value: 1.085e-05
# as expected, the inverse of the education binary resulted in a negative coefficient compared to the first model;
# this one shows that about 51% of those with a college degree have faith in government, and that that falls by about 6.5% for those without college degrees
model3<-glm(binary_trust~binary_edu, data=assn, family=binomial) 
summary(model3)
## 
## Call:
## glm(formula = binary_trust ~ binary_edu, family = binomial, data = assn)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.21526    0.04062  -5.300 1.16e-07 ***
## binary_edu   0.25922    0.05901   4.393 1.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6418.3  on 4636  degrees of freedom
## Residual deviance: 6398.9  on 4635  degrees of freedom
## AIC: 6402.9
## 
## Number of Fisher Scoring iterations: 3
exp(coef(model3))
## (Intercept)  binary_edu 
##   0.8063328   1.2959212
# basically, individuals with a college education have higher log-odds of trust (by 0.259) compared to those without a college education. 
# this model shows that those with a college degree have 29.6% higher trust than those without (based on the odds ratio)
# P-value indicates a high degree of significance


model4<-glm(binary_trust~inverse_edu, data=assn, family=binomial) 
summary(model4)
## 
## Call:
## glm(formula = binary_trust ~ inverse_edu, family = binomial, 
##     data = assn)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.04396    0.04281   1.027    0.304    
## inverse_edu -0.25922    0.05901  -4.393 1.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6418.3  on 4636  degrees of freedom
## Residual deviance: 6398.9  on 4635  degrees of freedom
## AIC: 6402.9
## 
## Number of Fisher Scoring iterations: 3
exp(coef(model4))
## (Intercept) inverse_edu 
##   1.0449438   0.7716519
#whereas the previous one model showed about 30% higher trust for government for those with college degrees, the inverse model confirms that those without college degrees have about 30% lower trust in government based on the odds ratio