library(tibble)
library(ggplot2)
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
read_excel("TexasCountyPoverty.xlsx")
## # A tibble: 7 × 4
##   `Label (Grouping)`   Population for whom …¹ pct_decimal county_vote_turnout …²
##   <chr>                                 <dbl>       <dbl>                  <dbl>
## 1 Bexar County, Texas…                  0.157       0.157                  0.442
## 2 Dallas County, Texa…                  0.142       0.142                  0.442
## 3 El Paso County, Tex…                  0.213       0.213                  0.329
## 4 Harris County, Texa…                  0.165       0.165                  0.435
## 5 Hidalgo County, Tex…                  0.276       0.276                  0.344
## 6 Tarrant County, Tex…                  0.106       0.106                  0.470
## 7 Webb County, Texas!…                  0.201       0.201                  0.312
## # ℹ abbreviated names: ¹​`Population for whom poverty status is determined`,
## #   ²​`county_vote_turnout ?`
TexasCountyPoverty <- read_excel("TexasCountyPoverty.xlsx")
x<-TexasCountyPoverty$`Population for whom poverty status is determined`
y<-TexasCountyPoverty$`county_vote_turnout ?`
texas_data<-data.frame(x=x,y=y)

ggplot(texas_data,aes(x=x,y=y)) + geom_point() + geom_smooth(method='lm',color='red')
## `geom_smooth()` using formula = 'y ~ x'

poverty_model<-lm(x~y,data = texas_data)
summary(poverty_model)
## 
## Call:
## lm(formula = x ~ y, data = texas_data)
## 
## Residuals:
##         1         2         3         4         5         6         7 
##  0.008651 -0.005788 -0.014070  0.012367  0.058902 -0.022055 -0.038008 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.45838    0.08601   5.329  0.00312 **
## y           -0.70223    0.21450  -3.274  0.02211 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03423 on 5 degrees of freedom
## Multiple R-squared:  0.6819, Adjusted R-squared:  0.6183 
## F-statistic: 10.72 on 1 and 5 DF,  p-value: 0.02211

Poverty effects 95% of turnout dropping in the counties with 68% of x (poverty level) being explained by y (voter turnout). Y has a negative effect on X, reducing it with every increase.

x<-TexasCountyPoverty$`Population for whom poverty status is determined`
y<-TexasCountyPoverty$`county_vote_turnout ?`
texas_data<-data.frame(x=x,y=y)
poverty_model<-lm(x~y,data = texas_data)
raintest(poverty_model)
## 
##  Rainbow test
## 
## data:  poverty_model
## Rain = 0.28727, df1 = 4, df2 = 1, p-value = 0.8645
# Rainbow test shows that the model has high linearity
durbinWatsonTest(poverty_model)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.02132838      1.698045   0.846
##  Alternative hypothesis: rho != 0
# errors are indpendent
bptest(poverty_model)
## 
##  studentized Breusch-Pagan test
## 
## data:  poverty_model
## BP = 2.1205, df = 1, p-value = 0.1453
# null hypothesis can be rejected
plot(poverty_model,which=2)

shapiro.test(poverty_model$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  poverty_model$residuals
## W = 0.93508, p-value = 0.5949
# results may be normal

There are only two values so multicolinarity is not an issue.

pastecs::stat.desc(TexasCountyPoverty$`county_vote_turnout ?`)
##      nbr.val     nbr.null       nbr.na          min          max        range 
##  7.000000000  0.000000000  0.000000000  0.312400000  0.470400000  0.158000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  2.775000000  0.435400000  0.396428571  0.024626087  0.060257864  0.004245109 
##      std.dev     coef.var 
##  0.065154501  0.164353697