library(tibble)
library(ggplot2)
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
read_excel("TexasCountyPoverty.xlsx")
## # A tibble: 7 × 4
## `Label (Grouping)` Population for whom …¹ pct_decimal county_vote_turnout …²
## <chr> <dbl> <dbl> <dbl>
## 1 Bexar County, Texas… 0.157 0.157 44.2
## 2 Dallas County, Texa… 0.142 0.142 44.2
## 3 El Paso County, Tex… 0.213 0.213 32.9
## 4 Harris County, Texa… 0.165 0.165 43.5
## 5 Hidalgo County, Tex… 0.276 0.276 34.4
## 6 Tarrant County, Tex… 0.106 0.106 47.0
## 7 Webb County, Texas!… 0.201 0.201 31.2
## # ℹ abbreviated names: ¹`Population for whom poverty status is determined`,
## # ²`county_vote_turnout ?`
TexasCountyPoverty <- read_excel("TexasCountyPoverty.xlsx")
x<-TexasCountyPoverty$`Population for whom poverty status is determined`
y<-TexasCountyPoverty$`county_vote_turnout ?`
texas_data<-data.frame(x=x,y=y)
TexasCountyPoverty <- read_excel("TexasCountyPoverty.xlsx")
poverty_model<-lm(x~y,data = texas_data)
raintest(poverty_model)
##
## Rainbow test
##
## data: poverty_model
## Rain = 0.28727, df1 = 4, df2 = 1, p-value = 0.8645
# Rainbow test shows that the model has high linearity
durbinWatsonTest(poverty_model)
## lag Autocorrelation D-W Statistic p-value
## 1 0.02132838 1.698045 0.898
## Alternative hypothesis: rho != 0
# errors are indpendent
bptest(poverty_model)
##
## studentized Breusch-Pagan test
##
## data: poverty_model
## BP = 2.1205, df = 1, p-value = 0.1453
# null hypothesis can be rejected
plot(poverty_model,which=2)
shapiro.test(poverty_model$residuals)
##
## Shapiro-Wilk normality test
##
## data: poverty_model$residuals
## W = 0.93508, p-value = 0.5949
# results may be normal
There are only two values so multicolinarity is not an issue.