library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
dist_data<-read.csv("district_data.csv")
sum(is.na(dist_data$DA0GR21N))
## [1] 126
sum(is.na(dist_data$DPETALLC))
## [1] 0
sum(is.na(dist_data$DPFRAALLT))
## [1] 5
dist_clean<-dist_data %>% filter(!is.na(DA0GR21N), !is.na(DPETALLC), !is.na(DPFRAALLT))
nrow(dist_clean)
## [1] 1080
model <- lm(DA0GR21N ~ DPETALLC + DPFRAALLT, data = dist_clean)
summary(model)
##
## Call:
## lm(formula = DA0GR21N ~ DPETALLC + DPFRAALLT, data = dist_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3128.47 -15.09 -6.88 4.46 1093.88
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.512e+00 5.203e+00 1.828 0.0678 .
## DPETALLC 7.974e-02 3.171e-03 25.151 < 2e-16 ***
## DPFRAALLT -1.101e-06 2.403e-07 -4.581 5.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 159.7 on 1077 degrees of freedom
## Multiple R-squared: 0.9665, Adjusted R-squared: 0.9664
## F-statistic: 1.553e+04 on 2 and 1077 DF, p-value: < 2.2e-16
plot(model$fitted.values, model$residuals)
abline(h = 0, col = "red")

library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 1.8606, p-value = 0.0106
## alternative hypothesis: true autocorrelation is greater than 0
library(lmtest)
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 123.02, df = 2, p-value < 2.2e-16
model_log <- lm(log(DA0GR21N) ~ DPETALLC + DPFRAALLT, data = dist_clean)
summary(model_log)
##
## Call:
## lm(formula = log(DA0GR21N) ~ DPETALLC + DPFRAALLT, data = dist_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3505 -0.7478 0.0662 0.8708 2.7207
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.051e+00 3.812e-02 106.265 < 2e-16 ***
## DPETALLC 1.146e-04 2.323e-05 4.934 9.32e-07 ***
## DPFRAALLT -2.881e-09 1.761e-09 -1.637 0.102
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.17 on 1077 degrees of freedom
## Multiple R-squared: 0.4273, Adjusted R-squared: 0.4262
## F-statistic: 401.7 on 2 and 1077 DF, p-value: < 2.2e-16
qqnorm(residuals(model_log))
qqline(residuals(model_log), col = "blue")

library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
vif(model_log)
## DPETALLC DPFRAALLT
## 73.14313 73.14313