library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(zoo)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
setwd("~/Desktop/Monday Class")
district<-read_excel("district.xls")
DistrictModel <- lm(DA0912DR21R ~ DA0AT21R+DA0CT21R, data = district)
summary(DistrictModel)
##
## Call:
## lm(formula = DA0912DR21R ~ DA0AT21R + DA0CT21R, data = district)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.6637 -0.9424 -0.2303 0.6698 28.0421
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.626183 2.043604 29.666 <2e-16 ***
## DA0AT21R -0.624078 0.021918 -28.473 <2e-16 ***
## DA0CT21R -0.004277 0.002269 -1.885 0.0597 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.002 on 1078 degrees of freedom
## (126 observations deleted due to missingness)
## Multiple R-squared: 0.462, Adjusted R-squared: 0.461
## F-statistic: 462.9 on 2 and 1078 DF, p-value: < 2.2e-16
ggplot(district,aes(x= DA0AT21R,y = DA0912DR21R)) + geom_point()
## Warning: Removed 112 rows containing missing values or values outside the scale range
## (`geom_point()`).
raintest(DistrictModel)
##
## Rainbow test
##
## data: DistrictModel
## Rain = 1.8528, df1 = 541, df2 = 537, p-value = 6.364e-13
dwtdistrict <- durbinWatsonTest(DistrictModel)
dwtdistrict
## lag Autocorrelation D-W Statistic p-value
## 1 0.05241542 1.894533 0.108
## Alternative hypothesis: rho != 0
plot(DistrictModel,which=3)
bptestdistrict <- bptest(DistrictModel)
bptestdistrict
##
## studentized Breusch-Pagan test
##
## data: DistrictModel
## BP = 170.14, df = 2, p-value < 2.2e-16
plot(DistrictModel,which=2)
shapiro.test(DistrictModel$residuals)
##
## Shapiro-Wilk normality test
##
## data: DistrictModel$residuals
## W = 0.72197, p-value < 2.2e-16
vif(DistrictModel)
## DA0AT21R DA0CT21R
## 1.094539 1.094539
Homework 7
linearity: the graph shows that these are not a linear relationship independence of errors: Homoscedasticity: violates this assumption graph is not a straight line, it is wavy Normality: there is not a normal distribution shown here Multicolinearity: the VIF shows that the variables are not highly correlated