library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
setwd("C:/Users/KaeRo/Desktop/R Studio/Reseach Data Selection")
library(readxl)
district <- read_excel("district.xls")
Cleaned_district<-district %>% drop_na()
model1<-lm(DPSTTOSA~DPSAMIFP+DPETWHIP+DPETALLC+DZRVLOCP, data=Cleaned_district)

Check the following assumptions: a) Linearity (plot and raintest)

plot(model1,which=1)

raintest(model1)
## 
##  Rainbow test
## 
## data:  model1
## Rain = 0.87665, df1 = 162, df2 = 156, p-value = 0.7966

Does it meet Linearity: Somewhat ? The line itself isnt the most straight but has a good portion of linearity, and the p value in the rainbow test is high

b) Independence of errors (durbin-watson)
d) Normality of residuals (QQ plot, shapiro test)
r plot(model1,which=2)
r shapiro.test(model1$residuals)
## ## Shapiro-Wilk normality test ## ## data: model1$residuals ## W = 0.99459, p-value = 0.3117 Does it meet Normality of residuals: A lot of the dots are on the line, and the p value is above .05, so the residuals are normal
  1. No multicolinarity (VIF, cor)
vif(model1)
## DPSAMIFP DPETWHIP DPETALLC DZRVLOCP 
## 5.261047 5.237581 1.285218 1.149458