library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
district<-read_excel("district.xls")
model1<- lm(DA0912DR21R~DDA00A001S22R+PROPWLTH+DDE00A001S22R, data=district)

summary(model1) 
## 
## Call:
## lm(formula = DA0912DR21R ~ DDA00A001S22R + PROPWLTH + DDE00A001S22R, 
##     data = district)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.722 -0.973 -0.110  0.660 39.477 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       9.146392   0.752444  12.156  < 2e-16 ***
## DDA00A001S22R                    -0.084529   0.017220  -4.909 1.06e-06 ***
## PROPWLTH$1,892,208 and over       0.313910   0.582796   0.539    0.590    
## PROPWLTH$164,606 to < $234,712   -0.032790   0.500695  -0.065    0.948    
## PROPWLTH$234,712 to < $298,152   -0.254902   0.462105  -0.552    0.581    
## PROPWLTH$298,152 to < $340,843   -0.111611   0.504490  -0.221    0.825    
## PROPWLTH$340,843 to < $359,962    0.090908   0.605503   0.150    0.881    
## PROPWLTH$359,962 to < $411,857   -0.448565   0.457677  -0.980    0.327    
## PROPWLTH$411,857 to < $427,868    0.082275   0.629944   0.131    0.896    
## PROPWLTH$427,868 to < $456,750   -0.090155   0.539586  -0.167    0.867    
## PROPWLTH$456,750 to < $479,670   -0.094413   0.574538  -0.164    0.870    
## PROPWLTH$479,670 to < $526,224   -0.174903   0.521662  -0.335    0.737    
## PROPWLTH$526,224 to < $539,089    0.104998   0.872696   0.120    0.904    
## PROPWLTH$539,089 to < $573,876   -0.396455   0.624508  -0.635    0.526    
## PROPWLTH$573,876 to < $602,435    0.091625   0.644454   0.142    0.887    
## PROPWLTH$602,435 to < $674,066   -0.132866   0.560522  -0.237    0.813    
## PROPWLTH$674,066 to < $710,768   -0.196519   0.597997  -0.329    0.743    
## PROPWLTH$710,768 to < $836,577   -0.197682   0.527288  -0.375    0.708    
## PROPWLTH$836,577 to < $949,515   -0.006754   0.568720  -0.012    0.991    
## PROPWLTH$949,515 to < $1,039,446  0.310709   0.725872   0.428    0.669    
## PROPWLTHNon-taxing entities       2.190254   0.438502   4.995 6.87e-07 ***
## PROPWLTHUnder $164,606            0.159319   0.587300   0.271    0.786    
## DDE00A001S22R                    -0.024671   0.017281  -1.428    0.154    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.014 on 1071 degrees of freedom
##   (113 observations deleted due to missingness)
## Multiple R-squared:  0.1937, Adjusted R-squared:  0.1772 
## F-statistic:  11.7 on 22 and 1071 DF,  p-value: < 2.2e-16
model2<- lm(DA0912DR21R~DDA00A001S22R+DDE00A001S22R, data=district)
summary(model2)
## 
## Call:
## lm(formula = DA0912DR21R ~ DDA00A001S22R + DDE00A001S22R, data = district)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.145 -1.038 -0.263  0.504 40.876 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.99672    0.65146  15.345  < 2e-16 ***
## DDA00A001S22R -0.09809    0.01715  -5.721 1.37e-08 ***
## DDE00A001S22R -0.02006    0.01743  -1.151     0.25    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.075 on 1091 degrees of freedom
##   (113 observations deleted due to missingness)
## Multiple R-squared:  0.1451, Adjusted R-squared:  0.1435 
## F-statistic: 92.58 on 2 and 1091 DF,  p-value: < 2.2e-16
model3<- lm(DA0912DR21R~DDA00A001S22R+DPFEAINSK, data=district)

summary(model3)
## 
## Call:
## lm(formula = DA0912DR21R ~ DDA00A001S22R + DPFEAINSK, data = district)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.226 -1.047 -0.280  0.569 39.828 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    8.552e+00  7.790e-01   10.98  < 2e-16 ***
## DDA00A001S22R -1.131e-01  8.513e-03  -13.29  < 2e-16 ***
## DPFEAINSK      1.660e-04  5.338e-05    3.11  0.00192 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.059 on 1090 degrees of freedom
##   (114 observations deleted due to missingness)
## Multiple R-squared:  0.1532, Adjusted R-squared:  0.1517 
## F-statistic: 98.61 on 2 and 1090 DF,  p-value: < 2.2e-16
cor(district$DDA00A001S22R, district$DPFEAINSK, use = "complete.obs")
## [1] -0.01725519
cor(district$DDA00A001S22R, district$DA0912DR21R, use = "complete.obs")
## [1] -0.3795395
cor(district$DA0912DR21R, district$DPFEAINSK, use = "complete.obs")
## [1] 0.1270866