library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
cities <- read.csv('C:/Users/canda/DEM Dissertation Data/citiesDEC2020.csv')
cities <- as.data.frame(unclass(cities),stringsAsFactors=TRUE)

str(cities)
## 'data.frame':    265 obs. of  47 variables:
##  $ statefp20: int  48 48 48 48 48 48 48 48 48 48 ...
##  $ placefp20: int  1000 1240 1576 1696 1852 1924 2212 2272 3000 3216 ...
##  $ cityname : Factor w/ 265 levels "","Abilene","Addison",..: 2 3 4 5 6 7 8 9 10 11 ...
##  $ Proximity: int  0 1 0 0 0 0 1 0 0 0 ...
##  $ Miles    : num  NA 3 NA NA NA NA 13.7 NA NA NA ...
##  $ FC.name  : Factor w/ 68 levels "","1867 district/Highlands",..: 1 3 1 1 1 1 54 1 1 1 ...
##  $ w_20     : int  70391 8001 2302 1288 2120 53330 439 14435 104026 5147 ...
##  $ b_20     : int  14221 2893 39 385 131 11286 53 1318 16021 240 ...
##  $ h_20     : int  33634 3807 17005 13984 15352 13197 17646 10209 65302 7698 ...
##  $ a_20     : int  3674 1535 36 232 133 24113 22 452 9056 104 ...
##  $ o_20     : int  3262 425 111 110 155 2701 38 684 5988 298 ...
##  $ t_20     : int  125182 16661 19493 15999 17891 104627 18198 27098 200393 13487 ...
##  $ pw_20    : num  56.23 48.02 11.81 8.05 11.85 ...
##  $ pb_20    : num  11.36 17.364 0.2 2.406 0.732 ...
##  $ ph_20    : Factor w/ 262 levels "","10.16769409",..: 100 78 234 235 230 13 258 153 137 200 ...
##  $ pa_20    : num  2.935 9.213 0.185 1.45 0.743 ...
##  $ po_20    : num  2.606 2.551 0.569 0.688 0.866 ...
##  $ dwb_20   : num  28.7 25 35.3 32.8 38.2 ...
##  $ dwh_20   : num  28.88 24.3 37.84 4.46 28.84 ...
##  $ dwa_20   : num  25.31 9.14 57.85 21.95 10.37 ...
##  $ dbw_20   : num  28.7 25 35.3 32.8 38.2 ...
##  $ dbh_20   : num  21.1 22.5 48.2 33.1 12 ...
##  $ dba_20   : num  39 18.2 62 47.4 37.9 ...
##  $ dhw_20   : num  28.88 24.3 37.84 4.46 28.84 ...
##  $ dhb_20   : num  21.1 22.5 48.2 33.1 12 ...
##  $ dha_20   : num  44.5 19.2 40.6 20.4 29 ...
##  $ daw_20   : num  25.31 9.14 57.85 21.95 10.37 ...
##  $ dab_20   : num  39 18.2 62 47.4 37.9 ...
##  $ dah_20   : num  44.5 19.2 40.6 20.4 29 ...
##  $ pbooDEC20: num  4.74 4.08 0.26 3.13 0.65 6.5 0.13 2.73 3.56 1.08 ...
##  $ pboo_20  : num  3.9 1.4 0 3.1 0 7.4 0 1.6 3.6 4.1 ...
##  $ lifeexp  : num  NA NA NA NA NA NA NA NA 69.6 NA ...
##  $ lifeexp_f: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lifeexp_m: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ zipcode  : int  NA NA NA NA NA NA NA NA 79107 NA ...
##  $ X        : logi  NA NA NA NA NA NA ...
##  $ X.1      : logi  NA NA NA NA NA NA ...
##  $ X.2      : logi  NA NA NA NA NA NA ...
##  $ X.3      : logi  NA NA NA NA NA NA ...
##  $ X.4      : logi  NA NA NA NA NA NA ...
##  $ X.5      : logi  NA NA NA NA NA NA ...
##  $ X.6      : logi  NA NA NA NA NA NA ...
##  $ X.7      : logi  NA NA NA NA NA NA ...
##  $ X.8      : logi  NA NA NA NA NA NA ...
##  $ X.9      : logi  NA NA NA NA NA NA ...
##  $ X.10     : logi  NA NA NA NA NA NA ...
##  $ X.11     : int  NA NA NA NA NA NA NA NA NA NA ...

#The list of 261 cities from the Diversity and Disparities project available for year 2020 was used to conduct a binomial regression analysis. Of these cities, 100 were identified as being in proximity to a freedom colony and 161 were not.

cities$Proximity <- as.factor(cities$Proximity)
summary(cities$Proximity)                           
##    0    1 NA's 
##  161  100    4
head(cities)
##   statefp20 placefp20 cityname Proximity Miles FC.name  w_20  b_20  h_20  a_20
## 1        48      1000  Abilene         0    NA         70391 14221 33634  3674
## 2        48      1240  Addison         1     3   Alpha  8001  2893  3807  1535
## 3        48      1576    Alamo         0    NA          2302    39 17005    36
## 4        48      1696   Aldine         0    NA          1288   385 13984   232
## 5        48      1852    Alice         0    NA          2120   131 15352   133
## 6        48      1924    Allen         0    NA         53330 11286 13197 24113
##   o_20   t_20     pw_20      pb_20       ph_20      pa_20     po_20   dwb_20
## 1 3262 125182 56.230927 11.3602591 26.86808014  2.9349267 2.6058059 28.71591
## 2  425  16661 48.022327 17.3639031 22.84976959  9.2131329 2.5508673 24.97171
## 3  111  19493 11.809367  0.2000718 87.23644257  0.1846817 0.5694352 35.25474
## 4  110  15999  8.050503  2.4064004 87.40546417  1.4500906 0.6875430 32.83879
## 5  155  17891 11.849533  0.7322117 85.80850983  0.7433906 0.8663574 38.17082
## 6 2701 104627 50.971546 10.7868910 12.61337852 23.0466328 2.5815516 13.79678
##      dwh_20    dwa_20   dbw_20   dbh_20   dba_20    dhw_20   dhb_20   dha_20
## 1 28.876587 25.311729 28.71591 21.11862 39.04976 28.876587 21.11862 44.49091
## 2 24.299740  9.137611 24.97171 22.51835 18.21161 24.299740 22.51835 19.16062
## 3 37.839760 57.845837 35.25474 48.17920 61.96581 37.839760 48.17920 40.63903
## 4  4.459177 21.953308 32.83879 33.14220 47.37349  4.459177 33.14220 20.41890
## 5 28.835796 10.373103 38.17082 12.03613 37.92114 28.835796 12.03613 29.04787
## 6 13.915831 27.770782 13.79678 10.68244 32.44231 13.915831 10.68244 36.19292
##      daw_20   dab_20   dah_20 pbooDEC20 pboo_20 lifeexp lifeexp_f lifeexp_m
## 1 25.311729 39.04976 44.49091      4.74     3.9      NA        NA        NA
## 2  9.137611 18.21161 19.16062      4.08     1.4      NA        NA        NA
## 3 57.845837 61.96581 40.63903      0.26     0.0      NA        NA        NA
## 4 21.953308 47.37349 20.41890      3.13     3.1      NA        NA        NA
## 5 10.373103 37.92114 29.04787      0.65     0.0      NA        NA        NA
## 6 27.770782 32.44231 36.19292      6.50     7.4      NA        NA        NA
##   zipcode  X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8 X.9 X.10 X.11
## 1      NA NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA
## 2      NA NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA
## 3      NA NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA
## 4      NA NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA
## 5      NA NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA
## 6      NA NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA
summary(cities$pbooDEC20)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   2.170   4.910   8.541  12.050  67.120       4
summary(cities$pb_20)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##  0.02595  3.76571  8.84484 12.23104 17.32191 71.26547        4
summary(cities$t_20)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   10070   15819   24486   75682   57526 2304580       4
summary(cities$dbw_20)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   16.23   24.07   25.12   33.08   69.14       4

Racial residential segregation and proximity to freedom colonies

glm(formula = Proximity ~ dbw_20, family = "binomial", 
    data = cities)
## 
## Call:  glm(formula = Proximity ~ dbw_20, family = "binomial", data = cities)
## 
## Coefficients:
## (Intercept)       dbw_20  
##    -1.06267      0.02303  
## 
## Degrees of Freedom: 260 Total (i.e. Null);  259 Residual
##   (4 observations deleted due to missingness)
## Null Deviance:       347.4 
## Residual Deviance: 342.8     AIC: 346.8
model = glm(Proximity ~ dbw_20,
            data = cities,
            family = binomial(link="logit"))

summary(model)
## 
## Call:
## glm(formula = Proximity ~ dbw_20, family = binomial(link = "logit"), 
##     data = cities)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.06267    0.30592  -3.474 0.000513 ***
## dbw_20       0.02303    0.01078   2.137 0.032632 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 347.43  on 260  degrees of freedom
## Residual deviance: 342.77  on 259  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 346.77
## 
## Number of Fisher Scoring iterations: 4
Anova(model,
      type="II",
      test="LR")
## Analysis of Deviance Table (Type II tests)
## 
## Response: Proximity
##        LR Chisq Df Pr(>Chisq)  
## dbw_20   4.6616  1    0.03084 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(cities, aes(dbw_20, Proximity, col = dbw_20)) +
  geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).

#The results showed that proximity to freedom colonies increases residential segregation by 3.297 points (Table 2). The results were statistically significant.

fit=lm(dbw_20 ~ Proximity,cities)
summary(fit)
## 
## Call:
## lm(formula = dbw_20 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.009  -9.263  -0.545   8.248  45.285 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  23.8538     0.9395  25.389   <2e-16 ***
## Proximity1    3.2967     1.5178   2.172   0.0308 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.92 on 259 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.01789,    Adjusted R-squared:  0.0141 
## F-statistic: 4.718 on 1 and 259 DF,  p-value: 0.03076

Proximity to freedom colonies and homeownership

#A strong positive association for the percent Black population and homeownership

cor(cities$pb_20,cities$pbooDEC20)
## [1] NA
cor.test(cities$pb_20,cities$pbooDEC20)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$pb_20 and cities$pbooDEC20
## t = 54.31, df = 259, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9476974 0.9675692
## sample estimates:
##     cor 
## 0.95879
plot(cities$pb_20,cities$pbooDEC20,pch=20,
     xlab='Percent Black Population',ylab='Percent Black Homeowners',
     main='Cities by Percent Black Population and Black Homeownership in 2020')

#The correlation between segregation and Black home ownership yielded a weak positive association with a coefficient of 0.0363

cor(cities$dbw_20,cities$pbooDEC20)
## [1] NA
cor.test(cities$dbw_20,cities$pbooDEC20)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$dbw_20 and cities$pbooDEC20
## t = 0.58608, df = 259, p-value = 0.5583
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08540425  0.15711885
## sample estimates:
##        cor 
## 0.03639314
plot(cities$dbw_20,cities$pbooDEC20,pch=20,
     xlab='D index',ylab='Percent Black Homeowners',
     main='Cities by Residential Segregation and Black Homeownership in 2020')

cor(cities$t_20,cities$pbooDEC20)
## [1] NA
cor.test(cities$t_20,cities$pbooDEC20)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$t_20 and cities$pbooDEC20
## t = 0.65928, df = 259, p-value = 0.5103
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08089062  0.16154852
## sample estimates:
##       cor 
## 0.0409314
plot(cities$t_20,cities$pbooDEC20,pch=20,
     xlab='City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2020')

#Utilizing all 261 Texas cities in the data for 2020, correlation tests produced somewhat expected results. The Pearson’s correlation produced a coefficient of 0.0409 for city population and Black homeownership revealing a very weak positive association (Figure 5). However, there were outliers discovered in the data. The city with the largest population is Houston with 2,304,580 and the city with the smallest population is Rockport with 10,070. The median city population is 24,486. The three largest cities in the data are Houston, San Antonio, and Dallas. Although they are significantly larger than other cities, they remained in the data because there are multiple freedom colonies within their city boundaries. The predictor variable of city population was rescaled through a natural log transformation to reduce the impact of the magnitude of the largest city sizes. The Pearson’s correlation then produced a coefficient of 0.0867, which is greater but remains a weak association.

#log total population due to outliers

cor(log(cities$t_20),cities$pbooDEC20)
## [1] NA
cor.test(log(cities$t_20),cities$pbooDEC20)
## 
##  Pearson's product-moment correlation
## 
## data:  log(cities$t_20) and cities$pbooDEC20
## t = 1.4009, df = 259, p-value = 0.1624
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0350703  0.2059702
## sample estimates:
##        cor 
## 0.08671897
hist(cities$t_20)

hist(log(cities$t_20+1))

hist(log(cities$t_20+1), breaks=50)

tpop<- lm(cities$pbooDEC20 ~ log(cities$t_20))
summary(tpop)
## 
## Call:
## lm(formula = cities$pbooDEC20 ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.149  -6.552  -4.080   3.091  58.104 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -0.8246     6.7179  -0.123    0.902
## log(cities$t_20)   0.8999     0.6424   1.401    0.162
## 
## Residual standard error: 10.59 on 259 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.00752,    Adjusted R-squared:  0.003688 
## F-statistic: 1.962 on 1 and 259 DF,  p-value: 0.1624
coef(tpop)["log(cities$t_20)"]/100
## log(cities$t_20) 
##      0.008998776
     log(cities$t_20) 
##   [1] 11.737524  9.720826  9.877811  9.680281  9.792053 11.558157  9.809067
##   [8] 10.207215 12.208036  9.509482  9.874522  9.734832 12.884781 11.387067
##  [15]  9.461644 13.776619  9.500694 10.228646  9.801510 11.335006 11.655137
##  [22] 10.818337  9.522886  9.752781  9.258559 10.045595 10.107244 10.171375
##  [29]  9.789759  9.250330  9.437556  9.762442 12.137462  9.844905 10.022115
##  [36] 11.338334  9.622980  9.300090 10.771449  9.604812 10.345735 11.801362
##  [43] 10.802591 11.259258  9.725497 10.729591 10.382079  9.735010 10.353033
##  [50] 10.089967  9.268987 11.699496 10.168042 11.407076 10.220704 10.668560
##  [57] 10.509714 10.027208 12.669376 10.130982  9.802008 14.081238 10.448570
##  [64] 10.453717 10.105571 11.848462 10.935693  9.944965  9.728956  9.581973
##  [71] 10.614131 10.244592 11.515353  9.421411 13.428104  9.911406 11.019154
##  [78]  9.246865 10.491024  9.795791 11.237910  9.543593 10.062839  9.329012
##  [85] 10.250440 13.730949  9.401209  9.294222  9.277625 10.105857 10.626509
##  [92] 12.208614  9.732462  9.763881  9.281730 10.891075 12.413160  9.688746
##  [99] 11.115071  9.668967  9.301825 12.186380 10.832319        NA 10.245800
## [106]  9.760483 10.737982 10.407198 11.182044  9.493336  9.613937  9.681968
## [113]  9.544238  9.674011 10.020782  9.406565 14.650409  9.728837 10.735113
## [120] 10.606907 10.224737 12.455601        NA  9.546598        NA  9.993968
## [127] 10.731515 10.097326  9.501217 11.938814 10.142583 10.729788  9.329633
## [134]  9.799792 10.466640 10.246261  9.862092 10.628012  9.285912        NA
## [141] 12.449822 11.647386 10.988711  9.353748  9.445571 11.624664 10.746196
## [148]  9.666562  9.573524 11.310050 12.457380 10.438313  9.514437 11.865060
## [155] 12.182333  9.521641 11.192748 10.060149  9.539716  9.696340 11.919110
## [162] 11.794519 10.466668  9.603733 11.359518 10.516346 11.215314  9.683277
## [169]  9.952897 10.378074  9.844586 11.412033 11.155064 11.647701  9.869103
## [176]  9.827902  9.669662  9.733114 10.105448 11.931307 11.742671 10.033726
## [183]  9.466222 11.085077 11.286213  9.912794 12.561976  9.273127 10.933803
## [190]  9.922456  9.355047  9.524567  9.742556 10.314736  9.233666  9.562545
## [197]  9.512886 11.690812  9.361085  9.636719  9.428913  9.224539  9.217316
## [204] 10.763229  9.355393 10.552735 11.690804 11.043482  9.511037 10.207400
## [211] 10.081215 11.511855 14.176414 10.121056  9.221874 10.471468 11.120668
## [218]  9.452109  9.255123 10.645473  9.519148  9.822603 10.289872  9.301277
## [225] 10.683844  9.913636  9.344696 10.443076  9.689861 10.350255 11.043865
## [232]  9.779397  9.723942  9.947361 11.617520  9.676650  9.270683  9.696894
## [239] 11.315364  9.767954 10.496621 10.857036 10.704008 11.647771 10.469284
## [246]  9.420682  9.524275 11.571147  9.889389 10.137690  9.630169  9.218110
## [253] 11.090324 11.838525 10.071118 10.624736 10.337022  9.433404  9.546813
## [260] 10.600627 10.414513  9.612801  9.812961 11.535821 10.959992
plot(log(cities$t_20),cities$pbooDEC20,pch=20,
     xlab='logged City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2020')

Logistic Regression ——————————————————————————————————-

#model 1 #A multiple regression was performed starting with proximity to freedom colonies (Table 4). This variable proved to yield statistically significant results. Proximity to freedom colonies provided an increase in Black homeownership by 7.935 percentage points.

cit<- lm(pbooDEC20~Proximity,data=cities)

summary(cit)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.307  -5.061  -2.421   2.319  53.683 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.5011     0.7799   7.054 1.58e-11 ***
## Proximity1    7.9354     1.2599   6.298 1.28e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.895 on 259 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1328, Adjusted R-squared:  0.1295 
## F-statistic: 39.67 on 1 and 259 DF,  p-value: 1.283e-09

#model 2 - add in total popualtion logged #When the logged variable of city population was introduced in Model 2, there was a slight increase in homeownership although population was not statistically significant. Proximity remained a significant factor.

cit2log <- lm(pbooDEC20~Proximity+log(t_20),data=cities)

summary(cit2log)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity + log(t_20), data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.209  -4.919  -2.322   2.125  53.538 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2897     6.2955   0.205    0.838    
## Proximity1    7.8224     1.2723   6.148 2.97e-09 ***
## log(t_20)     0.4088     0.6064   0.674    0.501    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.906 on 258 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1343, Adjusted R-squared:  0.1276 
## F-statistic: 20.02 on 2 and 258 DF,  p-value: 8.269e-09

#As established, percent of Black population is highly correlated with Black homeownership. As expected, an initial regression with this variable yielded a significant result, but only a small increase in homeownership. Due to this high correlation, proximity lost its significance when percent of Black population was introduced to the model. Therefore, this variable was omitted from the regression.

citpb=lm(pb_20~pboo_20,cities)
summary(citpb)
## 
## Call:
## lm(formula = pb_20 ~ pboo_20, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.2843  -2.6805  -0.6989   2.4274  12.8243 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.57565    0.32616   10.96   <2e-16 ***
## pboo_20      0.98774    0.02291   43.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.153 on 259 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.8777, Adjusted R-squared:  0.8772 
## F-statistic:  1859 on 1 and 259 DF,  p-value: < 2.2e-16

#model 3 add in segregation #In Model 3, the DI measure of residential segregation for each city was added to the regression. Residential segregation caused a decrease in homeownership by -0.029. This result was not statistically significant but provides a different outcome than the previous analysis comparing cities near and not near freedom colonies

cit3 <- lm(pbooDEC20~Proximity+log(t_20)+dbw_20,data=cities)

summary(cit3)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity + log(t_20) + dbw_20, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.148  -4.798  -2.341   2.459  53.318 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.60463    6.44086   0.094    0.925    
## Proximity1   7.88156    1.27921   6.161 2.77e-09 ***
## log(t_20)    0.54287    0.65979   0.823    0.411    
## dbw_20      -0.02919    0.05618  -0.520    0.604    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.92 on 257 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1353, Adjusted R-squared:  0.1252 
## F-statistic:  13.4 on 3 and 257 DF,  p-value: 3.752e-08

#Separated cities by total population into big and small —————————————————————————————

smallcit <- read.csv('C:/Users/canda/DEM Dissertation Data/cities_small DEC2020.csv')
bigcit <- read.csv('C:/Users/canda/DEM Dissertation Data/cities_big DEC2020.csv')

summary(smallcit$Proximity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.3462  1.0000  1.0000
summary(bigcit$Proximity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.4198  1.0000  1.0000
smallcit$Proximity <- as.numeric(smallcit$Proximity)
bigcit$Proximity <- as.numeric(bigcit$Proximity)
class(smallcit$Proximity)
## [1] "numeric"
class(bigcit$Proximity)
## [1] "numeric"
hist(smallcit$t_20)

hist(log(smallcit$t_20+1))

hist(log(smallcit$t_20+1), breaks=50)

hist(bigcit$t_20)

hist(log(bigcit$t_20+1))

hist(log(bigcit$t_20+1), breaks=50)

#In separate regression analyses, cities were separated into big and small, using the median of total population. Small cities which have a total population of 24,486 or less (n=130) continued to show that proximity to freedom colonies was a significant factor in the increase of homeownership. Residential segregation produced a decrease in homeownership of -0.038 and again was not statistically significant. A regression using big cities (n=131) showed similar results, however the increase in homeownership due to proximity was smaller

fit8s<-lm(pbooDEC20~Proximity, data = smallcit)
summary(fit8s)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = smallcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.329  -3.877  -1.692   2.833  41.641 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.3020     0.8631   4.984 1.97e-06 ***
## Proximity     8.1571     1.4670   5.561 1.50e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.957 on 128 degrees of freedom
## Multiple R-squared:  0.1946, Adjusted R-squared:  0.1883 
## F-statistic: 30.92 on 1 and 128 DF,  p-value: 1.5e-07
fit8b<-lm(pbooDEC20~Proximity, data = bigcit)
summary(fit8b)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = bigcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.626  -6.392  -2.932   2.976  52.884 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.842      1.314   5.208 7.33e-07 ***
## Proximity      7.394      2.027   3.647 0.000384 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.45 on 129 degrees of freedom
## Multiple R-squared:  0.09347,    Adjusted R-squared:  0.08644 
## F-statistic:  13.3 on 1 and 129 DF,  p-value: 0.0003837
fit11s<-lm(pbooDEC20~Proximity+dbw_20, data = smallcit)
summary(fit11s)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity + dbw_20, data = smallcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.555  -3.802  -1.618   2.964  41.454 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.15627    1.59207   3.239  0.00153 ** 
## Proximity    8.14220    1.47054   5.537 1.69e-07 ***
## dbw_20      -0.03789    0.05928  -0.639  0.52387    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.976 on 127 degrees of freedom
## Multiple R-squared:  0.1971, Adjusted R-squared:  0.1845 
## F-statistic: 15.59 on 2 and 127 DF,  p-value: 8.798e-07
fit11b<-lm(pbooDEC20~Proximity+dbw_20, data = bigcit)
summary(fit11b)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity + dbw_20, data = bigcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.569  -6.261  -2.917   3.093  52.637 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.46917    2.62970   2.840 0.005245 ** 
## Proximity    7.54063    2.10322   3.585 0.000477 ***
## dbw_20      -0.02476    0.08988  -0.276 0.783348    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.49 on 128 degrees of freedom
## Multiple R-squared:  0.094,  Adjusted R-squared:  0.07985 
## F-statistic: 6.641 on 2 and 128 DF,  p-value: 0.001803

#When the cities were grouped by population size using the median, there was a difference in mean of percent Black homeownership by 2.82 points

library(dplyr)
cities%>%
  mutate(smalltown = ifelse(test = t_20 < 24486,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  summarise(n = n(),
            across(.cols = c(pbooDEC20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(...)`.
## ℹ In group 1: `smalltown = "big"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
## # A tibble: 3 × 6
##   smalltown     n pbooDEC20 pb_20   b_20 dbw_20
##   <chr>     <int>     <dbl> <dbl>  <dbl>  <dbl>
## 1 big         131      9.95  14.2 20064.   27.8
## 2 small       130      7.13  10.3  1682.   22.4
## 3 <NA>          4    NaN    NaN     NaN   NaN
cities%>%
  mutate(smalltown = ifelse(test = t_20 < 24486,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "1")%>%
  summarise(n = n(),
            across(.cols = c(pbooDEC20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 2 × 6
##   smalltown     n pbooDEC20 pb_20   b_20 dbw_20
##   <chr>     <int>     <dbl> <dbl>  <dbl>  <dbl>
## 1 big          55      14.2  19.6 35797.   31.2
## 2 small        45      12.5  17.0  2730.   22.2
cities%>%
  mutate(smalltown = ifelse(test = t_20 < 24486,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "0")%>%
  summarise(n = n(),
            across(.cols = c(pbooDEC20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 2 × 6
##   smalltown     n pbooDEC20 pb_20  b_20 dbw_20
##   <chr>     <int>     <dbl> <dbl> <dbl>  <dbl>
## 1 big          76      6.84 10.2  8678.   25.3
## 2 small        85      4.30  6.71 1127.   22.5
cities%>%
  mutate(smalltown = ifelse(test = t_20 < 75681.66,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  summarise(n = n(),
            across(.cols = c(pbooDEC20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 3 × 6
##   smalltown     n pbooDEC20 pb_20   b_20 dbw_20
##   <chr>     <int>     <dbl> <dbl>  <dbl>  <dbl>
## 1 big          53      8.01  12.8 39839.   32.5
## 2 small       208      8.68  12.1  3536.   23.2
## 3 <NA>          4    NaN    NaN     NaN   NaN

Proximity to freedom colonies as the mediator

Causal mediation analysis ——————————————————————————————–

#linear regression model path c

library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
fit.totaleffect=lm(pbooDEC20~dbw_20,cities)
summary(fit.totaleffect)
## 
## Call:
## lm(formula = pbooDEC20 ~ dbw_20, data = cities)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.787 -6.303 -3.709  3.457 58.702 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.73402    1.52652   5.066 7.71e-07 ***
## dbw_20       0.03215    0.05485   0.586    0.558    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.62 on 259 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.001324,   Adjusted R-squared:  -0.002531 
## F-statistic: 0.3435 on 1 and 259 DF,  p-value: 0.5583

#homeownership and percent black significant because they are highly correlated

fit.totaleffect2=lm(pb_20~pbooDEC20,cities)
summary(fit.totaleffect2)
## 
## Call:
## lm(formula = pb_20 ~ pbooDEC20, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0786  -2.3097  -0.7025   1.6565  16.8774 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.07937    0.26834   11.48   <2e-16 ***
## pbooDEC20    1.07144    0.01973   54.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.374 on 259 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.9193, Adjusted R-squared:  0.919 
## F-statistic:  2950 on 1 and 259 DF,  p-value: < 2.2e-16

#Proximity as the mediator

cities$Proximity <- as.ordered(cities$Proximity)
class(cities$Proximity)
## [1] "ordered" "factor"
summary(cities$Proximity)
##    0    1 NA's 
##  161  100    4
specmod <- "
#Path c
pbooDEC20 ~ c*dbw_20

#Path a
Proximity ~ a*dbw_20

#Path b
pbooDEC20 ~ b*Proximity

#Indirect effect (a*b)
ab :=a*b
"
library(lavaan)
fitmodel<- sem(specmod, data=cities)
summary(fitmodel,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 17 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                         6
## 
##                                                   Used       Total
##   Number of observations                           261         265
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                 0.000       0.000
##   Degrees of freedom                                 0           0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                47.826      47.826
##   Degrees of freedom                                 1           1
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000       1.000
##   Tucker-Lewis Index (TLI)                       1.000       1.000
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000       0.000
##   90 Percent confidence interval - lower         0.000       0.000
##   90 Percent confidence interval - upper         0.000       0.000
##   P-value H_0: RMSEA <= 0.050                       NA          NA
##   P-value H_0: RMSEA >= 0.080                       NA          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000       0.000
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pbooDEC20 ~                                         
##     dbw_20     (c)   -0.043    0.053   -0.817    0.414
##   Proximity ~                                         
##     dbw_20     (a)    0.014    0.007    2.168    0.030
##   pbooDEC20 ~                                         
##     Proximity  (b)    5.319    0.769    6.916    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pbooDEC20         7.734    1.894    4.084    0.000
##    .Proximity         0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity|t1      0.655    0.182    3.600    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pbooDEC20        83.613    7.889   10.599    0.000
##    .Proximity         1.000                           
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity         1.000                           
## 
## R-Square:
##                    Estimate
##     pbooDEC20         0.254
##     Proximity         0.028
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.075    0.036    2.096    0.036

#Segregation as the mediator

cities$Proximity <- factor(cities$Proximity, ordered = FALSE)
class(cities$Proximity)
## [1] "factor"
summary(cities$Proximity)
##    0    1 NA's 
##  161  100    4
specmod2 <- "
#Path c
pbooDEC20 ~ c*Proximity

#Path a
dbw_20 ~ a*Proximity

#Path b
pbooDEC20 ~ b*dbw_20

#Indirect effect (a*b)
ab :=a*b
"
library(lavaan)
fitmodel2<- sem(specmod2, data=cities)
summary(fitmodel2,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##                                                   Used       Total
##   Number of observations                           261         265
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                41.953
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1983.724
##   Loglikelihood unrestricted model (H1)      -1983.724
##                                                       
##   Akaike (AIC)                                3977.447
##   Bayesian (BIC)                              3995.270
##   Sample-size adjusted Bayesian (SABIC)       3979.418
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pbooDEC20 ~                                         
##     Proximity  (c)    7.972    1.266    6.295    0.000
##   dbw_20 ~                                            
##     Proximity  (a)    3.297    1.512    2.180    0.029
##   pbooDEC20 ~                                         
##     dbw_20     (b)   -0.011    0.051   -0.216    0.829
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pbooDEC20        97.151    8.504   11.424    0.000
##    .dbw_20          141.024   12.345   11.424    0.000
## 
## R-Square:
##                    Estimate
##     pbooDEC20         0.133
##     dbw_20            0.018
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab               -0.037    0.170   -0.215    0.830