Black Homeownership in Historical Context

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(dplyr)
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
cities <- read.csv('C:/Users/canda/DEM Dissertation Data/citiesALLYRS.csv', na.strings=c("NA"))
cities <- cities %>% mutate_if(is.character, as.numeric)
## Warning: There were 117 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cityname = .Primitive("as.double")(cityname)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 116 remaining warnings.
cities$Proximity<-as.factor(ifelse(cities$Proximity==1, "In Proximity", "Not in Prox"))
stargazer(cities, header=FALSE, type='text', title="Descriptive Statistics",digits=1)
## 
## Descriptive Statistics
## =================================================
## Statistic  N    Mean   St. Dev.   Min      Max   
## -------------------------------------------------
## statefp20 272   48.0      0.0      48      48    
## placefp20 272 40,326.9 22,986.4  1,000   80,356  
## Miles     114   6.5       5.5     0.0     21.0   
## pboo_20   261   8.5      10.6     0.0     67.1   
## pboo_10   241   8.7      11.1     0.05    65.6   
## pboo_00   258   7.2       9.3     0.0     61.0   
## w_20      261 26,559.9 59,120.8   140    545,989 
## b_20      261 10,907.9 41,209.5    3     531,293 
## h_20      261 31,562.0 102,121.1  749   1,013,423
## a_20      261 5,215.7  16,081.4    4     181,536 
## o_20      261 1,436.2   3,254.3    29    32,339  
## t_20      261 75,681.7 207,755.3 10,070 2,304,580
## pw_20     261   42.1     21.3     1.2     86.7   
## pb_20     261   12.2     11.9     0.03    71.3   
## ph_20     261   38.1     25.0     5.6     98.5   
## pa_20     261   5.3       6.4     0.03    40.1   
## po_20     261   2.2       1.0     0.2      6.9   
## dwb_20    261   25.1     12.0     0.0     69.1   
## dwh_20    261   20.9     12.2     0.0     63.4   
## dwa_20    261   21.8     11.7     0.0     61.6   
## dbw_20    261   25.1     12.0     0.0     69.1   
## dbh_20    261   20.0     11.2     0.0     58.7   
## dba_20    261   25.7     13.4     0.0     74.2   
## dhw_20    261   20.9     12.2     0.0     63.4   
## dhb_20    261   20.0     11.2     0.0     58.7   
## dha_20    261   28.3     15.0     0.0     70.0   
## daw_20    261   21.8     11.7     0.0     61.6   
## dab_20    261   25.7     13.4     0.0     74.2   
## dah_20    261   28.3     15.0     0.0     70.0   
## w_10      241 28,100.9 58,830.2   116    537,901 
## b_10      241 9,745.9  39,365.9    3     495,792 
## h_10      241 28,865.3 96,765.6   540    919,668 
## a_10      241 3,491.9  11,185.4    4     135,131 
## o_10      241  509.4    1,166.5    2     10,959  
## t_10      241 70,713.3 194,540.9 10,127 2,099,451
## pw_10     241   48.9     23.3     0.9     92.9   
## pb_10     241   11.4     12.1     0.03    69.4   
## ph_10     241   34.9     25.4     4.0     98.7   
## pa_10     241   4.0       5.7     0.03    43.3   
## po_10     241   0.8       0.4     0.02     3.2   
## dwb_10    241   27.1     14.1     0.0     82.2   
## dwh_10    241   22.5     13.6     0.0     61.1   
## dwa_10    241   20.9     10.7     0.0     62.5   
## dbw_10    241   27.1     14.1     0.0     82.2   
## dbh_10    241   21.9     12.1     0.0     64.4   
## dba_10    241   27.5     15.8     0.0     83.3   
## dhw_10    241   22.5     13.6     0.0     61.1   
## dhb_10    241   21.9     12.1     0.0     64.4   
## dha_10    241   29.4     15.9     0.0     71.7   
## daw_10    241   20.9     10.7     0.0     62.5   
## dab_10    241   27.5     15.8     0.0     83.3   
## dah_10    241   29.4     15.9     0.0     71.7   
## w_00      205 31,761.8 66,126.9   169    601,851 
## b_00      205 9,421.2  42,032.3    4     495,338 
## h_00      205 24,319.5 83,258.7   399    730,865 
## a_00      205 2,511.1   9,120.7    4     111,379 
## o_00      205  600.0    1,513.7    4     14,198  
## t_00      205 68,613.6 190,232.9 10,302 1,953,631
## pw_00     205   56.7     23.5     1.5     94.8   
## pb_00     205   10.3     11.1     0.03    57.3   
## ph_00     205   29.3     25.7     2.9     97.9   
## pa_00     205   2.8       3.8     0.04    27.4   
## po_00     205   0.9       0.4     0.04     3.1   
## dwb_00    205   30.7     16.1     0.0     75.6   
## dwh_00    205   25.0     14.6     0.0     65.3   
## dwa_00    205   20.3     10.9     0.0     54.2   
## dbw_00    205   30.7     16.1     0.0     75.6   
## dbh_00    205   24.2     14.2     0.0     72.1   
## dba_00    205   30.6     17.6     0.0     77.9   
## dhw_00    205   25.0     14.6     0.0     65.3   
## dhb_00    205   24.2     14.2     0.0     72.1   
## dha_00    205   30.2     16.2     0.0     73.4   
## daw_00    205   20.3     10.9     0.0     54.2   
## dab_00    205   30.6     17.6     0.0     77.9   
## dah_00    205   30.2     16.2     0.0     73.4   
## w_90      182 34,347.7 73,188.7   841    662,642 
## b_90      182 8,693.2  40,405.8    0     447,144 
## h_90      182 17,633.1 60,672.7   252    520,282 
## a_90      182 1,344.7   5,261.6    2     64,126  
## t_90      182 62,297.8 167,007.8 10,023 1,630,553
## o_90      182  279.0     711.6     13     6,158  
## pw_90     182   65.0     22.8     4.1     97.3   
## pb_90     182   9.7      10.6     0.0     60.6   
## ph_90     182   23.3     24.8     1.1     95.5   
## pa_90     182   1.6       2.1     0.01    15.8   
## po_90     182   0.4       0.2     0.1      1.8   
## dwb_90    182   34.0     18.7     0.0     80.8   
## dwh_90    182   25.2     14.2     0.0     61.8   
## dwa_90    182   24.5     13.1     0.0     81.0   
## dbw_90    182   34.0     18.7     0.0     80.8   
## dbh_90    182   27.6     16.2     0.0     80.5   
## dba_90    182   36.0     20.8     0.0     80.6   
## dhw_90    182   25.2     14.2     0.0     61.8   
## dhb_90    182   27.6     16.2     0.0     80.5   
## dha_90    182   33.2     17.5     0.0     70.7   
## daw_90    182   24.5     13.1     0.0     81.0   
## dab_90    182   36.0     20.8     0.0     80.6   
## dah_90    182   33.2     17.5     0.0     70.7   
## t_80      157 59,889.3 164,434.4 10,197 1,595,138
## w_80      157 36,419.9 86,063.4  1,242   834,061 
## b_80      157 8,577.7  41,362.4    3     436,392 
## h_80      157 13,903.6 47,448.1   121    421,954 
## a_80      157  572.3    2,619.8    1     31,431  
## o_80      157  415.7    1,240.8    18    11,923  
## pw_80     157   70.4     22.4     5.8     97.6   
## pb_80     157   9.0      10.2     0.01    40.1   
## ph_80     157   19.4     23.6     0.9     93.0   
## pa_80     157   0.7       0.7     0.01     4.3   
## po_80     157   0.6       0.4     0.1      3.3   
## dwb_80    157   42.6     22.6     0.0     93.2   
## dwh_80    157   25.9     15.9     0.0     67.1   
## dwa_80    157   23.3     12.7     0.0     68.9   
## dbw_80    157   42.6     22.6     0.0     93.2   
## dbh_80    157   34.2     20.3     0.0     87.1   
## dba_80    157   43.1     25.2     0.0     89.1   
## dhw_80    157   25.9     15.9     0.0     67.1   
## dhb_80    157   34.2     20.3     0.0     87.1   
## dha_80    157   33.6     19.4     0.0     83.8   
## daw_80    157   23.3     12.7     0.0     68.9   
## dab_80    157   43.1     25.2     0.0     89.1   
## dah_80    157   33.6     19.4     0.0     83.8   
## -------------------------------------------------
stargazer(cities[c("pboo_20","t_20","dbw_20")], header=FALSE, type='text', 
          title="Descriptive Statistics 2020", digits=2,
          covariate.labels=c("Black Homeownership","City Population","B-W Dissimilarity")
          )
## 
## Descriptive Statistics 2020
## =============================================================
## Statistic            N    Mean     St. Dev.   Min      Max   
## -------------------------------------------------------------
## Black Homeownership 261   8.54      10.61     0.00    67.12  
## City Population     261 75,681.66 207,755.30 10,070 2,304,580
## B-W Dissimilarity   261   25.12     12.01     0.00    69.14  
## -------------------------------------------------------------
stargazer(cities[c("pboo_10","t_10","dbw_10")], header=FALSE, type='text', 
          title="Descriptive Statistics 2010", digits=2,
          covariate.labels=c("Black Homeownership","City Population","B-W Dissimilarity")
          )
## 
## Descriptive Statistics 2010
## =============================================================
## Statistic            N    Mean     St. Dev.   Min      Max   
## -------------------------------------------------------------
## Black Homeownership 241   8.65      11.07     0.05    65.63  
## City Population     241 70,713.28 194,540.90 10,127 2,099,451
## B-W Dissimilarity   241   27.13     14.08     0.00    82.22  
## -------------------------------------------------------------
stargazer(cities[c("pboo_00","t_00","dbw_00")], header=FALSE, type='text', 
          title="Descriptive Statistics 2000", digits=2,
          covariate.labels=c("Black Homeownership","City Population","B-W Dissimilarity")
          )
## 
## Descriptive Statistics 2000
## =============================================================
## Statistic            N    Mean     St. Dev.   Min      Max   
## -------------------------------------------------------------
## Black Homeownership 258   7.18       9.34     0.00    61.00  
## City Population     205 68,613.65 190,232.90 10,302 1,953,631
## B-W Dissimilarity   205   30.71     16.06     0.00    75.64  
## -------------------------------------------------------------
sapply(cities, function(x) sum(is.na(x)))
## statefp20 placefp20  cityname Proximity     Miles   FC.name   pboo_20   pboo_10 
##         0         0       272         0       158       272        11        31 
##   pboo_00   pboo_90   pboo_80      w_20      b_20      h_20      a_20      o_20 
##        14       272       272        11        11        11        11        11 
##      t_20     pw_20     pb_20     ph_20     pa_20     po_20    dwb_20    dwh_20 
##        11        11        11        11        11        11        11        11 
##    dwa_20    dbw_20    dbh_20    dba_20    dhw_20    dhb_20    dha_20    daw_20 
##        11        11        11        11        11        11        11        11 
##    dab_20    dah_20      w_10      b_10      h_10      a_10      o_10      t_10 
##        11        11        31        31        31        31        31        31 
##     pw_10     pb_10     ph_10     pa_10     po_10    dwb_10    dwh_10    dwa_10 
##        31        31        31        31        31        31        31        31 
##    dbw_10    dbh_10    dba_10    dhw_10    dhb_10    dha_10    daw_10    dab_10 
##        31        31        31        31        31        31        31        31 
##    dah_10      w_00      b_00      h_00      a_00      o_00      t_00     pw_00 
##        31        67        67        67        67        67        67        67 
##     pb_00     ph_00     pa_00     po_00    dwb_00    dwh_00    dwa_00    dbw_00 
##        67        67        67        67        67        67        67        67 
##    dbh_00    dba_00    dhw_00    dhb_00    dha_00    daw_00    dab_00    dah_00 
##        67        67        67        67        67        67        67        67 
##      w_90      b_90      h_90      a_90      t_90      o_90     pw_90     pb_90 
##        90        90        90        90        90        90        90        90 
##     ph_90     pa_90     po_90    dwb_90    dwh_90    dwa_90    dbw_90    dbh_90 
##        90        90        90        90        90        90        90        90 
##    dba_90    dhw_90    dhb_90    dha_90    daw_90    dab_90    dah_90      t_80 
##        90        90        90        90        90        90        90       115 
##      w_80      b_80      h_80      a_80      o_80     pw_80     pb_80     ph_80 
##       115       115       115       115       115       115       115       115 
##     pa_80     po_80    dwb_80    dwh_80    dwa_80    dbw_80    dbh_80    dba_80 
##       115       115       115       115       115       115       115       115 
##    dhw_80    dhb_80    dha_80    daw_80    dab_80    dah_80 
##       115       115       115       115       115       115
#Cities In Proximity = 1, Cities Not in Proximity = 0

cities$Proximity <- factor(cities$Proximity, ordered = FALSE )

# Make "Not in Proximity" the reference category
cities$Proximity <- relevel(cities$Proximity, ref = "Not in Prox")
table(cities$Proximity)
## 
##  Not in Prox In Proximity 
##          164          108
Count of missing values of Percent Black Homeownership for cities 2020 (pboo_20) = 11, 2010 (pboo_10) = 31, 2000 (pboo_00) = 14

Racial residential segregation and proximity to freedom colonies

#The correlation between segregation and Black home ownership yielded a weak positive association in all years with a coefficient of 0.0363 in 2020, 0.0815 in 2010, and 0.2355 in 2000.

cor.test(cities$dbw_20,cities$pboo_20)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$dbw_20 and cities$pboo_20
## t = 0.58608, df = 259, p-value = 0.5583
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08540425  0.15711885
## sample estimates:
##        cor 
## 0.03639314
plot(cities$dbw_20,cities$pboo_20,pch=20,
     xlab='D index',ylab='Percent Black Homeowners',
     main='Cities by Residential Segregation and Black Homeownership in 2020')

cor.test(cities$dbw_10,cities$pboo_10)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$dbw_10 and cities$pboo_10
## t = 1.2643, df = 239, p-value = 0.2074
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04532483  0.20575565
## sample estimates:
##        cor 
## 0.08150847
plot(cities$dbw_10,cities$pboo_10,pch=20,
     xlab='D index',ylab='Percent Black Homeowners',
     main='Cities by Residential Segregation and Black Homeownership in 2010')

cor.test(cities$dbw_00,cities$pboo_00)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$dbw_00 and cities$pboo_00
## t = 3.4189, df = 199, p-value = 0.0007626
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1004203 0.3621316
## sample estimates:
##       cor 
## 0.2355415
plot(cities$dbw_00,cities$pboo_00,pch=20,
     xlab='D index',ylab='Percent Black Homeowners',
     main='Cities by Residential Segregation and Black Homeownership in 2000')

ggplot(cities, aes(dbw_20, Proximity, col = dbw_20)) +
  geom_point()
## Warning: Removed 11 rows containing missing values (`geom_point()`).

ggplot(cities, aes(dbw_10, Proximity, col = dbw_10)) +
  geom_point()
## Warning: Removed 31 rows containing missing values (`geom_point()`).

ggplot(cities, aes(dbw_00, Proximity, col = dbw_00)) +
  geom_point()
## Warning: Removed 67 rows containing missing values (`geom_point()`).

#The list of 261 cities from the Diversity and Disparities project available for year 2020 was used to conduct a binomial regression analysis. Of these cities, 100 were identified as being in proximity to a freedom colony and 16 were not.

#The results showed that proximity to freedom colonies INCREASED residential segregation by 3.21 points in 2020. The results were statistically significant in 2020 and 2000, but not in 2010.

fit20=lm(dbw_20 ~ Proximity,cities)
summary(fit20)
## 
## Call:
## lm(formula = dbw_20 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.943  -9.284  -0.561   8.314  45.264 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            23.8749     0.9429  25.322   <2e-16 ***
## ProximityIn Proximity   3.2095     1.5157   2.118   0.0352 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.93 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.01702,    Adjusted R-squared:  0.01322 
## F-statistic: 4.484 on 1 and 259 DF,  p-value: 0.03517
fit10=lm(dbw_10 ~ Proximity,cities)
summary(fit10)
## 
## Call:
## lm(formula = dbw_10 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.653 -10.562  -1.782   8.095  56.137 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             26.079      1.175  22.198   <2e-16 ***
## ProximityIn Proximity    2.575      1.842   1.397    0.164    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.05 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.008104,   Adjusted R-squared:  0.003954 
## F-statistic: 1.953 on 1 and 239 DF,  p-value: 0.1636
fit00=lm(dbw_00 ~ Proximity,cities)
summary(fit00)
## 
## Call:
## lm(formula = dbw_00 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.126 -11.375  -0.984  10.532  41.766 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             28.647      1.427   20.08   <2e-16 ***
## ProximityIn Proximity    5.221      2.270    2.30   0.0225 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.89 on 203 degrees of freedom
##   (67 observations deleted due to missingness)
## Multiple R-squared:  0.0254, Adjusted R-squared:  0.0206 
## F-statistic: 5.291 on 1 and 203 DF,  p-value: 0.02246

#Alternatively, I used Proximity as the dependent (outcome) variable. The results showed that residential segregation INCREASED proximity by 0.0224 points in 2020. The results were statistically significant. The results were statistically significant in 2020 and 2000, but not in 2010.

model20 = glm(Proximity ~ dbw_20,
            data = cities,
            family = binomial(link="logit"))
model10 = glm(Proximity ~ dbw_10,
            data = cities,
            family = binomial(link="logit"))
model00 = glm(Proximity ~ dbw_00,
            data = cities,
            family = binomial(link="logit"))

summary(model20)
## 
## Call:
## glm(formula = Proximity ~ dbw_20, family = binomial(link = "logit"), 
##     data = cities)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.03052    0.30481  -3.381 0.000723 ***
## dbw_20       0.02243    0.01075   2.085 0.037057 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 348.37  on 260  degrees of freedom
## Residual deviance: 343.93  on 259  degrees of freedom
##   (11 observations deleted due to missingness)
## AIC: 347.93
## 
## Number of Fisher Scoring iterations: 4
summary(model10)
## 
## Call:
## glm(formula = Proximity ~ dbw_10, family = binomial(link = "logit"), 
##     data = cities)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.73383    0.28936  -2.536   0.0112 *
## dbw_10       0.01302    0.00936   1.391   0.1642  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 325.64  on 240  degrees of freedom
## Residual deviance: 323.70  on 239  degrees of freedom
##   (31 observations deleted due to missingness)
## AIC: 327.7
## 
## Number of Fisher Scoring iterations: 4
summary(model00)
## 
## Call:
## glm(formula = Proximity ~ dbw_00, family = binomial(link = "logit"), 
##     data = cities)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.067050   0.322461  -3.309 0.000936 ***
## dbw_00       0.020547   0.009106   2.256 0.024050 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.10  on 204  degrees of freedom
## Residual deviance: 269.88  on 203  degrees of freedom
##   (67 observations deleted due to missingness)
## AIC: 273.88
## 
## Number of Fisher Scoring iterations: 4
Anova(model20,
      type="II",
      test="LR")
## Analysis of Deviance Table (Type II tests)
## 
## Response: Proximity
##        LR Chisq Df Pr(>Chisq)  
## dbw_20   4.4355  1     0.0352 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(model10,
      type="II",
      test="LR")
## Analysis of Deviance Table (Type II tests)
## 
## Response: Proximity
##        LR Chisq Df Pr(>Chisq)
## dbw_10   1.9474  1     0.1629
Anova(model00,
      type="II",
      test="LR")
## Analysis of Deviance Table (Type II tests)
## 
## Response: Proximity
##        LR Chisq Df Pr(>Chisq)  
## dbw_00   5.2201  1    0.02233 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Proximity to freedom colonies and homeownership

Testing correlations

#Testing the correlation of proximity to a freedom colony and percent Black homeownership. #In proximity = 1, Not in proximity = 0. Using point biserial correlation. Treat as numeric within dataframe to get Pearsons R.

cor.test(as.numeric(cities$Proximity), cities$pboo_20)
## 
##  Pearson's product-moment correlation
## 
## data:  as.numeric(cities$Proximity) and cities$pboo_20
## t = 6.2183, df = 259, p-value = 2.004e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2499329 0.4616335
## sample estimates:
##       cor 
## 0.3604154
plot(cities$Proximity,cities$pboo_20,pch=20, 
     xlab='Proximity',ylab='Percent Black Homeowners',
     main='Cities by Proximity and Black Homeownership in 2020')

cor.test(as.numeric(cities$Proximity), cities$pboo_10)
## 
##  Pearson's product-moment correlation
## 
## data:  as.numeric(cities$Proximity) and cities$pboo_10
## t = 6.0327, df = 239, p-value = 6.091e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2485787 0.4683765
## sample estimates:
##       cor 
## 0.3635261
plot(cities$Proximity,cities$pboo_10,pch=20,
     xlab='Proximity',ylab='Percent Black Homeowners',
     main='Cities by Proximity and Black Homeownership in 2010')

cor.test(as.numeric(cities$Proximity), cities$pboo_00)
## 
##  Pearson's product-moment correlation
## 
## data:  as.numeric(cities$Proximity) and cities$pboo_00
## t = 6.617, df = 256, p-value = 2.132e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2727777 0.4818092
## sample estimates:
##       cor 
## 0.3821715
plot(cities$Proximity,cities$pboo_00,pch=20,
     xlab='Proximity',ylab='Percent Black Homeowners',
     main='Cities by Proximity and Black Homeownership in 2000')

Result - all 3 years show a weak positive association between proximity and percent Black homeownership.

#Plot the correlation between Proximity and homeownership using a different visual

ggplot(cities, aes(pboo_20, Proximity, col = pboo_20)) +
  geom_point()
## Warning: Removed 11 rows containing missing values (`geom_point()`).

ggplot(cities, aes(pboo_10, Proximity, col = pboo_10)) +
  geom_point()
## Warning: Removed 31 rows containing missing values (`geom_point()`).

ggplot(cities, aes(pboo_00, Proximity, col = pboo_00)) +
  geom_point()
## Warning: Removed 14 rows containing missing values (`geom_point()`).

#A strong positive association for the percent Black population and homeownership

cor.test(cities$pb_20,cities$pboo_20)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$pb_20 and cities$pboo_20
## t = 54.31, df = 259, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9476974 0.9675692
## sample estimates:
##     cor 
## 0.95879
plot(cities$pb_20,cities$pboo_20,pch=20,
     xlab='Percent Black Population',ylab='Percent Black Homeowners',
     main='Cities by Percent Black Population and Black Homeownership in 2020')

cor.test(cities$pb_10,cities$pboo_10)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$pb_10 and cities$pboo_10
## t = 59.286, df = 239, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9584755 0.9748110
## sample estimates:
##      cor 
## 0.967642
plot(cities$pb_10,cities$pboo_10,pch=20,
     xlab='Percent Black Population',ylab='Percent Black Homeowners',
     main='Cities by Percent Black Population and Black Homeownership in 2010')

cor.test(cities$pb_00,cities$pboo_00)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$pb_00 and cities$pboo_00
## t = 48.393, df = 199, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9475417 0.9696095
## sample estimates:
##       cor 
## 0.9600416
plot(cities$pb_00,cities$pboo_00,pch=20,
     xlab='Percent Black Population',ylab='Percent Black Homeowners',
     main='Cities by Percent Black Population and Black Homeownership in 2000')

#Weak association for the total population of the city and homeownership

cor.test(cities$t_20,cities$pboo_20)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$t_20 and cities$pboo_20
## t = 0.65928, df = 259, p-value = 0.5103
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08089062  0.16154852
## sample estimates:
##       cor 
## 0.0409314
plot(cities$t_20,cities$pboo_20,pch=20,
     xlab='City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2020')

cor.test(cities$t_10,cities$pboo_10)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$t_10 and cities$pboo_10
## t = 0.93395, df = 239, p-value = 0.3513
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06657168  0.18525682
## sample estimates:
##        cor 
## 0.06030206
plot(cities$t_10,cities$pboo_10,pch=20,
     xlab='City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2010')

cor.test(cities$t_00,cities$pboo_00)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$t_00 and cities$pboo_00
## t = 1.6142, df = 199, p-value = 0.1081
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02510209  0.24817753
## sample estimates:
##       cor 
## 0.1136876
plot(cities$t_00,cities$pboo_00,pch=20,
     xlab='City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2000')

#Utilizing all 261 Texas cities in the data for 2020, correlation tests produced somewhat expected results. The Pearson’s correlation produced a coefficient of 0.0409 for city population and Black homeownership revealing a very weak positive association (Figure 5). However, there were outliers discovered in the data. The city with the largest population is Houston with 2,304,580 and the city with the smallest population is Rockport with 10,070. The median city population is 24,486. The three largest cities in the data are Houston, San Antonio, and Dallas. Although they are significantly larger than other cities, they remained in the data because there are multiple freedom colonies within their city boundaries. The predictor variable of city population was rescaled through a natural log transformation to reduce the impact of the magnitude of the largest city sizes. The Pearson’s correlation then produced a coefficient of 0.0867, which is greater but remains a weak association.

#log total population due to outliers

cor.test(log(cities$t_20),cities$pboo_20)
## 
##  Pearson's product-moment correlation
## 
## data:  log(cities$t_20) and cities$pboo_20
## t = 1.4009, df = 259, p-value = 0.1624
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0350703  0.2059702
## sample estimates:
##        cor 
## 0.08671897
cor.test(log(cities$t_10),cities$pboo_10)
## 
##  Pearson's product-moment correlation
## 
## data:  log(cities$t_10) and cities$pboo_10
## t = 1.4047, df = 239, p-value = 0.1614
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03629194  0.21440427
## sample estimates:
##       cor 
## 0.0904895
cor.test(log(cities$t_00),cities$pboo_00)
## 
##  Pearson's product-moment correlation
## 
## data:  log(cities$t_00) and cities$pboo_00
## t = 1.5598, df = 199, p-value = 0.1204
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02893074  0.24457866
## sample estimates:
##      cor 
## 0.109904

#Log Total population

tpop20<- lm(cities$pboo_20 ~ log(cities$t_20))
summary(tpop20)
## 
## Call:
## lm(formula = cities$pboo_20 ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.149  -6.552  -4.080   3.091  58.104 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -0.8246     6.7179  -0.123    0.902
## log(cities$t_20)   0.8999     0.6424   1.401    0.162
## 
## Residual standard error: 10.59 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.00752,    Adjusted R-squared:  0.003688 
## F-statistic: 1.962 on 1 and 259 DF,  p-value: 0.1624
coef(tpop20)["log(cities$t_20)"]/100
## log(cities$t_20) 
##      0.008998776
plot(log(cities$t_20),cities$pboo_20,pch=20,
     xlab='logged City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2020')

tpop10<- lm(cities$pboo_10 ~ log(cities$t_10))
summary(tpop10)
## 
## Call:
## lm(formula = cities$pboo_10 ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.370  -6.767  -4.232   3.534  56.823 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)        -1.652      7.370  -0.224    0.823
## log(cities$t_10)    0.996      0.709   1.405    0.161
## 
## Residual standard error: 11.05 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.008188,   Adjusted R-squared:  0.004039 
## F-statistic: 1.973 on 1 and 239 DF,  p-value: 0.1614
coef(tpop10)["log(cities$t_10)"]/100
## log(cities$t_10) 
##      0.009959837
plot(log(cities$t_10),cities$pboo_10,pch=20,
     xlab='logged City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2010')

tpop00<- lm(cities$pboo_00 ~ log(cities$t_00))
summary(tpop00)
## 
## Call:
## lm(formula = cities$pboo_00 ~ log(cities$t_00))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.571 -6.250 -3.593  2.774 54.104 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -3.4148     7.2497  -0.471    0.638
## log(cities$t_00)   1.0890     0.6981   1.560    0.120
## 
## Residual standard error: 9.705 on 199 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.01208,    Adjusted R-squared:  0.007114 
## F-statistic: 2.433 on 1 and 199 DF,  p-value: 0.1204
coef(tpop00)["log(cities$t_00)"]/100
## log(cities$t_00) 
##       0.01088954
plot(log(cities$t_00),cities$pboo_00,pch=20,
     xlab='logged City Population',ylab='Percent Black Homeowners',
     main='Cities by City Total Population and Black Homeownership in 2000')

hist(cities$t_20)

hist(log(cities$t_20+1))

hist(log(cities$t_20+1), breaks=50)

hist(cities$t_10)

hist(log(cities$t_10+1))

hist(log(cities$t_10+1), breaks=50)

hist(cities$t_00)

hist(log(cities$t_00+1))

hist(log(cities$t_00+1), breaks=50)

## testing correlation of independent variables

#logged city population and percent blCK

tpop20B<- lm(cities$pb_20 ~ log(cities$t_20))
summary(tpop20B)
## 
## Call:
## lm(formula = cities$pb_20 ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.238  -8.232  -3.575   5.236  58.169 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -4.8470     7.4597   -0.65   0.5164  
## log(cities$t_20)   1.6408     0.7133    2.30   0.0222 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.76 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.02002,    Adjusted R-squared:  0.01624 
## F-statistic: 5.292 on 1 and 259 DF,  p-value: 0.02222
coef(tpop20B)["log(cities$t_20)"]/100
## log(cities$t_20) 
##       0.01640827
plot(log(cities$t_20),cities$pb_20,pch=20,
     xlab='logged City Population',ylab='Percent Black population',
     main='Cities by City Total Population and Black population in 2020')

tpop10B<- lm(cities$pb_10 ~ log(cities$t_10))
summary(tpop10B)
## 
## Call:
## lm(formula = cities$pb_10 ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.061  -7.954  -4.176   4.202  57.713 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -3.2387     8.0274  -0.403   0.6870  
## log(cities$t_10)   1.4167     0.7723   1.834   0.0678 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.03 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.01389,    Adjusted R-squared:  0.009759 
## F-statistic: 3.365 on 1 and 239 DF,  p-value: 0.06783
coef(tpop10B)["log(cities$t_10)"]/100
## log(cities$t_10) 
##       0.01416692
plot(log(cities$t_10),cities$pb_10,pch=20,
     xlab='logged City Population',ylab='Percent Black population',
     main='Cities by City Total Population and Black population in 2010')

tpop00B<- lm(cities$pb_00 ~ log(cities$t_00))
summary(tpop00B)
## 
## Call:
## lm(formula = cities$pb_00 ~ log(cities$t_00))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.692  -7.589  -4.004   4.610  48.198 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -4.7546     8.1419  -0.584   0.5599  
## log(cities$t_00)   1.4587     0.7854   1.857   0.0647 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.01 on 203 degrees of freedom
##   (67 observations deleted due to missingness)
## Multiple R-squared:  0.01671,    Adjusted R-squared:  0.01187 
## F-statistic:  3.45 on 1 and 203 DF,  p-value: 0.06471
coef(tpop00B)["log(cities$t_00)"]/100
## log(cities$t_00) 
##       0.01458666
plot(log(cities$t_00),cities$pb_00,pch=20,
     xlab='logged City Population',ylab='Percent Black population',
     main='Cities by City Total Population and Black population in 2000')

#logged city population and dissimilarity index/segregation

tpop20Bw<- lm(cities$dbw_20 ~ log(cities$t_20))
summary(tpop20Bw)
## 
## Call:
## lm(formula = cities$dbw_20 ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.301  -8.218  -1.514   7.249  47.509 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -24.0174     6.9900  -3.436 0.000688 ***
## log(cities$t_20)   4.7207     0.6684   7.063  1.5e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1615, Adjusted R-squared:  0.1583 
## F-statistic: 49.88 on 1 and 259 DF,  p-value: 1.501e-11
coef(tpop20Bw)["log(cities$t_20)"]/100
## log(cities$t_20) 
##       0.04720739
plot(log(cities$t_20),cities$dbw_20,pch=20,
     xlab='logged City Population',ylab='Segregation',
     main='Cities by City Total Population and B-W segregation in 2020')

tpop10Bw<- lm(cities$dbw_10 ~ log(cities$t_10))
summary(tpop10Bw)
## 
## Call:
## lm(formula = cities$dbw_10 ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.222 -10.573  -1.836   9.156  59.638 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -22.1664     8.8508  -2.504   0.0129 *  
## log(cities$t_10)   4.7643     0.8515   5.595 6.01e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.26 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1158, Adjusted R-squared:  0.1121 
## F-statistic: 31.31 on 1 and 239 DF,  p-value: 6.011e-08
coef(tpop10Bw)["log(cities$t_10)"]/100
## log(cities$t_10) 
##       0.04764328
plot(log(cities$t_10),cities$dbw_10,pch=20,
     xlab='logged City Population',ylab='segregation',
     main='Cities by City Total Population and B-W segregation in 2010')

tpop00Bw<- lm(cities$dbw_00 ~ log(cities$t_00))
summary(tpop00Bw)
## 
## Call:
## lm(formula = cities$dbw_00 ~ log(cities$t_00))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.857 -12.739  -0.844  11.567  48.645 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -17.128     11.416  -1.500    0.135    
## log(cities$t_00)    4.635      1.101   4.209 3.85e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.44 on 203 degrees of freedom
##   (67 observations deleted due to missingness)
## Multiple R-squared:  0.08027,    Adjusted R-squared:  0.07574 
## F-statistic: 17.72 on 1 and 203 DF,  p-value: 3.848e-05
coef(tpop00Bw)["log(cities$t_00)"]/100
## log(cities$t_00) 
##       0.04635165
plot(log(cities$t_00),cities$dbw_00,pch=20,
     xlab='logged City Population',ylab='Percent Black segregation',
     main='Cities by City Total Population and B-W segregation in 2000')

#logged city population and proximity

tpop20Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_20))
summary(tpop20Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5772 -0.3825 -0.3305  0.5882  0.6855 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       0.73130    0.30756   2.378   0.0181 *
## log(cities$t_20)  0.06300    0.02941   2.142   0.0331 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4847 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.01741,    Adjusted R-squared:  0.01361 
## F-statistic: 4.588 on 1 and 259 DF,  p-value: 0.03312
coef(tpop20Bx)["log(cities$t_20)"]/100
## log(cities$t_20) 
##     0.0006299569
plot(log(cities$t_20),(as.numeric(cities$Proximity)),pch=20,
     xlab='logged City Population',ylab='proximity',
     main='Cities by City Total Population and Proximity in 2020')

tpop10Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_10))
summary(tpop10Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5596 -0.3987 -0.3609  0.5829  0.6498 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       0.88552    0.32738   2.705  0.00733 **
## log(cities$t_10)  0.05037    0.03150   1.599  0.11109   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4906 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.01059,    Adjusted R-squared:  0.006448 
## F-statistic: 2.558 on 1 and 239 DF,  p-value: 0.1111
coef(tpop10Bx)["log(cities$t_10)"]/100
## log(cities$t_10) 
##     0.0005036838
plot(log(cities$t_10),(as.numeric(cities$Proximity)),pch=20,
     xlab='logged City Population',ylab='Proximity',
     main='Cities by City Total Population and Proximity in 2010')

tpop00Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_00))
summary(tpop00Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_00))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6193 -0.3839 -0.3292  0.5884  0.6869 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       0.60308    0.35903   1.680   0.0945 .
## log(cities$t_00)  0.07674    0.03463   2.216   0.0278 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4854 on 203 degrees of freedom
##   (67 observations deleted due to missingness)
## Multiple R-squared:  0.02362,    Adjusted R-squared:  0.01881 
## F-statistic:  4.91 on 1 and 203 DF,  p-value: 0.02781
coef(tpop00Bx)["log(cities$t_00)"]/100
## log(cities$t_00) 
##     0.0007674211
plot(log(cities$t_00),(as.numeric(cities$Proximity)),pch=20,
     xlab='logged City Population',ylab='Proximity',
     main='Cities by City Total Population and Proximity in 2000')

cor.test(cities$pb_10,as.numeric(cities$Proximity))
## 
##  Pearson's product-moment correlation
## 
## data:  cities$pb_10 and as.numeric(cities$Proximity)
## t = 6.9313, df = 239, p-value = 3.853e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2981591 0.5091555
## sample estimates:
##       cor 
## 0.4091113
#cor.test(as.numeric(cities$Proximity), cities$pboo_10)

plot(cities$pb_10,as.numeric(cities$Proximity),pch=20,
     xlab='Percent Black Population',ylab='proximity',
     main='Cities by Percent Black Population and proximity in 2010')

cor.test(cities$pb_00,cities$dbw_00)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$pb_00 and cities$dbw_00
## t = 4.5033, df = 203, p-value = 1.127e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1714197 0.4210225
## sample estimates:
##       cor 
## 0.3013753
plot(cities$pb_00,cities$dbw_00,pch=20,
     xlab='Percent Black Population',ylab='segregation',
     main='Cities by Percent Black Population and segregation in 2000')

##Logistic Regression

#The models indicate that there is a positive relationship between the percent of Black homeownership and whether the city is within proximity to a freedom colony. Cities with high percent of Black homeowners are more likely to be within proximity to a freedom colony for years 2000, 2010, and 2020.

#model 1 #A multiple regression was performed starting with proximity to freedom colonies (Table 4). This variable proved to yield statistically significant results. Proximity to freedom colonies provided an increase in Black homeownership by 7.935 percentage points.

cit<- lm(pboo_20~Proximity,data=cities)

summary(cit)
## 
## Call:
## lm(formula = pboo_20 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.213  -5.090  -2.430   2.377  53.777 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.5103     0.7836   7.032 1.81e-11 ***
## ProximityIn Proximity   7.8331     1.2597   6.218 2.00e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.912 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1299, Adjusted R-squared:  0.1265 
## F-statistic: 38.67 on 1 and 259 DF,  p-value: 2.004e-09

#model 2 - add in total popualtion logged #When the logged variable of city population was introduced in Model 2, there was a slight increase in homeownership although population was not statistically significant. Proximity remained a significant factor.

cit2log <- lm(pboo_20~Proximity+log(t_20),data=cities)

summary(cit2log)
## 
## Call:
## lm(formula = pboo_20 ~ Proximity + log(t_20), data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.121  -4.947  -2.335   2.183  53.629 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.2494     6.3059   0.198    0.843    
## ProximityIn Proximity   7.7188     1.2721   6.068  4.6e-09 ***
## log(t_20)               0.4136     0.6074   0.681    0.496    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.922 on 258 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1315, Adjusted R-squared:  0.1247 
## F-statistic: 19.53 on 2 and 258 DF,  p-value: 1.27e-08

#As established, percent of Black population is highly correlated with Black homeownership. As expected, an initial regression with this variable yielded a significant result, but only a small increase in homeownership. Due to this high correlation, proximity lost its significance when percent of Black population was introduced to the model. Therefore, this variable was omitted from the regression.

citpb20=lm(pboo_20~pb_20,cities)
citpb10=lm(pboo_10~pb_10,cities)
citpb00=lm(pboo_00~pb_00,cities)
summary(citpb20)
## 
## Call:
## lm(formula = pboo_20 ~ pb_20, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.0288  -1.1835   0.5156   1.5324  12.6742 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.9526     0.2688  -7.264 4.42e-12 ***
## pb_20         0.8580     0.0158  54.310  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.019 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.9193, Adjusted R-squared:  0.919 
## F-statistic:  2950 on 1 and 259 DF,  p-value: < 2.2e-16
summary(citpb10)
## 
## Call:
## lm(formula = pboo_10 ~ pb_10, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.7624  -1.2648   0.3254   1.4259  13.1581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.46306    0.24821  -5.894 1.27e-08 ***
## pb_10        0.88586    0.01494  59.286  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.799 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.9363, Adjusted R-squared:  0.9361 
## F-statistic:  3515 on 1 and 239 DF,  p-value: < 2.2e-16
summary(citpb00)
## 
## Call:
## lm(formula = pboo_00 ~ pb_00, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.8641  -1.0172   0.3435   0.9166  17.2871 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.90972    0.26432  -3.442 0.000704 ***
## pb_00        0.83977    0.01735  48.393  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.733 on 199 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.9217, Adjusted R-squared:  0.9213 
## F-statistic:  2342 on 1 and 199 DF,  p-value: < 2.2e-16

#model 3 add in segregation #In Model 3, the DI measure of residential segregation for each city was added to the regression. Residential segregation caused a decrease in homeownership by -0.029. This result was not statistically significant but provides a different outcome than the previous analysis comparing cities near and not near freedom colonies

cit3 <- glm(pboo_20~Proximity+log(t_20)+dbw_20,data=cities)

summary(cit3)
## 
## Call:
## glm(formula = pboo_20 ~ Proximity + log(t_20) + dbw_20, data = cities)
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.60219    6.45204   0.093    0.926    
## ProximityIn Proximity  7.77216    1.27865   6.078 4.36e-09 ***
## log(t_20)              0.54030    0.66102   0.817    0.414    
## dbw_20                -0.02755    0.05626  -0.490    0.625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 98.74338)
## 
##     Null deviance: 29245  on 260  degrees of freedom
## Residual deviance: 25377  on 257  degrees of freedom
##   (11 observations deleted due to missingness)
## AIC: 1945.3
## 
## Number of Fisher Scoring iterations: 2

#models for 2010

cit10<- lm(pboo_10~Proximity,data=cities)
summary(cit10)
## 
## Call:
## lm(formula = pboo_10 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.352  -5.028  -2.318   2.112  52.128 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.328      0.864   6.167 2.95e-09 ***
## ProximityIn Proximity    8.174      1.355   6.033 6.09e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.33 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1322, Adjusted R-squared:  0.1285 
## F-statistic: 36.39 on 1 and 239 DF,  p-value: 6.091e-09
cit2log10 <- lm(pboo_10~Proximity+log(t_10),data=cities)
summary(cit2log10)
## 
## Call:
## lm(formula = pboo_10 ~ Proximity + log(t_10), data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.504  -4.772  -2.362   1.961  52.110 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.7308     6.8991  -0.106    0.916    
## ProximityIn Proximity   8.0498     1.3628   5.907  1.2e-08 ***
## log(t_10)               0.5905     0.6671   0.885    0.377    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.34 on 238 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.135,  Adjusted R-squared:  0.1277 
## F-statistic: 18.57 on 2 and 238 DF,  p-value: 3.199e-08
cit3_10 <- lm(pboo_10~Proximity+log(t_10)+dbw_10,data=cities)
summary(cit3_10)
## 
## Call:
## lm(formula = pboo_10 ~ Proximity + log(t_10) + dbw_10, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.912  -5.022  -2.373   1.779  52.130 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.12234    6.99816  -0.017    0.986    
## ProximityIn Proximity  8.00578    1.36716   5.856 1.57e-08 ***
## log(t_10)              0.46087    0.70883   0.650    0.516    
## dbw_10                 0.02768    0.05057   0.547    0.585    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.35 on 237 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1361, Adjusted R-squared:  0.1252 
## F-statistic: 12.44 on 3 and 237 DF,  p-value: 1.381e-07

#models for 2000

cit00<- lm(pboo_00~Proximity,data=cities)
summary(cit00)
## 
## Call:
## lm(formula = pboo_00 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.661  -4.213  -2.050   1.673  49.339 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             4.3498     0.6879   6.323 1.13e-09 ***
## ProximityIn Proximity   7.3115     1.1049   6.617 2.13e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.647 on 256 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.1461, Adjusted R-squared:  0.1427 
## F-statistic: 43.79 on 1 and 256 DF,  p-value: 2.132e-10
cit2log00 <- lm(pboo_00~Proximity+log(t_00),data=cities)
summary(cit2log00)
## 
## Call:
## lm(formula = pboo_00 ~ Proximity + log(t_00), data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.561  -4.419  -2.302   1.716  49.190 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.3797     6.7931  -0.056    0.955    
## ProximityIn Proximity   7.2792     1.3254   5.492 1.21e-07 ***
## log(t_00)               0.5186     0.6602   0.786    0.433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.064 on 198 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.1427, Adjusted R-squared:  0.134 
## F-statistic: 16.48 on 2 and 198 DF,  p-value: 2.406e-07
cit3_00 <- lm(pboo_00~Proximity+log(t_00)+dbw_00,data=cities)
summary(cit3_00)
## 
## Call:
## lm(formula = pboo_00 ~ Proximity + log(t_00) + dbw_00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.848  -4.473  -2.406   1.576  49.312 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1.22467    6.74083   0.182   0.8560    
## ProximityIn Proximity  6.78083    1.32471   5.119 7.29e-07 ***
## log(t_00)              0.07294    0.67694   0.108   0.9143    
## dbw_00                 0.10385    0.04240   2.449   0.0152 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.951 on 197 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.168,  Adjusted R-squared:  0.1553 
## F-statistic: 13.26 on 3 and 197 DF,  p-value: 6.389e-08
m1a <- lm(pboo_20~Proximity,data=cities)
m2a <- lm(pboo_20~Proximity+log(t_20),data=cities)
m3a <- lm(pboo_20~Proximity+log(t_20)+dbw_20,data=cities)
m4a <- glm(formula = Proximity ~ dbw_20, family = "binomial", data = cities)

m1b <- lm(pboo_10~Proximity,data=cities)
m2b <- lm(pboo_10~Proximity+log(t_10),data=cities)
m3b <- lm(pboo_10~Proximity+log(t_10)+dbw_10,data=cities)
m4b <- glm(formula =Proximity ~ dbw_10, family = "binomial", data = cities)

m1c <- lm(pboo_00~Proximity,data=cities)
m2c <- lm(pboo_00~Proximity+log(t_00),data=cities)
m3c <- lm(pboo_00~Proximity+log(t_00)+dbw_00,data=cities)
m4c <- glm(formula = Proximity ~ dbw_00, family = "binomial", data = cities)


stargazer(m1a, m2a, m3a, m4a, m1b, m2b, m3b, m4b, m1c, m2c, m3c, m4c, type="text", title="Regression Results",   
          align=TRUE, dep.var.labels=c("Percent Black Homeownership 2000", "Proximity", "Percent Black Homeownership 2010","Proximity", "Percent Black Homeownership 2020", "Proximity"),
          covariate.labels=c("Proximity","City Population 2000","Dissimilarity Index 2000", "City Population 2010", "Dissimilarity Index 2010", "City Population 2020","Dissimilarity Index 2020"), out="main.htm") 
## 
## Regression Results
## ==============================================================================================================================================================================================================================================================================
##                                                                                                                                           Dependent variable:                                                                                                                 
##                          -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##                                             Percent Black Homeownership 2000                            Proximity                           Percent Black Homeownership 2010                            Proximity                           Percent Black Homeownership 2020                            Proximity       
##                                                            OLS                                          logistic                                           OLS                                          logistic                                           OLS                                          logistic        
##                                    (1)                     (2)                     (3)              (4)              (5)                     (6)                     (7)              (8)              (9)                    (10)                    (11)             (12)   
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Proximity                       7.833***                7.719***                7.772***                          8.174***                8.050***                8.006***                          7.311***                7.279***                6.781***                  
##                                  (1.260)                 (1.272)                 (1.279)                           (1.355)                 (1.363)                 (1.367)                           (1.105)                 (1.325)                 (1.325)                  
##                                                                                                                                                                                                                                                                               
## City Population 2000                                      0.414                   0.540                                                                                                                                                                                       
##                                                          (0.607)                 (0.661)                                                                                                                                                                                      
##                                                                                                                                                                                                                                                                               
## Dissimilarity Index 2000                                                         -0.028           0.022**                                                                                                                                                                     
##                                                                                  (0.056)          (0.011)                                                                                                                                                                     
##                                                                                                                                                                                                                                                                               
## City Population 2010                                                                                                                        0.591                   0.461                                                                                                     
##                                                                                                                                            (0.667)                 (0.709)                                                                                                    
##                                                                                                                                                                                                                                                                               
## Dissimilarity Index 2010                                                                                                                                            0.028            0.013                                                                                    
##                                                                                                                                                                    (0.051)          (0.009)                                                                                   
##                                                                                                                                                                                                                                                                               
## City Population 2020                                                                                                                                                                                                          0.519                   0.073                   
##                                                                                                                                                                                                                              (0.660)                 (0.677)                  
##                                                                                                                                                                                                                                                                               
## Dissimilarity Index 2020                                                                                                                                                                                                                             0.104**          0.021** 
##                                                                                                                                                                                                                                                      (0.042)          (0.009) 
##                                                                                                                                                                                                                                                                               
## Constant                        5.510***                  1.249                   0.602          -1.031***        5.328***                 -0.731                  -0.122          -0.734**         4.350***                 -0.380                   1.225          -1.067***
##                                  (0.784)                 (6.306)                 (6.452)          (0.305)          (0.864)                 (6.899)                 (6.998)          (0.289)          (0.688)                 (6.793)                 (6.741)          (0.322) 
##                                                                                                                                                                                                                                                                               
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                       261                     261                     261              261              241                     241                     241              241              258                     201                     201              205   
## R2                                0.130                   0.131                   0.132                             0.132                   0.135                   0.136                             0.146                   0.143                   0.168                   
## Adjusted R2                       0.127                   0.125                   0.122                             0.129                   0.128                   0.125                             0.143                   0.134                   0.155                   
## Log Likelihood                                                                                   -171.967                                                                          -161.849                                                                          -134.942 
## Akaike Inf. Crit.                                                                                 347.934                                                                           327.698                                                                           273.883 
## Residual Std. Error         9.912 (df = 259)        9.922 (df = 258)        9.937 (df = 257)                  10.332 (df = 239)       10.337 (df = 238)       10.352 (df = 237)                 8.647 (df = 256)        9.064 (df = 198)        8.951 (df = 197)              
## F Statistic              38.667*** (df = 1; 259) 19.525*** (df = 2; 258) 13.058*** (df = 3; 257)           36.394*** (df = 1; 239) 18.572*** (df = 2; 238) 12.445*** (df = 3; 237)           43.785*** (df = 1; 256) 16.476*** (df = 2; 198) 13.261*** (df = 3; 197)          
## ==============================================================================================================================================================================================================================================================================
## Note:                                                                                                                                                                                                                                              *p<0.1; **p<0.05; ***p<0.01

Separated cities by total population into big and small

smallcit <- read.csv('C:/Users/canda/DEM Dissertation Data/cities_small DEC2020.csv')
bigcit <- read.csv('C:/Users/canda/DEM Dissertation Data/cities_big DEC2020.csv')

summary(smallcit$Proximity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.3462  1.0000  1.0000
summary(bigcit$Proximity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.4198  1.0000  1.0000
smallcit$Proximity <- as.numeric(smallcit$Proximity)
bigcit$Proximity <- as.numeric(bigcit$Proximity)
class(smallcit$Proximity)
## [1] "numeric"
class(bigcit$Proximity)
## [1] "numeric"
hist(smallcit$t_20)

hist(log(smallcit$t_20+1))

hist(log(smallcit$t_20+1), breaks=50)

hist(bigcit$t_20)

hist(log(bigcit$t_20+1))

hist(log(bigcit$t_20+1), breaks=50)

#In separate regression analyses, cities were separated into big and small, using the median of total population. Small cities which have a total population of 24,486 or less (n=130) continued to show that proximity to freedom colonies was a significant factor in the increase of homeownership. Residential segregation produced a decrease in homeownership of -0.038 and again was not statistically significant. A regression using big cities (n=131) showed similar results, however the increase in homeownership due to proximity was smaller

fit8s<-lm(pbooDEC20~Proximity, data = smallcit)
summary(fit8s)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = smallcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.329  -3.877  -1.692   2.833  41.641 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.3020     0.8631   4.984 1.97e-06 ***
## Proximity     8.1571     1.4670   5.561 1.50e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.957 on 128 degrees of freedom
## Multiple R-squared:  0.1946, Adjusted R-squared:  0.1883 
## F-statistic: 30.92 on 1 and 128 DF,  p-value: 1.5e-07
fit8b<-lm(pbooDEC20~Proximity, data = bigcit)
summary(fit8b)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = bigcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.626  -6.392  -2.932   2.976  52.884 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.842      1.314   5.208 7.33e-07 ***
## Proximity      7.394      2.027   3.647 0.000384 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.45 on 129 degrees of freedom
## Multiple R-squared:  0.09347,    Adjusted R-squared:  0.08644 
## F-statistic:  13.3 on 1 and 129 DF,  p-value: 0.0003837
fit11s<-lm(pbooDEC20~Proximity+dbw_20, data = smallcit)
summary(fit11s)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity + dbw_20, data = smallcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.555  -3.802  -1.618   2.964  41.454 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.15627    1.59207   3.239  0.00153 ** 
## Proximity    8.14220    1.47054   5.537 1.69e-07 ***
## dbw_20      -0.03789    0.05928  -0.639  0.52387    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.976 on 127 degrees of freedom
## Multiple R-squared:  0.1971, Adjusted R-squared:  0.1845 
## F-statistic: 15.59 on 2 and 127 DF,  p-value: 8.798e-07
fit11b<-lm(pbooDEC20~Proximity+dbw_20, data = bigcit)
summary(fit11b)
## 
## Call:
## lm(formula = pbooDEC20 ~ Proximity + dbw_20, data = bigcit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.569  -6.261  -2.917   3.093  52.637 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.46917    2.62970   2.840 0.005245 ** 
## Proximity    7.54063    2.10322   3.585 0.000477 ***
## dbw_20      -0.02476    0.08988  -0.276 0.783348    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.49 on 128 degrees of freedom
## Multiple R-squared:  0.094,  Adjusted R-squared:  0.07985 
## F-statistic: 6.641 on 2 and 128 DF,  p-value: 0.001803

#When the cities were grouped by population size using the median, there was a difference in mean of percent Black homeownership by 2.82 points

cities%>%
  mutate(smalltown = ifelse(test = t_20 < 24486,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  summarise(n = n(),
            across(.cols = c(pboo_20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(...)`.
## ℹ In group 1: `smalltown = "big"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
## # A tibble: 3 × 6
##   smalltown     n pboo_20 pb_20   b_20 dbw_20
##   <chr>     <int>   <dbl> <dbl>  <dbl>  <dbl>
## 1 big         131    9.95  14.2 20064.   27.8
## 2 small       130    7.13  10.3  1682.   22.4
## 3 <NA>         11  NaN    NaN     NaN   NaN

#using median for 2010

cities%>%
  mutate(smalltown = ifelse(test = t_10 < 23497,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  summarise(n = n(),
            across(.cols = c(pboo_10, pb_10, b_10, dbw_10),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 3 × 6
##   smalltown     n pboo_10  pb_10   b_10 dbw_10
##   <chr>     <int>   <dbl>  <dbl>  <dbl>  <dbl>
## 1 big         121   10.1   13.4  17999.   29.7
## 2 small       120    7.18   9.46  1424.   24.6
## 3 <NA>         31  NaN    NaN      NaN   NaN

#using median for 2000

cities%>%
  mutate(smalltown = ifelse(test = t_00 < 23935,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  summarise(n = n(),
            across(.cols = c(pboo_00, pb_00, b_00, dbw_00),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 3 × 6
##   smalltown     n pboo_00  pb_00   b_00 dbw_00
##   <chr>     <int>   <dbl>  <dbl>  <dbl>  <dbl>
## 1 big         103    9.45  12.3  17518.   32.7
## 2 small       102    6.15   8.30  1245.   28.7
## 3 <NA>         67    4.86 NaN      NaN   NaN
cities%>%
  mutate(smalltown = ifelse(test = t_20 < 24486,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "1")%>%
  summarise(n = n(),
            across(.cols = c(pboo_20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_20 <dbl>, pb_20 <dbl>,
## #   b_20 <dbl>, dbw_20 <dbl>
cities%>%
  mutate(smalltown = ifelse(test = t_20 < 24486,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "0")%>%
  summarise(n = n(),
            across(.cols = c(pboo_20, pb_20, b_20, dbw_20),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_20 <dbl>, pb_20 <dbl>,
## #   b_20 <dbl>, dbw_20 <dbl>
cities%>%
  mutate(smalltown = ifelse(test = t_10 < 23497,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "1")%>%
  summarise(n = n(),
            across(.cols = c(pboo_10, pb_10, b_10, dbw_10),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_10 <dbl>, pb_10 <dbl>,
## #   b_10 <dbl>, dbw_10 <dbl>
 cities%>%
  mutate(smalltown = ifelse(test = t_10 < 23497,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "0")%>%
  summarise(n = n(),
            across(.cols = c(pboo_10, pb_10, b_10, dbw_10),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_10 <dbl>, pb_10 <dbl>,
## #   b_10 <dbl>, dbw_10 <dbl>
cities%>%
  mutate(smalltown = ifelse(test = t_00 < 23935,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "1")%>%
  summarise(n = n(),
            across(.cols = c(pboo_00, pb_00, b_00, dbw_00),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_00 <dbl>, pb_00 <dbl>,
## #   b_00 <dbl>, dbw_00 <dbl>
cities%>%
  mutate(smalltown = ifelse(test = t_00 < 23935,
                           yes =  "small",
                           no =  "big") )%>%
  group_by(smalltown) %>%
  filter(Proximity == "0")%>%
  summarise(n = n(),
            across(.cols = c(pboo_00, pb_00, b_00, dbw_00),
                   .fns = mean,
                   na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_00 <dbl>, pb_00 <dbl>,
## #   b_00 <dbl>, dbw_00 <dbl>

Proximity to freedom colonies as the mediator

Causal mediation analysis

#linear regression model path c

library(lavaan)

fit.totaleffect20=lm(pboo_20~dbw_20,cities)
summary(fit.totaleffect20)
## 
## Call:
## lm(formula = pboo_20 ~ dbw_20, data = cities)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.787 -6.303 -3.709  3.457 58.702 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.73402    1.52652   5.066 7.71e-07 ***
## dbw_20       0.03215    0.05485   0.586    0.558    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.62 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.001324,   Adjusted R-squared:  -0.002531 
## F-statistic: 0.3435 on 1 and 259 DF,  p-value: 0.5583
fit.totaleffect10=lm(pboo_10~dbw_10,cities)
summary(fit.totaleffect10)
## 
## Call:
## lm(formula = pboo_10 ~ dbw_10, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.133  -6.681  -4.104   3.138  57.176 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.91381    1.54839   4.465 1.23e-05 ***
## dbw_10       0.06408    0.05069   1.264    0.207    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.05 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.006644,   Adjusted R-squared:  0.002487 
## F-statistic: 1.598 on 1 and 239 DF,  p-value: 0.2074
fit.totaleffect00=lm(pboo_00~dbw_00,cities)
summary(fit.totaleffect00)
## 
## Call:
## lm(formula = pboo_00 ~ dbw_00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.250  -5.673  -2.842   2.320  53.447 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3670     1.4704   2.290 0.023074 *  
## dbw_00        0.1453     0.0425   3.419 0.000763 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.49 on 199 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.05548,    Adjusted R-squared:  0.05073 
## F-statistic: 11.69 on 1 and 199 DF,  p-value: 0.0007626

#homeownership and percent black significant because they are highly correlated

fit.totaleffect2_20=lm(pb_20~pboo_20,cities)
summary(fit.totaleffect2_20)
## 
## Call:
## lm(formula = pb_20 ~ pboo_20, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0786  -2.3097  -0.7025   1.6565  16.8774 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.07937    0.26834   11.48   <2e-16 ***
## pboo_20      1.07144    0.01973   54.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.374 on 259 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.9193, Adjusted R-squared:  0.919 
## F-statistic:  2950 on 1 and 259 DF,  p-value: < 2.2e-16
fit.totaleffect2_10=lm(pb_10~pboo_10,cities)
summary(fit.totaleffect2_10)
## 
## Call:
## lm(formula = pb_10 ~ pboo_10, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0698  -1.9509  -0.6102   1.4097  15.1085 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.27341    0.25014   9.089   <2e-16 ***
## pboo_10      1.05697    0.01783  59.286   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.057 on 239 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.9363, Adjusted R-squared:  0.9361 
## F-statistic:  3515 on 1 and 239 DF,  p-value: < 2.2e-16
fit.totaleffect2_00=lm(pb_00~pboo_00,cities)
summary(fit.totaleffect2_00)
## 
## Call:
## lm(formula = pb_00 ~ pboo_00, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.2517  -1.6284  -0.6059   1.2296  19.8110 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.81477    0.28319   6.408 1.04e-09 ***
## pboo_00      1.09754    0.02268  48.393  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.124 on 199 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.9217, Adjusted R-squared:  0.9213 
## F-statistic:  2342 on 1 and 199 DF,  p-value: < 2.2e-16

#Proximity as the mediator

cities$Proximity <- as.ordered(cities$Proximity)
class(cities$Proximity)
## [1] "ordered" "factor"
summary(cities$Proximity)
##  Not in Prox In Proximity 
##          164          108
specmod20 <- "
#Path c
pboo_20 ~ c*dbw_20

#Path a
Proximity ~ a*dbw_20

#Path b
pboo_20 ~ b*Proximity

#Indirect effect (a*b)
ab :=a*b
"
specmod10 <- "
#Path c
pboo_10 ~ c*dbw_10

#Path a
Proximity ~ a*dbw_10

#Path b
pboo_10 ~ b*Proximity

#Indirect effect (a*b)
ab :=a*b
"
specmod00 <- "
#Path c
pboo_00 ~ c*dbw_00

#Path a
Proximity ~ a*dbw_00

#Path b
pboo_00 ~ b*Proximity

#Indirect effect (a*b)
ab :=a*b
"
fitmodel20<- sem(specmod20, data=cities)
fitmodel10<- sem(specmod10, data=cities)
fitmodel00<- sem(specmod00, data=cities)
summary(fitmodel20,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 16 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                         6
## 
##                                                   Used       Total
##   Number of observations                           261         272
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                 0.000       0.000
##   Degrees of freedom                                 0           0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                46.356      46.356
##   Degrees of freedom                                 1           1
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000       1.000
##   Tucker-Lewis Index (TLI)                       1.000       1.000
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000       0.000
##   90 Percent confidence interval - lower         0.000       0.000
##   90 Percent confidence interval - upper         0.000       0.000
##   P-value H_0: RMSEA <= 0.050                       NA          NA
##   P-value H_0: RMSEA >= 0.080                       NA          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000       0.000
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pboo_20 ~                                           
##     dbw_20     (c)   -0.040    0.053   -0.770    0.441
##   Proximity ~                                         
##     dbw_20     (a)    0.014    0.007    2.112    0.035
##   pboo_20 ~                                           
##     Proximity  (b)    5.275    0.775    6.809    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pboo_20           7.734    1.894    4.084    0.000
##    .Proximity         0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity|t1      0.636    0.182    3.492    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pboo_20          84.081    7.929   10.605    0.000
##    .Proximity         1.000                           
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity         1.000                           
## 
## R-Square:
##                    Estimate
##     pboo_20           0.250
##     Proximity         0.027
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.073    0.036    2.043    0.041
summary(fitmodel10,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 19 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                         6
## 
##                                                   Used       Total
##   Number of observations                           241         272
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                 0.000       0.000
##   Degrees of freedom                                 0           0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                47.298      47.298
##   Degrees of freedom                                 1           1
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000       1.000
##   Tucker-Lewis Index (TLI)                       1.000       1.000
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000       0.000
##   90 Percent confidence interval - lower         0.000       0.000
##   90 Percent confidence interval - upper         0.000       0.000
##   P-value H_0: RMSEA <= 0.050                       NA          NA
##   P-value H_0: RMSEA >= 0.080                       NA          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000       0.000
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pboo_10 ~                                           
##     dbw_10     (c)    0.019    0.047    0.404    0.686
##   Proximity ~                                         
##     dbw_10     (a)    0.008    0.006    1.404    0.160
##   pboo_10 ~                                           
##     Proximity  (b)    5.568    0.810    6.877    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pboo_10           6.914    2.065    3.348    0.001
##    .Proximity         0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity|t1      0.457    0.178    2.573    0.010
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pboo_10          90.177    8.831   10.212    0.000
##    .Proximity         1.000                           
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity         1.000                           
## 
## R-Square:
##                    Estimate
##     pboo_10           0.261
##     Proximity         0.013
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.045    0.032    1.386    0.166
summary(fitmodel00,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 19 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                         6
## 
##                                                   Used       Total
##   Number of observations                           201         272
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                 0.000       0.000
##   Degrees of freedom                                 0           0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                45.724      45.724
##   Degrees of freedom                                 1           1
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000       1.000
##   Tucker-Lewis Index (TLI)                       1.000       1.000
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000       0.000
##   90 Percent confidence interval - lower         0.000       0.000
##   90 Percent confidence interval - upper         0.000       0.000
##   P-value H_0: RMSEA <= 0.050                       NA          NA
##   P-value H_0: RMSEA >= 0.080                       NA          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000       0.000
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pboo_00 ~                                           
##     dbw_00     (c)    0.080    0.042    1.920    0.055
##   Proximity ~                                         
##     dbw_00     (a)    0.016    0.006    2.634    0.008
##   pboo_00 ~                                           
##     Proximity  (b)    4.128    0.610    6.762    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pboo_00           3.367    1.852    1.818    0.069
##    .Proximity         0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity|t1      0.763    0.209    3.644    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pboo_00          72.116    6.280   11.484    0.000
##    .Proximity         1.000                           
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     Proximity         1.000                           
## 
## R-Square:
##                    Estimate
##     pboo_00           0.236
##     Proximity         0.058
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.065    0.026    2.457    0.014