Black Homeownership in Historical Context

r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)

install.packages("tidyverse")
## Installing package into 'C:/Users/canda/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\canda\AppData\Local\Temp\RtmpqubneP\downloaded_packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(stats)
library(car)
## Loading required package: carData
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.3.2
## This is lavaan 0.6-17
## lavaan is FREE software! Please report any bugs.
cities <- read.csv('C:/Users/canda/DEM Dissertation Data/citiesALLYRS.csv', na.strings=c("NA"))
cities <- cities %>% mutate_if(is.character, as.numeric)
## Warning: There were 25 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cityname = .Primitive("as.double")(cityname)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 24 remaining warnings.
cities$Proximity<-as.factor(ifelse(cities$Proximity==1, "In Proximity", "Not in Prox"))
stargazer(cities, header=FALSE, type='text', title="Descriptive Statistics",digits=2, out="Descript.htm")
## 
## Descriptive Statistics
## ===================================================
## Statistic  N    Mean     St. Dev.   Min      Max   
## ---------------------------------------------------
## Miles     114   6.50       5.53     0.00    21.00  
## gini_20   262   0.42       0.05     0.29    0.54   
## gini_10   264   0.42       0.06     0.26    0.57   
## pboo_20   262   8.42      10.61     0.00    67.12  
## pboo_10   240   8.58      11.03     0.05    65.63  
## pboo_00   256   7.07       9.08     0.00    61.00  
## pboo_90   257   6.45       8.59     0.00    60.64  
## pboo_80   241   5.32       7.07     0.00    33.94  
## chg_t20   235 10,355.00 24,403.75  -3,487  205,129 
## chg_pb20  235   1.08       2.36    -8.59    15.95  
## t_20      261 75,666.40 207,759.00 10,070 2,304,580
## pb_20     261   12.15     11.86     0.03    71.27  
## dbw_20    261   25.38     12.07     0.00    69.14  
## chg_t10   204 12,128.63 26,631.64  -9,504  206,512 
## chg_pb10  204   1.36       3.65    -8.32    23.16  
## t_10      240 70,884.63 194,929.20 10,127 2,099,451
## pb_10     240   11.39     12.10     0.03    69.35  
## dbw_10    240   27.19     14.07     0.00    82.22  
## chg_t00   174 13,749.87 36,706.30  -2,072  323,078 
## chg_pb00  174   1.23       3.85    -5.08    25.02  
## t_00      204 68,784.74 190,685.10 10,302 1,953,631
## pb_00     204   10.19     10.99     0.03    57.26  
## dbw_00    204   30.84     15.98     0.00    75.64  
## chg_t90   153 9,878.79  22,303.25  -4,247  150,053 
## chg_pb90  153   1.49       4.02    -5.08    24.12  
## t_90      180 62,714.99 167,886.90 10,023 1,630,553
## pb_90     180   9.46      10.45     0.00    60.56  
## dbw_90    180   34.40     18.49     0.00    80.76  
## t_80      154 60,676.74 165,936.10 10,197 1,595,138
## pb_80     154   8.62       9.85     0.01    40.12  
## dbw_80    154   43.36     22.13     0.00    93.17  
## ---------------------------------------------------
stargazer(cities[c("pboo_20","t_20","pb_20","chg_t20","chg_pb20")], header=FALSE, type='text', 
          title="Descriptive Statistics 2020", digits=2,
          covariate.labels=c("Black Homeownership","City Population","Percent Black Population","Change in City Population","Change in Percent Black Population"),
          out="Descriptive Homeowner 2020.htm"
          )
## 
## Descriptive Statistics 2020
## ============================================================================
## Statistic                           N    Mean     St. Dev.   Min      Max   
## ----------------------------------------------------------------------------
## Black Homeownership                262   8.42      10.61     0.00    67.12  
## City Population                    261 75,666.40 207,759.00 10,070 2,304,580
## Percent Black Population           261   12.15     11.86     0.03    71.27  
## Change in City Population          235 10,355.00 24,403.75  -3,487  205,129 
## Change in Percent Black Population 235   1.08       2.36    -8.59    15.95  
## ----------------------------------------------------------------------------
table(cities$Proximity)
## 
## In Proximity  Not in Prox 
##          107          162
install.packages("ggcorrplot")
## Installing package into 'C:/Users/canda/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'ggcorrplot' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\canda\AppData\Local\Temp\RtmpqubneP\downloaded_packages
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.2
library(ggplot2)

#Change proximity to numeric to run correlation matrix

cities <- cities %>% mutate_if(is.character, as.numeric)
cities <- cities %>% mutate_if(is.integer, as.numeric)
cities <- cities %>% mutate_if(is.factor, as.numeric)
cities$t_20 <- log(cities$t_20)

cor_matrix <- cor(cities, use = "pairwise.complete.obs", method = "pearson") %>% 
  ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2)

print(cor_matrix)

df <- data.frame(cities)

sel20_vars <- c("pboo_20", "t_20","pb_20", "chg_t20","chg_pb20")
df_select20 <- df[sel20_vars]

cor_matrix20 <- cor(df_select20, use = "pairwise.complete.obs", method = "pearson") %>% 
  ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=4)

print(cor_matrix20)

#Run regression with Logged Total population on Percent Black homeownership. No signifigance for any year.

tpop20<- lm(cities$pboo_20 ~ log(cities$t_20))
summary(tpop20)
## 
## Call:
## lm(formula = cities$pboo_20 ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.270  -6.448  -4.056   3.111  58.063 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -17.554     16.467  -1.066    0.287
## log(cities$t_20)   11.125      7.038   1.581    0.115
## 
## Residual standard error: 10.58 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.009556,   Adjusted R-squared:  0.005732 
## F-statistic: 2.499 on 1 and 259 DF,  p-value: 0.1152
coef(tpop20)["log(cities$t_20)"]/100
## log(cities$t_20) 
##         0.111247
tpop10<- lm(cities$pboo_10 ~ log(cities$t_10))
summary(tpop10)
## 
## Call:
## lm(formula = cities$pboo_10 ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.303  -6.725  -4.234   3.273  56.897 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -1.7654     7.3455  -0.240    0.810
## log(cities$t_10)   0.9997     0.7066   1.415    0.158
## 
## Residual standard error: 11.01 on 238 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.00834,    Adjusted R-squared:  0.004173 
## F-statistic: 2.002 on 1 and 238 DF,  p-value: 0.1584
coef(tpop10)["log(cities$t_10)"]/100
## log(cities$t_10) 
##       0.00999737
tpop00<- lm(cities$pboo_00 ~ log(cities$t_00))
summary(tpop00)
## 
## Call:
## lm(formula = cities$pboo_00 ~ log(cities$t_00))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.365 -6.078 -3.391  2.804 54.268 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -3.4256     7.0160  -0.488    0.626
## log(cities$t_00)   1.0728     0.6756   1.588    0.114
## 
## Residual standard error: 9.392 on 198 degrees of freedom
##   (69 observations deleted due to missingness)
## Multiple R-squared:  0.01257,    Adjusted R-squared:  0.007586 
## F-statistic: 2.521 on 1 and 198 DF,  p-value: 0.1139
coef(tpop00)["log(cities$t_00)"]/100
## log(cities$t_00) 
##       0.01072754
tpop90<- lm(cities$pboo_90 ~ log(cities$t_90))
summary(tpop90)
## 
## Call:
## lm(formula = cities$pboo_90 ~ log(cities$t_90))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.420 -6.262 -3.160  4.192 54.227 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -5.5214     7.3197  -0.754   0.4517  
## log(cities$t_90)   1.2766     0.7115   1.794   0.0745 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.218 on 177 degrees of freedom
##   (90 observations deleted due to missingness)
## Multiple R-squared:  0.01787,    Adjusted R-squared:  0.01232 
## F-statistic:  3.22 on 1 and 177 DF,  p-value: 0.07446
coef(tpop90)["log(cities$t_90)"]/100
## log(cities$t_90) 
##       0.01276642
tpop80<- lm(cities$pboo_80 ~ log(cities$t_80))
summary(tpop80)
## 
## Call:
## lm(formula = cities$pboo_80 ~ log(cities$t_80))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.515 -5.458 -2.950  4.121 28.452 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -7.3996     6.5234  -1.134   0.2584  
## log(cities$t_80)   1.3760     0.6371   2.160   0.0324 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.697 on 152 degrees of freedom
##   (115 observations deleted due to missingness)
## Multiple R-squared:  0.02978,    Adjusted R-squared:  0.02339 
## F-statistic: 4.665 on 1 and 152 DF,  p-value: 0.03236
coef(tpop80)["log(cities$t_80)"]/100
## log(cities$t_80) 
##       0.01376022

#Run regression with Logged Total population on Percent Black population. Slight signifigance in 2020 and 1980.

tpop20B<- lm(cities$pb_20 ~ log(cities$t_20))
summary(tpop20B)
## 
## Call:
## lm(formula = cities$pb_20 ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.354  -8.050  -3.541   5.225  58.074 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -33.034     18.265  -1.809   0.0717 .
## log(cities$t_20)   19.325      7.806   2.476   0.0139 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.74 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.02312,    Adjusted R-squared:  0.01935 
## F-statistic: 6.129 on 1 and 259 DF,  p-value: 0.01394
coef(tpop20B)["log(cities$t_20)"]/100
## log(cities$t_20) 
##        0.1932485
tpop10B<- lm(cities$pb_10 ~ log(cities$t_10))
summary(tpop10B)
## 
## Call:
## lm(formula = cities$pb_10 ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.032  -7.937  -4.269   4.124  57.746 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -3.2881     8.0375  -0.409   0.6828  
## log(cities$t_10)   1.4183     0.7732   1.834   0.0679 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.05 on 238 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.01394,    Adjusted R-squared:  0.009798 
## F-statistic: 3.365 on 1 and 238 DF,  p-value: 0.06785
coef(tpop10B)["log(cities$t_10)"]/100
## log(cities$t_10) 
##       0.01418331
tpop00B<- lm(cities$pb_00 ~ log(cities$t_00))
summary(tpop00B)
## 
## Call:
## lm(formula = cities$pb_00 ~ log(cities$t_00))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.563  -7.499  -4.037   4.522  48.296 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -4.7408     8.0813  -0.587   0.5581  
## log(cities$t_00)   1.4469     0.7795   1.856   0.0649 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.93 on 202 degrees of freedom
##   (65 observations deleted due to missingness)
## Multiple R-squared:  0.01677,    Adjusted R-squared:  0.0119 
## F-statistic: 3.445 on 1 and 202 DF,  p-value: 0.06489
coef(tpop00B)["log(cities$t_00)"]/100
## log(cities$t_00) 
##       0.01446904
tpop90B<- lm(cities$pb_90 ~ log(cities$t_90))
summary(tpop90B)
## 
## Call:
## lm(formula = cities$pb_90 ~ log(cities$t_90))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.606  -7.087  -4.198   3.658  52.431 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -5.7914     8.2396  -0.703   0.4831  
## log(cities$t_90)   1.4886     0.8007   1.859   0.0647 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.38 on 178 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.01905,    Adjusted R-squared:  0.01354 
## F-statistic: 3.457 on 1 and 178 DF,  p-value: 0.06465
coef(tpop90B)["log(cities$t_90)"]/100
## log(cities$t_90) 
##       0.01488605
tpop80B<- lm(cities$pb_80 ~ log(cities$t_80))
summary(tpop80B)
## 
## Call:
## lm(formula = cities$pb_80 ~ log(cities$t_80))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.915  -6.934  -4.312   5.507  29.895 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -11.0056     8.2193  -1.339   0.1826  
## log(cities$t_80)   1.9256     0.8027   2.399   0.0177 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.698 on 152 degrees of freedom
##   (115 observations deleted due to missingness)
## Multiple R-squared:  0.03648,    Adjusted R-squared:  0.03014 
## F-statistic: 5.754 on 1 and 152 DF,  p-value: 0.01766
coef(tpop80B)["log(cities$t_80)"]/100
## log(cities$t_80) 
##       0.01925604

#Run regression with Logged Total population on Proximity. Some signifigance for all years except 2010.

#change proximity back to factor variable

cities$Proximity<-as.factor(ifelse(cities$Proximity==1, "In Proximity", "Not in Prox"))
str(cities)
## 'data.frame':    269 obs. of  34 variables:
##  $ cityname : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Proximity: Factor w/ 2 levels "In Proximity",..: 2 1 2 2 2 2 1 2 2 2 ...
##  $ Miles    : num  NA 3 NA NA NA NA 13.7 NA NA NA ...
##  $ FC.name  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gini_20  : num  0.45 0.395 0.461 0.423 0.471 0.39 0.456 0.417 0.469 0.44 ...
##  $ gini_10  : num  0.45 0.475 0.42 0.411 0.456 0.358 0.424 0.436 0.457 0.523 ...
##  $ pboo_20  : num  4.74 4.08 0.26 3.13 0.65 6.5 0.13 2.73 3.56 1.08 ...
##  $ pboo_10  : num  4.73 2.53 0.19 3.31 0.57 7.01 0.19 1.49 3.69 1.53 ...
##  $ pboo_00  : num  4.68 1.73 0 4.42 0.48 3.58 0 1.15 3.76 2.69 ...
##  $ pboo_90  : num  5.34 6.51 0 2.63 0.72 2.68 0 0.9 4.06 2.46 ...
##  $ pboo_80  : num  4.36 4.23 0 0.21 0.97 2.14 0 1.03 3.77 2.13 ...
##  $ chg_t20  : num  8119 3605 1140 130 -1213 ...
##  $ chg_pb20 : num  1.34343 5.30048 0.00392 -0.40411 0.14071 ...
##  $ t_20     : num  11.74 9.72 9.88 9.68 9.79 ...
##  $ pb_20    : num  11.36 17.364 0.2 2.406 0.732 ...
##  $ dbw_20   : num  28.7 25 35.3 32.8 38.2 ...
##  $ chg_t10  : num  1133 -1110 3593 1890 94 ...
##  $ chg_pb10 : num  0.925 2.265 0.081 -2.991 -0.15 ...
##  $ t_10     : num  117063 13056 18353 15869 19104 ...
##  $ pb_10    : num  10.017 12.063 0.196 2.811 0.591 ...
##  $ dbw_10   : num  34.8 21.4 55.3 36 27.6 ...
##  $ chg_t00  : num  9276 NA NA 2846 -778 ...
##  $ chg_pb00 : num  2.2753 NA NA 2.1368 -0.0416 ...
##  $ t_00     : num  115930 14166 14760 13979 19010 ...
##  $ pb_00    : num  9.092 9.798 0.115 5.802 0.742 ...
##  $ dbw_00   : num  37.7 38.8 23.4 28.7 27.4 ...
##  $ chg_t90  : num  8339 NA NA -1490 -1173 ...
##  $ chg_pb90 : num  0.2325 NA NA 1.5337 -0.0707 ...
##  $ t_90     : num  106654 NA NA 11133 19788 ...
##  $ pb_90    : num  6.816 NA NA 3.665 0.783 ...
##  $ dbw_90   : num  38.1 NA NA 24.7 41.2 ...
##  $ t_80     : num  98315 NA NA 12623 20961 ...
##  $ pb_80    : num  6.584 NA NA 2.131 0.854 ...
##  $ dbw_80   : num  48.2 NA NA 29.9 52.6 ...

Make “Not in Proximity” the reference category

cities$Proximity <- relevel(cities$Proximity, ref = "Not in Prox")
table(cities$Proximity)
## 
##  Not in Prox In Proximity 
##          162          107
tpop20Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_20))
summary(tpop20Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_20))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5487 -0.3861 -0.3336  0.5907  0.6832 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.07095    0.75527  -0.094   0.9252  
## log(cities$t_20)  0.62357    0.32278   1.932   0.0545 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4854 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.01421,    Adjusted R-squared:  0.0104 
## F-statistic: 3.732 on 1 and 259 DF,  p-value: 0.05447
coef(tpop20Bx)["log(cities$t_20)"]/100
## log(cities$t_20) 
##      0.006235658
tpop10Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_10))
summary(tpop10Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_10))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5452 -0.4012 -0.3668  0.5838  0.6422 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       0.94206    0.32796   2.872  0.00444 **
## log(cities$t_10)  0.04507    0.03155   1.428  0.15448   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4915 on 238 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.0085, Adjusted R-squared:  0.004334 
## F-statistic:  2.04 on 1 and 238 DF,  p-value: 0.1545
coef(tpop10Bx)["log(cities$t_10)"]/100
## log(cities$t_10) 
##     0.0004506704
tpop00Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_00))
summary(tpop00Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_00))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6068 -0.3868 -0.3355  0.5879  0.6796 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       0.65621    0.35988   1.823   0.0697 .
## log(cities$t_00)  0.07179    0.03471   2.068   0.0399 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4866 on 202 degrees of freedom
##   (65 observations deleted due to missingness)
## Multiple R-squared:  0.02073,    Adjusted R-squared:  0.01588 
## F-statistic: 4.276 on 1 and 202 DF,  p-value: 0.03992
coef(tpop00Bx)["log(cities$t_00)"]/100
## log(cities$t_00) 
##     0.0007178517
tpop90Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_90))
summary(tpop90Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_90))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6849 -0.3635 -0.3050  0.5875  0.7163 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       0.34578    0.38110   0.907  0.36547   
## log(cities$t_90)  0.10181    0.03703   2.749  0.00659 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4801 on 178 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.04073,    Adjusted R-squared:  0.03534 
## F-statistic: 7.559 on 1 and 178 DF,  p-value: 0.006589
coef(tpop90Bx)["log(cities$t_90)"]/100
## log(cities$t_90) 
##      0.001018135
tpop80Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_80))
summary(tpop80Bx)
## 
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_80))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6419 -0.3694 -0.3163  0.5917  0.6982 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       0.46039    0.40908   1.125   0.2622  
## log(cities$t_80)  0.09117    0.03995   2.282   0.0239 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4827 on 152 degrees of freedom
##   (115 observations deleted due to missingness)
## Multiple R-squared:  0.03312,    Adjusted R-squared:  0.02676 
## F-statistic: 5.207 on 1 and 152 DF,  p-value: 0.02389
coef(tpop80Bx)["log(cities$t_80)"]/100
## log(cities$t_80) 
##     0.0009116513

#Run regression with percent Black population on Proximity. Significance

#Run regression with percent Black population on logged city population. Significance

proxpb20<- lm(as.numeric(Proximity)~pb_20,data=cities)
logtpb20<- lm(log(t_20)~pb_20,data=cities)

summary(proxpb20)
## 
## Call:
## lm(formula = as.numeric(Proximity) ~ pb_20, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0530 -0.3211 -0.2202  0.4807  0.8001 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.195302   0.040063  29.836  < 2e-16 ***
## pb_20       0.015777   0.002362   6.678 1.47e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4516 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1469, Adjusted R-squared:  0.1436 
## F-statistic:  44.6 on 1 and 259 DF,  p-value: 1.466e-10
summary(logtpb20)
## 
## Call:
## lm(formula = log(t_20) ~ pb_20, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12198 -0.07040 -0.02304  0.04690  0.33338 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.3235144  0.0081942 283.556   <2e-16 ***
## pb_20       0.0011962  0.0004832   2.476   0.0139 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09237 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.02312,    Adjusted R-squared:  0.01935 
## F-statistic: 6.129 on 1 and 259 DF,  p-value: 0.01394

#Run regression with Proximity on percent black population. Highly Significant all years.

pbx20=lm(pb_20 ~ Proximity,cities)
summary(pbx20)
## 
## Call:
## lm(formula = pb_20 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.565  -6.880  -2.417   3.975  53.409 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             8.5452     0.8673   9.852  < 2e-16 ***
## ProximityIn Proximity   9.3114     1.3942   6.678 1.47e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.97 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1469, Adjusted R-squared:  0.1436 
## F-statistic:  44.6 on 1 and 259 DF,  p-value: 1.466e-10
pbx10=lm(pb_10 ~ Proximity,cities)
summary(pbx10)
## 
## Call:
## lm(formula = pb_10 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.785  -6.725  -2.739   3.358  52.413 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             7.5541     0.9408   8.030 4.49e-14 ***
## ProximityIn Proximity   9.3850     1.4722   6.375 9.42e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.21 on 238 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.1458, Adjusted R-squared:  0.1423 
## F-statistic: 40.64 on 1 and 238 DF,  p-value: 9.424e-10
pbx00=lm(pb_00 ~ Proximity,cities)
summary(pbx00)
## 
## Call:
## lm(formula = pb_00 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.176  -5.906  -2.490   2.612  41.624 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             6.6092     0.9095   7.267 7.87e-12 ***
## ProximityIn Proximity   9.0227     1.4434   6.251 2.38e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.09 on 202 degrees of freedom
##   (65 observations deleted due to missingness)
## Multiple R-squared:  0.1621, Adjusted R-squared:  0.1579 
## F-statistic: 39.08 on 1 and 202 DF,  p-value: 2.379e-09
pbx90=lm(pb_90 ~ Proximity,cities)
summary(pbx90)
## 
## Call:
## lm(formula = pb_90 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.264  -5.885  -2.536   4.423  45.938 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             6.1778     0.9182   6.728 2.27e-10 ***
## ProximityIn Proximity   8.4397     1.4724   5.732 4.17e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.63 on 178 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.1558, Adjusted R-squared:  0.1511 
## F-statistic: 32.86 on 1 and 178 DF,  p-value: 4.171e-08
pbx80=lm(pb_80 ~ Proximity,cities)
summary(pbx80)
## 
## Call:
## lm(formula = pb_80 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.815  -5.591  -2.976   4.212  34.316 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.7997     0.9508   6.100 8.41e-09 ***
## ProximityIn Proximity   7.2424     1.5232   4.755 4.57e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.218 on 152 degrees of freedom
##   (115 observations deleted due to missingness)
## Multiple R-squared:  0.1295, Adjusted R-squared:  0.1238 
## F-statistic: 22.61 on 1 and 152 DF,  p-value: 4.572e-06

##Logistic Regression

#The models indicate that there is a positive relationship between the percent of Black homeownership and whether the city is within proximity to a freedom colony. Cities with high percent of Black homeowners are more likely to be within proximity to a freedom colony for years 2000, 2010, and 2020.

#model 1 #A multiple regression was performed starting with proximity to freedom colonies (Table 4). This variable proved to yield statistically significant results. Proximity to freedom colonies provided an increase in Black homeownership by 7.935 percentage points.

model1_20<- lm(pboo_20~Proximity,data=cities)
summary(model1_20)
## 
## Call:
## lm(formula = pboo_20 ~ Proximity, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.791  -5.199  -2.581   2.164  54.199 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.6037     0.7887   7.105 1.16e-11 ***
## ProximityIn Proximity   7.3177     1.2703   5.760 2.36e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.01 on 260 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.1132, Adjusted R-squared:  0.1098 
## F-statistic: 33.18 on 1 and 260 DF,  p-value: 2.363e-08

#model 2 - add in total popualtion logged #When the logged variable of city population was introduced in Model 2, there was a slight increase in homeownership although population was not statistically significant. Proximity remained a significant factor.

model2_20 <- lm(pboo_20~Proximity+log(t_20),data=cities)
summary(model2_20)
## 
## Call:
## lm(formula = pboo_20 ~ Proximity + log(t_20), data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.924  -4.983  -2.568   2.154  53.931 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -9.916     15.652  -0.634    0.527    
## ProximityIn Proximity    7.131      1.283   5.560 6.74e-08 ***
## log(t_20)                6.678      6.711   0.995    0.321    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.02 on 258 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1155, Adjusted R-squared:  0.1087 
## F-statistic: 16.85 on 2 and 258 DF,  p-value: 1.327e-07

#As established, percent of Black population is highly correlated with Black homeownership. As expected, an initial regression with this variable yielded a significant result, but only a small increase in homeownership. Due to this high correlation, proximity lost its significance when percent of Black population was introduced to the model. Therefore, this variable was omitted from the regression.

citpb20=lm(pboo_20~pb_20,cities)
citpb10=lm(pboo_10~pb_10,cities)
citpb00=lm(pboo_00~pb_00,cities)
citpb90=lm(pboo_90~pb_90,cities)
citpb80=lm(pboo_80~pb_80,cities)
summary(citpb20)
## 
## Call:
## lm(formula = pboo_20 ~ pb_20, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.0194  -1.2180   0.5173   1.5513  12.6654 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.9730     0.2680  -7.362 2.41e-12 ***
## pb_20         0.8585     0.0158  54.322  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.021 on 259 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9193, Adjusted R-squared:  0.919 
## F-statistic:  2951 on 1 and 259 DF,  p-value: < 2.2e-16
summary(citpb10)
## 
## Call:
## lm(formula = pboo_10 ~ pb_10, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.6954  -1.2473   0.3532   1.4362  13.3214 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.4811     0.2406  -6.155 3.17e-09 ***
## pb_10         0.8834     0.0145  60.941  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.713 on 238 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.9398, Adjusted R-squared:  0.9395 
## F-statistic:  3714 on 1 and 238 DF,  p-value: < 2.2e-16
summary(citpb00)
## 
## Call:
## lm(formula = pboo_00 ~ pb_00, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.5147  -0.8487   0.2933   0.8873  14.6523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.83415    0.23633   -3.53 0.000518 ***
## pb_00        0.82406    0.01565   52.64  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.441 on 198 degrees of freedom
##   (69 observations deleted due to missingness)
## Multiple R-squared:  0.9333, Adjusted R-squared:  0.933 
## F-statistic:  2771 on 1 and 198 DF,  p-value: < 2.2e-16
summary(citpb90)
## 
## Call:
## lm(formula = pboo_90 ~ pb_90, data = cities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.9046  -0.7423   0.2832   0.7728   8.9028 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.66915    0.20261  -3.303  0.00116 ** 
## pb_90        0.86542    0.01436  60.287  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.004 on 177 degrees of freedom
##   (90 observations deleted due to missingness)
## Multiple R-squared:  0.9536, Adjusted R-squared:  0.9533 
## F-statistic:  3634 on 1 and 177 DF,  p-value: < 2.2e-16
summary(citpb80)
## 
## Call:
## lm(formula = pboo_80 ~ pb_80, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9313 -0.4940  0.0426  0.3423  5.6163 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.10014    0.13794  -0.726    0.469    
## pb_80        0.78014    0.01056  73.888   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.286 on 152 degrees of freedom
##   (115 observations deleted due to missingness)
## Multiple R-squared:  0.9729, Adjusted R-squared:  0.9727 
## F-statistic:  5459 on 1 and 152 DF,  p-value: < 2.2e-16