library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(dplyr)
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
cities <- read.csv('C:/Users/canda/DEM Dissertation Data/citiesALLYRS.csv', na.strings=c("NA"))
cities <- cities %>% mutate_if(is.character, as.numeric)
## Warning: There were 117 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cityname = .Primitive("as.double")(cityname)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 116 remaining warnings.
cities$Proximity<-as.factor(ifelse(cities$Proximity==1, "In Proximity", "Not in Prox"))
stargazer(cities, header=FALSE, type='text', title="Descriptive Statistics",digits=1)
##
## Descriptive Statistics
## =================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------
## statefp20 272 48.0 0.0 48 48
## placefp20 272 40,326.9 22,986.4 1,000 80,356
## Miles 114 6.5 5.5 0.0 21.0
## pboo_20 261 8.5 10.6 0.0 67.1
## pboo_10 241 8.7 11.1 0.05 65.6
## pboo_00 258 7.2 9.3 0.0 61.0
## w_20 261 26,559.9 59,120.8 140 545,989
## b_20 261 10,907.9 41,209.5 3 531,293
## h_20 261 31,562.0 102,121.1 749 1,013,423
## a_20 261 5,215.7 16,081.4 4 181,536
## o_20 261 1,436.2 3,254.3 29 32,339
## t_20 261 75,681.7 207,755.3 10,070 2,304,580
## pw_20 261 42.1 21.3 1.2 86.7
## pb_20 261 12.2 11.9 0.03 71.3
## ph_20 261 38.1 25.0 5.6 98.5
## pa_20 261 5.3 6.4 0.03 40.1
## po_20 261 2.2 1.0 0.2 6.9
## dwb_20 261 25.1 12.0 0.0 69.1
## dwh_20 261 20.9 12.2 0.0 63.4
## dwa_20 261 21.8 11.7 0.0 61.6
## dbw_20 261 25.1 12.0 0.0 69.1
## dbh_20 261 20.0 11.2 0.0 58.7
## dba_20 261 25.7 13.4 0.0 74.2
## dhw_20 261 20.9 12.2 0.0 63.4
## dhb_20 261 20.0 11.2 0.0 58.7
## dha_20 261 28.3 15.0 0.0 70.0
## daw_20 261 21.8 11.7 0.0 61.6
## dab_20 261 25.7 13.4 0.0 74.2
## dah_20 261 28.3 15.0 0.0 70.0
## w_10 241 28,100.9 58,830.2 116 537,901
## b_10 241 9,745.9 39,365.9 3 495,792
## h_10 241 28,865.3 96,765.6 540 919,668
## a_10 241 3,491.9 11,185.4 4 135,131
## o_10 241 509.4 1,166.5 2 10,959
## t_10 241 70,713.3 194,540.9 10,127 2,099,451
## pw_10 241 48.9 23.3 0.9 92.9
## pb_10 241 11.4 12.1 0.03 69.4
## ph_10 241 34.9 25.4 4.0 98.7
## pa_10 241 4.0 5.7 0.03 43.3
## po_10 241 0.8 0.4 0.02 3.2
## dwb_10 241 27.1 14.1 0.0 82.2
## dwh_10 241 22.5 13.6 0.0 61.1
## dwa_10 241 20.9 10.7 0.0 62.5
## dbw_10 241 27.1 14.1 0.0 82.2
## dbh_10 241 21.9 12.1 0.0 64.4
## dba_10 241 27.5 15.8 0.0 83.3
## dhw_10 241 22.5 13.6 0.0 61.1
## dhb_10 241 21.9 12.1 0.0 64.4
## dha_10 241 29.4 15.9 0.0 71.7
## daw_10 241 20.9 10.7 0.0 62.5
## dab_10 241 27.5 15.8 0.0 83.3
## dah_10 241 29.4 15.9 0.0 71.7
## w_00 205 31,761.8 66,126.9 169 601,851
## b_00 205 9,421.2 42,032.3 4 495,338
## h_00 205 24,319.5 83,258.7 399 730,865
## a_00 205 2,511.1 9,120.7 4 111,379
## o_00 205 600.0 1,513.7 4 14,198
## t_00 205 68,613.6 190,232.9 10,302 1,953,631
## pw_00 205 56.7 23.5 1.5 94.8
## pb_00 205 10.3 11.1 0.03 57.3
## ph_00 205 29.3 25.7 2.9 97.9
## pa_00 205 2.8 3.8 0.04 27.4
## po_00 205 0.9 0.4 0.04 3.1
## dwb_00 205 30.7 16.1 0.0 75.6
## dwh_00 205 25.0 14.6 0.0 65.3
## dwa_00 205 20.3 10.9 0.0 54.2
## dbw_00 205 30.7 16.1 0.0 75.6
## dbh_00 205 24.2 14.2 0.0 72.1
## dba_00 205 30.6 17.6 0.0 77.9
## dhw_00 205 25.0 14.6 0.0 65.3
## dhb_00 205 24.2 14.2 0.0 72.1
## dha_00 205 30.2 16.2 0.0 73.4
## daw_00 205 20.3 10.9 0.0 54.2
## dab_00 205 30.6 17.6 0.0 77.9
## dah_00 205 30.2 16.2 0.0 73.4
## w_90 182 34,347.7 73,188.7 841 662,642
## b_90 182 8,693.2 40,405.8 0 447,144
## h_90 182 17,633.1 60,672.7 252 520,282
## a_90 182 1,344.7 5,261.6 2 64,126
## t_90 182 62,297.8 167,007.8 10,023 1,630,553
## o_90 182 279.0 711.6 13 6,158
## pw_90 182 65.0 22.8 4.1 97.3
## pb_90 182 9.7 10.6 0.0 60.6
## ph_90 182 23.3 24.8 1.1 95.5
## pa_90 182 1.6 2.1 0.01 15.8
## po_90 182 0.4 0.2 0.1 1.8
## dwb_90 182 34.0 18.7 0.0 80.8
## dwh_90 182 25.2 14.2 0.0 61.8
## dwa_90 182 24.5 13.1 0.0 81.0
## dbw_90 182 34.0 18.7 0.0 80.8
## dbh_90 182 27.6 16.2 0.0 80.5
## dba_90 182 36.0 20.8 0.0 80.6
## dhw_90 182 25.2 14.2 0.0 61.8
## dhb_90 182 27.6 16.2 0.0 80.5
## dha_90 182 33.2 17.5 0.0 70.7
## daw_90 182 24.5 13.1 0.0 81.0
## dab_90 182 36.0 20.8 0.0 80.6
## dah_90 182 33.2 17.5 0.0 70.7
## t_80 157 59,889.3 164,434.4 10,197 1,595,138
## w_80 157 36,419.9 86,063.4 1,242 834,061
## b_80 157 8,577.7 41,362.4 3 436,392
## h_80 157 13,903.6 47,448.1 121 421,954
## a_80 157 572.3 2,619.8 1 31,431
## o_80 157 415.7 1,240.8 18 11,923
## pw_80 157 70.4 22.4 5.8 97.6
## pb_80 157 9.0 10.2 0.01 40.1
## ph_80 157 19.4 23.6 0.9 93.0
## pa_80 157 0.7 0.7 0.01 4.3
## po_80 157 0.6 0.4 0.1 3.3
## dwb_80 157 42.6 22.6 0.0 93.2
## dwh_80 157 25.9 15.9 0.0 67.1
## dwa_80 157 23.3 12.7 0.0 68.9
## dbw_80 157 42.6 22.6 0.0 93.2
## dbh_80 157 34.2 20.3 0.0 87.1
## dba_80 157 43.1 25.2 0.0 89.1
## dhw_80 157 25.9 15.9 0.0 67.1
## dhb_80 157 34.2 20.3 0.0 87.1
## dha_80 157 33.6 19.4 0.0 83.8
## daw_80 157 23.3 12.7 0.0 68.9
## dab_80 157 43.1 25.2 0.0 89.1
## dah_80 157 33.6 19.4 0.0 83.8
## -------------------------------------------------
stargazer(cities[c("pboo_20","t_20","dbw_20")], header=FALSE, type='text',
title="Descriptive Statistics 2020", digits=2,
covariate.labels=c("Black Homeownership","City Population","B-W Dissimilarity")
)
##
## Descriptive Statistics 2020
## =============================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------------------
## Black Homeownership 261 8.54 10.61 0.00 67.12
## City Population 261 75,681.66 207,755.30 10,070 2,304,580
## B-W Dissimilarity 261 25.12 12.01 0.00 69.14
## -------------------------------------------------------------
stargazer(cities[c("pboo_10","t_10","dbw_10")], header=FALSE, type='text',
title="Descriptive Statistics 2010", digits=2,
covariate.labels=c("Black Homeownership","City Population","B-W Dissimilarity")
)
##
## Descriptive Statistics 2010
## =============================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------------------
## Black Homeownership 241 8.65 11.07 0.05 65.63
## City Population 241 70,713.28 194,540.90 10,127 2,099,451
## B-W Dissimilarity 241 27.13 14.08 0.00 82.22
## -------------------------------------------------------------
stargazer(cities[c("pboo_00","t_00","dbw_00")], header=FALSE, type='text',
title="Descriptive Statistics 2000", digits=2,
covariate.labels=c("Black Homeownership","City Population","B-W Dissimilarity")
)
##
## Descriptive Statistics 2000
## =============================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------------------
## Black Homeownership 258 7.18 9.34 0.00 61.00
## City Population 205 68,613.65 190,232.90 10,302 1,953,631
## B-W Dissimilarity 205 30.71 16.06 0.00 75.64
## -------------------------------------------------------------
sapply(cities, function(x) sum(is.na(x)))
## statefp20 placefp20 cityname Proximity Miles FC.name pboo_20 pboo_10
## 0 0 272 0 158 272 11 31
## pboo_00 pboo_90 pboo_80 w_20 b_20 h_20 a_20 o_20
## 14 272 272 11 11 11 11 11
## t_20 pw_20 pb_20 ph_20 pa_20 po_20 dwb_20 dwh_20
## 11 11 11 11 11 11 11 11
## dwa_20 dbw_20 dbh_20 dba_20 dhw_20 dhb_20 dha_20 daw_20
## 11 11 11 11 11 11 11 11
## dab_20 dah_20 w_10 b_10 h_10 a_10 o_10 t_10
## 11 11 31 31 31 31 31 31
## pw_10 pb_10 ph_10 pa_10 po_10 dwb_10 dwh_10 dwa_10
## 31 31 31 31 31 31 31 31
## dbw_10 dbh_10 dba_10 dhw_10 dhb_10 dha_10 daw_10 dab_10
## 31 31 31 31 31 31 31 31
## dah_10 w_00 b_00 h_00 a_00 o_00 t_00 pw_00
## 31 67 67 67 67 67 67 67
## pb_00 ph_00 pa_00 po_00 dwb_00 dwh_00 dwa_00 dbw_00
## 67 67 67 67 67 67 67 67
## dbh_00 dba_00 dhw_00 dhb_00 dha_00 daw_00 dab_00 dah_00
## 67 67 67 67 67 67 67 67
## w_90 b_90 h_90 a_90 t_90 o_90 pw_90 pb_90
## 90 90 90 90 90 90 90 90
## ph_90 pa_90 po_90 dwb_90 dwh_90 dwa_90 dbw_90 dbh_90
## 90 90 90 90 90 90 90 90
## dba_90 dhw_90 dhb_90 dha_90 daw_90 dab_90 dah_90 t_80
## 90 90 90 90 90 90 90 115
## w_80 b_80 h_80 a_80 o_80 pw_80 pb_80 ph_80
## 115 115 115 115 115 115 115 115
## pa_80 po_80 dwb_80 dwh_80 dwa_80 dbw_80 dbh_80 dba_80
## 115 115 115 115 115 115 115 115
## dhw_80 dhb_80 dha_80 daw_80 dab_80 dah_80
## 115 115 115 115 115 115
#Cities In Proximity = 1, Cities Not in Proximity = 0
cities$Proximity <- factor(cities$Proximity, ordered = FALSE )
# Make "Not in Proximity" the reference category
cities$Proximity <- relevel(cities$Proximity, ref = "Not in Prox")
table(cities$Proximity)
##
## Not in Prox In Proximity
## 164 108
#The correlation between segregation and Black home ownership yielded a weak positive association in all years with a coefficient of 0.0363 in 2020, 0.0815 in 2010, and 0.2355 in 2000.
cor.test(cities$dbw_20,cities$pboo_20)
##
## Pearson's product-moment correlation
##
## data: cities$dbw_20 and cities$pboo_20
## t = 0.58608, df = 259, p-value = 0.5583
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08540425 0.15711885
## sample estimates:
## cor
## 0.03639314
plot(cities$dbw_20,cities$pboo_20,pch=20,
xlab='D index',ylab='Percent Black Homeowners',
main='Cities by Residential Segregation and Black Homeownership in 2020')
cor.test(cities$dbw_10,cities$pboo_10)
##
## Pearson's product-moment correlation
##
## data: cities$dbw_10 and cities$pboo_10
## t = 1.2643, df = 239, p-value = 0.2074
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.04532483 0.20575565
## sample estimates:
## cor
## 0.08150847
plot(cities$dbw_10,cities$pboo_10,pch=20,
xlab='D index',ylab='Percent Black Homeowners',
main='Cities by Residential Segregation and Black Homeownership in 2010')
cor.test(cities$dbw_00,cities$pboo_00)
##
## Pearson's product-moment correlation
##
## data: cities$dbw_00 and cities$pboo_00
## t = 3.4189, df = 199, p-value = 0.0007626
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1004203 0.3621316
## sample estimates:
## cor
## 0.2355415
plot(cities$dbw_00,cities$pboo_00,pch=20,
xlab='D index',ylab='Percent Black Homeowners',
main='Cities by Residential Segregation and Black Homeownership in 2000')
ggplot(cities, aes(dbw_20, Proximity, col = dbw_20)) +
geom_point()
## Warning: Removed 11 rows containing missing values (`geom_point()`).
ggplot(cities, aes(dbw_10, Proximity, col = dbw_10)) +
geom_point()
## Warning: Removed 31 rows containing missing values (`geom_point()`).
ggplot(cities, aes(dbw_00, Proximity, col = dbw_00)) +
geom_point()
## Warning: Removed 67 rows containing missing values (`geom_point()`).
#The list of 261 cities from the Diversity and Disparities project available for year 2020 was used to conduct a binomial regression analysis. Of these cities, 100 were identified as being in proximity to a freedom colony and 16 were not.
#The results showed that proximity to freedom colonies INCREASED residential segregation by 3.21 points in 2020. The results were statistically significant in 2020 and 2000, but not in 2010.
fit20=lm(dbw_20 ~ Proximity,cities)
summary(fit20)
##
## Call:
## lm(formula = dbw_20 ~ Proximity, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.943 -9.284 -0.561 8.314 45.264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.8749 0.9429 25.322 <2e-16 ***
## ProximityIn Proximity 3.2095 1.5157 2.118 0.0352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.93 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.01702, Adjusted R-squared: 0.01322
## F-statistic: 4.484 on 1 and 259 DF, p-value: 0.03517
fit10=lm(dbw_10 ~ Proximity,cities)
summary(fit10)
##
## Call:
## lm(formula = dbw_10 ~ Proximity, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.653 -10.562 -1.782 8.095 56.137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.079 1.175 22.198 <2e-16 ***
## ProximityIn Proximity 2.575 1.842 1.397 0.164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.05 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.008104, Adjusted R-squared: 0.003954
## F-statistic: 1.953 on 1 and 239 DF, p-value: 0.1636
fit00=lm(dbw_00 ~ Proximity,cities)
summary(fit00)
##
## Call:
## lm(formula = dbw_00 ~ Proximity, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.126 -11.375 -0.984 10.532 41.766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.647 1.427 20.08 <2e-16 ***
## ProximityIn Proximity 5.221 2.270 2.30 0.0225 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.89 on 203 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.0254, Adjusted R-squared: 0.0206
## F-statistic: 5.291 on 1 and 203 DF, p-value: 0.02246
#Alternatively, I used Proximity as the dependent (outcome) variable. The results showed that residential segregation INCREASED proximity by 0.0224 points in 2020. The results were statistically significant. The results were statistically significant in 2020 and 2000, but not in 2010.
model20 = glm(Proximity ~ dbw_20,
data = cities,
family = binomial(link="logit"))
model10 = glm(Proximity ~ dbw_10,
data = cities,
family = binomial(link="logit"))
model00 = glm(Proximity ~ dbw_00,
data = cities,
family = binomial(link="logit"))
summary(model20)
##
## Call:
## glm(formula = Proximity ~ dbw_20, family = binomial(link = "logit"),
## data = cities)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.03052 0.30481 -3.381 0.000723 ***
## dbw_20 0.02243 0.01075 2.085 0.037057 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.37 on 260 degrees of freedom
## Residual deviance: 343.93 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 347.93
##
## Number of Fisher Scoring iterations: 4
summary(model10)
##
## Call:
## glm(formula = Proximity ~ dbw_10, family = binomial(link = "logit"),
## data = cities)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.73383 0.28936 -2.536 0.0112 *
## dbw_10 0.01302 0.00936 1.391 0.1642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 325.64 on 240 degrees of freedom
## Residual deviance: 323.70 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 327.7
##
## Number of Fisher Scoring iterations: 4
summary(model00)
##
## Call:
## glm(formula = Proximity ~ dbw_00, family = binomial(link = "logit"),
## data = cities)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.067050 0.322461 -3.309 0.000936 ***
## dbw_00 0.020547 0.009106 2.256 0.024050 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 275.10 on 204 degrees of freedom
## Residual deviance: 269.88 on 203 degrees of freedom
## (67 observations deleted due to missingness)
## AIC: 273.88
##
## Number of Fisher Scoring iterations: 4
Anova(model20,
type="II",
test="LR")
## Analysis of Deviance Table (Type II tests)
##
## Response: Proximity
## LR Chisq Df Pr(>Chisq)
## dbw_20 4.4355 1 0.0352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(model10,
type="II",
test="LR")
## Analysis of Deviance Table (Type II tests)
##
## Response: Proximity
## LR Chisq Df Pr(>Chisq)
## dbw_10 1.9474 1 0.1629
Anova(model00,
type="II",
test="LR")
## Analysis of Deviance Table (Type II tests)
##
## Response: Proximity
## LR Chisq Df Pr(>Chisq)
## dbw_00 5.2201 1 0.02233 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Testing the correlation of proximity to a freedom colony and percent Black homeownership. #In proximity = 1, Not in proximity = 0. Using point biserial correlation. Treat as numeric within dataframe to get Pearsons R.
cor.test(as.numeric(cities$Proximity), cities$pboo_20)
##
## Pearson's product-moment correlation
##
## data: as.numeric(cities$Proximity) and cities$pboo_20
## t = 6.2183, df = 259, p-value = 2.004e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2499329 0.4616335
## sample estimates:
## cor
## 0.3604154
plot(cities$Proximity,cities$pboo_20,pch=20,
xlab='Proximity',ylab='Percent Black Homeowners',
main='Cities by Proximity and Black Homeownership in 2020')
cor.test(as.numeric(cities$Proximity), cities$pboo_10)
##
## Pearson's product-moment correlation
##
## data: as.numeric(cities$Proximity) and cities$pboo_10
## t = 6.0327, df = 239, p-value = 6.091e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2485787 0.4683765
## sample estimates:
## cor
## 0.3635261
plot(cities$Proximity,cities$pboo_10,pch=20,
xlab='Proximity',ylab='Percent Black Homeowners',
main='Cities by Proximity and Black Homeownership in 2010')
cor.test(as.numeric(cities$Proximity), cities$pboo_00)
##
## Pearson's product-moment correlation
##
## data: as.numeric(cities$Proximity) and cities$pboo_00
## t = 6.617, df = 256, p-value = 2.132e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2727777 0.4818092
## sample estimates:
## cor
## 0.3821715
plot(cities$Proximity,cities$pboo_00,pch=20,
xlab='Proximity',ylab='Percent Black Homeowners',
main='Cities by Proximity and Black Homeownership in 2000')
#Plot the correlation between Proximity and homeownership using a different visual
ggplot(cities, aes(pboo_20, Proximity, col = pboo_20)) +
geom_point()
## Warning: Removed 11 rows containing missing values (`geom_point()`).
ggplot(cities, aes(pboo_10, Proximity, col = pboo_10)) +
geom_point()
## Warning: Removed 31 rows containing missing values (`geom_point()`).
ggplot(cities, aes(pboo_00, Proximity, col = pboo_00)) +
geom_point()
## Warning: Removed 14 rows containing missing values (`geom_point()`).
#A strong positive association for the percent Black population and homeownership
cor.test(cities$pb_20,cities$pboo_20)
##
## Pearson's product-moment correlation
##
## data: cities$pb_20 and cities$pboo_20
## t = 54.31, df = 259, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9476974 0.9675692
## sample estimates:
## cor
## 0.95879
plot(cities$pb_20,cities$pboo_20,pch=20,
xlab='Percent Black Population',ylab='Percent Black Homeowners',
main='Cities by Percent Black Population and Black Homeownership in 2020')
cor.test(cities$pb_10,cities$pboo_10)
##
## Pearson's product-moment correlation
##
## data: cities$pb_10 and cities$pboo_10
## t = 59.286, df = 239, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9584755 0.9748110
## sample estimates:
## cor
## 0.967642
plot(cities$pb_10,cities$pboo_10,pch=20,
xlab='Percent Black Population',ylab='Percent Black Homeowners',
main='Cities by Percent Black Population and Black Homeownership in 2010')
cor.test(cities$pb_00,cities$pboo_00)
##
## Pearson's product-moment correlation
##
## data: cities$pb_00 and cities$pboo_00
## t = 48.393, df = 199, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9475417 0.9696095
## sample estimates:
## cor
## 0.9600416
plot(cities$pb_00,cities$pboo_00,pch=20,
xlab='Percent Black Population',ylab='Percent Black Homeowners',
main='Cities by Percent Black Population and Black Homeownership in 2000')
#Weak association for the total population of the city and homeownership
cor.test(cities$t_20,cities$pboo_20)
##
## Pearson's product-moment correlation
##
## data: cities$t_20 and cities$pboo_20
## t = 0.65928, df = 259, p-value = 0.5103
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08089062 0.16154852
## sample estimates:
## cor
## 0.0409314
plot(cities$t_20,cities$pboo_20,pch=20,
xlab='City Population',ylab='Percent Black Homeowners',
main='Cities by City Total Population and Black Homeownership in 2020')
cor.test(cities$t_10,cities$pboo_10)
##
## Pearson's product-moment correlation
##
## data: cities$t_10 and cities$pboo_10
## t = 0.93395, df = 239, p-value = 0.3513
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06657168 0.18525682
## sample estimates:
## cor
## 0.06030206
plot(cities$t_10,cities$pboo_10,pch=20,
xlab='City Population',ylab='Percent Black Homeowners',
main='Cities by City Total Population and Black Homeownership in 2010')
cor.test(cities$t_00,cities$pboo_00)
##
## Pearson's product-moment correlation
##
## data: cities$t_00 and cities$pboo_00
## t = 1.6142, df = 199, p-value = 0.1081
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02510209 0.24817753
## sample estimates:
## cor
## 0.1136876
plot(cities$t_00,cities$pboo_00,pch=20,
xlab='City Population',ylab='Percent Black Homeowners',
main='Cities by City Total Population and Black Homeownership in 2000')
#Utilizing all 261 Texas cities in the data for 2020, correlation tests produced somewhat expected results. The Pearson’s correlation produced a coefficient of 0.0409 for city population and Black homeownership revealing a very weak positive association (Figure 5). However, there were outliers discovered in the data. The city with the largest population is Houston with 2,304,580 and the city with the smallest population is Rockport with 10,070. The median city population is 24,486. The three largest cities in the data are Houston, San Antonio, and Dallas. Although they are significantly larger than other cities, they remained in the data because there are multiple freedom colonies within their city boundaries. The predictor variable of city population was rescaled through a natural log transformation to reduce the impact of the magnitude of the largest city sizes. The Pearson’s correlation then produced a coefficient of 0.0867, which is greater but remains a weak association.
#log total population due to outliers
cor.test(log(cities$t_20),cities$pboo_20)
##
## Pearson's product-moment correlation
##
## data: log(cities$t_20) and cities$pboo_20
## t = 1.4009, df = 259, p-value = 0.1624
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0350703 0.2059702
## sample estimates:
## cor
## 0.08671897
cor.test(log(cities$t_10),cities$pboo_10)
##
## Pearson's product-moment correlation
##
## data: log(cities$t_10) and cities$pboo_10
## t = 1.4047, df = 239, p-value = 0.1614
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03629194 0.21440427
## sample estimates:
## cor
## 0.0904895
cor.test(log(cities$t_00),cities$pboo_00)
##
## Pearson's product-moment correlation
##
## data: log(cities$t_00) and cities$pboo_00
## t = 1.5598, df = 199, p-value = 0.1204
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02893074 0.24457866
## sample estimates:
## cor
## 0.109904
#Log Total population
tpop20<- lm(cities$pboo_20 ~ log(cities$t_20))
summary(tpop20)
##
## Call:
## lm(formula = cities$pboo_20 ~ log(cities$t_20))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.149 -6.552 -4.080 3.091 58.104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8246 6.7179 -0.123 0.902
## log(cities$t_20) 0.8999 0.6424 1.401 0.162
##
## Residual standard error: 10.59 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.00752, Adjusted R-squared: 0.003688
## F-statistic: 1.962 on 1 and 259 DF, p-value: 0.1624
coef(tpop20)["log(cities$t_20)"]/100
## log(cities$t_20)
## 0.008998776
plot(log(cities$t_20),cities$pboo_20,pch=20,
xlab='logged City Population',ylab='Percent Black Homeowners',
main='Cities by City Total Population and Black Homeownership in 2020')
tpop10<- lm(cities$pboo_10 ~ log(cities$t_10))
summary(tpop10)
##
## Call:
## lm(formula = cities$pboo_10 ~ log(cities$t_10))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.370 -6.767 -4.232 3.534 56.823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.652 7.370 -0.224 0.823
## log(cities$t_10) 0.996 0.709 1.405 0.161
##
## Residual standard error: 11.05 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.008188, Adjusted R-squared: 0.004039
## F-statistic: 1.973 on 1 and 239 DF, p-value: 0.1614
coef(tpop10)["log(cities$t_10)"]/100
## log(cities$t_10)
## 0.009959837
plot(log(cities$t_10),cities$pboo_10,pch=20,
xlab='logged City Population',ylab='Percent Black Homeowners',
main='Cities by City Total Population and Black Homeownership in 2010')
tpop00<- lm(cities$pboo_00 ~ log(cities$t_00))
summary(tpop00)
##
## Call:
## lm(formula = cities$pboo_00 ~ log(cities$t_00))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.571 -6.250 -3.593 2.774 54.104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.4148 7.2497 -0.471 0.638
## log(cities$t_00) 1.0890 0.6981 1.560 0.120
##
## Residual standard error: 9.705 on 199 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.01208, Adjusted R-squared: 0.007114
## F-statistic: 2.433 on 1 and 199 DF, p-value: 0.1204
coef(tpop00)["log(cities$t_00)"]/100
## log(cities$t_00)
## 0.01088954
plot(log(cities$t_00),cities$pboo_00,pch=20,
xlab='logged City Population',ylab='Percent Black Homeowners',
main='Cities by City Total Population and Black Homeownership in 2000')
hist(cities$t_20)
hist(log(cities$t_20+1))
hist(log(cities$t_20+1), breaks=50)
hist(cities$t_10)
hist(log(cities$t_10+1))
hist(log(cities$t_10+1), breaks=50)
hist(cities$t_00)
hist(log(cities$t_00+1))
hist(log(cities$t_00+1), breaks=50)
## testing correlation of independent variables
#logged city population and percent blCK
tpop20B<- lm(cities$pb_20 ~ log(cities$t_20))
summary(tpop20B)
##
## Call:
## lm(formula = cities$pb_20 ~ log(cities$t_20))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.238 -8.232 -3.575 5.236 58.169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.8470 7.4597 -0.65 0.5164
## log(cities$t_20) 1.6408 0.7133 2.30 0.0222 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.76 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.02002, Adjusted R-squared: 0.01624
## F-statistic: 5.292 on 1 and 259 DF, p-value: 0.02222
coef(tpop20B)["log(cities$t_20)"]/100
## log(cities$t_20)
## 0.01640827
plot(log(cities$t_20),cities$pb_20,pch=20,
xlab='logged City Population',ylab='Percent Black population',
main='Cities by City Total Population and Black population in 2020')
tpop10B<- lm(cities$pb_10 ~ log(cities$t_10))
summary(tpop10B)
##
## Call:
## lm(formula = cities$pb_10 ~ log(cities$t_10))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.061 -7.954 -4.176 4.202 57.713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.2387 8.0274 -0.403 0.6870
## log(cities$t_10) 1.4167 0.7723 1.834 0.0678 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.03 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.01389, Adjusted R-squared: 0.009759
## F-statistic: 3.365 on 1 and 239 DF, p-value: 0.06783
coef(tpop10B)["log(cities$t_10)"]/100
## log(cities$t_10)
## 0.01416692
plot(log(cities$t_10),cities$pb_10,pch=20,
xlab='logged City Population',ylab='Percent Black population',
main='Cities by City Total Population and Black population in 2010')
tpop00B<- lm(cities$pb_00 ~ log(cities$t_00))
summary(tpop00B)
##
## Call:
## lm(formula = cities$pb_00 ~ log(cities$t_00))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.692 -7.589 -4.004 4.610 48.198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.7546 8.1419 -0.584 0.5599
## log(cities$t_00) 1.4587 0.7854 1.857 0.0647 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.01 on 203 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.01671, Adjusted R-squared: 0.01187
## F-statistic: 3.45 on 1 and 203 DF, p-value: 0.06471
coef(tpop00B)["log(cities$t_00)"]/100
## log(cities$t_00)
## 0.01458666
plot(log(cities$t_00),cities$pb_00,pch=20,
xlab='logged City Population',ylab='Percent Black population',
main='Cities by City Total Population and Black population in 2000')
#logged city population and dissimilarity index/segregation
tpop20Bw<- lm(cities$dbw_20 ~ log(cities$t_20))
summary(tpop20Bw)
##
## Call:
## lm(formula = cities$dbw_20 ~ log(cities$t_20))
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.301 -8.218 -1.514 7.249 47.509
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -24.0174 6.9900 -3.436 0.000688 ***
## log(cities$t_20) 4.7207 0.6684 7.063 1.5e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.02 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1615, Adjusted R-squared: 0.1583
## F-statistic: 49.88 on 1 and 259 DF, p-value: 1.501e-11
coef(tpop20Bw)["log(cities$t_20)"]/100
## log(cities$t_20)
## 0.04720739
plot(log(cities$t_20),cities$dbw_20,pch=20,
xlab='logged City Population',ylab='Segregation',
main='Cities by City Total Population and B-W segregation in 2020')
tpop10Bw<- lm(cities$dbw_10 ~ log(cities$t_10))
summary(tpop10Bw)
##
## Call:
## lm(formula = cities$dbw_10 ~ log(cities$t_10))
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.222 -10.573 -1.836 9.156 59.638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.1664 8.8508 -2.504 0.0129 *
## log(cities$t_10) 4.7643 0.8515 5.595 6.01e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.26 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.1158, Adjusted R-squared: 0.1121
## F-statistic: 31.31 on 1 and 239 DF, p-value: 6.011e-08
coef(tpop10Bw)["log(cities$t_10)"]/100
## log(cities$t_10)
## 0.04764328
plot(log(cities$t_10),cities$dbw_10,pch=20,
xlab='logged City Population',ylab='segregation',
main='Cities by City Total Population and B-W segregation in 2010')
tpop00Bw<- lm(cities$dbw_00 ~ log(cities$t_00))
summary(tpop00Bw)
##
## Call:
## lm(formula = cities$dbw_00 ~ log(cities$t_00))
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.857 -12.739 -0.844 11.567 48.645
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.128 11.416 -1.500 0.135
## log(cities$t_00) 4.635 1.101 4.209 3.85e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.44 on 203 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.08027, Adjusted R-squared: 0.07574
## F-statistic: 17.72 on 1 and 203 DF, p-value: 3.848e-05
coef(tpop00Bw)["log(cities$t_00)"]/100
## log(cities$t_00)
## 0.04635165
plot(log(cities$t_00),cities$dbw_00,pch=20,
xlab='logged City Population',ylab='Percent Black segregation',
main='Cities by City Total Population and B-W segregation in 2000')
#logged city population and proximity
tpop20Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_20))
summary(tpop20Bx)
##
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_20))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5772 -0.3825 -0.3305 0.5882 0.6855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.73130 0.30756 2.378 0.0181 *
## log(cities$t_20) 0.06300 0.02941 2.142 0.0331 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4847 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.01741, Adjusted R-squared: 0.01361
## F-statistic: 4.588 on 1 and 259 DF, p-value: 0.03312
coef(tpop20Bx)["log(cities$t_20)"]/100
## log(cities$t_20)
## 0.0006299569
plot(log(cities$t_20),(as.numeric(cities$Proximity)),pch=20,
xlab='logged City Population',ylab='proximity',
main='Cities by City Total Population and Proximity in 2020')
tpop10Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_10))
summary(tpop10Bx)
##
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_10))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5596 -0.3987 -0.3609 0.5829 0.6498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.88552 0.32738 2.705 0.00733 **
## log(cities$t_10) 0.05037 0.03150 1.599 0.11109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4906 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.01059, Adjusted R-squared: 0.006448
## F-statistic: 2.558 on 1 and 239 DF, p-value: 0.1111
coef(tpop10Bx)["log(cities$t_10)"]/100
## log(cities$t_10)
## 0.0005036838
plot(log(cities$t_10),(as.numeric(cities$Proximity)),pch=20,
xlab='logged City Population',ylab='Proximity',
main='Cities by City Total Population and Proximity in 2010')
tpop00Bx<- lm(as.numeric(cities$Proximity) ~ log(cities$t_00))
summary(tpop00Bx)
##
## Call:
## lm(formula = as.numeric(cities$Proximity) ~ log(cities$t_00))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6193 -0.3839 -0.3292 0.5884 0.6869
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.60308 0.35903 1.680 0.0945 .
## log(cities$t_00) 0.07674 0.03463 2.216 0.0278 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4854 on 203 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.02362, Adjusted R-squared: 0.01881
## F-statistic: 4.91 on 1 and 203 DF, p-value: 0.02781
coef(tpop00Bx)["log(cities$t_00)"]/100
## log(cities$t_00)
## 0.0007674211
plot(log(cities$t_00),(as.numeric(cities$Proximity)),pch=20,
xlab='logged City Population',ylab='Proximity',
main='Cities by City Total Population and Proximity in 2000')
cor.test(cities$pb_10,as.numeric(cities$Proximity))
##
## Pearson's product-moment correlation
##
## data: cities$pb_10 and as.numeric(cities$Proximity)
## t = 6.9313, df = 239, p-value = 3.853e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2981591 0.5091555
## sample estimates:
## cor
## 0.4091113
#cor.test(as.numeric(cities$Proximity), cities$pboo_10)
plot(cities$pb_10,as.numeric(cities$Proximity),pch=20,
xlab='Percent Black Population',ylab='proximity',
main='Cities by Percent Black Population and proximity in 2010')
cor.test(cities$pb_00,cities$dbw_00)
##
## Pearson's product-moment correlation
##
## data: cities$pb_00 and cities$dbw_00
## t = 4.5033, df = 203, p-value = 1.127e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1714197 0.4210225
## sample estimates:
## cor
## 0.3013753
plot(cities$pb_00,cities$dbw_00,pch=20,
xlab='Percent Black Population',ylab='segregation',
main='Cities by Percent Black Population and segregation in 2000')
##Logistic Regression
#The models indicate that there is a positive relationship between the percent of Black homeownership and whether the city is within proximity to a freedom colony. Cities with high percent of Black homeowners are more likely to be within proximity to a freedom colony for years 2000, 2010, and 2020.
#model 1 #A multiple regression was performed starting with proximity to freedom colonies (Table 4). This variable proved to yield statistically significant results. Proximity to freedom colonies provided an increase in Black homeownership by 7.935 percentage points.
cit<- lm(pboo_20~Proximity,data=cities)
summary(cit)
##
## Call:
## lm(formula = pboo_20 ~ Proximity, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.213 -5.090 -2.430 2.377 53.777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5103 0.7836 7.032 1.81e-11 ***
## ProximityIn Proximity 7.8331 1.2597 6.218 2.00e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.912 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1299, Adjusted R-squared: 0.1265
## F-statistic: 38.67 on 1 and 259 DF, p-value: 2.004e-09
#model 2 - add in total popualtion logged #When the logged variable of city population was introduced in Model 2, there was a slight increase in homeownership although population was not statistically significant. Proximity remained a significant factor.
cit2log <- lm(pboo_20~Proximity+log(t_20),data=cities)
summary(cit2log)
##
## Call:
## lm(formula = pboo_20 ~ Proximity + log(t_20), data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.121 -4.947 -2.335 2.183 53.629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2494 6.3059 0.198 0.843
## ProximityIn Proximity 7.7188 1.2721 6.068 4.6e-09 ***
## log(t_20) 0.4136 0.6074 0.681 0.496
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.922 on 258 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1315, Adjusted R-squared: 0.1247
## F-statistic: 19.53 on 2 and 258 DF, p-value: 1.27e-08
#As established, percent of Black population is highly correlated with Black homeownership. As expected, an initial regression with this variable yielded a significant result, but only a small increase in homeownership. Due to this high correlation, proximity lost its significance when percent of Black population was introduced to the model. Therefore, this variable was omitted from the regression.
citpb20=lm(pboo_20~pb_20,cities)
citpb10=lm(pboo_10~pb_10,cities)
citpb00=lm(pboo_00~pb_00,cities)
summary(citpb20)
##
## Call:
## lm(formula = pboo_20 ~ pb_20, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.0288 -1.1835 0.5156 1.5324 12.6742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.9526 0.2688 -7.264 4.42e-12 ***
## pb_20 0.8580 0.0158 54.310 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.019 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.9193, Adjusted R-squared: 0.919
## F-statistic: 2950 on 1 and 259 DF, p-value: < 2.2e-16
summary(citpb10)
##
## Call:
## lm(formula = pboo_10 ~ pb_10, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.7624 -1.2648 0.3254 1.4259 13.1581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.46306 0.24821 -5.894 1.27e-08 ***
## pb_10 0.88586 0.01494 59.286 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.799 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.9363, Adjusted R-squared: 0.9361
## F-statistic: 3515 on 1 and 239 DF, p-value: < 2.2e-16
summary(citpb00)
##
## Call:
## lm(formula = pboo_00 ~ pb_00, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.8641 -1.0172 0.3435 0.9166 17.2871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.90972 0.26432 -3.442 0.000704 ***
## pb_00 0.83977 0.01735 48.393 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.733 on 199 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.9217, Adjusted R-squared: 0.9213
## F-statistic: 2342 on 1 and 199 DF, p-value: < 2.2e-16
#model 3 add in segregation #In Model 3, the DI measure of residential segregation for each city was added to the regression. Residential segregation caused a decrease in homeownership by -0.029. This result was not statistically significant but provides a different outcome than the previous analysis comparing cities near and not near freedom colonies
cit3 <- glm(pboo_20~Proximity+log(t_20)+dbw_20,data=cities)
summary(cit3)
##
## Call:
## glm(formula = pboo_20 ~ Proximity + log(t_20) + dbw_20, data = cities)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.60219 6.45204 0.093 0.926
## ProximityIn Proximity 7.77216 1.27865 6.078 4.36e-09 ***
## log(t_20) 0.54030 0.66102 0.817 0.414
## dbw_20 -0.02755 0.05626 -0.490 0.625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 98.74338)
##
## Null deviance: 29245 on 260 degrees of freedom
## Residual deviance: 25377 on 257 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 1945.3
##
## Number of Fisher Scoring iterations: 2
#models for 2010
cit10<- lm(pboo_10~Proximity,data=cities)
summary(cit10)
##
## Call:
## lm(formula = pboo_10 ~ Proximity, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.352 -5.028 -2.318 2.112 52.128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.328 0.864 6.167 2.95e-09 ***
## ProximityIn Proximity 8.174 1.355 6.033 6.09e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.33 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.1322, Adjusted R-squared: 0.1285
## F-statistic: 36.39 on 1 and 239 DF, p-value: 6.091e-09
cit2log10 <- lm(pboo_10~Proximity+log(t_10),data=cities)
summary(cit2log10)
##
## Call:
## lm(formula = pboo_10 ~ Proximity + log(t_10), data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.504 -4.772 -2.362 1.961 52.110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.7308 6.8991 -0.106 0.916
## ProximityIn Proximity 8.0498 1.3628 5.907 1.2e-08 ***
## log(t_10) 0.5905 0.6671 0.885 0.377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.34 on 238 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.135, Adjusted R-squared: 0.1277
## F-statistic: 18.57 on 2 and 238 DF, p-value: 3.199e-08
cit3_10 <- lm(pboo_10~Proximity+log(t_10)+dbw_10,data=cities)
summary(cit3_10)
##
## Call:
## lm(formula = pboo_10 ~ Proximity + log(t_10) + dbw_10, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.912 -5.022 -2.373 1.779 52.130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.12234 6.99816 -0.017 0.986
## ProximityIn Proximity 8.00578 1.36716 5.856 1.57e-08 ***
## log(t_10) 0.46087 0.70883 0.650 0.516
## dbw_10 0.02768 0.05057 0.547 0.585
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.35 on 237 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.1361, Adjusted R-squared: 0.1252
## F-statistic: 12.44 on 3 and 237 DF, p-value: 1.381e-07
#models for 2000
cit00<- lm(pboo_00~Proximity,data=cities)
summary(cit00)
##
## Call:
## lm(formula = pboo_00 ~ Proximity, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.661 -4.213 -2.050 1.673 49.339
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3498 0.6879 6.323 1.13e-09 ***
## ProximityIn Proximity 7.3115 1.1049 6.617 2.13e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.647 on 256 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.1461, Adjusted R-squared: 0.1427
## F-statistic: 43.79 on 1 and 256 DF, p-value: 2.132e-10
cit2log00 <- lm(pboo_00~Proximity+log(t_00),data=cities)
summary(cit2log00)
##
## Call:
## lm(formula = pboo_00 ~ Proximity + log(t_00), data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.561 -4.419 -2.302 1.716 49.190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3797 6.7931 -0.056 0.955
## ProximityIn Proximity 7.2792 1.3254 5.492 1.21e-07 ***
## log(t_00) 0.5186 0.6602 0.786 0.433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.064 on 198 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.1427, Adjusted R-squared: 0.134
## F-statistic: 16.48 on 2 and 198 DF, p-value: 2.406e-07
cit3_00 <- lm(pboo_00~Proximity+log(t_00)+dbw_00,data=cities)
summary(cit3_00)
##
## Call:
## lm(formula = pboo_00 ~ Proximity + log(t_00) + dbw_00, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.848 -4.473 -2.406 1.576 49.312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.22467 6.74083 0.182 0.8560
## ProximityIn Proximity 6.78083 1.32471 5.119 7.29e-07 ***
## log(t_00) 0.07294 0.67694 0.108 0.9143
## dbw_00 0.10385 0.04240 2.449 0.0152 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.951 on 197 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.168, Adjusted R-squared: 0.1553
## F-statistic: 13.26 on 3 and 197 DF, p-value: 6.389e-08
m1a <- lm(pboo_20~Proximity,data=cities)
m2a <- lm(pboo_20~Proximity+log(t_20),data=cities)
m3a <- lm(pboo_20~Proximity+log(t_20)+dbw_20,data=cities)
m4a <- glm(formula = Proximity ~ dbw_20, family = "binomial", data = cities)
m1b <- lm(pboo_10~Proximity,data=cities)
m2b <- lm(pboo_10~Proximity+log(t_10),data=cities)
m3b <- lm(pboo_10~Proximity+log(t_10)+dbw_10,data=cities)
m4b <- glm(formula =Proximity ~ dbw_10, family = "binomial", data = cities)
m1c <- lm(pboo_00~Proximity,data=cities)
m2c <- lm(pboo_00~Proximity+log(t_00),data=cities)
m3c <- lm(pboo_00~Proximity+log(t_00)+dbw_00,data=cities)
m4c <- glm(formula = Proximity ~ dbw_00, family = "binomial", data = cities)
stargazer(m1a, m2a, m3a, m4a, m1b, m2b, m3b, m4b, m1c, m2c, m3c, m4c, type="text", title="Regression Results",
align=TRUE, dep.var.labels=c("Percent Black Homeownership 2000", "Proximity", "Percent Black Homeownership 2010","Proximity", "Percent Black Homeownership 2020", "Proximity"),
covariate.labels=c("Proximity","City Population 2000","Dissimilarity Index 2000", "City Population 2010", "Dissimilarity Index 2010", "City Population 2020","Dissimilarity Index 2020"), out="main.htm")
##
## Regression Results
## ==============================================================================================================================================================================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Percent Black Homeownership 2000 Proximity Percent Black Homeownership 2010 Proximity Percent Black Homeownership 2020 Proximity
## OLS logistic OLS logistic OLS logistic
## (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11) (12)
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Proximity 7.833*** 7.719*** 7.772*** 8.174*** 8.050*** 8.006*** 7.311*** 7.279*** 6.781***
## (1.260) (1.272) (1.279) (1.355) (1.363) (1.367) (1.105) (1.325) (1.325)
##
## City Population 2000 0.414 0.540
## (0.607) (0.661)
##
## Dissimilarity Index 2000 -0.028 0.022**
## (0.056) (0.011)
##
## City Population 2010 0.591 0.461
## (0.667) (0.709)
##
## Dissimilarity Index 2010 0.028 0.013
## (0.051) (0.009)
##
## City Population 2020 0.519 0.073
## (0.660) (0.677)
##
## Dissimilarity Index 2020 0.104** 0.021**
## (0.042) (0.009)
##
## Constant 5.510*** 1.249 0.602 -1.031*** 5.328*** -0.731 -0.122 -0.734** 4.350*** -0.380 1.225 -1.067***
## (0.784) (6.306) (6.452) (0.305) (0.864) (6.899) (6.998) (0.289) (0.688) (6.793) (6.741) (0.322)
##
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations 261 261 261 261 241 241 241 241 258 201 201 205
## R2 0.130 0.131 0.132 0.132 0.135 0.136 0.146 0.143 0.168
## Adjusted R2 0.127 0.125 0.122 0.129 0.128 0.125 0.143 0.134 0.155
## Log Likelihood -171.967 -161.849 -134.942
## Akaike Inf. Crit. 347.934 327.698 273.883
## Residual Std. Error 9.912 (df = 259) 9.922 (df = 258) 9.937 (df = 257) 10.332 (df = 239) 10.337 (df = 238) 10.352 (df = 237) 8.647 (df = 256) 9.064 (df = 198) 8.951 (df = 197)
## F Statistic 38.667*** (df = 1; 259) 19.525*** (df = 2; 258) 13.058*** (df = 3; 257) 36.394*** (df = 1; 239) 18.572*** (df = 2; 238) 12.445*** (df = 3; 237) 43.785*** (df = 1; 256) 16.476*** (df = 2; 198) 13.261*** (df = 3; 197)
## ==============================================================================================================================================================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
smallcit <- read.csv('C:/Users/canda/DEM Dissertation Data/cities_small DEC2020.csv')
bigcit <- read.csv('C:/Users/canda/DEM Dissertation Data/cities_big DEC2020.csv')
summary(smallcit$Proximity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.3462 1.0000 1.0000
summary(bigcit$Proximity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4198 1.0000 1.0000
smallcit$Proximity <- as.numeric(smallcit$Proximity)
bigcit$Proximity <- as.numeric(bigcit$Proximity)
class(smallcit$Proximity)
## [1] "numeric"
class(bigcit$Proximity)
## [1] "numeric"
hist(smallcit$t_20)
hist(log(smallcit$t_20+1))
hist(log(smallcit$t_20+1), breaks=50)
hist(bigcit$t_20)
hist(log(bigcit$t_20+1))
hist(log(bigcit$t_20+1), breaks=50)
#In separate regression analyses, cities were separated into big and small, using the median of total population. Small cities which have a total population of 24,486 or less (n=130) continued to show that proximity to freedom colonies was a significant factor in the increase of homeownership. Residential segregation produced a decrease in homeownership of -0.038 and again was not statistically significant. A regression using big cities (n=131) showed similar results, however the increase in homeownership due to proximity was smaller
fit8s<-lm(pbooDEC20~Proximity, data = smallcit)
summary(fit8s)
##
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = smallcit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.329 -3.877 -1.692 2.833 41.641
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3020 0.8631 4.984 1.97e-06 ***
## Proximity 8.1571 1.4670 5.561 1.50e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.957 on 128 degrees of freedom
## Multiple R-squared: 0.1946, Adjusted R-squared: 0.1883
## F-statistic: 30.92 on 1 and 128 DF, p-value: 1.5e-07
fit8b<-lm(pbooDEC20~Proximity, data = bigcit)
summary(fit8b)
##
## Call:
## lm(formula = pbooDEC20 ~ Proximity, data = bigcit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.626 -6.392 -2.932 2.976 52.884
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.842 1.314 5.208 7.33e-07 ***
## Proximity 7.394 2.027 3.647 0.000384 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.45 on 129 degrees of freedom
## Multiple R-squared: 0.09347, Adjusted R-squared: 0.08644
## F-statistic: 13.3 on 1 and 129 DF, p-value: 0.0003837
fit11s<-lm(pbooDEC20~Proximity+dbw_20, data = smallcit)
summary(fit11s)
##
## Call:
## lm(formula = pbooDEC20 ~ Proximity + dbw_20, data = smallcit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.555 -3.802 -1.618 2.964 41.454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.15627 1.59207 3.239 0.00153 **
## Proximity 8.14220 1.47054 5.537 1.69e-07 ***
## dbw_20 -0.03789 0.05928 -0.639 0.52387
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.976 on 127 degrees of freedom
## Multiple R-squared: 0.1971, Adjusted R-squared: 0.1845
## F-statistic: 15.59 on 2 and 127 DF, p-value: 8.798e-07
fit11b<-lm(pbooDEC20~Proximity+dbw_20, data = bigcit)
summary(fit11b)
##
## Call:
## lm(formula = pbooDEC20 ~ Proximity + dbw_20, data = bigcit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.569 -6.261 -2.917 3.093 52.637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.46917 2.62970 2.840 0.005245 **
## Proximity 7.54063 2.10322 3.585 0.000477 ***
## dbw_20 -0.02476 0.08988 -0.276 0.783348
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.49 on 128 degrees of freedom
## Multiple R-squared: 0.094, Adjusted R-squared: 0.07985
## F-statistic: 6.641 on 2 and 128 DF, p-value: 0.001803
#When the cities were grouped by population size using the median, there was a difference in mean of percent Black homeownership by 2.82 points
cities%>%
mutate(smalltown = ifelse(test = t_20 < 24486,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
summarise(n = n(),
across(.cols = c(pboo_20, pb_20, b_20, dbw_20),
.fns = mean,
na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(...)`.
## ℹ In group 1: `smalltown = "big"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
## # A tibble: 3 × 6
## smalltown n pboo_20 pb_20 b_20 dbw_20
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 big 131 9.95 14.2 20064. 27.8
## 2 small 130 7.13 10.3 1682. 22.4
## 3 <NA> 11 NaN NaN NaN NaN
#using median for 2010
cities%>%
mutate(smalltown = ifelse(test = t_10 < 23497,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
summarise(n = n(),
across(.cols = c(pboo_10, pb_10, b_10, dbw_10),
.fns = mean,
na.rm = TRUE))
## # A tibble: 3 × 6
## smalltown n pboo_10 pb_10 b_10 dbw_10
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 big 121 10.1 13.4 17999. 29.7
## 2 small 120 7.18 9.46 1424. 24.6
## 3 <NA> 31 NaN NaN NaN NaN
#using median for 2000
cities%>%
mutate(smalltown = ifelse(test = t_00 < 23935,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
summarise(n = n(),
across(.cols = c(pboo_00, pb_00, b_00, dbw_00),
.fns = mean,
na.rm = TRUE))
## # A tibble: 3 × 6
## smalltown n pboo_00 pb_00 b_00 dbw_00
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 big 103 9.45 12.3 17518. 32.7
## 2 small 102 6.15 8.30 1245. 28.7
## 3 <NA> 67 4.86 NaN NaN NaN
cities%>%
mutate(smalltown = ifelse(test = t_20 < 24486,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
filter(Proximity == "1")%>%
summarise(n = n(),
across(.cols = c(pboo_20, pb_20, b_20, dbw_20),
.fns = mean,
na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_20 <dbl>, pb_20 <dbl>,
## # b_20 <dbl>, dbw_20 <dbl>
cities%>%
mutate(smalltown = ifelse(test = t_20 < 24486,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
filter(Proximity == "0")%>%
summarise(n = n(),
across(.cols = c(pboo_20, pb_20, b_20, dbw_20),
.fns = mean,
na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_20 <dbl>, pb_20 <dbl>,
## # b_20 <dbl>, dbw_20 <dbl>
cities%>%
mutate(smalltown = ifelse(test = t_10 < 23497,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
filter(Proximity == "1")%>%
summarise(n = n(),
across(.cols = c(pboo_10, pb_10, b_10, dbw_10),
.fns = mean,
na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_10 <dbl>, pb_10 <dbl>,
## # b_10 <dbl>, dbw_10 <dbl>
cities%>%
mutate(smalltown = ifelse(test = t_10 < 23497,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
filter(Proximity == "0")%>%
summarise(n = n(),
across(.cols = c(pboo_10, pb_10, b_10, dbw_10),
.fns = mean,
na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_10 <dbl>, pb_10 <dbl>,
## # b_10 <dbl>, dbw_10 <dbl>
cities%>%
mutate(smalltown = ifelse(test = t_00 < 23935,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
filter(Proximity == "1")%>%
summarise(n = n(),
across(.cols = c(pboo_00, pb_00, b_00, dbw_00),
.fns = mean,
na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_00 <dbl>, pb_00 <dbl>,
## # b_00 <dbl>, dbw_00 <dbl>
cities%>%
mutate(smalltown = ifelse(test = t_00 < 23935,
yes = "small",
no = "big") )%>%
group_by(smalltown) %>%
filter(Proximity == "0")%>%
summarise(n = n(),
across(.cols = c(pboo_00, pb_00, b_00, dbw_00),
.fns = mean,
na.rm = TRUE))
## # A tibble: 0 × 6
## # ℹ 6 variables: smalltown <chr>, n <int>, pboo_00 <dbl>, pb_00 <dbl>,
## # b_00 <dbl>, dbw_00 <dbl>
#linear regression model path c
library(lavaan)
fit.totaleffect20=lm(pboo_20~dbw_20,cities)
summary(fit.totaleffect20)
##
## Call:
## lm(formula = pboo_20 ~ dbw_20, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.787 -6.303 -3.709 3.457 58.702
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.73402 1.52652 5.066 7.71e-07 ***
## dbw_20 0.03215 0.05485 0.586 0.558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.62 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.001324, Adjusted R-squared: -0.002531
## F-statistic: 0.3435 on 1 and 259 DF, p-value: 0.5583
fit.totaleffect10=lm(pboo_10~dbw_10,cities)
summary(fit.totaleffect10)
##
## Call:
## lm(formula = pboo_10 ~ dbw_10, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.133 -6.681 -4.104 3.138 57.176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.91381 1.54839 4.465 1.23e-05 ***
## dbw_10 0.06408 0.05069 1.264 0.207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.05 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.006644, Adjusted R-squared: 0.002487
## F-statistic: 1.598 on 1 and 239 DF, p-value: 0.2074
fit.totaleffect00=lm(pboo_00~dbw_00,cities)
summary(fit.totaleffect00)
##
## Call:
## lm(formula = pboo_00 ~ dbw_00, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.250 -5.673 -2.842 2.320 53.447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3670 1.4704 2.290 0.023074 *
## dbw_00 0.1453 0.0425 3.419 0.000763 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.49 on 199 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.05548, Adjusted R-squared: 0.05073
## F-statistic: 11.69 on 1 and 199 DF, p-value: 0.0007626
#homeownership and percent black significant because they are highly correlated
fit.totaleffect2_20=lm(pb_20~pboo_20,cities)
summary(fit.totaleffect2_20)
##
## Call:
## lm(formula = pb_20 ~ pboo_20, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0786 -2.3097 -0.7025 1.6565 16.8774
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.07937 0.26834 11.48 <2e-16 ***
## pboo_20 1.07144 0.01973 54.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.374 on 259 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.9193, Adjusted R-squared: 0.919
## F-statistic: 2950 on 1 and 259 DF, p-value: < 2.2e-16
fit.totaleffect2_10=lm(pb_10~pboo_10,cities)
summary(fit.totaleffect2_10)
##
## Call:
## lm(formula = pb_10 ~ pboo_10, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0698 -1.9509 -0.6102 1.4097 15.1085
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.27341 0.25014 9.089 <2e-16 ***
## pboo_10 1.05697 0.01783 59.286 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.057 on 239 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.9363, Adjusted R-squared: 0.9361
## F-statistic: 3515 on 1 and 239 DF, p-value: < 2.2e-16
fit.totaleffect2_00=lm(pb_00~pboo_00,cities)
summary(fit.totaleffect2_00)
##
## Call:
## lm(formula = pb_00 ~ pboo_00, data = cities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.2517 -1.6284 -0.6059 1.2296 19.8110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.81477 0.28319 6.408 1.04e-09 ***
## pboo_00 1.09754 0.02268 48.393 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.124 on 199 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.9217, Adjusted R-squared: 0.9213
## F-statistic: 2342 on 1 and 199 DF, p-value: < 2.2e-16
#Proximity as the mediator
cities$Proximity <- as.ordered(cities$Proximity)
class(cities$Proximity)
## [1] "ordered" "factor"
summary(cities$Proximity)
## Not in Prox In Proximity
## 164 108
specmod20 <- "
#Path c
pboo_20 ~ c*dbw_20
#Path a
Proximity ~ a*dbw_20
#Path b
pboo_20 ~ b*Proximity
#Indirect effect (a*b)
ab :=a*b
"
specmod10 <- "
#Path c
pboo_10 ~ c*dbw_10
#Path a
Proximity ~ a*dbw_10
#Path b
pboo_10 ~ b*Proximity
#Indirect effect (a*b)
ab :=a*b
"
specmod00 <- "
#Path c
pboo_00 ~ c*dbw_00
#Path a
Proximity ~ a*dbw_00
#Path b
pboo_00 ~ b*Proximity
#Indirect effect (a*b)
ab :=a*b
"
fitmodel20<- sem(specmod20, data=cities)
fitmodel10<- sem(specmod10, data=cities)
fitmodel00<- sem(specmod00, data=cities)
summary(fitmodel20,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 16 iterations
##
## Estimator DWLS
## Optimization method NLMINB
## Number of model parameters 6
##
## Used Total
## Number of observations 261 272
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 0.000 0.000
## Degrees of freedom 0 0
##
## Model Test Baseline Model:
##
## Test statistic 46.356 46.356
## Degrees of freedom 1 1
## P-value 0.000 0.000
## Scaling correction factor 1.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 1.000 1.000
##
## Robust Comparative Fit Index (CFI) NA
## Robust Tucker-Lewis Index (TLI) NA
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.000 0.000
## P-value H_0: RMSEA <= 0.050 NA NA
## P-value H_0: RMSEA >= 0.080 NA NA
##
## Robust RMSEA NA
## 90 Percent confidence interval - lower NA
## 90 Percent confidence interval - upper NA
## P-value H_0: Robust RMSEA <= 0.050 NA
## P-value H_0: Robust RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000 0.000
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Unstructured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## pboo_20 ~
## dbw_20 (c) -0.040 0.053 -0.770 0.441
## Proximity ~
## dbw_20 (a) 0.014 0.007 2.112 0.035
## pboo_20 ~
## Proximity (b) 5.275 0.775 6.809 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .pboo_20 7.734 1.894 4.084 0.000
## .Proximity 0.000
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|)
## Proximity|t1 0.636 0.182 3.492 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .pboo_20 84.081 7.929 10.605 0.000
## .Proximity 1.000
##
## Scales y*:
## Estimate Std.Err z-value P(>|z|)
## Proximity 1.000
##
## R-Square:
## Estimate
## pboo_20 0.250
## Proximity 0.027
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## ab 0.073 0.036 2.043 0.041
summary(fitmodel10,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 19 iterations
##
## Estimator DWLS
## Optimization method NLMINB
## Number of model parameters 6
##
## Used Total
## Number of observations 241 272
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 0.000 0.000
## Degrees of freedom 0 0
##
## Model Test Baseline Model:
##
## Test statistic 47.298 47.298
## Degrees of freedom 1 1
## P-value 0.000 0.000
## Scaling correction factor 1.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 1.000 1.000
##
## Robust Comparative Fit Index (CFI) NA
## Robust Tucker-Lewis Index (TLI) NA
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.000 0.000
## P-value H_0: RMSEA <= 0.050 NA NA
## P-value H_0: RMSEA >= 0.080 NA NA
##
## Robust RMSEA NA
## 90 Percent confidence interval - lower NA
## 90 Percent confidence interval - upper NA
## P-value H_0: Robust RMSEA <= 0.050 NA
## P-value H_0: Robust RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000 0.000
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Unstructured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## pboo_10 ~
## dbw_10 (c) 0.019 0.047 0.404 0.686
## Proximity ~
## dbw_10 (a) 0.008 0.006 1.404 0.160
## pboo_10 ~
## Proximity (b) 5.568 0.810 6.877 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .pboo_10 6.914 2.065 3.348 0.001
## .Proximity 0.000
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|)
## Proximity|t1 0.457 0.178 2.573 0.010
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .pboo_10 90.177 8.831 10.212 0.000
## .Proximity 1.000
##
## Scales y*:
## Estimate Std.Err z-value P(>|z|)
## Proximity 1.000
##
## R-Square:
## Estimate
## pboo_10 0.261
## Proximity 0.013
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## ab 0.045 0.032 1.386 0.166
summary(fitmodel00,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6.15 ended normally after 19 iterations
##
## Estimator DWLS
## Optimization method NLMINB
## Number of model parameters 6
##
## Used Total
## Number of observations 201 272
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 0.000 0.000
## Degrees of freedom 0 0
##
## Model Test Baseline Model:
##
## Test statistic 45.724 45.724
## Degrees of freedom 1 1
## P-value 0.000 0.000
## Scaling correction factor 1.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 1.000 1.000
##
## Robust Comparative Fit Index (CFI) NA
## Robust Tucker-Lewis Index (TLI) NA
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.000 0.000
## P-value H_0: RMSEA <= 0.050 NA NA
## P-value H_0: RMSEA >= 0.080 NA NA
##
## Robust RMSEA NA
## 90 Percent confidence interval - lower NA
## 90 Percent confidence interval - upper NA
## P-value H_0: Robust RMSEA <= 0.050 NA
## P-value H_0: Robust RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000 0.000
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Unstructured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## pboo_00 ~
## dbw_00 (c) 0.080 0.042 1.920 0.055
## Proximity ~
## dbw_00 (a) 0.016 0.006 2.634 0.008
## pboo_00 ~
## Proximity (b) 4.128 0.610 6.762 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .pboo_00 3.367 1.852 1.818 0.069
## .Proximity 0.000
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|)
## Proximity|t1 0.763 0.209 3.644 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .pboo_00 72.116 6.280 11.484 0.000
## .Proximity 1.000
##
## Scales y*:
## Estimate Std.Err z-value P(>|z|)
## Proximity 1.000
##
## R-Square:
## Estimate
## pboo_00 0.236
## Proximity 0.058
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## ab 0.065 0.026 2.457 0.014