An Analysis on San Diego County and San Diego City: an in-depth look into San Diego County’s and City’s racial/ethnic demographics and median household income, and segregation in San Diego City
Name: SHERIDAMAE GUDEZ School: UC Davis Year: Junior/3rd-Year Class: CRD 150- Quantitative Methods in Community Research
Final presentation can be found here: https://storymaps.arcgis.com/stories/b9bf0c289a4b475cb58e362ca531b52b
Library
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidycensus)
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
options(tigris_class = "sf")
library(tmap)
library(rmapshaper)
## Registered S3 method overwritten by 'geojsonlint':
## method from
## print.location dplyr
library(sp)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## Loading required package: rpart
## Registered S3 method overwritten by 'spatstat':
## method from
## print.boxx cli
##
## spatstat 1.63-0 (nickname: 'Space camouflage')
## For an introduction to spatstat, type 'beginner'
##
## Attaching package: 'spatstat'
## The following object is masked from 'package:data.table':
##
## shift
## The following object is masked from 'package:colorspace':
##
## coords
census_api_key("1617af527eaf75d77339a0e5595f8067a78dbb26")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
Getting San Diego County Census Data
San_Diego <- get_acs(geography = "tract",
year = 2018,
variables = c(tpop = "B01003_001", tpopr = "B03002_001",
nhwhite = "B03002_003", nhblk = "B03002_004",
nhasn = "B03002_006", hisp = "B03002_012", medincome= "B19013_001"),
state = "CA",
county = "San Diego",
survey = "acs5",
geometry = TRUE) %>%
select(-(moe)) %>%
spread(key = variable, value = estimate) %>%
mutate(pnhwhite = nhwhite/tpopr, pnhasn = nhasn/tpopr,
pnhblk = nhblk/tpopr, phisp = hisp/tpopr, oth = tpopr - (nhwhite+nhblk+nhasn+hisp),
poth = oth/tpopr, nonwhite = tpopr-nhwhite, pnonwhite = nonwhite/tpopr) %>%
select(c(GEOID,tpop, tpopr, medincome, pnhwhite, pnhasn, pnhblk, phisp,
nhwhite, nhasn, nhblk, hisp, nonwhite, pnonwhite, oth, poth))%>%
filter(tpop != 0)
## Getting data from the 2014-2018 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
Getting San Diego City Tracts Data
pl <- places(state = "CA", cb = TRUE, year=2018)
##
|
| | 0%
|
|= | 1%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================= | 24%
|
|================== | 25%
|
|=================== | 27%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 35%
|
|========================== | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 71%
|
|================================================== | 71%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|======================================================================| 100%
cb <- core_based_statistical_areas(cb = TRUE, year=2018)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
SD <- filter(cb, grepl("San Diego", NAME))
SD.city <- filter(pl, NAME == "San Diego")
SD.city.tracts <- ms_clip(target = San_Diego, clip = SD.city, remove_slivers = TRUE)
Dealing with Missing Data
summary(aggr(San_Diego))
##
## Missings per variable:
## Variable Count
## GEOID 0
## tpop 0
## tpopr 0
## medincome 5
## pnhwhite 0
## pnhasn 0
## pnhblk 0
## phisp 0
## nhwhite 0
## nhasn 0
## nhblk 0
## hisp 0
## nonwhite 0
## pnonwhite 0
## oth 0
## poth 0
## geometry 0
##
## Missings in combinations of variables:
## Combinations Count Percent
## 0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0 621 99.201278
## 0:0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0 5 0.798722
summary(aggr(SD.city.tracts))
##
## Missings per variable:
## Variable Count
## GEOID 0
## tpop 0
## tpopr 0
## medincome 5
## pnhwhite 0
## pnhasn 0
## pnhblk 0
## phisp 0
## nhwhite 0
## nhasn 0
## nhblk 0
## hisp 0
## nonwhite 0
## pnonwhite 0
## oth 0
## poth 0
## geometry 0
##
## Missings in combinations of variables:
## Combinations Count Percent
## 0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0 291 98.310811
## 0:0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0 5 1.689189
San_Diego <- drop_na(San_Diego, medincome)
SD.city.tracts <- drop_na(SD.city.tracts, medincome)
#The above line of code drops all median income missing datafrom the dataset so that we can go ahead and do calculations without recieving an error message. Removing the N/A variables is ok for this circumstace because the census tracts containing the missing variables include the San Diego Airport, a military base hospital, parks, and a harbor.
summary(aggr(San_Diego))
##
## Missings per variable:
## Variable Count
## GEOID 0
## tpop 0
## tpopr 0
## medincome 0
## pnhwhite 0
## pnhasn 0
## pnhblk 0
## phisp 0
## nhwhite 0
## nhasn 0
## nhblk 0
## hisp 0
## nonwhite 0
## pnonwhite 0
## oth 0
## poth 0
## geometry 0
##
## Missings in combinations of variables:
## Combinations Count Percent
## 0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0 621 100
summary(aggr(SD.city.tracts))
##
## Missings per variable:
## Variable Count
## GEOID 0
## tpop 0
## tpopr 0
## medincome 0
## pnhwhite 0
## pnhasn 0
## pnhblk 0
## phisp 0
## nhwhite 0
## nhasn 0
## nhblk 0
## hisp 0
## nonwhite 0
## pnonwhite 0
## oth 0
## poth 0
## geometry 0
##
## Missings in combinations of variables:
## Combinations Count Percent
## 0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0 291 100
#running the summary code again shows that we now no longer have missing data and can proceed with analyzing the dataset.
Graphs for San Diego County
ggplot(San_Diego) +
geom_boxplot(mapping = aes(y = medincome))+
ylab("Median Household Income")
ggplot(San_Diego) +
geom_point(mapping = aes(x = pnhwhite, y = medincome)) +
xlab("Percent White") +
ylab("Median Household Income")
ggplot(San_Diego) +
geom_point(mapping = aes(x = pnhblk, y = medincome)) +
xlab("Percent Black") +
ylab("Median Household Income")
ggplot(San_Diego) +
geom_point(mapping = aes(x = pnhasn, y = medincome)) +
xlab("Percent Asian") +
ylab("Median Household Income")
ggplot(San_Diego) +
geom_point(mapping = aes(x = phisp, y = medincome)) +
xlab("Percent Hispanic") +
ylab("Median Household Income")
Looking at Demographic Proportions in San Diego County
ggplot(San_Diego) +
geom_histogram(mapping = aes(x=nhwhite), bins= 45) +
xlab("Number of Whites")
ggplot(San_Diego) +
geom_histogram(mapping = aes(x=nhblk), bins =45) +
xlab("Number of Blacks")
ggplot(San_Diego) +
geom_histogram(mapping = aes(x=nhasn), bins =45) +
xlab("Number of Asians")
ggplot(San_Diego) +
geom_histogram(mapping = aes(x=hisp), bins =45) +
xlab("Number of Hispanics")
Looking at the Average Median Household Income for San Diego County
San_Diego%>%
summarize(mean=mean(medincome))
## Simple feature collection with 1 feature and 1 field
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -117.5961 ymin: 32.53416 xmax: -116.0811 ymax: 33.50502
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## mean geometry
## 1 79566.18 POLYGON ((-117.0296 32.5422...
#Average median household income in San Diego County is $79566.18
Correlational Analysis on Median Household Income and Race/Ethnicity in San Diego County
San_Diego%>%
summarize(medincome_white= (cor(medincome, pnhwhite)))
## Simple feature collection with 1 feature and 1 field
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -117.5961 ymin: 32.53416 xmax: -116.0811 ymax: 33.50502
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_white geometry
## 1 0.5519117 POLYGON ((-117.0296 32.5422...
#Correlation Coefficient= 0.55 (medium positive correlation)
San_Diego%>%
summarize(medincome_black= (cor(medincome, pnhblk)))
## Simple feature collection with 1 feature and 1 field
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -117.5961 ymin: 32.53416 xmax: -116.0811 ymax: 33.50502
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_black geometry
## 1 -0.3327813 POLYGON ((-117.0296 32.5422...
#Correlation Coefficient= -0.33 (low negative correlation)
San_Diego%>%
summarize(medincome_asian= (cor(medincome, pnhasn)))
## Simple feature collection with 1 feature and 1 field
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -117.5961 ymin: 32.53416 xmax: -116.0811 ymax: 33.50502
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_asian geometry
## 1 0.2145339 POLYGON ((-117.0296 32.5422...
#Correlation Coefficient= 0.21 (low to nonexistent positive correlation)
San_Diego%>%
summarize(medincome_hispanic= (cor(medincome, phisp)))
## Simple feature collection with 1 feature and 1 field
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -117.5961 ymin: 32.53416 xmax: -116.0811 ymax: 33.50502
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_hispanic geometry
## 1 -0.6348156 POLYGON ((-117.0296 32.5422...
#Correlation Coefficient= -0.63 (strong negative correlation)
Maps for San Diego County
SanDiegoM <- tm_shape(San_Diego, unit = "mi") +
tm_polygons(col = "medincome", style = "quantile",palette = "Greys",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Median Income in San Diego County", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SanDiegoM
SanDiegoW <- tm_shape(San_Diego, unit = "mi") +
tm_polygons(col = "pnhwhite", style = "quantile",palette = "Blues",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent White in San Diego County", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SanDiegoW
SanDiegoB <- tm_shape(San_Diego, unit = "mi") +
tm_polygons(col = "pnhblk", style = "quantile",palette = "Greens",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent Black in San Diego County", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SanDiegoB
SanDiegoA <- tm_shape(San_Diego, unit = "mi") +
tm_polygons(col = "pnhasn", style = "quantile",palette = "Purples",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent Asian in San Diego County", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SanDiegoA
SanDiegoH <- tm_shape(San_Diego, unit = "mi") +
tm_polygons(col = "phisp", style = "quantile",palette = "Reds",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent Hispanic in San Diego County", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SanDiegoH
Graphs for San Diego City
ggplot(SD.city.tracts) +
geom_boxplot(mapping = aes(y = medincome))+
ylab("Median Household Income")
ggplot(SD.city.tracts) +
geom_point(mapping = aes(x = pnhwhite, y = medincome)) +
xlab("Percent White") +
ylab("Median Household Income")
ggplot(SD.city.tracts) +
geom_point(mapping = aes(x = pnhblk, y = medincome)) +
xlab("Percent Black") +
ylab("Median Household Income")
ggplot(SD.city.tracts) +
geom_point(mapping = aes(x = pnhasn, y = medincome)) +
xlab("Percent Asian") +
ylab("Median Household Income")
ggplot(SD.city.tracts) +
geom_point(mapping = aes(x = phisp, y = medincome)) +
xlab("Percent Hispanic") +
ylab("Median Household Income")
Looking at Demogrpahic Proportions in San Diego City
ggplot(SD.city.tracts) +
geom_histogram(mapping = aes(x=nhwhite), bins= 45) +
xlab("Number of Whites")
ggplot(SD.city.tracts) +
geom_histogram(mapping = aes(x=nhblk), bins =45) +
xlab("Number of Blacks")
ggplot(SD.city.tracts) +
geom_histogram(mapping = aes(x=nhasn), bins =45) +
xlab("Number of Asians")
ggplot(SD.city.tracts) +
geom_histogram(mapping = aes(x=hisp), bins =45) +
xlab("Number of Hispanics")
Looking at the Average Median Household Income for San Diego City
SD.city.tracts%>%
summarize(mean=mean(medincome))
## Simple feature collection with 1 feature and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## mean geometry
## 1 82493.75 MULTIPOLYGON (((-116.9266 3...
#Average median household income in San Diego City is $82493.75
Correlational Analysis on Median Household Income and Race/Ethnicity in San Diego City
SD.city.tracts%>%
summarize(medincome_white= (cor(medincome, pnhwhite)))
## Simple feature collection with 1 feature and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_white geometry
## 1 0.540845 MULTIPOLYGON (((-116.9266 3...
#Correlation Coefficient= 0.54 (medium positive correlation)
SD.city.tracts%>%
summarize(medincome_black= (cor(medincome, pnhblk)))
## Simple feature collection with 1 feature and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_black geometry
## 1 -0.3959043 MULTIPOLYGON (((-116.9266 3...
#Correlation Coefficient= -0.40 (low to low-medium negative correlation)
SD.city.tracts%>%
summarize(medincome_asian= (cor(medincome, pnhasn)))
## Simple feature collection with 1 feature and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_asian geometry
## 1 0.240393 MULTIPOLYGON (((-116.9266 3...
#Correlation Coefficient= 0.24 (low to nonexistent positive correlation)
SD.city.tracts%>%
summarize(medincome_hispanic= (cor(medincome, phisp)))
## Simple feature collection with 1 feature and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## medincome_hispanic geometry
## 1 -0.6381943 MULTIPOLYGON (((-116.9266 3...
#Correlation Coefficient= -0.64 (strong negative correlation)
Maps for San Diego City
SD.CityM <- tm_shape(SD.city.tracts, unit = "mi") +
tm_polygons(col = "medincome", style = "quantile",palette = "Greys",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Median Income in San Diego City", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SD.CityM
## Legend labels were too wide. The labels have been resized to 0.55, 0.55, 0.55, 0.51, 0.48. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
SD.CityW <- tm_shape(SD.city.tracts, unit = "mi") +
tm_polygons(col = "pnhwhite", style = "quantile",palette = "Blues",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent White in San Diego City", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SD.CityW
## Legend labels were too wide. The labels have been resized to 0.64, 0.64, 0.64, 0.64, 0.64. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
SD.CityB <- tm_shape(SD.city.tracts, unit = "mi") +
tm_polygons(col = "pnhblk", style = "quantile",palette = "Greens",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent Black in San Diego City", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SD.CityB
## Legend labels were too wide. The labels have been resized to 0.64, 0.64, 0.64, 0.64, 0.64. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
SD.CityA <- tm_shape(SD.city.tracts, unit = "mi") +
tm_polygons(col = "pnhasn", style = "quantile",palette = "Purples",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent Asian in San Diego City", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SD.CityA
## Legend labels were too wide. The labels have been resized to 0.64, 0.64, 0.64, 0.64, 0.64. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
SD.CityH <- tm_shape(SD.city.tracts, unit = "mi") +
tm_polygons(col = "phisp", style = "quantile",palette = "Reds",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Percent Hispanic in San Diego City", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SD.CityH
## Legend labels were too wide. The labels have been resized to 0.64, 0.64, 0.64, 0.64, 0.64. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
Dissimilarity Index for San Diego City
SD.city.tracts <- SD.city.tracts %>%
mutate(nhwhitec = sum(nhwhite), nonwhitec = sum(nonwhite),
nhasnc = sum(nhasn), nhblkc = sum(nhblk), othc = sum(oth),
hispc = sum(hisp), tpoprc = sum(tpopr)) %>%
ungroup()
SD.city.tracts %>%
mutate(d.wb = abs(nhblk/nhblkc-nhwhite/nhwhitec),
d.wa = abs(nhasn/nhasnc-nhwhite/nhwhitec),
d.wh = abs(hisp/hispc-nhwhite/nhwhitec),
d.wnw = abs(nonwhite/nonwhitec-nhwhite/nhwhitec)) %>%
summarize(BWD = 0.5*sum(d.wb, na.rm=TRUE), AWD = 0.5*sum(d.wa, na.rm=TRUE),
HWD = 0.5*sum(d.wh, na.rm=TRUE), NWWD = 0.5*sum(d.wnw, na.rm=TRUE))
## Simple feature collection with 1 feature and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## BWD AWD HWD NWWD geometry
## 1 0.5719762 0.4678355 0.5453133 0.4466351 MULTIPOLYGON (((-116.9266 3...
#Dissimilarity Index indicates what percentage a specific race will have to move to a certain neighborhood to reach uniformity of all races. For San Diego City, you would need 57% of Blacks, 47% of Asians, 55% of Hispanics, and 45% of non-Whites to move in to achieve total racial uniformity in the City.
Interaction Index for San Diego City
SD.city.tracts %>%
mutate(i.wb= (nhblk/nhblkc)*(nhwhite/tpopr),
i.wa= (nhasn/nhasnc)*(nhwhite/tpopr),
i.wh= (hisp/hispc)*(nhwhite/tpopr))%>%
summarize(BWI=sum(i.wb, na.rm = TRUE), AWI= sum(i.wa, na.rm = TRUE),
HWI= sum(i.wh, na.rm = TRUE))
## Simple feature collection with 1 feature and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.2822 ymin: 32.53471 xmax: -116.9057 ymax: 33.11418
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## BWI AWI HWI geometry
## 1 0.2936851 0.3771808 0.2729814 MULTIPOLYGON (((-116.9266 3...
#The percentage of each variable indicates the chance of a person from X ethnicity to interact with a person who is White in San Diego City. According to the calculations, the chance of an Asian person interacting with a White person is about 38%, and the chance of a Hispanic person interacting with a White person is about 27%, but a chance of a Black person interacting with a white person in this same tract is 29%. This means that 38 of every 100 people an Asian person meets in his or her neighborhood will be White, while 27 out of every 100 people a Hispanic person meets will be White. But, this also means that 38 out of every 100 people a Black person meets in this same neighborhood will be White which indicates major segregation. This means that there is higher segregation between Black and White people and Hispanic and White people than Asian and White people within San Diego City.
Multigroup Entropy Index in San Diego City
SD.city.tracts <- SD.city.tracts %>%
mutate(e1 = pnhwhite*log(1/pnhwhite), e2 = pnhasn*log(1/pnhasn),
e3 = pnhblk*log(1/pnhblk), e4 = phisp*log(1/phisp),
e5 = poth*log(1/poth),
e1 = replace(e1, is.nan(e1), 0), e2 = replace(e2, is.nan(e2), 0),
e3 = replace(e3, is.nan(e3), 0), e4 = replace(e4, is.nan(e4), 0),
e5 = replace(e5, is.nan(e5), 0),
ent = e1 + e2 + e3 + e4 +e5) %>%
select(-c(e1:e5))
#This code adds the variable "ent" to the dataset for SD.city.tracts (San Diego City) which is the multigroup entropy index. Entropy Index ranges from a score of 0 to 1, where 0 is complete integration and a 1 is complete segregation.
log(5)
## [1] 1.609438
#the code above calculates the maximum entropy score, which is 1.609 because we are analyzing 5 different racial/ethnic groups. The higher the value, the greater the diversity.
Multigroup Entropy Index for San Diego County
San_Diego <- San_Diego %>%
mutate(e1 = pnhwhite*log(1/pnhwhite), e2 = pnhasn*log(1/pnhasn),
e3 = pnhblk*log(1/pnhblk), e4 = phisp*log(1/phisp),
e5 = poth*log(1/poth),
e1 = replace(e1, is.nan(e1), 0), e2 = replace(e2, is.nan(e2), 0),
e3 = replace(e3, is.nan(e3), 0), e4 = replace(e4, is.nan(e4), 0),
e5 = replace(e5, is.nan(e5), 0),
ent = e1 + e2 + e3 + e4 +e5) %>%
select(-c(e1:e5))
#This code adds the variable "ent" to the dataset for San_Diego (San Diego County) which is the multigroup entropy index. Entropy Index ranges from a score of 0 to 1, where 0 is complete integration and a 1 is complete segregation.
log(5)
## [1] 1.609438
#the code above calculates the maximum entropy score, which is 1.609 for San Diego County. The higher the value, the greater the diversity.
Entropy Score for San Diego City and San Diego Cunty
SD.CityE <- tm_shape(SD.city.tracts, unit = "mi") +
tm_polygons(col = "ent", style = "quantile",palette = "Reds",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Entropy Score for San Diego City", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SD.CityE
## Legend labels were too wide. The labels have been resized to 0.64, 0.64, 0.64, 0.64, 0.64. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
SanDiegoE <- tm_shape(San_Diego, unit = "mi") +
tm_polygons(col = "ent", style = "quantile",palette = "Reds",
border.alpha = 0) +
tm_scale_bar(breaks = c(0, 10, 20), size = 1, position = c("left", "bottom")) +
tm_compass(type = "4star", position = c("left", "top")) +
tm_layout(main.title = "Entropy Score for San Diego County", main.title.size = 0.95, frame = FALSE)
## Warning: The argument size of tm_scale_bar is deprecated. It has been renamed to
## text.size
SanDiegoE