# Load libraries
library(readxl)
library(tidyverse)
# set ggplot theme
theme_set(theme_minimal())Setup
Load and prepare the opportunity zone data for analysis.
# load data
data <- read_xlsx("data/urbaninstitute_tractlevelozanalysis_update01142021.xlsx")
# make 'designated opportunity zone' classification more clear
ozs <- data |>
mutate(DesignatedOZ =
ifelse(is.na(DesignatedOZ), "not_designated", "designated"))
# remove cols not helpful for analysis
ozs <-
ozs |>
select(-c(dec_score, SE_Flag, pctown, Metro, Micro, NoCBSAType))
# select only MA
ozs_ma <- ozs |> filter(state == "Massachusetts")There are 677 tracts in Massachusetts. To get an overview of the data, I calculated the median value for each continuous variable, broken down by tracts designated as opportunity zones vs. not.
Code
ozs_ma |>
select(-c(geoid, state, county, Type)) |>
gtsummary::tbl_summary(
by = DesignatedOZ,
missing = "no",
statistic = list(gtsummary::all_continuous() ~ "{median}"),
)| Characteristic | designated N = 1381 |
not_designated N = 5391 |
|---|---|---|
| Population | 3,734 | 4,139 |
| medhhincome | 39,875 | 52,163 |
| PovertyRate | 0.22 | 0.17 |
| unemprate | 0.10 | 0.08 |
| medvalue | 228,400 | 266,500 |
| medrent | 926 | 1,076 |
| severerentburden | 0.25 | 0.24 |
| vacancyrate | 0.08 | 0.08 |
| pctwhite | 0.62 | 0.61 |
| pctBlack | 0.05 | 0.06 |
| pctHispanic | 0.15 | 0.13 |
| pctAAPIalone | 0.02 | 0.03 |
| pctunder18 | 0.20 | 0.20 |
| pctover64 | 0.13 | 0.12 |
| HSorlower | 0.51 | 0.46 |
| BAorhigher | 0.21 | 0.25 |
| 1 Median | ||
Exercise 1
Q1-1
In Massachusetts, what are the average poverty rates for Opportunity Zones and non-Opportunity Zones?
ozs_ma |>
group_by(DesignatedOZ) |>
summarise(AvgPovertyRate = mean(PovertyRate, na.rm = TRUE))| DesignatedOZ | AvgPovertyRate |
|---|---|
| designated | 0.2591521 |
| not_designated | 0.1914590 |
In MA, the mean poverty rate for designated opportunity zones is 0.259, and the average rate for non-opportunity zones is 0.191.
Q1-2
What are the corresponding situations by county in Massachusetts?
poverty_per_county = ozs_ma |>
group_by(county, DesignatedOZ) |>
summarise(AvgPovertyRate = mean(PovertyRate, na.rm = TRUE))
poverty_per_county |>
pivot_wider(names_from = DesignatedOZ, values_from = AvgPovertyRate)| county | designated | not_designated |
|---|---|---|
| Barnstable County | 0.1420523 | 0.0789744 |
| Berkshire County | 0.1910461 | 0.1388011 |
| Bristol County | 0.2712375 | 0.2102006 |
| Dukes County | NA | 0.1042385 |
| Essex County | 0.2314069 | 0.1968981 |
| Franklin County | 0.1588750 | 0.0980621 |
| Hampden County | 0.3918664 | 0.2363856 |
| Hampshire County | 0.2275977 | 0.1987574 |
| Middlesex County | 0.2054697 | 0.1585122 |
| Norfolk County | 0.1032625 | 0.1340163 |
| Plymouth County | 0.2541559 | 0.1207872 |
| Suffolk County | 0.3851553 | 0.2348872 |
| Worcester County | 0.2504993 | 0.2074332 |
Let’s remove Duke’s County, since there is no poverty data for designated tracts in that country.
poverty_per_county = poverty_per_county |> filter(county != "Dukes County")Let’s also create a bar plot comparing mean poverty in each county, broken down by designated vs non-designated tracts, and sorted by poverty rate.
# create bar plot
poverty_per_county |>
arrange(DesignatedOZ) |>
ggplot(aes(
x = reorder(county, -AvgPovertyRate),
y = AvgPovertyRate,
fill = DesignatedOZ
)) +
geom_bar(stat = "identity",
position = "dodge",
alpha = 0.8) +
labs(x = "County", y = "Average Poverty Rate", caption = "Source: Urban Institute (2018)") +
coord_flip()In almost every county, the average poverty rate is higher in designated opportunity zones than in non-designated tracts. The difference is particularly pronounced in Hampden County and Suffolk County. Norfolk County is the exception, where the opposite is true.
Q1-3
Which county has the greatest disparity in poverty rate between designated and non-designated tracts?
We can already see the answer in the bar plot above, but let’s find the answer explicitly by calculating the difference in poverty rate between designated and non-designated tracts for each county, and sorting by that difference.
poverty_per_county |>
pivot_wider(names_from = DesignatedOZ, values_from = AvgPovertyRate) |>
mutate(Difference = designated - not_designated) |>
arrange(desc(Difference)) |>
head(3)| county | designated | not_designated | Difference |
|---|---|---|---|
| Hampden County | 0.3918664 | 0.2363856 | 0.1554809 |
| Suffolk County | 0.3851553 | 0.2348872 | 0.1502681 |
| Plymouth County | 0.2541559 | 0.1207872 | 0.1333688 |
The greatest difference in poverty rate between designated and non-designated tracts is in Hampden County, where the poverty rate in designated tracts (+0.155). This county also has the highest poverty rate overall. It is followed closely by Suffolk County and then Plymouth County.
Exercise 2
Q2-1
Select one of the variables, create a graphical representation that contrasts its distribution in designated tracts and in undesignated tracts in Massachusetts.
Let’s take a look at medhhincome, using both a box plot and a density plot.
ozs_ma |>
ggplot(aes(x = DesignatedOZ, y = medhhincome, fill = DesignatedOZ)) +
geom_boxplot(alpha = 0.8) +
labs(x = "Opportunity Zone Eligible Tracts", y = "Median Household Income", fill = "Tracts")ozs_ma |>
ggplot(aes(x = medhhincome, fill = DesignatedOZ)) +
geom_density(alpha = 0.6) +
labs(x = "Median Household Income", fill = "Tracts")Though overlapping, median household income is generally higher in non-designated tracts than in designated tracts. The distribution of median household income is also wider in non-designated tracts.
Q2-2
Select two variables, create a graphical representation that describes how they relate (or don’t relate) to each other, including the direction of this relationship.
ozs_ma |>
ggplot(aes(x = pctBlack, y = medrent, color = DesignatedOZ)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Proportion of Black Population", y = "Median Rent", #subtitle = "State of Massachusetts",
caption = "Source: Urban Institute (2018)")The percentage of the population that is black does not seem correlated with median rent, as indicated by these relatively flat lines. Regardless of the proportion of black residents, median rent is generally lower in the designated tracts (which we would expect.)
Q2-3
What can we say about the difference in demographic/economic conditions reflected by these graphs between designated and not designated tracts?
Opportunity zones were selected to galvanize investment in “low-income and undercapitalized communities” (Urban.org). In general, those selection criteria are evident in the Massachusetts data. Compared to non-designated zones, the designated zones tend to have:
- Higher poverty rates (Figure 1)
- Lower median household incomes (Figure 2)
- Lower median rents (Figure 4)
What stands out me in Figure 4 is the way the red, ‘not-designated’ line terminates abruptly as the black population increases. That is to say, there are a number of tracts with high proportions of black residents (over ~55%) and none of them are designated as opportunity zones. Since blackness often coincides with economic disadvantage in the US, this raises the question: are those tracts really better off than typical tracts that did get designated, or racial discrimination at play in the designation process?
Bonus Analysis
First, let us confirm that none of the tracts with a black population over 55% were designated as opportunity zones.
# select tracts with high black population, confirm that non have been designated
ozs_ma |>
filter(pctBlack >= 0.55) |>
group_by(DesignatedOZ) |>
summarise(n = n())| DesignatedOZ | n |
|---|---|
| not_designated | 28 |
Indeed, there are 28 such tracts, and see non of them were designated. We will refer to these tracts “majority black.”
How do these tracts compare to tracts that were designated as opportunity zones?
To find out, I created a new column indicating which of three groups each tract belongs to:
- Majority black (also implies non-designated)
- Designated OZ (also implies not majority black)
- The remaining data: non-designated AND not majority black
# create new classification
ozs_ma <- ozs_ma |>
mutate(Group = ifelse(pctBlack >= 0.55, "majority_black", DesignatedOZ)) |>
filter(!is.na(Group))
# select only the columns we need & reshape data for easier faceting
data_long <- ozs_ma |>
select(Group, PovertyRate, medhhincome, medrent) |>
reshape2::melt(id.vars = "Group",
variable.name = "Metric",
value.name = "Value")
# create box plot with facet_wrap
ggplot(data_long, aes(x = Group, y = Value, fill = Group)) +
geom_boxplot(alpha = 0.8) +
facet_wrap(~ Metric, scales = "free_y") +
labs(x = "Group", y = "Value", caption = "Majority Black: > 55%") +
guides(x = guide_axis(angle = 45)) Disconcertingly, for poverty rate and median household income, the majority black tracts (which were not designated) seem to resemble the designated tracts more than the non-designated tracts. Indeed, median poverty rate is even higher in the majority black tracts than among designated tracts.
For median rent, on the other hand, the majority black tracts appear more similar to the non-designated tracts; this is what we would expect, since they were also not designated.
The sample size of majority black tracts is small, and this is not a complete analysis, however Figure 5 suggests a possibility of racial discrimination in the “opportunity zone” designation process, worthy of a closer look.
Exercise 3
Median household income bar chart.
ozs_ma |>
group_by(county, DesignatedOZ) |>
summarise(AvgIncome = mean(medhhincome, na.rm = TRUE)) |>
ggplot(aes(
x = reorder(county, -AvgIncome),
y = AvgIncome,
fill = DesignatedOZ
)) +
geom_col(position = "dodge", alpha = 0.8) +
scale_y_continuous(labels = scales::label_comma()) +
guides(x = guide_axis(angle = 45)) +
labs(x = "County", y = "Mean Median Household Income", caption = "Source: Urban Institute (2018)")