library(pander)
library(knitr)
library(FactoMineR)
library(psych)
library(lattice)
library(GPArotation)
dat <- read.csv("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis/data-prepped/ez-plus-census-old.csv")
Adding labels…
dat$NTEE27 <- substr(dat$Nteecode, 0, 1)
dat$labels <- ifelse(dat$NTEE27=="A", "Arts", 0)
dat$labels <- ifelse(dat$NTEE27=="B", "Education", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="C", "Environment", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="D", "Animals", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="E", "Health", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="F", "Mental Health", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="G", "Disease", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="H", "Med Research", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="I", "Crime", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="J", "Employment", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="K", "Agriculture", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="L", "Housing", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="M", "Safety", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="N", "Sports", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="O"|dat$NTEE27=="o", "Youth", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="P", "Human Services", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="Q", "International", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="R", "Civil Rights", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="S", "Community", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="T", "Philanthropy", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="U", "Science", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="V", "Social Science", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="W", "Public Benefit", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="X", "Religion", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="Y", "Mutual", dat$labels)
dat$labels <- ifelse(dat$NTEE27=="Z", "Unknown", dat$labels)
Finding the means for each NTEE code…
aggdat <- aggregate(unemp~labels, data=dat, FUN=mean)
aggdat <- merge(aggdat, aggregate(poverty~labels, data=dat, FUN=mean), by="labels")
aggdat <- merge(aggdat, aggregate(medinc~labels, data=dat, FUN=mean), by="labels")
aggdat <- merge(aggdat, aggregate(ownerocc~labels, data=dat, FUN=mean), by="labels")
aggdat <- merge(aggdat, aggregate(black~labels, data=dat, FUN=mean), by="labels")
aggdat <- merge(aggdat, aggregate(h.density~labels, data=dat, FUN=mean), by="labels")
aggdat <- merge(aggdat, aggregate(hs~labels, data=dat, FUN=mean), by="labels")
First a high level look at all of the data that was calculated, starting with all of the data for the different NTEE codes.
kable(aggdat, digits=2)
| labels | unemp | poverty | medinc | ownerocc | black | h.density | hs |
|---|---|---|---|---|---|---|---|
| Agriculture | 0.08 | 0.16 | 61344.50 | 0.63 | 0.16 | 756.32 | 0.88 |
| Animals | 0.07 | 0.12 | 67151.71 | 0.69 | 0.08 | 671.86 | 0.89 |
| Arts | 0.07 | 0.15 | 68412.92 | 0.59 | 0.14 | 1636.64 | 0.89 |
| Civil Rights | 0.07 | 0.16 | 67373.18 | 0.55 | 0.18 | 1855.17 | 0.89 |
| Community | 0.09 | 0.18 | 57796.91 | 0.57 | 0.25 | 1194.73 | 0.87 |
| Crime | 0.08 | 0.17 | 59574.25 | 0.57 | 0.19 | 1016.51 | 0.87 |
| Disease | 0.07 | 0.12 | 74732.74 | 0.66 | 0.11 | 864.48 | 0.91 |
| Education | 0.07 | 0.14 | 69505.62 | 0.63 | 0.14 | 991.14 | 0.90 |
| Employment | 0.08 | 0.17 | 62930.35 | 0.56 | 0.24 | 1109.69 | 0.88 |
| Environment | 0.07 | 0.13 | 71381.03 | 0.65 | 0.09 | 951.88 | 0.91 |
| Health | 0.07 | 0.13 | 71439.71 | 0.63 | 0.15 | 1034.03 | 0.90 |
| Housing | 0.09 | 0.18 | 57511.88 | 0.58 | 0.27 | 1023.60 | 0.87 |
| Human Services | 0.08 | 0.15 | 64100.66 | 0.62 | 0.21 | 958.15 | 0.88 |
| International | 0.06 | 0.12 | 81027.45 | 0.62 | 0.11 | 1593.40 | 0.91 |
| Med Research | 0.06 | 0.11 | 81348.57 | 0.68 | 0.10 | 1087.69 | 0.92 |
| Mental Health | 0.07 | 0.15 | 65154.03 | 0.61 | 0.15 | 972.63 | 0.89 |
| Mutual | 0.07 | 0.14 | 66138.66 | 0.63 | 0.14 | 864.66 | 0.90 |
| Philanthropy | 0.06 | 0.12 | 75404.25 | 0.64 | 0.12 | 1140.99 | 0.91 |
| Public Benefit | 0.07 | 0.14 | 67789.71 | 0.62 | 0.12 | 877.39 | 0.89 |
| Religion | 0.08 | 0.15 | 63709.70 | 0.63 | 0.17 | 907.35 | 0.88 |
| Safety | 0.07 | 0.13 | 64478.45 | 0.68 | 0.09 | 598.65 | 0.89 |
| Science | 0.06 | 0.13 | 78782.89 | 0.60 | 0.11 | 1325.42 | 0.92 |
| Social Science | 0.07 | 0.15 | 71358.84 | 0.56 | 0.14 | 2020.86 | 0.91 |
| Sports | 0.07 | 0.12 | 69901.08 | 0.69 | 0.10 | 634.07 | 0.90 |
| Unknown | 0.08 | 0.14 | 62807.09 | 0.64 | 0.18 | 899.28 | 0.88 |
| Youth | 0.08 | 0.16 | 64203.62 | 0.61 | 0.25 | 1102.43 | 0.88 |
How strongly correlated are the different neighborhood demographics? Most are fairly correlated, as one would expect for neighborhood mores of wealth and need.
cordat <- aggdat[,-1]
kable(cor(cordat), digits=2)
| unemp | poverty | medinc | ownerocc | black | h.density | hs | |
|---|---|---|---|---|---|---|---|
| unemp | 1.00 | 0.94 | -0.87 | -0.63 | 0.94 | 0.00 | -0.90 |
| poverty | 0.94 | 1.00 | -0.84 | -0.78 | 0.88 | 0.16 | -0.85 |
| medinc | -0.87 | -0.84 | 1.00 | 0.37 | -0.70 | 0.29 | 0.94 |
| ownerocc | -0.63 | -0.78 | 0.37 | 1.00 | -0.69 | -0.68 | 0.40 |
| black | 0.94 | 0.88 | -0.70 | -0.69 | 1.00 | 0.14 | -0.80 |
| h.density | 0.00 | 0.16 | 0.29 | -0.68 | 0.14 | 1.00 | 0.25 |
| hs | -0.90 | -0.85 | 0.94 | 0.40 | -0.80 | 0.25 | 1.00 |
A few ways of looking at those relationships that stood out looking at pairs of demographics.
Some nonprofits clearly grouped together based on the SES of their neighborhoods and who they served.
Other demographics followed less of a pattern. You can tell a story about most of the nonprofits here over represented in Urban areas - International is driven by DC, Arts are generally downtown, Civil Rights is museums that serve black cities…
The lower density nonprofits follow an interesting pattern too. In the very bottom left corner you have Sports, Safety, and Animals - which are located away from city centers and in neighborhoods low on diversity. The medium density areas with large black populations are Housing, Employment, Youth Services, and Community. It’s almost like a Maslows hierarchy of needs for nonprofits.
That relationship becomes more clear when putting two measures of community need on the axes. It’s repetitive with the first graph, but this one clearly shows the types of nonprofits serving deprived communities as opposed to those serving larger societal needs.
plot(aggdat$poverty, aggdat$unemp, col="white", bty="l",
xlim=c(.1, .19), ylim=c(.06, .095),
ylab="Unemployment Rate", xlab="Poverty Rate")
text(aggdat$poverty, aggdat$unemp, labels=aggdat$labels, cex=.6)
text(.18, .095, "Basic Needs", cex=1.25)
text(.12, .095, "Non-Essentials", cex=1.25)
Let’s quickly look past two variables at a time to see how this highly correlated data combines and what the unique qualities are. We can create some factors, to understand the underlying story of the data.
psych::VSS.scree(cor(cordat))
factanal(x = cordat, factors = 2, rotation = "varimax")
##
## Call:
## factanal(x = cordat, factors = 2, rotation = "varimax")
##
## Uniquenesses:
## unemp poverty medinc ownerocc black h.density hs
## 0.068 0.008 0.056 0.005 0.209 0.173 0.081
##
## Loadings:
## Factor1 Factor2
## unemp 0.952 0.158
## poverty 0.932 0.353
## medinc -0.960 0.150
## ownerocc -0.519 -0.852
## black 0.835 0.305
## h.density -0.161 0.895
## hs -0.952 0.111
##
## Factor1 Factor2
## SS loadings 4.597 1.804
## Proportion Var 0.657 0.258
## Cumulative Var 0.657 0.914
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.62 on 8 degrees of freedom.
## The p-value is 6.71e-07
There are two unique factors in the data it appears.
fitAfterRotation <- factanal(cordat, factors = 2, rotation = "varimax")
print(fitAfterRotation$loadings, cutoff = .40, sort = TRUE)
##
## Loadings:
## Factor1 Factor2
## unemp 0.952
## poverty 0.932
## medinc -0.960
## black 0.835
## hs -0.952
## ownerocc -0.519 -0.852
## h.density 0.895
##
## Factor1 Factor2
## SS loadings 4.597 1.804
## Proportion Var 0.657 0.258
## Cumulative Var 0.657 0.914
f <- factanal(cordat, factors=2, rotation="varimax", scores="regression")
aggdatF <- cbind(aggdat$labels, f$scores)
It looks like they cluster around two qualities. One quality is the population each nonprofit serves, whether it is high or low SES. The other is about the built environment, and whether it is high density/high renter or not.
I wanted to sort these lists so that we could see which are higher/lower on both factors, but Markdown did not want me to.
kable(aggdatF, digits=2)
| Factor1 | Factor2 | |
|---|---|---|
| Agriculture | 1.00675350734045 | -0.927958174085942 |
| Animals | -0.316716555790826 | -1.89518408114915 |
| Arts | -0.204265110205847 | 1.01384092084051 |
| Civil Rights | 0.10898177938238 | 1.97462385565546 |
| Community | 1.92125218641282 | 0.299134289119646 |
| Crime | 1.28541334775766 | 0.73318072129628 |
| Disease | -1.10763313218803 | -0.580678072115209 |
| Education | -0.287129021627648 | -0.166371358767898 |
| Employment | 1.09080772831554 | 1.13871831161587 |
| Environment | -0.682264796429037 | -0.462402958707845 |
| Health | -0.553503882079068 | 0.0619659781024159 |
| Housing | 1.9652433867375 | 0.0780748468626988 |
| Human Services | 0.660277612581165 | -0.307340490336331 |
| International | -1.60407580375731 | 0.927739392431744 |
| Med Research | -1.83092257420469 | -0.426649168063112 |
| Mental Health | 0.16057185781134 | 0.117732121350106 |
| Mutual | 0.0712823286166333 | -0.324920764818245 |
| Philanthropy | -1.10406446275143 | 0.0672058949992523 |
| Public Benefit | -0.172467229905921 | -0.0445597768228767 |
| Religion | 0.686877893837539 | -0.610120219989694 |
| Safety | 0.0110539464611983 | -1.65563607526662 |
| Science | -1.20954169459105 | 1.2757276197809 |
| Social Science | -0.487649132806162 | 2.03284889423967 |
| Sports | -0.671371475898568 | -1.47491966661973 |
| Unknown | 0.44059839499325 | -0.703776436477831 |
| Youth | 0.822490901988095 | -0.140275603074027 |