Creating Factors
dat <- read.csv("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis//data-prepped/ez-plus-census-old.csv")
dat5 <- dat
dat5$NTEE27 <- substr(dat5$Nteecode, 0, 1)
dat5$labels <- ifelse(dat5$NTEE27=="A", "Arts", 0)
dat5$labels <- ifelse(dat5$NTEE27=="B", "Education", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="C", "Environment", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="D", "Animals", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="E", "Health", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="F", "Mental Health", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="G", "Disease", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="H", "Med Research", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="I", "Crime", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="J", "Employment", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="K", "Agriculture", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="L", "Housing", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="M", "Safety", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="N", "Sports", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="O"|dat5$NTEE27=="o", "Youth", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="P", "Human Services", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="Q", "International", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="R", "Civil Rights", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="S", "Community", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="T", "Philanthropy", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="U", "Science", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="V", "Social Science", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="W", "Public Benefit", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="X", "Religion", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="Y", "Mutual", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="Z", "Unknown", dat5$labels)
dat52 <- dat5[, c("labels", "EIN", "unemp", "poverty", "medinc", "ownerocc",
"black","h.density", "hs" )]
dat52$type <- 1
list <- dat5[, c("labels", "EIN")]
list <- list[!duplicated(list),]
censusbm <- readRDS("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis/data-census/census-bm-all.rds")
censusbm$EIN <- substr(censusbm$id, 9, 17)
censusbmx <- merge(censusbm, list, by="EIN")
censusbm2 <- censusbmx[, c("labels", "EIN", "unemp", "poverty", "medinc", "ownerocc",
"black","h.density", "hs" )]
censusbm2$type <- 0
binded <- rbind(dat52, censusbm2)
binded <- na.omit(binded)
cordat <- binded[, c( "unemp", "poverty", "medinc", "ownerocc",
"black","h.density", "hs")]
psych::VSS.scree(cor(cordat))

factanal(x = cordat, factors = 2, rotation = "varimax")
##
## Call:
## factanal(x = cordat, factors = 2, rotation = "varimax")
##
## Uniquenesses:
## unemp poverty medinc ownerocc black h.density hs
## 0.508 0.183 0.441 0.005 0.739 0.805 0.493
##
## Loadings:
## Factor1 Factor2
## unemp 0.701
## poverty 0.863 -0.268
## medinc -0.720 0.199
## ownerocc -0.450 0.890
## black 0.503
## h.density -0.441
## hs -0.708
##
## Factor1 Factor2
## SS loadings 2.714 1.110
## Proportion Var 0.388 0.159
## Cumulative Var 0.388 0.546
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 92285.95 on 8 degrees of freedom.
## The p-value is 0
fitAfterRotation <- factanal(cordat, factors = 2, rotation = "varimax")
loading <- print(fitAfterRotation$loadings, cutoff = .40, sort = TRUE)
##
## Loadings:
## Factor1 Factor2
## unemp 0.701
## poverty 0.863
## medinc -0.720
## black 0.503
## hs -0.708
## ownerocc -0.450 0.890
## h.density -0.441
##
## Factor1 Factor2
## SS loadings 2.714 1.110
## Proportion Var 0.388 0.159
## Cumulative Var 0.388 0.546
Plotting Relationship Between Factors
reg.dat <- read.csv("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis/data-prepped/RegressionData.csv")
agg.dat <- aggregate(NPOWealthFacZ~labels, data=reg.dat, FUN="mean")
agg.dat <- merge(agg.dat, aggregate(NPOUrbanFacZ~labels, data=reg.dat, FUN="mean"), by="labels")
plot(agg.dat$NPOWealthFacZ,agg.dat$NPOUrbanFacZ, col="white",
bty="u", main="Relationship of Wealth and Urban Density by NTEE Code",
xlab="Wealth",
ylab="Urban Density",
xlim=c(-.6, .6),
ylim=c(-.4, .4))
abline(h=0, lty=3, col="gray50")
abline(v=0, lty=3, col="gray50")
text(agg.dat$NPOWealthFacZ,agg.dat$NPOUrbanFacZ, labels = agg.dat$labels, cex=.6)

Regressions
reg.dat$BMStatus <- reg.dat$BMWealthFacZ-reg.dat$NPOWealthFacZ
reg1d <- (lm(BMWealthFacZ~NPOWealthFacZ+NPOUrbanFacZ+log(dist.to.npo)+log(ave.dist+1) +factor(labels)+factor(cbsaname),data=reg.dat))
reg2d <- (lm(BMStatus~NPOWealthFacZ+NPOUrbanFacZ+log(dist.to.npo)+log(ave.dist+1) +factor(labels)+factor(cbsaname),data=reg.dat))
stargazer(reg1d, reg2d,type="text",title ="Difference in Board Member and Community Vulnerability",
keep.stat=c("n", "adj.rsq"),
omit=c("labels", "cbsaname"),
dep.var.labels = c("Board Member Wealth", "Board Member Status"),
covariate.labels=c("Nonprofit Neighborhood Wealth",
"Nonprofit Neighborhood Urban Density",
"Average Distance of Board Members to NPO (log)",
"Average Distance Between Board Members (log)"),
add.lines=list(c("NTEE Code?","Yes", "Yes"), c("MSA?", "Yes", "Yes")))
##
## Difference in Board Member and Community Vulnerability
## ======================================================================================
## Dependent variable:
## ---------------------------------------
## Board Member Wealth Board Member Status
## (1) (2)
## --------------------------------------------------------------------------------------
## Nonprofit Neighborhood Wealth 0.297*** -0.703***
## (0.003) (0.003)
##
## Nonprofit Neighborhood Urban Density 0.017*** 0.017***
## (0.003) (0.003)
##
## Average Distance of Board Members to NPO (log) 0.038*** 0.038***
## (0.004) (0.004)
##
## Average Distance Between Board Members (log) -0.019*** -0.019***
## (0.004) (0.004)
##
## Constant -0.194*** -0.194***
## (0.027) (0.027)
##
## --------------------------------------------------------------------------------------
## NTEE Code? Yes Yes
## MSA? Yes Yes
## Observations 104,483 104,483
## Adjusted R2 0.172 0.348
## ======================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Partition Regression for NPO Wealth and BM Status
partition_reg <- function( y, x, controls, df )
{
fm1 <- as.formula( paste( "y ~ ", paste( controls, collapse= "+" ) ) )
fm2 <- as.formula( paste( "x ~ ", paste( controls, collapse= "+" ) ) )
reg1 <- lm( fm1, data=df )
reg2 <- lm( fm2, data=df )
center <- 1
for( i in 1:length(controls) )
{
ave.i <- mean( unlist( df[ controls[i] ] ), na.rm=T )
center <- c( center, ave.i )
}
y.c <- sum( reg1$coefficients * center ) + reg1$residuals
x.c <- sum( reg2$coefficients * center ) + reg2$residuals
summary( lm( y.c ~ x.c ) )
plot( x.c, y.c, bty="n", col=gray(0.5,0.5), pch=19, cex=1 )
lines( stats::lowess( x.c, y.c ), col="red", lwd=2 )
}
d6 <- reg.dat[, c("BMStatus", "NPOWealthFacZ","NPOUrbanFacZ","dist.to.npo","ave.dist")]
partition_reg( y=d6$BMStatus, x=d6$NPOWealthFacZ, controls=c("NPOUrbanFacZ","dist.to.npo","ave.dist"), df=d6)
