Creating Factors

dat <- read.csv("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis//data-prepped/ez-plus-census-old.csv")
dat5 <- dat
dat5$NTEE27 <- substr(dat5$Nteecode, 0, 1)
dat5$labels <- ifelse(dat5$NTEE27=="A", "Arts", 0)
dat5$labels <- ifelse(dat5$NTEE27=="B", "Education", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="C", "Environment", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="D", "Animals", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="E", "Health", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="F", "Mental Health", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="G", "Disease", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="H", "Med Research", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="I", "Crime", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="J", "Employment", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="K", "Agriculture", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="L", "Housing", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="M", "Safety", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="N", "Sports", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="O"|dat5$NTEE27=="o", "Youth", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="P", "Human Services", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="Q", "International", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="R", "Civil Rights", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="S", "Community", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="T", "Philanthropy", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="U", "Science", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="V", "Social Science", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="W", "Public Benefit", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="X", "Religion", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="Y", "Mutual", dat5$labels)
dat5$labels <- ifelse(dat5$NTEE27=="Z", "Unknown", dat5$labels)

dat52 <- dat5[, c("labels", "EIN", "unemp", "poverty", "medinc", "ownerocc", 
                  "black","h.density", "hs" )]
dat52$type <- 1

list <- dat5[, c("labels", "EIN")]
list <- list[!duplicated(list),]

censusbm <- readRDS("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis/data-census/census-bm-all.rds")
censusbm$EIN <- substr(censusbm$id, 9, 17)
censusbmx <- merge(censusbm, list, by="EIN")

censusbm2 <- censusbmx[, c("labels", "EIN", "unemp", "poverty", "medinc", "ownerocc", 
                           "black","h.density", "hs" )]
censusbm2$type <- 0
binded <- rbind(dat52, censusbm2)

binded <- na.omit(binded)
cordat <- binded[, c( "unemp", "poverty", "medinc", "ownerocc", 
                        "black","h.density", "hs")]

psych::VSS.scree(cor(cordat))

factanal(x = cordat, factors = 2, rotation = "varimax")
## 
## Call:
## factanal(x = cordat, factors = 2, rotation = "varimax")
## 
## Uniquenesses:
##     unemp   poverty    medinc  ownerocc     black h.density        hs 
##     0.508     0.183     0.441     0.005     0.739     0.805     0.493 
## 
## Loadings:
##           Factor1 Factor2
## unemp      0.701         
## poverty    0.863  -0.268 
## medinc    -0.720   0.199 
## ownerocc  -0.450   0.890 
## black      0.503         
## h.density         -0.441 
## hs        -0.708         
## 
##                Factor1 Factor2
## SS loadings      2.714   1.110
## Proportion Var   0.388   0.159
## Cumulative Var   0.388   0.546
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 92285.95 on 8 degrees of freedom.
## The p-value is 0
fitAfterRotation <- factanal(cordat, factors = 2, rotation = "varimax")

loading <- print(fitAfterRotation$loadings, cutoff = .40, sort = TRUE)
## 
## Loadings:
##           Factor1 Factor2
## unemp      0.701         
## poverty    0.863         
## medinc    -0.720         
## black      0.503         
## hs        -0.708         
## ownerocc  -0.450   0.890 
## h.density         -0.441 
## 
##                Factor1 Factor2
## SS loadings      2.714   1.110
## Proportion Var   0.388   0.159
## Cumulative Var   0.388   0.546

Plotting Relationship Between Factors

reg.dat <- read.csv("~/Dropbox (ASU)/Political Economy of NP Startups/data-and-analysis/data-prepped/RegressionData.csv")

agg.dat <- aggregate(NPOWealthFacZ~labels, data=reg.dat, FUN="mean")
agg.dat <- merge(agg.dat, aggregate(NPOUrbanFacZ~labels, data=reg.dat, FUN="mean"), by="labels")
plot(agg.dat$NPOWealthFacZ,agg.dat$NPOUrbanFacZ, col="white",
     bty="u", main="Relationship of Wealth and Urban Density by NTEE Code",
     xlab="Wealth",
     ylab="Urban Density",
     xlim=c(-.6, .6),
     ylim=c(-.4, .4))
abline(h=0, lty=3, col="gray50")
abline(v=0, lty=3, col="gray50")
text(agg.dat$NPOWealthFacZ,agg.dat$NPOUrbanFacZ, labels = agg.dat$labels, cex=.6)

Regressions

reg.dat$BMStatus <- reg.dat$BMWealthFacZ-reg.dat$NPOWealthFacZ

reg1d <- (lm(BMWealthFacZ~NPOWealthFacZ+NPOUrbanFacZ+log(dist.to.npo)+log(ave.dist+1) +factor(labels)+factor(cbsaname),data=reg.dat))

reg2d <- (lm(BMStatus~NPOWealthFacZ+NPOUrbanFacZ+log(dist.to.npo)+log(ave.dist+1) +factor(labels)+factor(cbsaname),data=reg.dat))

stargazer(reg1d, reg2d,type="text",title ="Difference in Board Member and Community Vulnerability",
          keep.stat=c("n", "adj.rsq"),
          omit=c("labels", "cbsaname"),
          dep.var.labels = c("Board Member Wealth", "Board Member Status"),
          covariate.labels=c("Nonprofit Neighborhood Wealth",
                             "Nonprofit Neighborhood Urban Density",
                             "Average Distance of Board Members to NPO (log)",
                             "Average Distance Between Board Members (log)"),
          add.lines=list(c("NTEE Code?","Yes", "Yes"), c("MSA?", "Yes", "Yes")))
## 
## Difference in Board Member and Community Vulnerability
## ======================================================================================
##                                                          Dependent variable:          
##                                                ---------------------------------------
##                                                Board Member Wealth Board Member Status
##                                                        (1)                 (2)        
## --------------------------------------------------------------------------------------
## Nonprofit Neighborhood Wealth                       0.297***            -0.703***     
##                                                      (0.003)             (0.003)      
##                                                                                       
## Nonprofit Neighborhood Urban Density                0.017***            0.017***      
##                                                      (0.003)             (0.003)      
##                                                                                       
## Average Distance of Board Members to NPO (log)      0.038***            0.038***      
##                                                      (0.004)             (0.004)      
##                                                                                       
## Average Distance Between Board Members (log)        -0.019***           -0.019***     
##                                                      (0.004)             (0.004)      
##                                                                                       
## Constant                                            -0.194***           -0.194***     
##                                                      (0.027)             (0.027)      
##                                                                                       
## --------------------------------------------------------------------------------------
## NTEE Code?                                             Yes                 Yes        
## MSA?                                                   Yes                 Yes        
## Observations                                         104,483             104,483      
## Adjusted R2                                           0.172               0.348       
## ======================================================================================
## Note:                                                      *p<0.1; **p<0.05; ***p<0.01

Partition Regression for NPO Wealth and BM Status

partition_reg <- function( y, x, controls, df )
{
  fm1 <- as.formula( paste( "y ~ ", paste( controls, collapse= "+" ) ) )
  fm2 <- as.formula( paste( "x ~ ", paste( controls, collapse= "+" ) ) )
  reg1 <- lm( fm1, data=df )
  reg2 <- lm( fm2, data=df )
  
  center <- 1
  for( i in 1:length(controls) )
  {
    ave.i <- mean( unlist( df[ controls[i] ] ), na.rm=T )
    center <- c( center, ave.i )
  }
  
  y.c <- sum( reg1$coefficients * center ) + reg1$residuals
  x.c <- sum( reg2$coefficients * center ) + reg2$residuals
  
  summary( lm( y.c ~ x.c ) )
  plot( x.c, y.c, bty="n", col=gray(0.5,0.5), pch=19, cex=1 )
  lines( stats::lowess( x.c, y.c ), col="red", lwd=2 )
}

d6 <- reg.dat[, c("BMStatus", "NPOWealthFacZ","NPOUrbanFacZ","dist.to.npo","ave.dist")]
partition_reg( y=d6$BMStatus, x=d6$NPOWealthFacZ, controls=c("NPOUrbanFacZ","dist.to.npo","ave.dist"), df=d6)