Analysis

Defining Variables

Once the data is cleaned, we can estimate the levels of blight under the following models. First we define our variables:

load("BlightData.RData")
N <- num.BG.considered
n <- nrow(blight.counts)
M <- blight.counts$AVG.RES
X <- sum(USPS$NOSTAT_RES)
m.10 <- blight.counts$surveyed10
m.12 <- blight.counts$surveyed12
m.14 <- blight.counts$surveyed14
weight.10 <- (M*N)/(m.10*n)
weight.12 <- (M*N)/(m.12*n)
weight.14 <- (M*N)/(m.14*n)

blighted.10 <- blight.counts$blighted10 #Model A
demo.10 <- blight.counts$lot10
demoNC.10 <- blight.counts$lotNC10
proc.10 <- blight.counts$proc10
BDP.10 <- blighted.10+demo.10+proc.10 #Model B
BDncP.10 <- blighted.10+demoNC.10+proc.10 #Model C

blighted.12 <- blight.counts$blighted12
demo.12 <- blight.counts$lot12
demoNC.12 <- blight.counts$lotNC12
proc.12 <- blight.counts$proc12
BDP.12 <- blighted.12+demo.12+proc.12
BDncP.12 <- blighted.12+demoNC.12+proc.12

blighted.14 <- blight.counts$blighted14
demo.14 <- blight.counts$lot14
demoNC.14 <- blight.counts$lotNC14
proc.14 <- blight.counts$proc14
BDP.14 <- blighted.14+demo.14+proc.14
BDncP.14 <- blighted.14+demoNC.14+proc.14

Blight Estimation

We can find the estimated blight in 2012 and 2014 based on each model:

wt.ratio <- function(var1,var2,weight1,weight2){
    #Calculates sample ratios for a two-stage sampling design
    #var1: a vector with the variables of interest in the auxiliary data
    #var2: same as above, for the data being estimated
    #weight1: a vector of sampling weights reflecting selection probabilities for units in the auxiliary data
    #weight2: same as above, for the data being estimated
    #value: the ratio between weight2 and weight1, weighted by selection probabilities
    return(sum(weight2*var2)/sum(weight1*var1))
}

ratioEstA.12 <- wt.ratio(blighted.10,blighted.12,weight.10,weight.12)
ratioEstA.14 <- wt.ratio(blighted.10,blighted.14,weight.10,weight.14)
ratioEstB.12 <- wt.ratio(BDP.10,BDP.12,weight.10,weight.12)
ratioEstB.14 <- wt.ratio(BDP.10,BDP.14,weight.10,weight.14)
ratioEstC.12 <- wt.ratio(BDncP.10,BDncP.12,weight.10,weight.12)
ratioEstC.14 <- wt.ratio(BDncP.10,BDncP.14,weight.10,weight.14)

estA.12 <- ratioEstA.12*X
estA.14 <- ratioEstA.14*X
estB.12 <- ratioEstB.12*X
estB.14 <- ratioEstB.14*X
estC.12 <- ratioEstC.12*X
estC.14 <- ratioEstC.14*X
blight.estimates <- data.frame(est.2010 <- rep(X,3), est.2012 <- c(estA.12,estB.12,estC.12), est.2014 <- c(estA.14,estB.14,estC.14))
colnames(blight.estimates) <- c("est.2010","est.2012", "est.2014")
rownames(blight.estimates) <- c("Model.A", "Model.B", "Model.C")
require(reshape2)
## Loading required package: reshape2
require(ggplot2)
## Loading required package: ggplot2
blight.estimates$Model <- rownames(blight.estimates)
estimates.molten <- melt(blight.estimates, variable.name="Year", value.name="Blighted.Addresses",id.vars=c("Model"))
colnames(estimates.molten) <- c("Model","Year","Blighted.Addresses")
ggplot(data=estimates.molten, aes(x = Year, y = Blighted.Addresses)) +
geom_bar(stat="identity",fill="dodgerblue",width=.75)+ 
facet_wrap("Model") + 
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-.3))+
theme(legend.position="none")+
labs(title = "Estimated Blight In New Orleans")

and the estimated reduction from 2010:

reduction <- X-blight.estimates[,2:3]
reduction$Model <- rownames(reduction)
reduction.molten <- melt(reduction, variable.name="Year", value.name="Blighted.Addresses",id.vars=c("Model"))
colnames(reduction.molten) <- c("Model","Year","Blighted.Addresses")
ggplot(data=reduction.molten, aes(x = Year, y = Blighted.Addresses)) +
geom_bar(stat="identity",fill="dodgerblue",width=.75)+ 
facet_wrap("Model") + 
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-.3))+
theme(legend.position="none")+
labs(title = "Estimated Blight Reduction In New Orleans from 2010")

This gives the following percent reductions since 2010:

options(digits=3)
1+((reduction[,1:2]-X)/X)
##         est.2012 est.2014
## Model.A    0.333    0.608
## Model.B    0.236    0.352
## Model.C    0.332    0.459

Confidence Intervals

Using the jackknife variance, we estimate confidence intervals around our estimates as:

jk.var <- function(var1, var2, weight1, weight2, X){
    #Calculates the jackknife variance from ratio estimation
    #var1: a vector with the variables of interest in the auxiliary data
    #var2: same as above, for the data being estimated
    #weight1: a vector of sampling weights reflecting selection probabilities for units in the auxiliary data
    #weight2: same as above, for the data being estimated
    #value: the jackknife variance of the estimates
    n <- length(var1)
    sample.estimate <- X*wt.ratio(var1,var2,weight1,weight2)
    jk.estimates = c()
    for(i in 1:n){
        var1.jk <- var1[-i]
        var2.jk <- var2[-i]
        weight1.jk <- weight1[-i]
        weight2.jk <- weight2[-i]
        jk.estimates[i] <- X*wt.ratio(var1.jk,var2.jk,weight1.jk,weight2.jk)
    }
    jk.variance <- ((n-1)/n)*sum((jk.estimates-sample.estimate)^2)
    return(jk.variance)
}

SE.est <- function(variance, n,crit.val=2.0345){
    #calculates confidence intervals
    #estimate: a point estimate
    #variance: the variance around the point estimate
    #n: the number of primary sampling units used to make the estimate
    #crit.val: the desired critical value to use to form the confidence interval, 2.0345 corresponds to 95% confidence at 33 (n-1) df
    #value: a vector of length 2 giving the low and high ends of a confidence interval 
    SE <- crit.val*(sqrt(variance)/sqrt(n))
    return(SE)
}
SE.A.12 <- SE.est(variance=jk.var(blighted.10,blighted.12,weight.10,weight.12,X=X),n=n)
SE.A.14 <- SE.est(variance=jk.var(blighted.10,blighted.14,weight.10,weight.14,X=X),n=n)
SE.B.12 <- SE.est(variance=jk.var(BDP.10,BDP.12,weight.10,weight.12,X=X),n=n)
SE.B.14 <- SE.est(variance=jk.var(BDP.10,BDP.14,weight.10,weight.14,X=X),n=n)
SE.C.12 <- SE.est(variance=jk.var(BDncP.10,BDncP.12,weight.10,weight.12,X=X),n=n)
SE.C.14 <- SE.est(variance=jk.var(BDncP.10,BDncP.14,weight.10,weight.14,X=X),n=n)

From this, we can plot 95% confidence intervals around our estimates:

SE.vec <- c(0,0,0,SE.A.12,SE.B.12,SE.C.12,SE.A.14,SE.B.14,SE.C.14)
estimates.molten$SE <- SE.vec
dodge <- position_dodge(width=0.9)
ggplot(data=estimates.molten, aes(x = Year, y = Blighted.Addresses,ymax=Blighted.Addresses+SE,ymin=Blighted.Addresses-SE)) +
geom_bar(stat="identity",fill="dodgerblue",position=dodge,width=.75)+
facet_wrap("Model") + 
geom_errorbar(position=dodge, width=0.25)+ 
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-1))+
theme(legend.position="none")+
labs(title = "Estimated Blight In New Orleans")

This implies reductions from 2010 of:

reduction.molten$SE <- SE.vec[-c(1:3)]
ggplot(data=reduction.molten, aes(x = Year, y = Blighted.Addresses, ymax=Blighted.Addresses+SE, ymin=Blighted.Addresses-SE)) +
geom_bar(stat="identity",fill="dodgerblue",width=.75)+ 
facet_wrap("Model") + 
geom_errorbar(position=dodge, width=0.25)+ 
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-2))+
theme(legend.position="none")+
labs(title = "Estimated Blight Reduction In New Orleans from 2010")

Discussion

The three models can be described as below:

Parameter Model A Model B Model C
UNO Survey Variables Blighted addresses only Blighted addresses, houses undergoing renovations, all vacant lots Blighted addresses, houses undergoing renovations, vacant lots that were also vacant in the previous survey
Parsimony Most straightforward Adds complexity Most complex
USPS Consistency Excludes lots and houses undergoing renovation that would likely be considered no-stats Includes vacant lots that likely wouldn’t be considered no-stats Most consistent

The models range from A, which is the most straightforward but possibly too simplistic, to C, which adds complexity in an attempt to align with the no-stat definition as closely as possible. Model B offers a middle ground, being more straightforward than C but more faithful to the USPS definitions than A.

Based on our results, we can likely dismiss Model A as a realistic model, particularly in light of the 2014-2015 results. A 60% reduction from 2010 to 2014-2015 is unrealistic and does not match with historical trends. The addition of lots and houses undergoing renovation appears necessary for estimation to work properly. Both Models B and C provide more realistic results, with Model B being more conservative and Model C more aggressive. Based on these two models, we can estimate that between 2010 and 2014-2015, New Orleans saw a reduction of over 15,000, and perhaps as many as 20,000, blighted addresses.