Once the data is cleaned, we can estimate the levels of blight under the following models. First we define our variables:
load("BlightData.RData")
N <- num.BG.considered
n <- nrow(blight.counts)
M <- blight.counts$AVG.RES
X <- sum(USPS$NOSTAT_RES)
m.10 <- blight.counts$surveyed10
m.12 <- blight.counts$surveyed12
m.14 <- blight.counts$surveyed14
weight.10 <- (M*N)/(m.10*n)
weight.12 <- (M*N)/(m.12*n)
weight.14 <- (M*N)/(m.14*n)
blighted.10 <- blight.counts$blighted10 #Model A
demo.10 <- blight.counts$lot10
demoNC.10 <- blight.counts$lotNC10
proc.10 <- blight.counts$proc10
BDP.10 <- blighted.10+demo.10+proc.10 #Model B
BDncP.10 <- blighted.10+demoNC.10+proc.10 #Model C
blighted.12 <- blight.counts$blighted12
demo.12 <- blight.counts$lot12
demoNC.12 <- blight.counts$lotNC12
proc.12 <- blight.counts$proc12
BDP.12 <- blighted.12+demo.12+proc.12
BDncP.12 <- blighted.12+demoNC.12+proc.12
blighted.14 <- blight.counts$blighted14
demo.14 <- blight.counts$lot14
demoNC.14 <- blight.counts$lotNC14
proc.14 <- blight.counts$proc14
BDP.14 <- blighted.14+demo.14+proc.14
BDncP.14 <- blighted.14+demoNC.14+proc.14
We can find the estimated blight in 2012 and 2014 based on each model:
wt.ratio <- function(var1,var2,weight1,weight2){
#Calculates sample ratios for a two-stage sampling design
#var1: a vector with the variables of interest in the auxiliary data
#var2: same as above, for the data being estimated
#weight1: a vector of sampling weights reflecting selection probabilities for units in the auxiliary data
#weight2: same as above, for the data being estimated
#value: the ratio between weight2 and weight1, weighted by selection probabilities
return(sum(weight2*var2)/sum(weight1*var1))
}
ratioEstA.12 <- wt.ratio(blighted.10,blighted.12,weight.10,weight.12)
ratioEstA.14 <- wt.ratio(blighted.10,blighted.14,weight.10,weight.14)
ratioEstB.12 <- wt.ratio(BDP.10,BDP.12,weight.10,weight.12)
ratioEstB.14 <- wt.ratio(BDP.10,BDP.14,weight.10,weight.14)
ratioEstC.12 <- wt.ratio(BDncP.10,BDncP.12,weight.10,weight.12)
ratioEstC.14 <- wt.ratio(BDncP.10,BDncP.14,weight.10,weight.14)
estA.12 <- ratioEstA.12*X
estA.14 <- ratioEstA.14*X
estB.12 <- ratioEstB.12*X
estB.14 <- ratioEstB.14*X
estC.12 <- ratioEstC.12*X
estC.14 <- ratioEstC.14*X
blight.estimates <- data.frame(est.2010 <- rep(X,3), est.2012 <- c(estA.12,estB.12,estC.12), est.2014 <- c(estA.14,estB.14,estC.14))
colnames(blight.estimates) <- c("est.2010","est.2012", "est.2014")
rownames(blight.estimates) <- c("Model.A", "Model.B", "Model.C")
require(reshape2)
## Loading required package: reshape2
require(ggplot2)
## Loading required package: ggplot2
blight.estimates$Model <- rownames(blight.estimates)
estimates.molten <- melt(blight.estimates, variable.name="Year", value.name="Blighted.Addresses",id.vars=c("Model"))
colnames(estimates.molten) <- c("Model","Year","Blighted.Addresses")
ggplot(data=estimates.molten, aes(x = Year, y = Blighted.Addresses)) +
geom_bar(stat="identity",fill="dodgerblue",width=.75)+
facet_wrap("Model") +
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-.3))+
theme(legend.position="none")+
labs(title = "Estimated Blight In New Orleans")
and the estimated reduction from 2010:
reduction <- X-blight.estimates[,2:3]
reduction$Model <- rownames(reduction)
reduction.molten <- melt(reduction, variable.name="Year", value.name="Blighted.Addresses",id.vars=c("Model"))
colnames(reduction.molten) <- c("Model","Year","Blighted.Addresses")
ggplot(data=reduction.molten, aes(x = Year, y = Blighted.Addresses)) +
geom_bar(stat="identity",fill="dodgerblue",width=.75)+
facet_wrap("Model") +
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-.3))+
theme(legend.position="none")+
labs(title = "Estimated Blight Reduction In New Orleans from 2010")
This gives the following percent reductions since 2010:
options(digits=3)
1+((reduction[,1:2]-X)/X)
## est.2012 est.2014
## Model.A 0.333 0.608
## Model.B 0.236 0.352
## Model.C 0.332 0.459
Using the jackknife variance, we estimate confidence intervals around our estimates as:
jk.var <- function(var1, var2, weight1, weight2, X){
#Calculates the jackknife variance from ratio estimation
#var1: a vector with the variables of interest in the auxiliary data
#var2: same as above, for the data being estimated
#weight1: a vector of sampling weights reflecting selection probabilities for units in the auxiliary data
#weight2: same as above, for the data being estimated
#value: the jackknife variance of the estimates
n <- length(var1)
sample.estimate <- X*wt.ratio(var1,var2,weight1,weight2)
jk.estimates = c()
for(i in 1:n){
var1.jk <- var1[-i]
var2.jk <- var2[-i]
weight1.jk <- weight1[-i]
weight2.jk <- weight2[-i]
jk.estimates[i] <- X*wt.ratio(var1.jk,var2.jk,weight1.jk,weight2.jk)
}
jk.variance <- ((n-1)/n)*sum((jk.estimates-sample.estimate)^2)
return(jk.variance)
}
SE.est <- function(variance, n,crit.val=2.0345){
#calculates confidence intervals
#estimate: a point estimate
#variance: the variance around the point estimate
#n: the number of primary sampling units used to make the estimate
#crit.val: the desired critical value to use to form the confidence interval, 2.0345 corresponds to 95% confidence at 33 (n-1) df
#value: a vector of length 2 giving the low and high ends of a confidence interval
SE <- crit.val*(sqrt(variance)/sqrt(n))
return(SE)
}
SE.A.12 <- SE.est(variance=jk.var(blighted.10,blighted.12,weight.10,weight.12,X=X),n=n)
SE.A.14 <- SE.est(variance=jk.var(blighted.10,blighted.14,weight.10,weight.14,X=X),n=n)
SE.B.12 <- SE.est(variance=jk.var(BDP.10,BDP.12,weight.10,weight.12,X=X),n=n)
SE.B.14 <- SE.est(variance=jk.var(BDP.10,BDP.14,weight.10,weight.14,X=X),n=n)
SE.C.12 <- SE.est(variance=jk.var(BDncP.10,BDncP.12,weight.10,weight.12,X=X),n=n)
SE.C.14 <- SE.est(variance=jk.var(BDncP.10,BDncP.14,weight.10,weight.14,X=X),n=n)
From this, we can plot 95% confidence intervals around our estimates:
SE.vec <- c(0,0,0,SE.A.12,SE.B.12,SE.C.12,SE.A.14,SE.B.14,SE.C.14)
estimates.molten$SE <- SE.vec
dodge <- position_dodge(width=0.9)
ggplot(data=estimates.molten, aes(x = Year, y = Blighted.Addresses,ymax=Blighted.Addresses+SE,ymin=Blighted.Addresses-SE)) +
geom_bar(stat="identity",fill="dodgerblue",position=dodge,width=.75)+
facet_wrap("Model") +
geom_errorbar(position=dodge, width=0.25)+
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-1))+
theme(legend.position="none")+
labs(title = "Estimated Blight In New Orleans")
This implies reductions from 2010 of:
reduction.molten$SE <- SE.vec[-c(1:3)]
ggplot(data=reduction.molten, aes(x = Year, y = Blighted.Addresses, ymax=Blighted.Addresses+SE, ymin=Blighted.Addresses-SE)) +
geom_bar(stat="identity",fill="dodgerblue",width=.75)+
facet_wrap("Model") +
geom_errorbar(position=dodge, width=0.25)+
geom_text (aes(label=sprintf("%1.0f",Blighted.Addresses), size=12,vjust=-2))+
theme(legend.position="none")+
labs(title = "Estimated Blight Reduction In New Orleans from 2010")
The three models can be described as below:
| Parameter | Model A | Model B | Model C |
|---|---|---|---|
| UNO Survey Variables | Blighted addresses only | Blighted addresses, houses undergoing renovations, all vacant lots | Blighted addresses, houses undergoing renovations, vacant lots that were also vacant in the previous survey |
| Parsimony | Most straightforward | Adds complexity | Most complex |
| USPS Consistency | Excludes lots and houses undergoing renovation that would likely be considered no-stats | Includes vacant lots that likely wouldn’t be considered no-stats | Most consistent |
The models range from A, which is the most straightforward but possibly too simplistic, to C, which adds complexity in an attempt to align with the no-stat definition as closely as possible. Model B offers a middle ground, being more straightforward than C but more faithful to the USPS definitions than A.
Based on our results, we can likely dismiss Model A as a realistic model, particularly in light of the 2014-2015 results. A 60% reduction from 2010 to 2014-2015 is unrealistic and does not match with historical trends. The addition of lots and houses undergoing renovation appears necessary for estimation to work properly. Both Models B and C provide more realistic results, with Model B being more conservative and Model C more aggressive. Based on these two models, we can estimate that between 2010 and 2014-2015, New Orleans saw a reduction of over 15,000, and perhaps as many as 20,000, blighted addresses.