HW1-Exploratory Analysis

library(foreign) 
 
## 1) Using the 2008 Texas Death File (2008_Texas_Deaths.dbf) available on the google drive, aggregate the number of deaths in San Antonio Census tracts for 2008 and merge them to the SA_Classdata.shp shapefile.


d99<-read.dbf("/Users/fikrewoldbitew/Google Drive/PhD_Courses/Spring 2019/SpatialDemography_7263/data/2008_Texas_Deaths.dbf")
d99$cofips<-substr(d99$D_TR2000, 3,5)
#get only bexar county, fips==029
bexar08<-subset(d99, d99$cofips=="029")
#form the tract ID
bexar08$tract<-substr(bexar08$D_TR2000,6,11)
#Cause of death is D_SUPC10

#aggregate by tract
bextr08<-data.frame(xtabs(~tract, bexar08, drop.unused.levels=T))

trs<-bextr08$tract

#make a dataframe with an id and deaths data
trmort<-data.frame(tract=bextr08$tract, d99=bextr08$Freq)
head(trmort)

##    tract d99
## 1 110100  41
## 2 110200   3
## 3 110300  32
## 4 110400  12
## 5 110500  15
## 6 110600  36

library(rgdal)
library(spdep)
library(RColorBrewer)
library(classInt)
library(maptools)

sa<-readShapePoly("/Users/fikrewoldbitew//Google Drive/PhD_Courses/Spring 2019/SpatialDemography_7263/data/SA_classdata.shp")

mdat<-sa@data
#merge the death data to the shapefile
mdat2<-merge(mdat, trmort, by.x="TractID", by.y="tract", all.x=T, sort=F)

sa@data<-mdat2

writePolyShape(sa, fn="SA_2008merged.shp")

library(spdep)
library(maptools)
library(RColorBrewer)
library(classInt)
library(ggplot2)


##2) Create mortality rate average, 2008

dat<-sa
#dat$mort<-apply(dat@data[, c("deaths08")],1,sum)
dat$mortrate<-1000*dat$d99/dat$acs_poptot
dat$mortrate<-ifelse(is.na(dat$mortrate)==T,0, dat$mortrate)
#get rid of tracts with very low populations
dat<-dat[which(dat$acs_poptot>100),]




## 3) Perform an explanatory analysis of the rate using Histograms, boxplots, thematic maps, you can use Geoda for this.

#Make Histogram Mortality Rate 2008


qplot(dat$mortrate, geom="histogram", binwidth=.05)

hist(dat$mortrate)

#Make Boxplots

#boxplot of mortality rate 2008
dat$hispq<-cut(dat$PHISPANIC, quantile(dat$PHISPANIC))
boxplot(mortrate~hispq, dat)

#ggplot(dat@data, aes(x=bextr08$tract, y=dat$mortrate)) + geom_boxplot() 
ggplot(dat@data, aes(x=PWHITE, y=dat$mortrate)) + geom_boxplot()

ggplot(dat@data, aes(x=PBLACK, y=dat$mortrate)) + geom_boxplot()

ggplot(dat@data, aes(x=PHISPANIC, y=dat$mortrate)) + geom_boxplot()

#na.omit(dat$mortrate)
#dat[complete.cases(dat$mortrate),]
#dat$mortrate[complete.cases(dat$mortrate)]

#Show a basic quantile map of the mortality rate
spplot(dat, "mortrate",
       main="Mortality Rate, 2008",
       at=quantile(dat$mortrate), 
       col.regions=brewer.pal(n=5, "Blues"))

sanb<-poly2nb(dat, queen=F)
summary(sanb)

## Neighbour list object:
## Number of regions: 234 
## Number of nonzero links: 1096 
## Percentage nonzero weights: 2.001607 
## Average number of links: 4.683761 
## Link number distribution:
## 
##  1  2  3  4  5  6  7  8  9 
##  4 10 31 62 66 33 23  3  2 
## 4 least connected regions:
## 61 82 147 205 with 1 link
## 2 most connected regions:
## 31 55 with 9 links

salw<-nb2listw(sanb, style="W") 
#the style = W row-standardizes the matrix

#make a k-nearest neighbor weight set, with k=3
knn<-knearneigh(x = coordinates(dat), k = 3)
knn3<-knn2nb(knn = knn)

plot(dat, main="Rook Neighbors")
plot(salw, coords=coordinates(dat), add=T, col=2)

plot(dat, main="k=3 Neighbors")
plot(knn3, coords=coordinates(dat), add=T, col=2)

#wmat<-nb2mat(sanb, style="W")
#dat$mort.w<-wmat%*%(dat$mortrate)

#head(dat@data[, c("mortrate", "mort.w")])

#spplot(dat, c("mortrate", "mort.w"),
#at=quantile(dat$mortrate), 
#col.regions=brewer.pal(n=5, "Blues"),
#main="Mortality Rate, 2008")

# 3) now to calculate moran's I for the mortality rate
moran.test(dat$mortrate, listw=salw)

## 
##  Moran I test under randomisation
## 
## data:  dat$mortrate  
## weights: salw    
## 
## Moran I statistic standard deviate = 3.4883, p-value = 0.000243
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.148344077      -0.004291845       0.001914605

moran.mc(dat$mortrate, listw=salw, nsim=999)

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  dat$mortrate 
## weights: salw  
## number of simulations + 1: 1000 
## 
## statistic = 0.14834, observed rank = 999, p-value = 0.001
## alternative hypothesis: greater

#Moran Scatter plot
moran.plot(dat$mortrate, listw=salw, main="Moran Scatterplot")

#Here, I use the false discovery rate correction, to adjust the p-value for all the tests, similar to a Bonferroni correction in ANOVA
locali<-localmoran(dat$mortrate, salw, p.adjust.method="fdr")
dat$locali<-locali[,1]
dat$localp<-locali[,5]

#We have to make our own cluster identifiers
dat$cl<-as.factor(ifelse(dat$localp<=.05,"Clustered","NotClustered"))


#Plots of the results
spplot(dat, "locali", main="Local Moran's I", 
       at=quantile(dat$locali), 
       col.regions=brewer.pal(n=4, "RdBu"))

spplot(dat, "cl", main="Local Moran Clusters", 
       col.regions=c(2,0))

n=dim(dat@data)[1]
n1=dim(dat@data)[1] -1
S0 = Szero(salw)
geary.mc(dat$mortrate, listw=salw, nsim=999, 
         alternative = "greater")

## 
##  Monte-Carlo simulation of Geary C
## 
## data:  dat$mortrate 
## weights: salw 
## number of simulations + 1: 1000 
## 
## statistic = 0.82804, observed rank = 1, p-value = 0.001
## alternative hypothesis: greater

#Cannot do
globalG.test(dat$mortrate, salw)

## 
##  Getis-Ord global G statistic
## 
## data:  dat$mortrate 
## weights: salw 
## 
## standard deviate = 2.0353, p-value = 0.02091
## alternative hypothesis: greater
## sample estimates:
## Global G statistic        Expectation           Variance 
##       4.404062e-03       4.291845e-03       3.039858e-09

#Local Getis-Org G analysis
localg<-localG(dat$mortrate, salw)
dat$localg<-as.numeric(localg)

#Plots
spplot(dat, "localg", main="Local Geary's G--Getis Ord G", at=c(-4, -3,-2,-1,0,1,2,3,4), 
       col.regions=brewer.pal(n=8, "RdBu"))

HW1-Exploratory Analysis

Fikrewold Bitew

January 24, 2019