setwd("/home/projects/analysis/DHS/R")
rm(list=ls())
library(data.table)
library(foreign)
library(rgdal)
## Loading required package: sp
## rgdal: version: 0.9-2, (SVN revision 526)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /usr/share/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: (autodetected)
library(knitr)
# Load child records
dhs <- fread("../out/JG/2015.03.08/all_indicators_CHILD.csv")
# Check the list of surveys, recodes, and phases, some `hv000` codes don't seem to
# match the phase
xy <- dhs[, length(unique(latnum)), by=list(country_code_iso, hv000, hv001)]
xy <- xy[V1>1]
xy <- unique(xy$hv000)
dhs[hv000 %in% xy, list(
phase=unique(phase),
start=paste(unique(start_year), collapse=", "),
end=paste(unique(end_year), collapse=", ")), by=hv000]
## hv000 phase start end
## 1: SN2 2 1992, 1997 1993, 1997
## 2: SN2 3 1992, 1997 1993, 1997
## 3: BD3 3 1993, 1996, 1999 1994, 1997, 2000
## 4: BD3 4 1993, 1996, 1999 1994, 1997, 2000
## 5: CI3 3 1994, 1998 1994, 1999
## 6: TZ3 3 1996, 1999 1996, 1999
## 7: TZ3 4 1996, 1999 1996, 1999
## 8: EG4 4 2000, 2003, 2005 2000, 2003, 2005
## 9: EG4 5 2000, 2003, 2005 2000, 2003, 2005
## 10: ET4 4 2000, 2005 2000, 2005
## 11: ET4 5 2000, 2005 2000, 2005
## 12: RW4 4 2000, 2005 2000, 2005
## 13: RW4 5 2000, 2005 2000, 2005
## 14: KH5 5 2005, 2010 2005, 2010
## 15: KH5 6 2005, 2010 2005, 2010
## 16: JO5 5 2007, 2009 2007, 2009
## 17: JO5 6 2007, 2009 2007, 2009
## 18: PE6 5 2009, 2010, 2011 2009, 2010, 2011
## 19: PE6 6 2009, 2010, 2011 2009, 2010, 2011
## 20: SN6 6 2010, 2012 2011, 2013
Note that JG clarified that unique surveys may be identified using a combination of hv000 and start_year, and not just hv000. For completeness we should also match these to the svyCode identifiers used in the GIS file.
# Add unique survey codes
dhs[, svyCode := paste0(start_year, hv000)]
# Recode Namibia country iso from NA to "NA"
dhs[is.na(country_code_iso), country_code_iso := "NA"]
# Validate other raw data from CHILD records
table(dhs$child)
##
## 1 2 3 4 5 6
## 1826839 1463959 1076193 757166 523734 356706
# zscores
phist(dhs$ic_haz06, main="Height-for-Age Z-Score")
phist(dhs$ic_whz06, main="Weight-for-Height Z-Score")
phist(dhs$ic_waz06, main="Weight-for-Age Z-Score")
table(dhs$ic_stunted_moderate)
##
## 0 1
## 611035 349891
# Number of surveys, phases, clusters by country
setkey(dhs, country_code_iso, hv000, svyCode, phase, recode, hv001)
dhs[, list(
surveys=paste(unique(svyCode), collapse=", ")),
by=list(iso=country_code_iso)]
## iso surveys
## 1: AL 2008AL5
## 2: AM 2000AM4, 2005AM4, 2010AM6
## 3: AZ 2006AZ5
## 4: BD 1993BD3, 1996BD3, 1999BD3, 2004BD4, 2007BD5, 2011BD6
## 5: BF 1993BF2, 1998BF3, 2003BF4, 2010BF6
## 6: BI 2010BU6
## 7: BJ 1996BJ3, 2001BJ4, 2006BJ5, 2011BJ6
## 8: BO 1998BO3, 2003BO4, 2008BO5
## 9: BR 1996BR3
## 10: CD 2007CD5, 2013CD6
## 11: CF 1994CF3
## 12: CG 2005CG5, 2011CG6
## 13: CI 1994CI3, 1998CI3, 2011CI6
## 14: CM 2004CM4, 2011CM6
## 15: CO 1995CO3, 2000CO4, 2005CO4, 2010CO5
## 16: DO 1996DR3, 1999DR3, 2002DR4, 2007DR5, 2013DR6
## 17: EG 1992EG2, 1995EG3, 2000EG4, 2003EG4, 2005EG4, 2008EG5
## 18: ET 2000ET4, 2005ET4, 2011ET6
## 19: GA 2000GA3, 2012GA6
## 20: GH 1993GH2, 1998GH3, 2003GH4, 2008GH5
## 21: GN 1999GN3, 2005GN4, 2012GN6
## 22: GT 1995GU3, 1998GU3
## 23: GY 2009GY5
## 24: HN 2011HN6
## 25: HT 2000HT4, 2005HT5, 2012HT6
## 26: ID 1991ID2, 1994ID3, 1997ID3, 2002ID4, 2007ID5, 2012ID6
## 27: IN 1992IA2, 2005IA5
## 28: JO 1990JO2, 1997JO3, 2002JO4, 2007JO5, 2009JO5, 2012JO6
## 29: KE 1993KE2, 1998KE3, 2003KE4, 2008KE5
## 30: KG 1997KY3, 2012KY6
## 31: KH 2000KH4, 2005KH5, 2010KH5
## 32: KM 1996KM3, 2012KM6
## 33: KZ 1995KK3, 1999KK3
## 34: LR 2007LB5, 2013LB6
## 35: LS 2004LS4, 2009LS5
## 36: MA 1992MA2, 2003MA4
## 37: MD 2005MB4
## 38: MG 1992MD2, 1997MD3, 2003MD4, 2008MD5
## 39: ML 2006ML5, 2012ML6
## 40: MV 2009MV5
## 41: MW 1992MW2, 2000MW4, 2004MW4, 2010MW5
## 42: MZ 1997MZ3, 2003MZ4, 2011MZ6
## 43: NA 1992NM2, 2000NM4, 2006NM5, 2013NM6
## 44: NE 2006NI5, 2012NI6
## 45: NG 1990NG2, 1999NG3, 2003NG4, 2008NG5, 2013NG6
## 46: NI 1998NC3, 2001NC4
## 47: NP 1996NP3, 2001NP4, 2006NP5, 2011NP6
## 48: PE 1991PE2, 1996PE3, 2000PE4, 2003PE5, 2009PE6, 2010PE6, 2011PE6
## 49: PH 1993PH2, 1998PH3, 2003PH4, 2008PH5, 2013PH6
## 50: PK 1990PK2, 2006PK5, 2012PK6
## 51: PY 1990PY2
## 52: RW 1992RW2, 2000RW4, 2005RW4, 2007RW5, 2010RW6
## 53: SL 2008SL5, 2013SL6
## 54: SN 1992SN2, 1997SN2, 2010SN6, 2012SN6
## 55: ST 2008ST5
## 56: SZ 2006SZ5
## 57: TD 2004TD4
## 58: TJ 2012TJ6
## 59: TL 2009TL5
## 60: TR 1993TR2, 1998TR3, 2003TR4
## 61: TZ 1991TZ2, 1996TZ3, 1999TZ3, 2004TZ4, 2010TZ5
## 62: UA 2007UA5
## 63: UG 1995UG3, 2000UG4, 2006UG5, 2011UG6
## 64: UZ 1996UZ3
## 65: VN 1997VNT, 2002VNT
## 66: YE 1991YE2
## 67: ZA 1998ZA3
## 68: ZM 1992ZM2, 1996ZM3, 2001ZM4, 2007ZM5
## 69: ZW 1994ZW3, 1999ZW4, 2005ZW5, 2010ZW6
## iso surveys
# Note that VNT code (line 65) looks like an error, should be VN3 and VN4 instead?
dhs[hv000=="VNT", .N, keyby=list(start_year, regname)]
## start_year regname N
## 1: 1997 central coast 1567
## 2: 1997 central highlands 633
## 3: 1997 mekong river delta 3550
## 4: 1997 north central 2248
## 5: 1997 northern uplands 2915
## 6: 1997 red river delta 2650
## 7: 1997 southeast 1510
## 8: 2002 central coast 1506
## 9: 2002 central highlands 569
## 10: 2002 mekong river delta 2992
## 11: 2002 north central 2210
## 12: 2002 northern uplands 2852
## 13: 2002 red river delta 2445
## 14: 2002 southeast 1556
# Correct
dhs[hv000=="VNT" & start_year==1997, hv000 := "VN3"]
dhs[hv000=="VNT" & start_year==2002, hv000 := "VN4"]
dhs[, svyCode := paste0(start_year, hv000)]
# Generate mean zscores across survey clusters
zscore <- names(dhs)[32:40]
clust <- dhs[, lapply(.SD, function(x) weighted.mean(x==1, hv005_sample_weight, na.rm=T)),
by=list(country_code_iso, svyCode, hv001),
.SDcols=c(zscore, "hv005_sample_weight")]
# Drop sample weights
clust[, hv005_sample_weight := NULL]
# Verify results
phist(clust$ic_stunted_moderate, main="Stunting - Moderate")
phist(clust$ic_wasted_moderate, main="Wasting - Moderate")
phist(clust$ic_underweight_moderate, main="Underweight - Moderate")
# Merge in cluster coordinates from `dhs` into `clust`
tmp <- dhs[, list(
X=mean(longnum, na.rm=T),
Y=mean(latnum, na.rm=T)), by=list(svyCode, hv001)]
setkey(tmp, svyCode, hv001)
setkey(clust, svyCode, hv001)
clust <- tmp[clust]
# Number of surveys
clust[, length(unique(svyCode))]
## [1] 191
# Number of countries
clust[, length(unique(country_code_iso))]
## [1] 69
# Surveys with missing coords
tmp <- clust[, sum(is.na(X))/.N, keyby=list(iso=country_code_iso, svyCode)][V1==1]
tmp
## iso svyCode V1
## 1: AM 2000AM4 1
## 2: AM 2005AM4 1
## 3: AM 2010AM6 1
## 4: AZ 2006AZ5 1
## 5: BD 1993BD3 1
## 6: BD 1996BD3 1
## 7: BJ 2006BJ5 1
## 8: BO 1998BO3 1
## 9: BO 2003BO4 1
## 10: BR 1996BR3 1
## 11: CG 2005CG5 1
## 12: CG 2011CG6 1
## 13: CO 1995CO3 1
## 14: CO 2000CO4 1
## 15: CO 2005CO4 1
## 16: DO 1996DR3 1
## 17: DO 1999DR3 1
## 18: DO 2002DR4 1
## 19: GA 2000GA3 1
## 20: GT 1995GU3 1
## 21: GT 1998GU3 1
## 22: ID 1991ID2 1
## 23: ID 1994ID3 1
## 24: ID 1997ID3 1
## 25: ID 2007ID5 1
## 26: ID 2012ID6 1
## 27: IN 1992IA2 1
## 28: IN 2005IA5 1
## 29: JO 1990JO2 1
## 30: JO 1997JO3 1
## 31: JO 2009JO5 1
## 32: KE 1993KE2 1
## 33: KE 1998KE3 1
## 34: KG 1997KY3 1
## 35: KM 1996KM3 1
## 36: KZ 1995KK3 1
## 37: KZ 1999KK3 1
## 38: MA 1992MA2 1
## 39: MG 1992MD2 1
## 40: MG 2003MD4 1
## 41: MV 2009MV5 1
## 42: MW 1992MW2 1
## 43: MZ 1997MZ3 1
## 44: MZ 2003MZ4 1
## 45: NA 1992NM2 1
## 46: NA 2013NM6 1
## 47: NE 2006NI5 1
## 48: NE 2012NI6 1
## 49: NG 1999NG3 1
## 50: NI 1998NC3 1
## 51: NI 2001NC4 1
## 52: NP 1996NP3 1
## 53: PE 1991PE2 1
## 54: PE 1996PE3 1
## 55: PE 2010PE6 1
## 56: PE 2011PE6 1
## 57: PH 1993PH2 1
## 58: PH 1998PH3 1
## 59: PH 2013PH6 1
## 60: PK 1990PK2 1
## 61: PK 2012PK6 1
## 62: PY 1990PY2 1
## 63: RW 1992RW2 1
## 64: RW 2000RW4 1
## 65: SN 2012SN6 1
## 66: ST 2008ST5 1
## 67: TD 2004TD4 1
## 68: TR 1993TR2 1
## 69: TR 1998TR3 1
## 70: TR 2003TR4 1
## 71: TZ 1991TZ2 1
## 72: TZ 1996TZ3 1
## 73: TZ 2004TZ4 1
## 74: UA 2007UA5 1
## 75: UG 1995UG3 1
## 76: UZ 1996UZ3 1
## 77: VN 1997VN3 1
## 78: VN 2002VN4 1
## 79: YE 1991YE2 1
## 80: ZA 1998ZA3 1
## 81: ZM 1992ZM2 1
## 82: ZM 1996ZM3 1
## 83: ZM 2001ZM4 1
## 84: ZW 1994ZW3 1
## iso svyCode V1
# Percent of missing coords and zscores per survey across clusters
tmp <- tmp[, unique(svyCode)]
clust[!svyCode %in% tmp, list(
clusters=.N,
missing.coords=(100*sum(is.na(X))/.N),
missing.zscore=(100*sum(is.na(ic_stunted_moderate))/.N)),
by=list(country_code_iso, svyCode)][order(country_code_iso, svyCode)]
## country_code_iso svyCode clusters missing.coords missing.zscore
## 1: AL 2008AL5 450 0.0000 10.222
## 2: BD 1999BD3 341 0.0000 0.000
## 3: BD 2004BD4 361 0.5540 0.000
## 4: BD 2007BD5 361 0.0000 0.000
## 5: BD 2011BD6 600 0.0000 0.000
## 6: BF 1993BF2 230 0.0000 0.000
## 7: BF 1998BF3 210 0.9524 0.000
## 8: BF 2003BF4 400 0.7500 0.000
## 9: BF 2010BF6 573 5.5846 0.175
## 10: BI 2010BU6 376 0.0000 0.000
## 11: BJ 1996BJ3 200 0.0000 0.000
## 12: BJ 2001BJ4 247 0.0000 0.000
## 13: BJ 2011BJ6 750 0.0000 0.133
## 14: BO 2008BO5 999 0.2002 1.201
## 15: CD 2007CD5 300 2.3333 0.000
## 16: CD 2013CD6 536 8.2090 0.000
## 17: CF 1994CF3 231 0.0000 0.866
## 18: CI 1994CI3 246 0.0000 0.000
## 19: CI 1998CI3 140 0.0000 0.000
## 20: CI 2011CI6 351 2.8490 0.285
## 21: CM 2004CM4 465 0.4301 3.656
## 22: CM 2011CM6 578 0.1730 0.519
## 23: CO 2010CO5 4927 2.3544 11.264
## 24: DO 2007DR5 1428 0.2101 2.101
## 25: DO 2013DR6 524 0.0000 2.099
## 26: EG 1992EG2 546 0.0000 1.282
## 27: EG 1995EG3 934 0.0000 1.285
## 28: EG 2000EG4 999 0.2002 1.201
## 29: EG 2003EG4 975 5.2308 4.923
## 30: EG 2005EG4 1359 4.4886 0.957
## 31: EG 2008EG5 1264 1.5823 2.136
## 32: ET 2000ET4 539 0.7421 0.000
## 33: ET 2005ET4 535 1.3084 1.869
## 34: ET 2011ET6 596 4.1946 0.000
## 35: GA 2012GA6 335 1.1940 2.090
## 36: GH 1993GH2 400 0.0000 2.750
## 37: GH 1998GH3 400 0.0000 1.250
## 38: GH 2003GH4 412 0.4854 1.699
## 39: GH 2008GH5 411 1.7032 0.973
## 40: GN 1999GN3 293 0.0000 0.341
## 41: GN 2005GN4 295 1.3559 0.339
## 42: GN 2012GN6 300 0.0000 0.000
## 43: GY 2009GY5 325 4.0000 1.846
## 44: HN 2011HN6 1147 1.7437 0.697
## 45: HT 2000HT4 317 0.3155 0.000
## 46: HT 2005HT5 339 2.0649 0.295
## 47: HT 2012HT6 445 1.7978 0.449
## 48: ID 2002ID4 1391 5.2480 100.000
## 49: JO 2002JO4 498 0.6024 0.602
## 50: JO 2007JO5 928 0.4310 50.108
## 51: JO 2012JO6 806 0.0000 0.993
## 52: KE 2003KE4 400 0.2500 0.500
## 53: KE 2008KE5 398 0.2513 0.000
## 54: KG 2012KY6 316 0.6329 0.316
## 55: KH 2000KH4 471 0.2123 0.637
## 56: KH 2005KH5 557 1.6158 0.539
## 57: KH 2010KH5 611 0.6547 0.655
## 58: KM 2012KM6 252 3.9683 0.397
## 59: LR 2007LB5 298 2.3490 0.336
## 60: LR 2013LB6 322 0.0000 0.000
## 61: LS 2004LS4 405 5.9259 8.889
## 62: LS 2009LS5 400 1.2500 3.000
## 63: MA 2003MA4 480 0.0000 0.208
## 64: MD 2005MB4 400 0.2500 5.250
## 65: MG 1997MD3 269 0.3717 1.859
## 66: MG 2008MD5 594 1.5152 0.337
## 67: ML 2006ML5 407 0.4914 0.000
## 68: ML 2012ML6 413 0.0000 0.242
## 69: MW 2000MW4 559 0.0000 0.000
## 70: MW 2004MW4 521 0.1919 0.000
## 71: MW 2010MW5 849 2.5913 2.002
## 72: MZ 2011MZ6 610 0.0000 0.000
## 73: NA 2000NM4 259 0.0000 1.158
## 74: NA 2006NM5 500 1.8000 1.400
## 75: NG 1990NG2 298 0.3356 0.000
## 76: NG 2003NG4 362 0.5525 0.276
## 77: NG 2008NG5 886 0.0000 0.000
## 78: NG 2013NG6 896 0.7812 0.112
## 79: NP 2001NP4 251 0.0000 1.195
## 80: NP 2006NP5 260 0.0000 0.000
## 81: NP 2011NP6 289 0.0000 0.692
## 82: PE 2000PE4 1413 0.4246 1.911
## 83: PE 2003PE5 1849 23.9048 31.747
## 84: PE 2009PE6 1132 0.0883 0.972
## 85: PH 2003PH4 819 0.3663 100.000
## 86: PH 2008PH5 787 0.3812 100.000
## 87: PK 2006PK5 970 1.5464 100.000
## 88: RW 2005RW4 462 1.2987 0.216
## 89: RW 2007RW5 249 1.2048 100.000
## 90: RW 2010RW6 492 0.0000 0.000
## 91: SL 2008SL5 353 0.8499 3.116
## 92: SL 2013SL6 435 0.0000 0.230
## 93: SN 1992SN2 258 0.0000 0.388
## 94: SN 1997SN2 320 0.3125 100.000
## 95: SN 2010SN6 391 1.5345 0.000
## 96: SZ 2006SZ5 274 1.4599 1.095
## 97: TJ 2012TJ6 355 0.0000 0.282
## 98: TL 2009TL5 455 0.2198 0.000
## 99: TZ 1999TZ3 176 1.7045 0.000
## 100: TZ 2010TZ5 475 3.5789 0.000
## 101: UG 2000UG4 297 10.4377 0.000
## 102: UG 2006UG5 368 8.6957 0.815
## 103: UG 2011UG6 404 0.9901 1.980
## 104: ZM 2007ZM5 319 0.0000 0.000
## 105: ZW 1999ZW4 230 3.9130 1.739
## 106: ZW 2005ZW5 398 0.5025 1.005
## 107: ZW 2010ZW6 406 3.2020 0.000
## country_code_iso svyCode clusters missing.coords missing.zscore
# Flag which is the latest survey year for each country
clust[, phase.last := max(as.integer(substr(svyCode, 1,4))), by=country_code_iso]
clust[, phase.last := phase.last==as.integer(substr(svyCode, 1,4))]
# Verify
summary(clust$phase.last)
## Mode FALSE TRUE NA's
## logical 71198 42888 0
clust[phase.last==T, unique(svyCode), keyby=country_code_iso]
## country_code_iso V1
## 1: AL 2008AL5
## 2: AM 2010AM6
## 3: AZ 2006AZ5
## 4: BD 2011BD6
## 5: BF 2010BF6
## 6: BI 2010BU6
## 7: BJ 2011BJ6
## 8: BO 2008BO5
## 9: BR 1996BR3
## 10: CD 2013CD6
## 11: CF 1994CF3
## 12: CG 2011CG6
## 13: CI 2011CI6
## 14: CM 2011CM6
## 15: CO 2010CO5
## 16: DO 2013DR6
## 17: EG 2008EG5
## 18: ET 2011ET6
## 19: GA 2012GA6
## 20: GH 2008GH5
## 21: GN 2012GN6
## 22: GT 1998GU3
## 23: GY 2009GY5
## 24: HN 2011HN6
## 25: HT 2012HT6
## 26: ID 2012ID6
## 27: IN 2005IA5
## 28: JO 2012JO6
## 29: KE 2008KE5
## 30: KG 2012KY6
## 31: KH 2010KH5
## 32: KM 2012KM6
## 33: KZ 1999KK3
## 34: LR 2013LB6
## 35: LS 2009LS5
## 36: MA 2003MA4
## 37: MD 2005MB4
## 38: MG 2008MD5
## 39: ML 2012ML6
## 40: MV 2009MV5
## 41: MW 2010MW5
## 42: MZ 2011MZ6
## 43: NA 2013NM6
## 44: NE 2012NI6
## 45: NG 2013NG6
## 46: NI 2001NC4
## 47: NP 2011NP6
## 48: PE 2011PE6
## 49: PH 2013PH6
## 50: PK 2012PK6
## 51: PY 1990PY2
## 52: RW 2010RW6
## 53: SL 2013SL6
## 54: SN 2012SN6
## 55: ST 2008ST5
## 56: SZ 2006SZ5
## 57: TD 2004TD4
## 58: TJ 2012TJ6
## 59: TL 2009TL5
## 60: TR 2003TR4
## 61: TZ 2010TZ5
## 62: UA 2007UA5
## 63: UG 2011UG6
## 64: UZ 1996UZ3
## 65: VN 2002VN4
## 66: YE 1991YE2
## 67: ZA 1998ZA3
## 68: ZM 2007ZM5
## 69: ZW 2010ZW6
## country_code_iso V1
# Load DHS survey maps (prepared earlier) to look up `svyCode` and country names
gis.web <- readRDS("../temp/dhsMap_web_2015.03.06.rds")
gis.dt <- data.table(gis.web@data)
setkey(clust, country_code_iso)
setkey(gis.dt, ISO)
unmatched <- clust[!gis.dt][, unique(svyCode)]
unmatched
## character(0)
# => all country codes seem to match
# Vice-versa, is there any country in the gis file that doesn't have CHILD attributes?
unmatched <- gis.dt[!clust][, unique(cntryNameEN)]
unmatched
## [1] Afghanistan Angola Botswana
## [4] Cape Verde Ecuador Eritrea
## [7] Sri Lanka Mauritania Mexico
## [10] Russia Sudan El Salvador
## [13] Togo Thailand Turkmenistan
## [16] Tunisia Trinidad and Tobago Samoa
## 88 Levels: Afghanistan Albania Armenia Angola Azerbaijan ... Indonesia
# Merge country name from `gis.dt` into `clust`
clust$cntry <- gis.dt[clust, mult="first"][, cntryNameEN]
clust[is.na(cntry), .N, by=svyCode]
## Empty data.table (0 rows) of 2 cols: svyCode,N
# => all matched
# Number of clusters in each last phase survey with coords and data on stunting rates
tmp <- clust[phase.last==T, list(N=.N,
with.data=sum(!is.na(ic_stunted_moderate) & !is.na(X))),
by=list(country_code_iso, cntry, svyCode)]
tmp
## country_code_iso cntry svyCode N with.data
## 1: AL Albania 2008AL5 450 404
## 2: AM Armenia 2010AM6 308 0
## 3: AZ Azerbaijan 2006AZ5 318 0
## 4: BD Bangladesh 2011BD6 600 600
## 5: BF Burkina Faso 2010BF6 573 540
## 6: BI Burundi 2010BU6 376 376
## 7: BJ Benin 2011BJ6 750 749
## 8: BO Bolivia 2008BO5 999 985
## 9: BR Brazil 1996BR3 827 0
## 10: CD Congo Democratic Republic 2013CD6 536 492
## 11: CF CAR 1994CF3 231 229
## 12: CG Congo (Brazzaville) 2011CG6 384 0
## 13: CI Cote d'Ivoire 2011CI6 351 340
## 14: CM Cameroon 2011CM6 578 574
## 15: CO Colombia 2010CO5 4927 4278
## 16: DO Dominican Republic 2013DR6 524 513
## 17: EG Egypt 2008EG5 1264 1217
## 18: ET Ethiopia 2011ET6 596 571
## 19: GA Gabon 2012GA6 335 324
## 20: GH Ghana 2008GH5 411 400
## 21: GN Guinea 2012GN6 300 300
## 22: GT Guatemala 1998GU3 278 0
## 23: GY Guyana 2009GY5 325 306
## 24: HN Honduras 2011HN6 1147 1119
## 25: HT Haiti 2012HT6 445 435
## 26: ID Indonesia 2012ID6 1831 0
## 27: IN India 2005IA5 3850 0
## 28: JO Jordan 2012JO6 806 798
## 29: KE Kenya 2008KE5 398 397
## 30: KG Kyrgyz Republic 2012KY6 316 313
## 31: KH Cambodia 2010KH5 611 603
## 32: KM Comoros 2012KM6 252 241
## 33: KZ Kazakhstan 1999KK3 251 0
## 34: LR Liberia 2013LB6 322 322
## 35: LS Lesotho 2009LS5 400 383
## 36: MA Morocco 2003MA4 480 479
## 37: MD Moldova 2005MB4 400 378
## 38: MG Madagascar 2008MD5 594 583
## 39: ML Mali 2012ML6 413 412
## 40: MV Maldives 2009MV5 270 0
## 41: MW Malawi 2010MW5 849 810
## 42: MZ Mozambique 2011MZ6 610 610
## 43: NA Namibia 2013NM6 549 0
## 44: NE Niger 2012NI6 476 0
## 45: NG Nigeria 2013NG6 896 888
## 46: NI Nicaragua 2001NC4 609 0
## 47: NP Nepal 2011NP6 289 287
## 48: PE Peru 2011PE6 1132 0
## 49: PH Philippines 2013PH6 800 0
## 50: PK Pakistan 2012PK6 498 0
## 51: PY Paraguay 1990PY2 264 0
## 52: RW Rwanda 2010RW6 492 492
## 53: SL Sierra Leone 2013SL6 435 434
## 54: SN Senegal 2012SN6 200 0
## 55: ST Sao Tome and Principe 2008ST5 104 0
## 56: SZ Swaziland 2006SZ5 274 267
## 57: TD Chad 2004TD4 196 0
## 58: TJ Tajikistan 2012TJ6 355 354
## 59: TL Timor-Leste 2009TL5 455 454
## 60: TR Turkey 2003TR4 686 0
## 61: TZ Tanzania 2010TZ5 475 458
## 62: UA Ukraine 2007UA5 500 0
## 63: UG Uganda 2011UG6 404 392
## 64: UZ Uzbekistan 1996UZ3 168 0
## 65: VN Vietnam 2002VN4 205 0
## 66: YE Yemen 1991YE2 257 0
## 67: ZA South Africa 1998ZA3 958 0
## 68: ZM Zambia 2007ZM5 319 319
## 69: ZW Zimbabwe 2010ZW6 406 393
## country_code_iso cntry svyCode N with.data
# Actual number of data points to map
sum(tmp$with.data)
## [1] 25819
# Save all cluster values for re-use
saveRDS(clust, "../temp/dhsClusters.rds", compress=T)
# Remove clusters with missing coords and convert to spatial points
gis.clust <- clust[!is.na(X)]
setcolorder(gis.clust, c(1:5, 15:16, 6:14))
setnames(gis.clust, 1, "iso")
# Convert to spatial
gis.clust <- SpatialPointsDataFrame(gis.clust[, list(X,Y)], as.data.frame(gis.clust),
proj4string=CRS("+init=epsg:4326"))
spplot(gis.clust, "ic_stunted_moderate", colorkey=T, cex=.4)
# Export to shapefile
writeOGR(gis.clust, "../maps", "dhsMap_ClusterCentroids", "ESRI Shapefile",
overwrite=T)
## Warning in writeOGR(gis.clust, "../out/r15.05", "dhsMap_ClusterCentroids", "ESRI
## Shapefile", : Field names abbreviated for ESRI Shapefile driver
# Check if generated DBF contains any negative values?
tmp <- readOGR("../maps", "dhsMap_ClusterCentroids")
## OGR data source with driver: ESRI Shapefile
## Source: "../out/r15.05", layer: "dhsMap_ClusterCentroids"
## with 58969 features
## It has 16 fields
tmp.dt <- data.table(tmp@data)
phist(tmp.dt$ic_stntd_m, main="Stunting - Moderate")