setwd("/home/projects/analysis/DHS/R")
rm(list=ls())

library(data.table)
library(foreign)
library(rgdal)
## Loading required package: sp
## rgdal: version: 0.9-2, (SVN revision 526)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /usr/share/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: (autodetected)
library(knitr)
# Load child records
dhs <- fread("../out/JG/2015.03.08/all_indicators_CHILD.csv")
# Check the list of surveys, recodes, and phases, some `hv000` codes don't seem to
# match the phase
xy <- dhs[, length(unique(latnum)), by=list(country_code_iso, hv000, hv001)]
xy <- xy[V1>1]
xy <- unique(xy$hv000)

dhs[hv000 %in% xy, list(
  phase=unique(phase),
  start=paste(unique(start_year), collapse=", "),
  end=paste(unique(end_year), collapse=", ")), by=hv000]
##     hv000 phase            start              end
##  1:   SN2     2       1992, 1997       1993, 1997
##  2:   SN2     3       1992, 1997       1993, 1997
##  3:   BD3     3 1993, 1996, 1999 1994, 1997, 2000
##  4:   BD3     4 1993, 1996, 1999 1994, 1997, 2000
##  5:   CI3     3       1994, 1998       1994, 1999
##  6:   TZ3     3       1996, 1999       1996, 1999
##  7:   TZ3     4       1996, 1999       1996, 1999
##  8:   EG4     4 2000, 2003, 2005 2000, 2003, 2005
##  9:   EG4     5 2000, 2003, 2005 2000, 2003, 2005
## 10:   ET4     4       2000, 2005       2000, 2005
## 11:   ET4     5       2000, 2005       2000, 2005
## 12:   RW4     4       2000, 2005       2000, 2005
## 13:   RW4     5       2000, 2005       2000, 2005
## 14:   KH5     5       2005, 2010       2005, 2010
## 15:   KH5     6       2005, 2010       2005, 2010
## 16:   JO5     5       2007, 2009       2007, 2009
## 17:   JO5     6       2007, 2009       2007, 2009
## 18:   PE6     5 2009, 2010, 2011 2009, 2010, 2011
## 19:   PE6     6 2009, 2010, 2011 2009, 2010, 2011
## 20:   SN6     6       2010, 2012       2011, 2013

Note that JG clarified that unique surveys may be identified using a combination of hv000 and start_year, and not just hv000. For completeness we should also match these to the svyCode identifiers used in the GIS file.

# Add unique survey codes
dhs[, svyCode := paste0(start_year, hv000)]

# Recode Namibia country iso from NA to "NA"
dhs[is.na(country_code_iso), country_code_iso := "NA"]
# Validate other raw data from CHILD records
table(dhs$child)
##
##       1       2       3       4       5       6
## 1826839 1463959 1076193  757166  523734  356706
# zscores
phist(dhs$ic_haz06, main="Height-for-Age Z-Score")

phist(dhs$ic_whz06, main="Weight-for-Height Z-Score")

phist(dhs$ic_waz06, main="Weight-for-Age Z-Score")

table(dhs$ic_stunted_moderate)
##
##      0      1
## 611035 349891
# Number of surveys, phases, clusters by country
setkey(dhs, country_code_iso, hv000, svyCode, phase, recode, hv001)
dhs[, list(
  surveys=paste(unique(svyCode), collapse=", ")),
  by=list(iso=country_code_iso)]
##     iso                                                       surveys
##  1:  AL                                                       2008AL5
##  2:  AM                                     2000AM4, 2005AM4, 2010AM6
##  3:  AZ                                                       2006AZ5
##  4:  BD          1993BD3, 1996BD3, 1999BD3, 2004BD4, 2007BD5, 2011BD6
##  5:  BF                            1993BF2, 1998BF3, 2003BF4, 2010BF6
##  6:  BI                                                       2010BU6
##  7:  BJ                            1996BJ3, 2001BJ4, 2006BJ5, 2011BJ6
##  8:  BO                                     1998BO3, 2003BO4, 2008BO5
##  9:  BR                                                       1996BR3
## 10:  CD                                              2007CD5, 2013CD6
## 11:  CF                                                       1994CF3
## 12:  CG                                              2005CG5, 2011CG6
## 13:  CI                                     1994CI3, 1998CI3, 2011CI6
## 14:  CM                                              2004CM4, 2011CM6
## 15:  CO                            1995CO3, 2000CO4, 2005CO4, 2010CO5
## 16:  DO                   1996DR3, 1999DR3, 2002DR4, 2007DR5, 2013DR6
## 17:  EG          1992EG2, 1995EG3, 2000EG4, 2003EG4, 2005EG4, 2008EG5
## 18:  ET                                     2000ET4, 2005ET4, 2011ET6
## 19:  GA                                              2000GA3, 2012GA6
## 20:  GH                            1993GH2, 1998GH3, 2003GH4, 2008GH5
## 21:  GN                                     1999GN3, 2005GN4, 2012GN6
## 22:  GT                                              1995GU3, 1998GU3
## 23:  GY                                                       2009GY5
## 24:  HN                                                       2011HN6
## 25:  HT                                     2000HT4, 2005HT5, 2012HT6
## 26:  ID          1991ID2, 1994ID3, 1997ID3, 2002ID4, 2007ID5, 2012ID6
## 27:  IN                                              1992IA2, 2005IA5
## 28:  JO          1990JO2, 1997JO3, 2002JO4, 2007JO5, 2009JO5, 2012JO6
## 29:  KE                            1993KE2, 1998KE3, 2003KE4, 2008KE5
## 30:  KG                                              1997KY3, 2012KY6
## 31:  KH                                     2000KH4, 2005KH5, 2010KH5
## 32:  KM                                              1996KM3, 2012KM6
## 33:  KZ                                              1995KK3, 1999KK3
## 34:  LR                                              2007LB5, 2013LB6
## 35:  LS                                              2004LS4, 2009LS5
## 36:  MA                                              1992MA2, 2003MA4
## 37:  MD                                                       2005MB4
## 38:  MG                            1992MD2, 1997MD3, 2003MD4, 2008MD5
## 39:  ML                                              2006ML5, 2012ML6
## 40:  MV                                                       2009MV5
## 41:  MW                            1992MW2, 2000MW4, 2004MW4, 2010MW5
## 42:  MZ                                     1997MZ3, 2003MZ4, 2011MZ6
## 43:  NA                            1992NM2, 2000NM4, 2006NM5, 2013NM6
## 44:  NE                                              2006NI5, 2012NI6
## 45:  NG                   1990NG2, 1999NG3, 2003NG4, 2008NG5, 2013NG6
## 46:  NI                                              1998NC3, 2001NC4
## 47:  NP                            1996NP3, 2001NP4, 2006NP5, 2011NP6
## 48:  PE 1991PE2, 1996PE3, 2000PE4, 2003PE5, 2009PE6, 2010PE6, 2011PE6
## 49:  PH                   1993PH2, 1998PH3, 2003PH4, 2008PH5, 2013PH6
## 50:  PK                                     1990PK2, 2006PK5, 2012PK6
## 51:  PY                                                       1990PY2
## 52:  RW                   1992RW2, 2000RW4, 2005RW4, 2007RW5, 2010RW6
## 53:  SL                                              2008SL5, 2013SL6
## 54:  SN                            1992SN2, 1997SN2, 2010SN6, 2012SN6
## 55:  ST                                                       2008ST5
## 56:  SZ                                                       2006SZ5
## 57:  TD                                                       2004TD4
## 58:  TJ                                                       2012TJ6
## 59:  TL                                                       2009TL5
## 60:  TR                                     1993TR2, 1998TR3, 2003TR4
## 61:  TZ                   1991TZ2, 1996TZ3, 1999TZ3, 2004TZ4, 2010TZ5
## 62:  UA                                                       2007UA5
## 63:  UG                            1995UG3, 2000UG4, 2006UG5, 2011UG6
## 64:  UZ                                                       1996UZ3
## 65:  VN                                              1997VNT, 2002VNT
## 66:  YE                                                       1991YE2
## 67:  ZA                                                       1998ZA3
## 68:  ZM                            1992ZM2, 1996ZM3, 2001ZM4, 2007ZM5
## 69:  ZW                            1994ZW3, 1999ZW4, 2005ZW5, 2010ZW6
##     iso                                                       surveys
# Note that VNT code (line 65) looks like an error, should be VN3 and VN4 instead?
dhs[hv000=="VNT", .N, keyby=list(start_year, regname)]
##     start_year            regname    N
##  1:       1997      central coast 1567
##  2:       1997  central highlands  633
##  3:       1997 mekong river delta 3550
##  4:       1997      north central 2248
##  5:       1997   northern uplands 2915
##  6:       1997    red river delta 2650
##  7:       1997          southeast 1510
##  8:       2002      central coast 1506
##  9:       2002  central highlands  569
## 10:       2002 mekong river delta 2992
## 11:       2002      north central 2210
## 12:       2002   northern uplands 2852
## 13:       2002    red river delta 2445
## 14:       2002          southeast 1556
# Correct
dhs[hv000=="VNT" & start_year==1997, hv000 := "VN3"]
dhs[hv000=="VNT" & start_year==2002, hv000 := "VN4"]
dhs[, svyCode := paste0(start_year, hv000)]
# Generate mean zscores across survey clusters
zscore <- names(dhs)[32:40]
clust <- dhs[, lapply(.SD, function(x) weighted.mean(x==1, hv005_sample_weight, na.rm=T)),
  by=list(country_code_iso, svyCode, hv001),
  .SDcols=c(zscore, "hv005_sample_weight")]
# Drop sample weights
clust[, hv005_sample_weight := NULL]
# Verify results
phist(clust$ic_stunted_moderate, main="Stunting - Moderate")

phist(clust$ic_wasted_moderate, main="Wasting - Moderate")

phist(clust$ic_underweight_moderate, main="Underweight - Moderate")

# Merge in cluster coordinates from `dhs` into `clust`
tmp <- dhs[, list(
  X=mean(longnum, na.rm=T),
  Y=mean(latnum, na.rm=T)), by=list(svyCode, hv001)]

setkey(tmp, svyCode, hv001)
setkey(clust, svyCode, hv001)
clust <- tmp[clust]

# Number of surveys
clust[, length(unique(svyCode))]
## [1] 191
# Number of countries
clust[, length(unique(country_code_iso))]
## [1] 69
# Surveys with missing coords
tmp <- clust[, sum(is.na(X))/.N, keyby=list(iso=country_code_iso, svyCode)][V1==1]
tmp
##     iso svyCode V1
##  1:  AM 2000AM4  1
##  2:  AM 2005AM4  1
##  3:  AM 2010AM6  1
##  4:  AZ 2006AZ5  1
##  5:  BD 1993BD3  1
##  6:  BD 1996BD3  1
##  7:  BJ 2006BJ5  1
##  8:  BO 1998BO3  1
##  9:  BO 2003BO4  1
## 10:  BR 1996BR3  1
## 11:  CG 2005CG5  1
## 12:  CG 2011CG6  1
## 13:  CO 1995CO3  1
## 14:  CO 2000CO4  1
## 15:  CO 2005CO4  1
## 16:  DO 1996DR3  1
## 17:  DO 1999DR3  1
## 18:  DO 2002DR4  1
## 19:  GA 2000GA3  1
## 20:  GT 1995GU3  1
## 21:  GT 1998GU3  1
## 22:  ID 1991ID2  1
## 23:  ID 1994ID3  1
## 24:  ID 1997ID3  1
## 25:  ID 2007ID5  1
## 26:  ID 2012ID6  1
## 27:  IN 1992IA2  1
## 28:  IN 2005IA5  1
## 29:  JO 1990JO2  1
## 30:  JO 1997JO3  1
## 31:  JO 2009JO5  1
## 32:  KE 1993KE2  1
## 33:  KE 1998KE3  1
## 34:  KG 1997KY3  1
## 35:  KM 1996KM3  1
## 36:  KZ 1995KK3  1
## 37:  KZ 1999KK3  1
## 38:  MA 1992MA2  1
## 39:  MG 1992MD2  1
## 40:  MG 2003MD4  1
## 41:  MV 2009MV5  1
## 42:  MW 1992MW2  1
## 43:  MZ 1997MZ3  1
## 44:  MZ 2003MZ4  1
## 45:  NA 1992NM2  1
## 46:  NA 2013NM6  1
## 47:  NE 2006NI5  1
## 48:  NE 2012NI6  1
## 49:  NG 1999NG3  1
## 50:  NI 1998NC3  1
## 51:  NI 2001NC4  1
## 52:  NP 1996NP3  1
## 53:  PE 1991PE2  1
## 54:  PE 1996PE3  1
## 55:  PE 2010PE6  1
## 56:  PE 2011PE6  1
## 57:  PH 1993PH2  1
## 58:  PH 1998PH3  1
## 59:  PH 2013PH6  1
## 60:  PK 1990PK2  1
## 61:  PK 2012PK6  1
## 62:  PY 1990PY2  1
## 63:  RW 1992RW2  1
## 64:  RW 2000RW4  1
## 65:  SN 2012SN6  1
## 66:  ST 2008ST5  1
## 67:  TD 2004TD4  1
## 68:  TR 1993TR2  1
## 69:  TR 1998TR3  1
## 70:  TR 2003TR4  1
## 71:  TZ 1991TZ2  1
## 72:  TZ 1996TZ3  1
## 73:  TZ 2004TZ4  1
## 74:  UA 2007UA5  1
## 75:  UG 1995UG3  1
## 76:  UZ 1996UZ3  1
## 77:  VN 1997VN3  1
## 78:  VN 2002VN4  1
## 79:  YE 1991YE2  1
## 80:  ZA 1998ZA3  1
## 81:  ZM 1992ZM2  1
## 82:  ZM 1996ZM3  1
## 83:  ZM 2001ZM4  1
## 84:  ZW 1994ZW3  1
##     iso svyCode V1
# Percent of missing coords and zscores per survey across clusters
tmp <- tmp[, unique(svyCode)]
clust[!svyCode %in% tmp, list(
  clusters=.N,
  missing.coords=(100*sum(is.na(X))/.N),
  missing.zscore=(100*sum(is.na(ic_stunted_moderate))/.N)),
  by=list(country_code_iso, svyCode)][order(country_code_iso, svyCode)]
##      country_code_iso svyCode clusters missing.coords missing.zscore
##   1:               AL 2008AL5      450         0.0000         10.222
##   2:               BD 1999BD3      341         0.0000          0.000
##   3:               BD 2004BD4      361         0.5540          0.000
##   4:               BD 2007BD5      361         0.0000          0.000
##   5:               BD 2011BD6      600         0.0000          0.000
##   6:               BF 1993BF2      230         0.0000          0.000
##   7:               BF 1998BF3      210         0.9524          0.000
##   8:               BF 2003BF4      400         0.7500          0.000
##   9:               BF 2010BF6      573         5.5846          0.175
##  10:               BI 2010BU6      376         0.0000          0.000
##  11:               BJ 1996BJ3      200         0.0000          0.000
##  12:               BJ 2001BJ4      247         0.0000          0.000
##  13:               BJ 2011BJ6      750         0.0000          0.133
##  14:               BO 2008BO5      999         0.2002          1.201
##  15:               CD 2007CD5      300         2.3333          0.000
##  16:               CD 2013CD6      536         8.2090          0.000
##  17:               CF 1994CF3      231         0.0000          0.866
##  18:               CI 1994CI3      246         0.0000          0.000
##  19:               CI 1998CI3      140         0.0000          0.000
##  20:               CI 2011CI6      351         2.8490          0.285
##  21:               CM 2004CM4      465         0.4301          3.656
##  22:               CM 2011CM6      578         0.1730          0.519
##  23:               CO 2010CO5     4927         2.3544         11.264
##  24:               DO 2007DR5     1428         0.2101          2.101
##  25:               DO 2013DR6      524         0.0000          2.099
##  26:               EG 1992EG2      546         0.0000          1.282
##  27:               EG 1995EG3      934         0.0000          1.285
##  28:               EG 2000EG4      999         0.2002          1.201
##  29:               EG 2003EG4      975         5.2308          4.923
##  30:               EG 2005EG4     1359         4.4886          0.957
##  31:               EG 2008EG5     1264         1.5823          2.136
##  32:               ET 2000ET4      539         0.7421          0.000
##  33:               ET 2005ET4      535         1.3084          1.869
##  34:               ET 2011ET6      596         4.1946          0.000
##  35:               GA 2012GA6      335         1.1940          2.090
##  36:               GH 1993GH2      400         0.0000          2.750
##  37:               GH 1998GH3      400         0.0000          1.250
##  38:               GH 2003GH4      412         0.4854          1.699
##  39:               GH 2008GH5      411         1.7032          0.973
##  40:               GN 1999GN3      293         0.0000          0.341
##  41:               GN 2005GN4      295         1.3559          0.339
##  42:               GN 2012GN6      300         0.0000          0.000
##  43:               GY 2009GY5      325         4.0000          1.846
##  44:               HN 2011HN6     1147         1.7437          0.697
##  45:               HT 2000HT4      317         0.3155          0.000
##  46:               HT 2005HT5      339         2.0649          0.295
##  47:               HT 2012HT6      445         1.7978          0.449
##  48:               ID 2002ID4     1391         5.2480        100.000
##  49:               JO 2002JO4      498         0.6024          0.602
##  50:               JO 2007JO5      928         0.4310         50.108
##  51:               JO 2012JO6      806         0.0000          0.993
##  52:               KE 2003KE4      400         0.2500          0.500
##  53:               KE 2008KE5      398         0.2513          0.000
##  54:               KG 2012KY6      316         0.6329          0.316
##  55:               KH 2000KH4      471         0.2123          0.637
##  56:               KH 2005KH5      557         1.6158          0.539
##  57:               KH 2010KH5      611         0.6547          0.655
##  58:               KM 2012KM6      252         3.9683          0.397
##  59:               LR 2007LB5      298         2.3490          0.336
##  60:               LR 2013LB6      322         0.0000          0.000
##  61:               LS 2004LS4      405         5.9259          8.889
##  62:               LS 2009LS5      400         1.2500          3.000
##  63:               MA 2003MA4      480         0.0000          0.208
##  64:               MD 2005MB4      400         0.2500          5.250
##  65:               MG 1997MD3      269         0.3717          1.859
##  66:               MG 2008MD5      594         1.5152          0.337
##  67:               ML 2006ML5      407         0.4914          0.000
##  68:               ML 2012ML6      413         0.0000          0.242
##  69:               MW 2000MW4      559         0.0000          0.000
##  70:               MW 2004MW4      521         0.1919          0.000
##  71:               MW 2010MW5      849         2.5913          2.002
##  72:               MZ 2011MZ6      610         0.0000          0.000
##  73:               NA 2000NM4      259         0.0000          1.158
##  74:               NA 2006NM5      500         1.8000          1.400
##  75:               NG 1990NG2      298         0.3356          0.000
##  76:               NG 2003NG4      362         0.5525          0.276
##  77:               NG 2008NG5      886         0.0000          0.000
##  78:               NG 2013NG6      896         0.7812          0.112
##  79:               NP 2001NP4      251         0.0000          1.195
##  80:               NP 2006NP5      260         0.0000          0.000
##  81:               NP 2011NP6      289         0.0000          0.692
##  82:               PE 2000PE4     1413         0.4246          1.911
##  83:               PE 2003PE5     1849        23.9048         31.747
##  84:               PE 2009PE6     1132         0.0883          0.972
##  85:               PH 2003PH4      819         0.3663        100.000
##  86:               PH 2008PH5      787         0.3812        100.000
##  87:               PK 2006PK5      970         1.5464        100.000
##  88:               RW 2005RW4      462         1.2987          0.216
##  89:               RW 2007RW5      249         1.2048        100.000
##  90:               RW 2010RW6      492         0.0000          0.000
##  91:               SL 2008SL5      353         0.8499          3.116
##  92:               SL 2013SL6      435         0.0000          0.230
##  93:               SN 1992SN2      258         0.0000          0.388
##  94:               SN 1997SN2      320         0.3125        100.000
##  95:               SN 2010SN6      391         1.5345          0.000
##  96:               SZ 2006SZ5      274         1.4599          1.095
##  97:               TJ 2012TJ6      355         0.0000          0.282
##  98:               TL 2009TL5      455         0.2198          0.000
##  99:               TZ 1999TZ3      176         1.7045          0.000
## 100:               TZ 2010TZ5      475         3.5789          0.000
## 101:               UG 2000UG4      297        10.4377          0.000
## 102:               UG 2006UG5      368         8.6957          0.815
## 103:               UG 2011UG6      404         0.9901          1.980
## 104:               ZM 2007ZM5      319         0.0000          0.000
## 105:               ZW 1999ZW4      230         3.9130          1.739
## 106:               ZW 2005ZW5      398         0.5025          1.005
## 107:               ZW 2010ZW6      406         3.2020          0.000
##      country_code_iso svyCode clusters missing.coords missing.zscore
# Flag which is the latest survey year for each country
clust[, phase.last := max(as.integer(substr(svyCode, 1,4))), by=country_code_iso]
clust[, phase.last := phase.last==as.integer(substr(svyCode, 1,4))]
# Verify
summary(clust$phase.last)
##    Mode   FALSE    TRUE    NA's
## logical   71198   42888       0
clust[phase.last==T, unique(svyCode), keyby=country_code_iso]
##     country_code_iso      V1
##  1:               AL 2008AL5
##  2:               AM 2010AM6
##  3:               AZ 2006AZ5
##  4:               BD 2011BD6
##  5:               BF 2010BF6
##  6:               BI 2010BU6
##  7:               BJ 2011BJ6
##  8:               BO 2008BO5
##  9:               BR 1996BR3
## 10:               CD 2013CD6
## 11:               CF 1994CF3
## 12:               CG 2011CG6
## 13:               CI 2011CI6
## 14:               CM 2011CM6
## 15:               CO 2010CO5
## 16:               DO 2013DR6
## 17:               EG 2008EG5
## 18:               ET 2011ET6
## 19:               GA 2012GA6
## 20:               GH 2008GH5
## 21:               GN 2012GN6
## 22:               GT 1998GU3
## 23:               GY 2009GY5
## 24:               HN 2011HN6
## 25:               HT 2012HT6
## 26:               ID 2012ID6
## 27:               IN 2005IA5
## 28:               JO 2012JO6
## 29:               KE 2008KE5
## 30:               KG 2012KY6
## 31:               KH 2010KH5
## 32:               KM 2012KM6
## 33:               KZ 1999KK3
## 34:               LR 2013LB6
## 35:               LS 2009LS5
## 36:               MA 2003MA4
## 37:               MD 2005MB4
## 38:               MG 2008MD5
## 39:               ML 2012ML6
## 40:               MV 2009MV5
## 41:               MW 2010MW5
## 42:               MZ 2011MZ6
## 43:               NA 2013NM6
## 44:               NE 2012NI6
## 45:               NG 2013NG6
## 46:               NI 2001NC4
## 47:               NP 2011NP6
## 48:               PE 2011PE6
## 49:               PH 2013PH6
## 50:               PK 2012PK6
## 51:               PY 1990PY2
## 52:               RW 2010RW6
## 53:               SL 2013SL6
## 54:               SN 2012SN6
## 55:               ST 2008ST5
## 56:               SZ 2006SZ5
## 57:               TD 2004TD4
## 58:               TJ 2012TJ6
## 59:               TL 2009TL5
## 60:               TR 2003TR4
## 61:               TZ 2010TZ5
## 62:               UA 2007UA5
## 63:               UG 2011UG6
## 64:               UZ 1996UZ3
## 65:               VN 2002VN4
## 66:               YE 1991YE2
## 67:               ZA 1998ZA3
## 68:               ZM 2007ZM5
## 69:               ZW 2010ZW6
##     country_code_iso      V1
# Load DHS survey maps (prepared earlier) to look up `svyCode` and country names
gis.web <- readRDS("../temp/dhsMap_web_2015.03.06.rds")
gis.dt <- data.table(gis.web@data)

setkey(clust, country_code_iso)
setkey(gis.dt, ISO)
unmatched <- clust[!gis.dt][, unique(svyCode)]
unmatched
## character(0)
# => all country codes seem to match

# Vice-versa, is there any country in the gis file that doesn't have CHILD attributes?
unmatched <- gis.dt[!clust][, unique(cntryNameEN)]
unmatched
##  [1] Afghanistan         Angola              Botswana
##  [4] Cape Verde          Ecuador             Eritrea
##  [7] Sri Lanka           Mauritania          Mexico
## [10] Russia              Sudan               El Salvador
## [13] Togo                Thailand            Turkmenistan
## [16] Tunisia             Trinidad and Tobago Samoa
## 88 Levels: Afghanistan Albania Armenia Angola Azerbaijan ... Indonesia
# Merge country name from `gis.dt` into `clust`
clust$cntry <- gis.dt[clust, mult="first"][, cntryNameEN]
clust[is.na(cntry), .N, by=svyCode]
## Empty data.table (0 rows) of 2 cols: svyCode,N
# => all matched

# Number of clusters in each last phase survey with coords and data on stunting rates
tmp <- clust[phase.last==T, list(N=.N,
  with.data=sum(!is.na(ic_stunted_moderate) & !is.na(X))),
  by=list(country_code_iso, cntry, svyCode)]
tmp
##     country_code_iso                     cntry svyCode    N with.data
##  1:               AL                   Albania 2008AL5  450       404
##  2:               AM                   Armenia 2010AM6  308         0
##  3:               AZ                Azerbaijan 2006AZ5  318         0
##  4:               BD                Bangladesh 2011BD6  600       600
##  5:               BF              Burkina Faso 2010BF6  573       540
##  6:               BI                   Burundi 2010BU6  376       376
##  7:               BJ                     Benin 2011BJ6  750       749
##  8:               BO                   Bolivia 2008BO5  999       985
##  9:               BR                    Brazil 1996BR3  827         0
## 10:               CD Congo Democratic Republic 2013CD6  536       492
## 11:               CF                       CAR 1994CF3  231       229
## 12:               CG       Congo (Brazzaville) 2011CG6  384         0
## 13:               CI             Cote d'Ivoire 2011CI6  351       340
## 14:               CM                  Cameroon 2011CM6  578       574
## 15:               CO                  Colombia 2010CO5 4927      4278
## 16:               DO        Dominican Republic 2013DR6  524       513
## 17:               EG                     Egypt 2008EG5 1264      1217
## 18:               ET                  Ethiopia 2011ET6  596       571
## 19:               GA                     Gabon 2012GA6  335       324
## 20:               GH                     Ghana 2008GH5  411       400
## 21:               GN                    Guinea 2012GN6  300       300
## 22:               GT                 Guatemala 1998GU3  278         0
## 23:               GY                    Guyana 2009GY5  325       306
## 24:               HN                  Honduras 2011HN6 1147      1119
## 25:               HT                     Haiti 2012HT6  445       435
## 26:               ID                 Indonesia 2012ID6 1831         0
## 27:               IN                     India 2005IA5 3850         0
## 28:               JO                    Jordan 2012JO6  806       798
## 29:               KE                     Kenya 2008KE5  398       397
## 30:               KG           Kyrgyz Republic 2012KY6  316       313
## 31:               KH                  Cambodia 2010KH5  611       603
## 32:               KM                   Comoros 2012KM6  252       241
## 33:               KZ                Kazakhstan 1999KK3  251         0
## 34:               LR                   Liberia 2013LB6  322       322
## 35:               LS                   Lesotho 2009LS5  400       383
## 36:               MA                   Morocco 2003MA4  480       479
## 37:               MD                   Moldova 2005MB4  400       378
## 38:               MG                Madagascar 2008MD5  594       583
## 39:               ML                      Mali 2012ML6  413       412
## 40:               MV                  Maldives 2009MV5  270         0
## 41:               MW                    Malawi 2010MW5  849       810
## 42:               MZ                Mozambique 2011MZ6  610       610
## 43:               NA                   Namibia 2013NM6  549         0
## 44:               NE                     Niger 2012NI6  476         0
## 45:               NG                   Nigeria 2013NG6  896       888
## 46:               NI                 Nicaragua 2001NC4  609         0
## 47:               NP                     Nepal 2011NP6  289       287
## 48:               PE                      Peru 2011PE6 1132         0
## 49:               PH               Philippines 2013PH6  800         0
## 50:               PK                  Pakistan 2012PK6  498         0
## 51:               PY                  Paraguay 1990PY2  264         0
## 52:               RW                    Rwanda 2010RW6  492       492
## 53:               SL              Sierra Leone 2013SL6  435       434
## 54:               SN                   Senegal 2012SN6  200         0
## 55:               ST     Sao Tome and Principe 2008ST5  104         0
## 56:               SZ                 Swaziland 2006SZ5  274       267
## 57:               TD                      Chad 2004TD4  196         0
## 58:               TJ                Tajikistan 2012TJ6  355       354
## 59:               TL               Timor-Leste 2009TL5  455       454
## 60:               TR                    Turkey 2003TR4  686         0
## 61:               TZ                  Tanzania 2010TZ5  475       458
## 62:               UA                   Ukraine 2007UA5  500         0
## 63:               UG                    Uganda 2011UG6  404       392
## 64:               UZ                Uzbekistan 1996UZ3  168         0
## 65:               VN                   Vietnam 2002VN4  205         0
## 66:               YE                     Yemen 1991YE2  257         0
## 67:               ZA              South Africa 1998ZA3  958         0
## 68:               ZM                    Zambia 2007ZM5  319       319
## 69:               ZW                  Zimbabwe 2010ZW6  406       393
##     country_code_iso                     cntry svyCode    N with.data
# Actual number of data points to map
sum(tmp$with.data)
## [1] 25819
# Save all cluster values for  re-use
saveRDS(clust, "../temp/dhsClusters.rds", compress=T)

# Remove clusters with missing coords and convert to spatial points
gis.clust <- clust[!is.na(X)]
setcolorder(gis.clust, c(1:5, 15:16, 6:14))
setnames(gis.clust, 1, "iso")

# Convert to spatial
gis.clust <- SpatialPointsDataFrame(gis.clust[, list(X,Y)], as.data.frame(gis.clust),
  proj4string=CRS("+init=epsg:4326"))

spplot(gis.clust, "ic_stunted_moderate", colorkey=T, cex=.4)

# Export to shapefile
writeOGR(gis.clust, "../maps", "dhsMap_ClusterCentroids", "ESRI Shapefile",
  overwrite=T)
## Warning in writeOGR(gis.clust, "../out/r15.05", "dhsMap_ClusterCentroids", "ESRI
## Shapefile", : Field names abbreviated for ESRI Shapefile driver
# Check if generated DBF contains any negative values?
tmp <- readOGR("../maps", "dhsMap_ClusterCentroids")
## OGR data source with driver: ESRI Shapefile
## Source: "../out/r15.05", layer: "dhsMap_ClusterCentroids"
## with 58969 features
## It has 16 fields
tmp.dt <- data.table(tmp@data)
phist(tmp.dt$ic_stntd_m, main="Stunting - Moderate")