Calorie intake and income in Vietnam

Code R paper: Huong Trinh Thi, Michel Simioni, Christine Thomas-Agnan, Assessing the nonlinearity of the calorie-income relationship: An estimation strategy - With new insights on nutritional transition in Vietnam, World Development, Volume 110, October 2018, Pages 192-204, ISSN 0305-750X, https://doi.org/10.1016/j.worlddev.2018.05.030. (https://www.sciencedirect.com/science/article/pii/S0305750X18301785)

RData are here:https://drive.google.com/drive/folders/1m7NBR2b0FkgBuk-isjOq8d9mPr5p2og5?usp=sharing.

Load data and packages

setwd("C:/Users/trinh/Documents/Study/Public paper/World Development 2018/Data")
load("VHLSS.RData")
load("CV.VHLSS.2004.RData")
load("CV.VHLSS.2006.RData")
load("CV.VHLSS.2008.RData")
load("CV.VHLSS.2010.RData")
load("CV.VHLSS.2012.RData")
load("CV.VHLSS.2014.RData")
load("DecomVHLSS0604.RData")
load("DecomVHLSS0804.RData")
load("DecomVHLSS1004.RData")
load("DecomVHLSS1204.RData")
load("DecomVHLSS1404.RData")
load("Muc1A.RData")
load("Weight.2012.RData")
require(mgcv)

## Loading required package: mgcv

## Loading required package: nlme

## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.

require(xtable)

## Loading required package: xtable

require(reshape)

## Loading required package: reshape

require(qpcR)

## Loading required package: qpcR

## Loading required package: MASS

## Loading required package: minpack.lm

## Loading required package: rgl

## Loading required package: robustbase

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:reshape':
## 
##     expand

All Functions by myself for this paper (Great thanks to Tibo)

#function to create a description data
multi.fun<- function(x)
{
  #x is the panel data with all year
  #fator.nam is the name of all factors in the data
  #conti.name is the name of all continues variables in the data
  cbind(freq = table(x),
        percentage = round(prop.table(table(x))*100, 2))
}

fun.DescriptionData <- function(don1, factor.name, conti.name)
    {
  result <- NULL
    years = sort(unique(don1[,"year"]))
    for (k in years)
        {
        don = don1[don1[,"year"] == k,]
        percen.factor <- length(don$PCE)
        names.year <- conti.name
        for (i in conti.name)
        {percen.factor<-c(percen.factor,
                                paste(round(mean(don[, i]), 1),
                                      "(",round(sd(don[, i]), 1), ")"))}
        for(j in factor.name)
              {
              names.year = c(names.year, levels(don[, j]))
              persent <- multi.fun(don[,j])
              percen.factor = c(percen.factor, paste(persent[,"percentage"], "%")) }
        result<-cbind(result, percen.factor)
    }
    result <- data.frame(result)
    n <- length(names.year)
  rownames(result) = c("Number Obs", paste(names.year[1]), paste(names.year[2]),
                       names.year[3:n])
  names(result) <- years
  return(result) 
}

f.smooth.DLM.link <- function(don,year,model.DLM)
{ # this code for both GAMGAULog and GAMGAMLog, VHLSS and CHNS
  DON.new <- NULL
    n<-length(VHLSS[which(VHLSS$year==year),"ID"])
    DON <- VHLSS[which(VHLSS$year==year),]
    DON.new <- data.frame(PCE=seq(min(DON$PCE),max(DON$PCE),length.out=n),
                          URBAN=factor(rep("RURAL",n)),
                          HSIZE=factor(rep("HSIZE4",n)),
                          ETHNIC=factor(rep("KINH" ,n)),
                          WA=factor(rep("WA1",n)),
                          EDUCH=factor(rep("EDUCH1",n)),
                          GENDER=factor(rep("MALE",n)),
                          AREA=factor(rep("Mekong",n)))

  pre.DLM <- predict(model.DLM, type="response", newdata=DON.new ,  se.fit=TRUE)
  seq.PCE <- DON.new$PCE
  e.fit.DLM <- exp(pre.DLM$fit+ (summary(model.DLM)$sigma)^2/2)
  fit.up95.DLM <-exp(pre.DLM$fit + (summary(model.DLM)$sigma)^2/2 -1.96*pre.DLM$se.fit)
  fit.low95.DLM<-exp(pre.DLM$fit + (summary(model.DLM)$sigma)^2/2 +1.96*pre.DLM$se.fit)
  return(c(seq.PCE,e.fit.DLM, fit.up95.DLM,fit.low95.DLM))
}


f.smooth.GAMLog.link <- function(don,year,model.gam)
{ # this code for both GAMGAULog and GAMGAMLog, VHLSS and CHNS
  DON.new <- NULL

    n<-length(VHLSS[which(VHLSS$year==year),"ID"])
    DON <- VHLSS[which(VHLSS$year==year),]
    DON.new <- data.frame(PCE=seq(min(DON$PCE),max(DON$PCE),length.out=n),
                          URBAN=factor(rep("RURAL",n)),
                          HSIZE=factor(rep("HSIZE4",n)),
                          ETHNIC=factor(rep("KINH" ,n)),
                          WA=factor(rep("WA1",n)),
                          EDUCH=factor(rep("EDUCH1",n)),
                          GENDER=factor(rep("MALE",n)),
                          AREA=factor(rep("Mekong",n)))

  pre.GAMLog <- predict(model.gam, type="link", newdata=DON.new ,  se.fit=TRUE)
  seq.PCE <- DON.new$PCE
  e.fit.GAMLog <- exp(pre.GAMLog$fit)
  fit.up95.GAMLog <-exp(pre.GAMLog$fit -1.96*pre.GAMLog$se.fit)
  fit.low95.GAMLog<-exp(pre.GAMLog$fit+1.96*pre.GAMLog$se.fit)
  return(c(seq.PCE,e.fit.GAMLog, fit.up95.GAMLog,fit.low95.GAMLog))
}


f.smooth.GAMId.link <- function(don,year,model.gam)
{ # this code for both GAMGAULog and GAMGAMLog, VHLSS and CHNS
  DON.new <- NULL
    n<-length(VHLSS[which(VHLSS$year==year),"ID"])
    DON <- VHLSS[which(VHLSS$year==year),]
    DON.new <- data.frame(PCE=seq(min(DON$PCE),max(DON$PCE),length.out=n),
                          URBAN=factor(rep("RURAL",n)),
                          HSIZE=factor(rep("HSIZE4",n)),
                          ETHNIC=factor(rep("KINH" ,n)),
                          WA=factor(rep("WA1",n)),
                          EDUCH=factor(rep("EDUCH1",n)),
                          GENDER=factor(rep("MALE",n)),
                          AREA=factor(rep("Mekong",n)))

  pre.GAMLog <- predict(model.gam, type="link", newdata=DON.new ,  se.fit=TRUE)
  seq.PCE <- DON.new$PCE
  e.fit.GAMLog <- pre.GAMLog$fit
  fit.up95.GAMLog <-pre.GAMLog$fit -1.96*pre.GAMLog$se.fit
  fit.low95.GAMLog<-pre.GAMLog$fit+1.96*pre.GAMLog$se.fit
  return(c(seq.PCE,e.fit.GAMLog, fit.up95.GAMLog,fit.low95.GAMLog))
}

####----------------------Function Table 2: t-paired test results
#function to take main information from 1 paired test
res.paired <- function(a,b)
{
  test.p <- t.test(a,b, paired=TRUE)
  star.paired = ifelse(test.p$p.value > 0.1, " ",
                    ifelse(test.p$p.value <=0.1 & test.p$p.value > 0.05, ".",
                           ifelse(test.p$p.value <= 0.05 & test.p$p.value > 0.01, "*",
                                  ifelse(test.p$p.value <= 0.01 & test.p$p.value >0.001, "**", "***"))))
  res<- paste0(round(test.p$statistic,2),star.paired )
  return(res)
}


paire.year <- function(CV,year.t)
{
  list.model<- c("LLD", "GAMGauId", "GAMGauLog")
  yeat.test <- c(year.t,paste(""),paste(""))
  pairedtest<- data.frame(yeat.test,list.model,
                          rbind(cbind(res.paired(CV[,1],CV[,2]), res.paired(CV[,1],CV[,4]),res.paired(CV[,1],CV[,3])),
                          cbind(paste0(" "), res.paired(CV[,2],CV[,4]), res.paired(CV[,2],CV[,3])),
                          cbind(paste0(" "), paste0(" "), res.paired(CV[,4],CV[,3]))))

  #row.names(pairedtest) <- list.model
  names(pairedtest) <- c("year", "Models" ,"GAMGauId", "GAMGauLog", "GAMGamLog" )
  return(pairedtest)
}#Done!! bravo!!!

##------------------------Table 4 ----------------------
####- Linearity test

#function to take star for smooth term
star.smooth <- function(mo.gam)
  # function to take value for s_NL(.)=0 in this line "Linearity Test when GAM choosen"
  # also, to take F value when GAM is choosen s(.)
{
  sta <- ifelse(summary(mo.gam)$s.table[4]>0.1, " ",
                ifelse(summary(mo.gam)$s.table[4]<=0.1 & summary(mo.gam)$s.table[4] >0.05, ".",
                       ifelse(summary(mo.gam)$s.table[4]<=0.05 & summary(mo.gam)$s.table[4] >0.01, "*",
                              ifelse(summary(mo.gam)$s.table[4]<=0.01 & summary(mo.gam)$s.table[4] >0.001, "**", "***"))))
  res <- c(paste0( round(summary(mo.gam)$s.table[3],3),sta))
  return(res)
}

#function to take the coefficient of t value for HHINC when GAM is choosen, \hat{gamma_1}

test.HHINC <- function(model.gamtest)
{
  a.HHINC <- 2*pnorm(abs(coefficients(model.gamtest)["PCE"]/sqrt(diag(model.gamtest$Vp)[2])),lower.tail=FALSE)
  star.HHINC = ifelse(a.HHINC>0.1, " ",
                      ifelse(a.HHINC<=0.1 & a.HHINC >0.05, ".",
                             ifelse(a.HHINC<=0.05 & a.HHINC >0.01, "*",
                                    ifelse(a.HHINC<=0.01 & a.HHINC >0.001, "**", "***"))))
  res <- paste0(round(summary(model.gamtest)$p.t["PCE"], 3), star.HHINC)
  return(res)
}

### Need to ask again Christine or Michel for this calculation
Significant.test.DLM <- function(modelDLM, year)
{
  VHLSS.DLM.without <-  lm(log(PCCI)~ URBAN + HSIZE + ETHNIC + WA + EDUCH + GENDER+AREA,
                                data=VHLSS[which(VHLSS$year == year),])
  df.urss <- dim(VHLSS[which(VHLSS$year == year),])[1] - length(coefficients(modelDLM))
  URSS <- RSS(modelDLM)
  RRSS <- RSS(VHLSS.DLM.without)
  F.value <- ((RRSS-URSS)/2)/((URSS)/(df.urss)) # follow F-distribution with degree of freedom: (2, df.urss)
  p.value <- pf(F.value, 2, df.urss, lower.tail = F)
  star.pvalue = ifelse(p.value > 0.1 , " ",
                      ifelse(p.value <= 0.1 & p.value >0.05, ".",
                             ifelse(p.value <= 0.05 &p.value >0.01, "*",
                                    ifelse(p.value <= 0.01 & p.value > 0.001, "**", "***"))))
  return(paste0(round(F.value, 2), star.pvalue))
}
## function to take \alpha_1 and \alpha2

alpha1 <- function(model.DLM)
{
  p.v.DLM = 2*pnorm(abs(summary(model.DLM)$coefficients[, 1]/summary(model.DLM)$coefficients[, 2]),lower.tail=FALSE)
  star.DLM = ifelse(p.v.DLM > 0.1, " ",
                    ifelse(p.v.DLM <=0.1 & p.v.DLM > 0.05, ".",
                           ifelse(p.v.DLM <= 0.05 & p.v.DLM > 0.01, "*",
                                  ifelse(p.v.DLM <= 0.01 & p.v.DLM >0.001, "**", "***"))))
  res <- paste0(round(coefficients(model.DLM)["log(PCE)"],3),star.DLM["log(PCE)"] )
  return(res)
}

alpha2 <- function(model.DLM)
{
  p.v.DLM = 2*pnorm(abs(summary(model.DLM)$coefficients[, 1]/summary(model.DLM)$coefficients[, 2]),lower.tail=FALSE)
  star.DLM = ifelse(p.v.DLM > 0.1, " ",
                    ifelse(p.v.DLM <=0.1 & p.v.DLM > 0.05, ".",
                           ifelse(p.v.DLM <= 0.05 & p.v.DLM > 0.01, "*",
                                  ifelse(p.v.DLM <= 0.01 & p.v.DLM >0.001, "**", "***"))))
  res <- paste0(round(coefficients(model.DLM)["I((log(PCE))^2)"],3),star.DLM["I((log(PCE))^2)"] )
  return(res)
}

count.Weight <- function(household)
{
  #E1 = 0.8 elderly > 65 year olds + male
  # E2 = 0.69 elderly > 65 year olds and female
  # A1 = 0.97 Male,, 18-65 year olds
  # A2 = 0.83 female 18 - 65 year olds
  # Y1 = 0.78 male 5 - 17 year olds
  # Y2 = 
    
  #c2: the number of child 0-2 aged
  #c3: The number of child 3-5 aged
  #c4: the number of child 6-13 aged
  #c5 The number of child 14-17 aged
  #c6: The number of child 18-21 aged
  #c7 the nubmer of child+male 14-17 aged
  #c8 The number of child + male 18-21 aged
  #c9 the number of child 0-2 aged and male
  #c10 c3: The number of child 3-5 aged and male
  #c11 the number of child 6-13 aged and male
  #c12 is dummy variable whether age of household is in 26-75 age or not
  c1 <- length(household[which(household$RELATIONSHIP!="head" & household$AGE > 17),"ID"])
  c2 <- length(household[which(household$AGE < 2.1& household$SEX=="F"),"ID"])
  c3 <- length(household[which(household$AGE < 2.1& household$SEX=="M"),"ID"])
  
  c4 <- length(household[which(2< household$AGE & household$AGE < 5.1& household$SEX=="F"),"ID"])
  c5 <- length(household[which(2< household$AGE & household$AGE < 5.1& household$SEX=="M"),"ID"])
  
  c6 <- length(household[which(5< household$AGE & household$AGE < 13.1& household$SEX=="F"),"ID"])
  c7 <- length(household[which(5< household$AGE & household$AGE < 13.1& household$SEX=="M"),"ID"])
  
  c8 <- length(household[which(13<household$AGE & household$AGE < 17.1& household$SEX=="F"),"ID"])
  c9 <- length(household[which(13<household$AGE & household$AGE < 17.1& household$SEX=="M"),"ID"])
  
  c11 <- household[which(household$RELATIONSHIP=="head"),"SEX"]
  c12 <- household[which(household$RELATIONSHIP=="head"),"AGE"]
  return(c(c1,c2,c3,c4,c5,c6,c7,c8,c9,c11,c12))
}

Create Table 1: Description dataset

factor.name<-c("URBAN","HSIZE","ETHNIC","GENDER","WA","EDUCH","AREA")
conti.name<-c("PCE","PCCI")
DescriptionVHLSS<-fun.DescriptionData(VHLSS,factor.name,conti.name)
xtable(DescriptionVHLSS,align=c(rep("|c",length(names(DescriptionVHLSS))+1),"|"))

## % latex table generated in R 3.3.2 by xtable 1.8-2 package
## % Sat Jun 02 00:16:46 2018
## \begin{table}[ht]
## \centering
## \begin{tabular}{|c|c|c|c|c|c|c|}
##   \hline
##  & 2004 & 2006 & 2008 & 2010 & 2012 & 2014 \\ 
##   \hline
## Number Obs & 8269 & 8325 & 8305 & 8469 & 8465 & 8427 \\ 
##   PCE & 335.3 ( 211.8 ) & 374.6 ( 239.4 ) & 435.8 ( 272.3 ) & 570.5 ( 337.2 ) & 597.7 ( 342.2 ) & 622.2 ( 343.6 ) \\ 
##   PCCI & 3297.7 ( 805.4 ) & 3275.9 ( 845.9 ) & 2820.6 ( 651.3 ) & 3634.6 ( 980.1 ) & 3614.7 ( 1120.7 ) & 3655.2 ( 1120.3 ) \\ 
##   RURAL & 76.68 \% & 75.58 \% & 74.89 \% & 72.51 \% & 71.59 \% & 70.96 \% \\ 
##   URBAN & 23.32 \% & 24.42 \% & 25.11 \% & 27.49 \% & 28.41 \% & 29.04 \% \\ 
##   HSIZE2 & 10.39 \% & 12.11 \% & 14.04 \% & 15.91 \% & 17.35 \% & 18.96 \% \\ 
##   HSIZE3 & 15.36 \% & 16.56 \% & 17.07 \% & 19.87 \% & 18.96 \% & 19.97 \% \\ 
##   HSIZE4 & 30.61 \% & 31.27 \% & 31.78 \% & 33.81 \% & 32.56 \% & 31.09 \% \\ 
##   HSIZE5 & 21.74 \% & 20.72 \% & 19.45 \% & 16.81 \% & 17.52 \% & 16.6 \% \\ 
##   HSIZE6 & 21.9 \% & 19.34 \% & 17.66 \% & 13.6 \% & 13.61 \% & 13.37 \% \\ 
##   KINH0 & 15.12 \% & 15.75 \% & 15.26 \% & 17.74 \% & 17.78 \% & 17.24 \% \\ 
##   KINH & 84.88 \% & 84.25 \% & 84.74 \% & 82.26 \% & 82.22 \% & 82.76 \% \\ 
##   MALE & 77.1 \% & 76.36 \% & 76.36 \% & 76.14 \% & 76.28 \% & 75.63 \% \\ 
##   FEMALE & 22.9 \% & 23.64 \% & 23.64 \% & 23.86 \% & 23.72 \% & 24.37 \% \\ 
##   WA0 & 30.83 \% & 39.5 \% & 36.03 \% & 37.62 \% & 34.78 \% & 31.1 \% \\ 
##   WA1 & 69.17 \% & 60.5 \% & 63.97 \% & 62.38 \% & 65.22 \% & 68.9 \% \\ 
##   EDUCH1 & 54.92 \% & 53.27 \% & 52.04 \% & 52.08 \% & 51.23 \% & 49.64 \% \\ 
##   EDUCH2 & 41.07 \% & 42.52 \% & 43.82 \% & 42.48 \% & 43.35 \% & 44.35 \% \\ 
##   EDUCH3 & 4.01 \% & 4.2 \% & 4.14 \% & 5.43 \% & 5.41 \% & 6.02 \% \\ 
##   RedRiver & 21.44 \% & 21 \% & 21 \% & 16.81 \% & 16.57 \% & 21.23 \% \\ 
##   MidlNorthMount & 19.58 \% & 19.54 \% & 18.92 \% & 13.44 \% & 13.55 \% & 18.1 \% \\ 
##   NorthCoastCent & 20.01 \% & 20.29 \% & 20.46 \% & 22.03 \% & 21.61 \% & 21.53 \% \\ 
##   CentHigh & 6.41 \% & 6.22 \% & 6.42 \% & 6.88 \% & 6.91 \% & 6.49 \% \\ 
##    Southeastern & 11.79 \% & 12.2 \% & 12.53 \% & 11.35 \% & 11.64 \% & 11.72 \% \\ 
##   Mekong & 20.76 \% & 20.74 \% & 20.67 \% & 29.48 \% & 29.72 \% & 20.93 \% \\ 
##    \hline
## \end{tabular}
## \end{table}

All regression models in papers

There are total 24 regression models (4 models per year)

VHLSS.DLM.2004 <- lm(log(PCCI)~log(PCE)+I((log(PCE))^2)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA,
                     data=VHLSS[which(VHLSS$year==2004),])
VHLSS.DLM.2006 <- lm(log(PCCI)~log(PCE)+I((log(PCE))^2)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA,
                     data=VHLSS[which(VHLSS$year==2006),])
VHLSS.DLM.2008 <- lm(log(PCCI)~log(PCE)+I((log(PCE))^2)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA,
                     data=VHLSS[which(VHLSS$year==2008),])
VHLSS.DLM.2010 <- lm(log(PCCI)~log(PCE)+I((log(PCE))^2)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA,
                     data=VHLSS[which(VHLSS$year==2010),])
VHLSS.DLM.2012 <- lm(log(PCCI)~log(PCE)+I((log(PCE))^2)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA,
                     data=VHLSS[which(VHLSS$year==2012),])
VHLSS.DLM.2014 <- lm(log(PCCI)~log(PCE)+I((log(PCE))^2)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA,
                     data=VHLSS[which(VHLSS$year==2014),])
# Model for VHLSS data
fomular.VHLSS <- PCCI ~ s(PCE)+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA
VHLSS.GAMGAUId.2004 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "identity"),
                           data=VHLSS[which(VHLSS$year=="2004"),])
VHLSS.GAMGAULog.2004 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2004"),])
VHLSS.GAMGAMLog.2004 <- gam(fomular.VHLSS, method="REML", family  = Gamma(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2004"),])

VHLSS.GAMGAUId.2006 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "identity"),
                           data=VHLSS[which(VHLSS$year=="2006"),])
VHLSS.GAMGAULog.2006 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2006"),])
VHLSS.GAMGAMLog.2006 <- gam(fomular.VHLSS, method="REML", family  = Gamma(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2006"),])

VHLSS.GAMGAUId.2008 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "identity"),
                           data=VHLSS[which(VHLSS$year=="2008"),])
VHLSS.GAMGAULog.2008 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2008"),])
VHLSS.GAMGAMLog.2008 <- gam(fomular.VHLSS, method="REML", family  = Gamma(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2008"),])

VHLSS.GAMGAUId.2010 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "identity"),
                           data=VHLSS[which(VHLSS$year=="2010"),])
VHLSS.GAMGAULog.2010 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2010"),])
VHLSS.GAMGAMLog.2010 <- gam(fomular.VHLSS, method="REML", family  = Gamma(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2010"),])

VHLSS.GAMGAUId.2012 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "identity"),
                           data=VHLSS[which(VHLSS$year=="2012"),])
VHLSS.GAMGAULog.2012 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2012"),])
VHLSS.GAMGAMLog.2012 <- gam(fomular.VHLSS, method="REML", family  = Gamma(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2012"),])

VHLSS.GAMGAUId.2014 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "identity"),
                           data=VHLSS[which(VHLSS$year=="2014"),])
VHLSS.GAMGAULog.2014 <- gam(fomular.VHLSS, method="REML", family  = gaussian(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2014"),])
VHLSS.GAMGAMLog.2014 <- gam(fomular.VHLSS, method="REML", family  = Gamma(link = "log"),
                            data=VHLSS[which(VHLSS$year=="2014"),])

R-square off all models

Rsquare.VHLSS <- data.frame(rbind(
  c(summary(VHLSS.DLM.2004)$r.squared,summary(VHLSS.GAMGAUId.2004)$r.sq,summary(VHLSS.GAMGAULog.2004)$r.sq,summary(VHLSS.GAMGAMLog.2004)$r.sq),
  c(summary(VHLSS.DLM.2006)$r.squared,summary(VHLSS.GAMGAUId.2006)$r.sq,summary(VHLSS.GAMGAULog.2006)$r.sq,summary(VHLSS.GAMGAMLog.2006)$r.sq),
  c(summary(VHLSS.DLM.2008)$r.squared,summary(VHLSS.GAMGAUId.2008)$r.sq,summary(VHLSS.GAMGAULog.2008)$r.sq,summary(VHLSS.GAMGAMLog.2008)$r.sq),
  c(summary(VHLSS.DLM.2010)$r.squared,summary(VHLSS.GAMGAUId.2010)$r.sq,summary(VHLSS.GAMGAULog.2010)$r.sq,summary(VHLSS.GAMGAMLog.2010)$r.sq),
  c(summary(VHLSS.DLM.2012)$r.squared,summary(VHLSS.GAMGAUId.2012)$r.sq,summary(VHLSS.GAMGAULog.2012)$r.sq,summary(VHLSS.GAMGAMLog.2012)$r.sq),
  c(summary(VHLSS.DLM.2014)$r.squared,summary(VHLSS.GAMGAUId.2014)$r.sq,summary(VHLSS.GAMGAULog.2014)$r.sq,summary(VHLSS.GAMGAMLog.2014)$r.sq)))
names(Rsquare.VHLSS) <- c("DLM","GAMGAUId","GAMGAULog","GAMGAMLog")
row.names(Rsquare.VHLSS) <- c("2004","2006","2008","2010","2012","2014")
xtable(t(Rsquare.VHLSS),digits=4)

## % latex table generated in R 3.3.2 by xtable 1.8-2 package
## % Sat Jun 02 00:20:19 2018
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
##   \hline
##  & 2004 & 2006 & 2008 & 2010 & 2012 & 2014 \\ 
##   \hline
## DLM & 0.0879 & 0.0862 & 0.1583 & 0.1082 & 0.1135 & 0.1483 \\ 
##   GAMGAUId & 0.0812 & 0.0775 & 0.1504 & 0.0971 & 0.0970 & 0.1311 \\ 
##   GAMGAULog & 0.0813 & 0.0774 & 0.1494 & 0.0965 & 0.0956 & 0.1293 \\ 
##   GAMGAMLog & 0.0812 & 0.0773 & 0.1491 & 0.0963 & 0.0953 & 0.1287 \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 2: t-paired test results.

paire.VHLSS.2004 <- paire.year(CV.VHLSS.2004,2004)
paire.VHLSS.2006 <- paire.year(CV.VHLSS.2006,2006)
paire.VHLSS.2008 <- paire.year(CV.VHLSS.2008,2008)
paire.VHLSS.2010 <- paire.year(CV.VHLSS.2010,2010)
paire.VHLSS.2012 <- paire.year(CV.VHLSS.2012,2012)
paire.VHLSS.2014 <- paire.year(CV.VHLSS.2014,2014)
paire.VHLSS <- data.frame(rbind(paire.VHLSS.2004,paire.VHLSS.2006,paire.VHLSS.2008,
                                paire.VHLSS.2010, paire.VHLSS.2012, paire.VHLSS.2014))
xtable(paire.VHLSS)

## % latex table generated in R 3.3.2 by xtable 1.8-2 package
## % Sat Jun 02 00:20:19 2018
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllll}
##   \hline
##  & year & Models & GAMGauId & GAMGauLog & GAMGamLog \\ 
##   \hline
## 1 & 2004 & LLD & -11.64*** & -10.2*** & -14.7*** \\ 
##   2 &  & GAMGauId &   & 4.67*** & -7.78*** \\ 
##   3 &  & GAMGauLog &   &   & -11.89*** \\ 
##   4 & 2006 & LLD & 17.14*** & 12.79*** & 9.3*** \\ 
##   5 &  & GAMGauId &   & -19.6*** & -29.49*** \\ 
##   6 &  & GAMGauLog &   &   & -11.9*** \\ 
##   7 & 2008 & LLD & 62.38*** & 21.77*** & 13.67*** \\ 
##   8 &  & GAMGauId &   & -87.88*** & -95.8*** \\ 
##   9 &  & GAMGauLog &   &   & -19.98*** \\ 
##   10 & 2010 & LLD & 19.26*** & -10.02*** & -16.74*** \\ 
##   11 &  & GAMGauId &   & -73.06*** & -79.93*** \\ 
##   12 &  & GAMGauLog &   &   & -15.04*** \\ 
##   13 & 2012 & LLD & 58.25*** & 2.41* & -5.34*** \\ 
##   14 &  & GAMGauId &   & -164.72*** & -149.59*** \\ 
##   15 &  & GAMGauLog &   &   & -16.28*** \\ 
##   16 & 2014 & LLD & 70.01*** & -23.97*** & -49.93*** \\ 
##   17 &  & GAMGauId &   & -174.34*** & -163.31*** \\ 
##   18 &  & GAMGauLog &   &   & -31.15*** \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 4: Results of significance and linearity tests

#--- Table for linearity Test

#VHLSS
fomular.VHLSS.testLN <- PCCI~s(PCE,m=c(2,0))+PCE+URBAN+HSIZE+ETHNIC+WA+EDUCH+GENDER+AREA

GAMGauID.2006.TestLinear.VHLSS <- gam(fomular.VHLSS.testLN,  
                                      family  = gaussian(link = "identity"),
                                      data=VHLSS[which(VHLSS$year=="2006"),])

GAMGauID.2008.TestLinear.VHLSS <- gam(fomular.VHLSS.testLN,  
                                      family  = gaussian(link = "identity"), data=VHLSS[which(VHLSS$year=="2008"),])

GAMGauID.2010.TestLinear.VHLSS <- gam(fomular.VHLSS.testLN,  
                                      family  = gaussian(link = "identity"),
                                      data=VHLSS[which(VHLSS$year=="2010"),])

GAMGauID.2012.TestLinear.VHLSS <- gam(fomular.VHLSS.testLN, 
                                      family  = gaussian(link = "identity"),
                                      data=VHLSS[which(VHLSS$year=="2012"),])
GAMGauID.2014.TestLinear.VHLSS <- gam(fomular.VHLSS.testLN, 
                                      family  = gaussian(link = "identity"),
                                      data=VHLSS[which(VHLSS$year=="2014"),])

### Linearity test for VHLSS
VHLSS.line1 <- cbind(Significant.test.DLM(VHLSS.DLM.2004, 2004), 
                     paste0("---"),paste0("---"),
                     paste0("---"), paste0("---"), paste0("---"))
VHLSS.line2 <- cbind(alpha1(VHLSS.DLM.2004),
                     paste0("---"), paste0("---"),
                     paste0("---"), paste0("---"), paste0("---"))
VHLSS.line3 <- cbind(alpha2(VHLSS.DLM.2004),
                     paste0("---"),   paste0("---"),
                     paste0("---"), paste0("---"), paste0("---"))
VHLSS.line4 <- cbind(paste0("---"), 
                     star.smooth(VHLSS.GAMGAUId.2006), 
                     star.smooth(VHLSS.GAMGAUId.2008),
                     star.smooth(VHLSS.GAMGAUId.2010), 
                     star.smooth(VHLSS.GAMGAUId.2012), 
                     star.smooth(VHLSS.GAMGAUId.2014))

VHLSS.line5 <- cbind( paste0("---"), 
                      test.HHINC(GAMGauID.2006.TestLinear.VHLSS), 
                      test.HHINC(GAMGauID.2008.TestLinear.VHLSS),
                      test.HHINC(GAMGauID.2010.TestLinear.VHLSS), 
                      test.HHINC(GAMGauID.2012.TestLinear.VHLSS),
                      test.HHINC(GAMGauID.2014.TestLinear.VHLSS))

VHLSS.line6 <- cbind( paste0("---"),
                      star.smooth(GAMGauID.2006.TestLinear.VHLSS), 
                      star.smooth(GAMGauID.2008.TestLinear.VHLSS),
                      star.smooth(GAMGauID.2010.TestLinear.VHLSS), 
                      star.smooth(GAMGauID.2012.TestLinear.VHLSS),
                      star.smooth(GAMGauID.2014.TestLinear.VHLSS))
test.VHLSS <- data.frame(rbind(VHLSS.line1, VHLSS.line2, VHLSS.line3, VHLSS.line4, VHLSS.line5, VHLSS.line6))
names(test.VHLSS) <- c("2004", "2006", "2008", "2010", "2012", "2014")
row.names(test.VHLSS) <- c("H0alpha12", "alpha1", "alpha2", "H0S(.)", "gamma1", "s_NL(.)")
xtable(test.VHLSS)

## % latex table generated in R 3.3.2 by xtable 1.8-2 package
## % Sat Jun 02 00:20:22 2018
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllllll}
##   \hline
##  & 2004 & 2006 & 2008 & 2010 & 2012 & 2014 \\ 
##   \hline
## H0alpha12 & 205.97*** & --- & --- & --- & --- & --- \\ 
##   alpha1 & 0.63*** & --- & --- & --- & --- & --- \\ 
##   alpha2 & -0.045*** & --- & --- & --- & --- & --- \\ 
##   H0S(.) & --- & 47.964*** & 107.72*** & 86.464*** & 71.641*** & 101.726*** \\ 
##   gamma1 & --- & 5.342*** & 6.839*** & 4.39*** & 4.091*** & 5.18*** \\ 
##   s\_NL(.) & --- & 19.147*** & 40.183*** & 21.729*** & 17.506*** & 23.949*** \\ 
##    \hline
## \end{tabular}
## \end{table}

Figure 1: Estimated calorie-income relationships for Vietnam.

Get predict values for only referred models

#-- year 2004 : DLM
income.2004.VHLSS <- matrix(unlist(f.smooth.DLM.link("Vietnam","2004",VHLSS.DLM.2004)), 
                            ncol = 4,byrow = FALSE)
#--year 2006 - 2014 GAMGAUId
income.2006.VHLSS <- matrix(unlist(f.smooth.GAMId.link("Vietnam","2006",VHLSS.GAMGAUId.2006)),
                            ncol=4,byrow=FALSE)
income.2008.VHLSS <- matrix(unlist(f.smooth.GAMId.link("Vietnam","2008",VHLSS.GAMGAUId.2008)),
                            ncol=4,byrow=FALSE)
income.2010.VHLSS <- matrix(unlist(f.smooth.GAMId.link("Vietnam","2010",VHLSS.GAMGAUId.2010)),
                            ncol=4,byrow=FALSE)
income.2012.VHLSS <- matrix(unlist(f.smooth.GAMId.link("Vietnam","2012",VHLSS.GAMGAUId.2012)),
                            ncol=4,byrow=FALSE)
income.2014.VHLSS <- matrix(unlist(f.smooth.GAMId.link("Vietnam","2014",VHLSS.GAMGAUId.2014)),
                            ncol=4,byrow=FALSE)
pal=rainbow(6)
plot(income.2014.VHLSS[,1], income.2014.VHLSS[,2], type="n", lwd=3,
     xlab="Per capita expenditure (PCE)", 
     ylab="Estimated per capita calorie intakeI",ylim=c(2200,4900),xlim=c(0,2200))
polygon(c(income.2014.VHLSS[,1], rev(income.2014.VHLSS[,1])), 
        c(income.2014.VHLSS[,3],rev(income.2014.VHLSS[,4])), col="grey90",
        border=NA)
polygon(c(income.2012.VHLSS[,1], rev(income.2012.VHLSS[,1])), 
        c(income.2012.VHLSS[,3],rev(income.2012.VHLSS[,4])), col="grey85",
        border=NA)
polygon(c(income.2010.VHLSS[,1], rev(income.2010.VHLSS[,1])), 
        c(income.2010.VHLSS[,3],rev(income.2010.VHLSS[,4])), col="grey80",
        border=NA)
polygon(c(income.2008.VHLSS[,1], rev(income.2008.VHLSS[,1])), 
        c(income.2008.VHLSS[,3],rev(income.2008.VHLSS[,4])), col="grey75",
        border=NA)
polygon(c(income.2006.VHLSS[,1], rev(income.2006.VHLSS[,1])), 
        c(income.2006.VHLSS[,3],rev(income.2006.VHLSS[,4])), col="grey70",
        border=NA)
polygon(c(income.2004.VHLSS[,1], rev(income.2004.VHLSS[,1])), 
        c(income.2004.VHLSS[,3],rev(income.2004.VHLSS[,4])), col="grey65",
        border=NA)
lines(income.2004.VHLSS[,1],income.2004.VHLSS[,2],  lwd=2,col=pal[1])
lines(income.2006.VHLSS[,1],income.2006.VHLSS[,2],  lwd=2,col=pal[2])
lines(income.2008.VHLSS[,1],income.2008.VHLSS[,2],  lwd=2,col=pal[3])
lines(income.2010.VHLSS[,1],income.2010.VHLSS[,2],  lwd=2,col=pal[4])
lines(income.2012.VHLSS[,1],income.2012.VHLSS[,2],  lwd=2,col=pal[5])
lines(income.2014.VHLSS[,1],income.2014.VHLSS[,2],  lwd=2,col=pal[6])
#abline(1,0)
legend("topleft",legend = c("2004","2006","2008","2010","2012","2014"),
       col=pal[1:6],lty=1,cex=1,  lwd=2)

Figure 2: . Decomposition of average per capita calorie intake difference

DecomVHLSS0604$year <- "2006"
DecomVHLSS0604.original <- DecomVHLSS0604[1001,]
DecomVHLSS0604 <- DecomVHLSS0604[1:1000,]
DecomVHLSS0604.melt <- melt(DecomVHLSS0604,id="year")

DecomVHLSS0804$year <- "2008"
DecomVHLSS0804.original <- DecomVHLSS0804[1001,]
DecomVHLSS0804 <- DecomVHLSS0804[1:1000,]
DecomVHLSS0804.melt <- melt(DecomVHLSS0804,id="year")

DecomVHLSS1004$year <- "2010"
DecomVHLSS1004.original <- DecomVHLSS1004[1001,]
DecomVHLSS1004 <- DecomVHLSS1004[1:1000,]
DecomVHLSS1004.melt <- melt(DecomVHLSS1004,id="year")

DecomVHLSS1204$year <- "2012"
DecomVHLSS1204.original <- DecomVHLSS1204[1001,]
DecomVHLSS1204 <- DecomVHLSS1204[1:1000,]
DecomVHLSS1204.melt <- melt(DecomVHLSS1204,id="year")

DecomVHLSS1404$year <- "2014"
DecomVHLSS1404.original <- DecomVHLSS1404[1001,]
DecomVHLSS1404 <- DecomVHLSS1404[1:1000,]
DecomVHLSS1404.melt <- melt(DecomVHLSS1404,id="year")


DecomVHLSS.melt <- rbind(DecomVHLSS0604.melt,DecomVHLSS0804.melt, DecomVHLSS1004.melt, DecomVHLSS1204.melt, DecomVHLSS1404.melt)
DecomVHLSS.melt$year <- factor(DecomVHLSS.melt$year)
DecomVHLSS.melt$variable <-factor(DecomVHLSS.melt$variable)
row.names(DecomVHLSS.melt) <- seq(1,length(DecomVHLSS.melt$year),by=1)

cx <- 1   # cex for axis label text default is 1. bigger numbers make the text bigger, smaller, smaller.
x.ttl <- ""  # blank x-axis label
y.ttl <- "Difference values(Kcal)"
ttl <- "VHLSS data (2004 is a reference year)"
y.lim <- c(min(DecomVHLSS.melt$value), max(DecomVHLSS.melt$value)+1.5)  # y-axis limits. +2 at top to give room for legend
x.lim <- c(0.5,5.5)
shifts <- c(-0.15, 0 ,0.15) 
years <- c("2006", "2008", "2010", "2012", "2014")
variables <- c("db","dX","toldiff")
col <- c("red","green","blue")

plot(x=0, y=0, xlim=x.lim, ylim=y.lim, xaxt='n', col='white', xlab=x.ttl, ylab=y.ttl,
     main=ttl, cex.main=cx, cex.axis=cx, cex.lab=cx)
axis(side=1, at=1:length(unique(DecomVHLSS.melt$year)), labels=years, cex.axis=cx, cex.lab=cx)   # put on the x-axis labels
grid(nx=NA, ny=NULL, col='darkgrey')
grid(nx=5, ny=NA, col='black')
lines(x=c(-1,100), y=c(0,0), col='darkgrey')
for (t.id in 1:length(unique(DecomVHLSS.melt$year))) {
  for (i in 1:length(unique(DecomVHLSS.melt$variable))) {   # t.id <- 1  i <- 1  
    inds <- row.names(DecomVHLSS.melt[which(DecomVHLSS.melt$variable == variables[i] & DecomVHLSS.melt$year == years[t.id]),])  # rows for this variable and year what we'll plot.
    boxplot(DecomVHLSS.melt[inds,"value"], at=t.id+shifts[i], col=col[i], add=TRUE, xaxt='n', yaxt='n', 
            bty='n', boxwex=0.5,medlwd = 0.5, boxlwd = 0.5, cex=0.1,outline = FALSE) 
  }
}
legend(x='bottom', legend=c("Structure effect","Composition effect","Total effect"), xpd = TRUE,fill=col, horiz=TRUE, cex=cx, bg='white', bty='n',inset = c(1, 1))

Fig. 3. Comparison of equivalence scales using 2012 VHLSS data

Weight.2012 <- Weight.2012[which(Weight.2012$ID%in%VHLSS[which(VHLSS$year==2012),"ID"]), ]
levels(Weight.2012$HSIZE)[12:13] <- c(NA, NA)
Weight.2012$OECD <- 1 + Weight.2012$c1R*0.7 + (Weight.2012$HSIZER - Weight.2012$c1R - 1)*0.5 
Weight.2012$OECDModi <- 1 + Weight.2012$c1R*0.5 + (Weight.2012$HSIZER - Weight.2012$c1R - 1)*0.3 

Weight.2012$OECD <- 1 + Weight.2012$c1R*0.7 + (Weight.2012$HSIZER - Weight.2012$c1R - 1)*0.5 
Muc1A$ID= paste(Muc1A$PROVINCE,Muc1A$DISTRICT,Muc1A$COMMUNE,Muc1A$LOCATION,Muc1A$PROFILE.NU,sep="_")

Weight.2012.volume <- Weight.2012[,c("Weight","OECD", "OECDModi",  "HSIZE" )]
levels(Weight.2012.volume$HSIZE)[1] <- "NA"
Weight.2012.volume.melt <- melt(Weight.2012.volume,id = "HSIZE")


cx <- 1   # cex for axis label text default is 1. bigger numbers make the text bigger, smaller, smaller.
x.ttl <- "Household size"  # blank x-axis label
y.ttl <- "Equivalence scale"
ttl <- "Compare between our scale, OECD scale and OECD-modified equivalence scale"
y.lim <- c(1,10)  # y-axis limits. +2 at top to give room for legend
x.lim <- c(1.5,11.5)
shifts <- c(-0.15, 0, 0, 0.15) 
HSIZEs <- levels(Weight.2012.volume$HSIZE)
variables <- c("Weight", "OECD", "OECDModi")
col <- c("green","yellow", "blue")

cx <- 1   # cex for axis label text default is 1. bigger numbers make the text bigger, smaller, smaller.
x.ttl <- "Household size"  # blank x-axis label
y.ttl <- "Equivalence scale"
ttl <- "Compare between our scale, OECD scale and OECD-modified equivalence scale"
y.lim <- c(1,10)  # y-axis limits. +2 at top to give room for legend
x.lim <- c(1.5,11.5)
shifts <- c(-0.15, 0, 0, 0.15) 
HSIZEs <- levels(Weight.2012.volume$HSIZE)
variables <- c("Weight", "OECD", "OECDModi")
col <- c("green","yellow", "blue")

plot(x=0, y=0, xlim=x.lim, ylim=y.lim, xaxt='n', col='white', xlab=x.ttl, ylab=y.ttl,
     main=ttl, cex.main=cx, cex.axis=cx, cex.lab=cx)
axis(side=1, at=1:length(levels(Weight.2012.volume$HSIZE)), 
     labels=HSIZEs, cex.axis=cx, cex.lab=cx)   # put on the x-axis labels
grid(nx=NA, ny=NULL, col='darkgrey')
grid(nx=11, ny=NA, col='black')
lines(x=c(-1,100), y=c(0,0), col='darkgrey')
for (t.id in 1:length(levels(Weight.2012.volume.melt$HSIZE))) {
  for (i in 1:length(levels(Weight.2012.volume.melt$variable))) {   # t.id <- 1  i <- 1  
    inds <- row.names(Weight.2012.volume.melt[which(Weight.2012.volume.melt$variable == variables[i] & Weight.2012.volume.melt$HSIZE == HSIZEs[t.id]),])  # rows for this variable and year what we'll plot.
    boxplot(Weight.2012.volume.melt[inds,"value"], at=t.id+shifts[i], col=col[i], add=TRUE, xaxt='n', yaxt='n', 
            bty='n', boxwex=0.5,medlwd = 1, boxlwd = 0.5, cex=0.1,outline = FALSE) 
  }
}
legend(x='bottom', legend=c("Our Scale", 
                            "OECD scale",
                            "OECD-modified scale"), 
       xpd = TRUE,fill=col, horiz=TRUE,
       cex=cx, bg='white', bty='n',inset = c(1, 1))

Calorie intake and income in Vietnam_World Development 2018

Trinh Thi Huong

1 June 2018