Model 1: Bayesian tobit regression estimates a linear regression model with a censored dependent variable using a Gibbs sampler. The dependent variable may be censored from below and/or from above. For other linear regression models with fully observed dependent variables, see Bayesian regression, maximum likelihood normal regression, or least squares.

Model 2: Tobit regression : Linear Regression for a Left-Censored Dependent Variable

Tobit regression estimates a linear regression model for a left-censored dependent variable, where the dependent variable is censored from below. While the classical tobit model has values censored at 0, you may select another censoring point. For other linear regression models with fully observed dependent variables, see Bayesian regression (), maximum likelihood normal regression (), or least squares ().

reading data

library(foreign)

## Warning: package 'foreign' was built under R version 3.2.5

r=read.dta("C:/Users/BINH THANG/Dropbox/Korea/STudy/Thesis/data management/DataR/dataR3.dta")


r$logC=log10(r$cost_inc)
qqnorm(r$logC)
qqline(r$logC)

r1=subset(r, cost_inc<180000)

attach(r1)
newdata2=r1

define varibles

newdata2$c1=as.factor(newdata2$c1)
newdata2$h1=as.factor(newdata2$h1)
newdata2$label1=as.factor(newdata2$label1)
newdata2$policy=as.factor(newdata2$policy)
newdata2$educ2=as.factor(newdata2$educ2)
newdata2$age_group=as.factor(newdata2$age_group)
newdata2$d1a=as.factor(newdata2$d1a)
newdata2$selfhealth=as.factor(newdata2$selfhealth)
newdata2$b18a=as.factor(newdata2$b18a)
newdata2$b16a=as.factor(newdata2$b16a)
newdata2$b6a=as.factor(newdata2$b6a)
newdata2$smostt=as.factor(newdata2$smostt)
newdata2$reasons1=as.factor(newdata2$reasons1)
newdata2$ter_in=as.factor(newdata2$ter_in)
newdata2$group_age1=as.factor(newdata2$group_age1)


attach(newdata2)

## The following objects are masked from r1:
## 
##     a1, advice, advice1, ag, age_group, anticam, anticam2, b1,
##     b10, b10a1, b10a2, b10a3, b10a4, b10a5, b10a6, b10a7, b11,
##     b11a, b11a2, b11a3, b12, b12a, b13, b14, b15, b16, b16a, b17,
##     b18, b18a, b1a, b2, b2a1, b3, b4, b5, b5a, b6, b6a, b6a1,
##     b6a2, b6a3, b6a4, b6a5, b6a6, b6a7, b6a7a, b7, b8, b9, br,
##     branch, branch1, branch2, branch3, c1, c2, c23a, c3, c4, c5,
##     c6, c7, c8, c9, COST, cost_inc, cost1, costincrease, ct, d1,
##     d10, d11, d1a, d2, d3, d3a, d4, d5, d6, d7, d8, d9, Decision,
##     e1, e2, edu, educ1, educ2, f1, ghi1, ghi2, ghiro2,
##     giadinhkoUH, group_age, group_age1, h1, h10a, h10a1, h10a10,
##     h10a11, h10a11a, h10a2, h10a3, h10a4, h10a5, h10a6, h10a7,
##     h10a8, h10a9, h12, h12a, h12a_1, h12log, h13, h2, h3, h4, h5,
##     h6, h7, h8, h9, ha000, itc1, itc2, l1, l2, l3, l4, l5, l6,
##     label1, label2, logC, moneyspent, msdt, n01, n02, n03, n05,
##     n06, n07, n08, n1, n10, n100, n101, n102, n103, n11, n12, n13,
##     n14, n15, n16, n1b, n2, n3, n35, n36, n37, n38, n39,
##     n3posterb, n4, n40, n41, n42, n43, n44, n45, n46, n47, n48,
##     n49, n5, n50, n51, n52, n53, n54, n55, n56, n57, n58, n59, n6,
##     n60, n61, n61a, n62, n62a, n63, n63a, n64, n64a, n65, n65a,
##     n66, n66a, n67, n67a, n68, n68a, n7, n77, n78, n7tren, n8,
##     n88, n89, n8nha, n9, n97, n98, n99, n9khach, noEnvi2, occup1,
##     policy, policy_a, policy2, reasons, reasons1, s1, Screening,
##     selfhealth, SH1, smostt, ter_fa1, ter_in, tertile_fa,
##     tertile_indi, test, unitsdiffi1, var242, w1, w2, w3, w4

Model 1: Zelig- Bayes tobit regression

Packages and library of

library(Zelig)

## Warning: package 'Zelig' was built under R version 3.2.5

fomulate for tobit:

z.out<- zelig(logC~c1+h1+ d1a+educ2+ter_in+selfhealth+smostt+b16a
               +b6a+b18a+label1+anticam+noEnvi2 , 
               above = 150000, thin=5, mcmc=30000, 
               data=newdata2, model = "tobit.bayes", verbose = FALSE)

## How to cite this model in Zelig:
##   Ben Goodrich, and Ying Lu. 2013.
##   tobit-bayes: Bayesian Tobit Regression for a Censored Dependent Variable
##   in Christine Choirat, Christopher Gandrud, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
##   "Zelig: Everyone's Statistical Software," http://zeligproject.org/

the fist model have been lauched, then we need to check the appropriate of its

z.out$geweke.diag()

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    (Intercept)            c12            h12     d1amarried         educ22 
##       -0.44071        2.21660       -1.21844        0.58881       -1.50756 
##         educ23        ter_in2        ter_in3 selfhealthgood   smosttmedium 
##       -1.09873       -0.27344        0.52289       -0.79391        0.80603 
##    smosttheavy          b16a1         b6ayes        b18abad        label11 
##        2.01286       -0.26267       -0.10248        1.28494        0.37587 
##        anticam        noEnvi2         sigma2 
##        0.01164       -0.40376        1.68245

z.out$heidel.diag()

##                                              
##                Stationarity start     p-value
##                test         iteration        
## (Intercept)    passed          1      0.2601 
## c12            passed          1      0.4263 
## h12            passed          1      0.0908 
## d1amarried     passed          1      0.4008 
## educ22         passed          1      0.2747 
## educ23         passed          1      0.7013 
## ter_in2        passed          1      0.9368 
## ter_in3        passed          1      0.7119 
## selfhealthgood passed          1      0.4562 
## smosttmedium   passed          1      0.5788 
## smosttheavy    passed          1      0.1209 
## b16a1          passed          1      0.3687 
## b6ayes         passed          1      0.9550 
## b18abad        passed          1      0.7146 
## label11        passed          1      0.1633 
## anticam        passed          1      0.2518 
## noEnvi2        passed          1      0.6839 
## sigma2         passed       1201      0.1991 
##                                            
##                Halfwidth Mean     Halfwidth
##                test                        
## (Intercept)    passed     4.68471 1.28e-03 
## c12            passed    -0.01343 1.05e-03 
## h12            passed    -0.01371 5.65e-04 
## d1amarried     passed    -0.02091 5.62e-04 
## educ22         passed     0.04477 7.19e-04 
## educ23         passed     0.13419 8.57e-04 
## ter_in2        passed     0.02736 6.33e-04 
## ter_in3        passed     0.00988 6.46e-04 
## selfhealthgood passed     0.06320 5.36e-04 
## smosttmedium   passed     0.04946 8.39e-04 
## smosttheavy    passed     0.06811 8.67e-04 
## b16a1          passed    -0.02811 7.67e-04 
## b6ayes         passed     0.02445 7.47e-04 
## b18abad        passed     0.02048 5.49e-04 
## label11        passed    -0.05608 7.97e-04 
## anticam        passed     0.03708 6.96e-04 
## noEnvi2        passed    -0.02891 6.86e-04 
## sigma2         passed     0.04223 8.45e-05

z.out$raftery.diag()

## 
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95 
##                                                       
##                 Burn-in  Total Lower bound  Dependence
##                 (M)      (N)   (Nmin)       factor (I)
##  (Intercept)    10       19595 3746         5.23      
##  c12            10       18300 3746         4.89      
##  h12            10       18805 3746         5.02      
##  d1amarried     10       18550 3746         4.95      
##  educ22         10       19065 3746         5.09      
##  educ23         10       17805 3746         4.75      
##  ter_in2        10       19595 3746         5.23      
##  ter_in3        10       19330 3746         5.16      
##  selfhealthgood 10       18050 3746         4.82      
##  smosttmedium   10       19595 3746         5.23      
##  smosttheavy    10       19065 3746         5.09      
##  b16a1          10       18805 3746         5.02      
##  b6ayes         10       19065 3746         5.09      
##  b18abad        10       19065 3746         5.09      
##  label11        10       18550 3746         4.95      
##  anticam        10       19065 3746         5.09      
##  noEnvi2        10       18550 3746         4.95      
##  sigma2         10       18050 3746         4.82

Summary of model (bayestobit)

summary(z.out)

## Model: 
## 
## Iterations = 1001:30996
## Thinning interval = 5 
## Number of chains = 1 
## Sample size per chain = 6000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                     Mean       SD  Naive SE Time-series SE
## (Intercept)     4.684706 0.050611 6.534e-04      6.534e-04
## c12            -0.013431 0.041690 5.382e-04      5.382e-04
## h12            -0.013713 0.022324 2.882e-04      2.882e-04
## d1amarried     -0.020912 0.022820 2.946e-04      2.869e-04
## educ22          0.044774 0.028421 3.669e-04      3.669e-04
## educ23          0.134188 0.033865 4.372e-04      4.372e-04
## ter_in2         0.027360 0.025031 3.232e-04      3.232e-04
## ter_in3         0.009879 0.026087 3.368e-04      3.298e-04
## selfhealthgood  0.063202 0.021200 2.737e-04      2.737e-04
## smosttmedium    0.049463 0.033146 4.279e-04      4.279e-04
## smosttheavy     0.068112 0.034245 4.421e-04      4.421e-04
## b16a1          -0.028112 0.030308 3.913e-04      3.913e-04
## b6ayes          0.024450 0.029517 3.811e-04      3.811e-04
## b18abad         0.020480 0.021701 2.802e-04      2.802e-04
## label11        -0.056085 0.031700 4.092e-04      4.068e-04
## anticam         0.037077 0.027494 3.549e-04      3.549e-04
## noEnvi2        -0.028910 0.027093 3.498e-04      3.498e-04
## sigma2          0.042267 0.002947 3.804e-05      3.735e-05
## 
## 2. Quantiles for each variable:
## 
##                     2.5%       25%       50%       75%    97.5%
## (Intercept)     4.586583  4.650269  4.684561  4.718158 4.784808
## c12            -0.094011 -0.042747 -0.013990  0.014669 0.070011
## h12            -0.057805 -0.029169 -0.013430  0.001506 0.028932
## d1amarried     -0.064900 -0.036830 -0.020701 -0.005672 0.023746
## educ22         -0.009766  0.025622  0.044711  0.063964 0.099748
## educ23          0.067688  0.111522  0.133881  0.156508 0.203352
## ter_in2        -0.021818  0.010752  0.027531  0.044110 0.075235
## ter_in3        -0.041075 -0.007543  0.009952  0.027935 0.059826
## selfhealthgood  0.021290  0.048938  0.063383  0.077298 0.104967
## smosttmedium   -0.015816  0.026429  0.049301  0.072186 0.114391
## smosttheavy     0.001336  0.045077  0.067668  0.091133 0.135846
## b16a1          -0.087850 -0.048738 -0.027801 -0.008075 0.031444
## b6ayes         -0.034444  0.004709  0.024592  0.044389 0.081542
## b18abad        -0.022042  0.005806  0.020043  0.035384 0.062613
## label11        -0.117363 -0.077907 -0.056577 -0.034322 0.005799
## anticam        -0.016578  0.018359  0.036783  0.055823 0.092080
## noEnvi2        -0.080444 -0.047361 -0.029230 -0.010459 0.023822
## sigma2          0.036789  0.040262  0.042114  0.044171 0.048420
## 
## Next step: Use 'setx' method

Setting values for the explanatory variables to their sample averages:

x.out <- setx(z.out)
# Simulating quantities of interest from the posterior distribution given x.out.
s.out1 <- sim(z.out, x = x.out)

summary(s.out1)

## 
##  sim x :
##  -----
## ev
##       mean         sd      50%     2.5%    97.5%
## 1 4.753942 0.02927181 4.753937 4.696019 4.811676
## pv
##          mean        sd      50%     2.5%    97.5%
## [1,] 4.751533 0.2041466 4.752736 4.349231 5.145806

Simulating First Differences

Set explanatory variables to their default(mean/mode) values, with high (80th percentile) and low (20th percentile) cost increased (logC):

x.high <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.8))
x.low <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.2))

#Estimating the first difference for the effect of high versus low liquidity ratio on duration( durable):

s.out2 <- sim(z.out, x = x.high, x1 = x.low)

summary(s.out2)

## 
##  sim x :
##  -----
## ev
##       mean         sd      50%     2.5%    97.5%
## 1 4.753942 0.02927181 4.753937 4.696019 4.811676
## pv
##          mean        sd      50%     2.5%    97.5%
## [1,] 4.751221 0.2094996 4.753536 4.340672 5.159351
## 
##  sim x1 :
##  -----
## ev
##       mean         sd      50%     2.5%    97.5%
## 1 4.753942 0.02927181 4.753937 4.696019 4.811676
## pv
##         mean        sd      50%    2.5%    97.5%
## [1,] 4.74939 0.2089809 4.750064 4.33654 5.156338
## fd
##   mean sd 50% 2.5% 97.5%
## 1    0  0   0    0     0

plot(s.out1)

Model 2: tobit regression

z.out <- zelig(logC~c1+h1+ d1a+educ2+ter_in+selfhealth+smostt+b16a
               +b6a+b18a+label1+anticam+noEnvi2 , 
               above = 180000, model = "tobit", data = newdata2)

## How to cite this model in Zelig:
##   Christian Kleiber, and Achim Zeileis. 2011.
##   tobit: Linear regression for Left-Censored Dependent Variable
##   in Christine Choirat, Christopher Gandrud, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
##   "Zelig: Everyone's Statistical Software," http://zeligproject.org/

Summarize estimated paramters:

summary(z.out)

## Model: 
## 
## Call:
## z5$zelig(formula = logC ~ c1 + h1 + d1a + educ2 + ter_in + selfhealth + 
##     smostt + b16a + b6a + b18a + label1 + anticam + noEnvi2, 
##     above = 180000, data = newdata2)
## 
## Observations: (27 observations deleted due to missingness)
##          Total  Left-censored     Uncensored Right-censored 
##            411              0            411              0 
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)     4.68562    0.05039  92.990  < 2e-16
## c12            -0.01382    0.04056  -0.341  0.73322
## h12            -0.01302    0.02176  -0.599  0.54941
## d1amarried     -0.02092    0.02263  -0.925  0.35517
## educ22          0.04423    0.02769   1.597  0.11018
## educ23          0.13424    0.03295   4.074 4.61e-05
## ter_in2         0.02740    0.02470   1.109  0.26725
## ter_in3         0.01013    0.02547   0.398  0.69092
## selfhealthgood  0.06336    0.02068   3.063  0.00219
## smosttmedium    0.04890    0.03284   1.489  0.13642
## smosttheavy     0.06760    0.03402   1.987  0.04690
## b16a1          -0.02847    0.02971  -0.958  0.33790
## b6ayes          0.02441    0.02915   0.837  0.40253
## b18abad         0.02005    0.02155   0.930  0.35226
## label11        -0.05626    0.03128  -1.798  0.07212
## anticam         0.03720    0.02723   1.367  0.17176
## noEnvi2        -0.02852    0.02633  -1.083  0.27870
## Log(scale)     -1.60446    0.03488 -46.001  < 2e-16
## 
## Scale: 0.201 
## 
## Gaussian distribution
## Number of Newton-Raphson Iterations: 3 
## Log-likelihood: 76.25 on 18 Df
## Wald-statistic: 49.44 on 16 Df, p-value: 2.8115e-05 
## 
## Next step: Use 'setx' method

Setting values for the explanatory variables to their sample averages:

x.out <- setx(z.out)

Simulating quantities of interest from the posterior distribution given x.out.

s.out1 <- sim(z.out, x = x.out)
summary(s.out1)

## 
##  sim x :
##  -----
## ev
##      mean         sd     50%     2.5%    97.5%
## 1 4.75348 0.02854102 4.75342 4.698589 4.809213
## pv
##          mean        sd      50%    2.5%    97.5%
## [1,] 4.750777 0.2007466 4.754827 4.36875 5.173069

Simulating First Differences Set explanatory variables to their default(mean/mode) values, with high (80th percentile) and low (20th percentile) liquidity ratio (quant):

x.high <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.8))
x.low <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.2))

Estimating the first difference for the effect of high versus low liquidity ratio on duration(durable):

s.out2 <- sim(z.out, x = x.high, x1 = x.low)
summary(s.out2)

## 
##  sim x :
##  -----
## ev
##       mean        sd      50%    2.5%    97.5%
## 1 4.751835 0.0288039 4.749947 4.69953 4.810419
## pv
##          mean        sd      50%     2.5%    97.5%
## [1,] 4.752416 0.2059493 4.754019 4.344547 5.175231
## 
##  sim x1 :
##  -----
## ev
##       mean        sd      50%    2.5%    97.5%
## 1 4.751835 0.0288039 4.749947 4.69953 4.810419
## pv
##          mean        sd      50%     2.5%    97.5%
## [1,] 4.757742 0.1938928 4.760406 4.394867 5.134123
## fd
##   mean sd 50% 2.5% 97.5%
## 1    0  0   0    0     0

plot(s.out1)

Ref: http://docs.zeligproject.org/en/latest/zelig-tobit.html http://docs.zeligproject.org/en/latest/zelig-tobitbayes.html

See you

Binh Thang

zelig-tobitbayes and zelig-tobit

Thang, TB

21 March 2017

reading data

define varibles

Model 1: Zelig- Bayes tobit regression

fomulate for tobit:

the fist model have been lauched, then we need to check the appropriate of its

Next

next

Summary of model (bayestobit)

Setting values for the explanatory variables to their sample averages:

Simulating First Differences

Model 2: tobit regression

Setting values for the explanatory variables to their sample averages: