Model 1: Bayesian tobit regression estimates a linear regression model with a censored dependent variable using a Gibbs sampler. The dependent variable may be censored from below and/or from above. For other linear regression models with fully observed dependent variables, see Bayesian regression, maximum likelihood normal regression, or least squares.
Model 2: Tobit regression : Linear Regression for a Left-Censored Dependent Variable
Tobit regression estimates a linear regression model for a left-censored dependent variable, where the dependent variable is censored from below. While the classical tobit model has values censored at 0, you may select another censoring point. For other linear regression models with fully observed dependent variables, see Bayesian regression (), maximum likelihood normal regression (), or least squares ().
library(foreign)
## Warning: package 'foreign' was built under R version 3.2.5
r=read.dta("C:/Users/BINH THANG/Dropbox/Korea/STudy/Thesis/data management/DataR/dataR3.dta")
r$logC=log10(r$cost_inc)
qqnorm(r$logC)
qqline(r$logC)
r1=subset(r, cost_inc<180000)
attach(r1)
newdata2=r1
newdata2$c1=as.factor(newdata2$c1)
newdata2$h1=as.factor(newdata2$h1)
newdata2$label1=as.factor(newdata2$label1)
newdata2$policy=as.factor(newdata2$policy)
newdata2$educ2=as.factor(newdata2$educ2)
newdata2$age_group=as.factor(newdata2$age_group)
newdata2$d1a=as.factor(newdata2$d1a)
newdata2$selfhealth=as.factor(newdata2$selfhealth)
newdata2$b18a=as.factor(newdata2$b18a)
newdata2$b16a=as.factor(newdata2$b16a)
newdata2$b6a=as.factor(newdata2$b6a)
newdata2$smostt=as.factor(newdata2$smostt)
newdata2$reasons1=as.factor(newdata2$reasons1)
newdata2$ter_in=as.factor(newdata2$ter_in)
newdata2$group_age1=as.factor(newdata2$group_age1)
attach(newdata2)
## The following objects are masked from r1:
##
## a1, advice, advice1, ag, age_group, anticam, anticam2, b1,
## b10, b10a1, b10a2, b10a3, b10a4, b10a5, b10a6, b10a7, b11,
## b11a, b11a2, b11a3, b12, b12a, b13, b14, b15, b16, b16a, b17,
## b18, b18a, b1a, b2, b2a1, b3, b4, b5, b5a, b6, b6a, b6a1,
## b6a2, b6a3, b6a4, b6a5, b6a6, b6a7, b6a7a, b7, b8, b9, br,
## branch, branch1, branch2, branch3, c1, c2, c23a, c3, c4, c5,
## c6, c7, c8, c9, COST, cost_inc, cost1, costincrease, ct, d1,
## d10, d11, d1a, d2, d3, d3a, d4, d5, d6, d7, d8, d9, Decision,
## e1, e2, edu, educ1, educ2, f1, ghi1, ghi2, ghiro2,
## giadinhkoUH, group_age, group_age1, h1, h10a, h10a1, h10a10,
## h10a11, h10a11a, h10a2, h10a3, h10a4, h10a5, h10a6, h10a7,
## h10a8, h10a9, h12, h12a, h12a_1, h12log, h13, h2, h3, h4, h5,
## h6, h7, h8, h9, ha000, itc1, itc2, l1, l2, l3, l4, l5, l6,
## label1, label2, logC, moneyspent, msdt, n01, n02, n03, n05,
## n06, n07, n08, n1, n10, n100, n101, n102, n103, n11, n12, n13,
## n14, n15, n16, n1b, n2, n3, n35, n36, n37, n38, n39,
## n3posterb, n4, n40, n41, n42, n43, n44, n45, n46, n47, n48,
## n49, n5, n50, n51, n52, n53, n54, n55, n56, n57, n58, n59, n6,
## n60, n61, n61a, n62, n62a, n63, n63a, n64, n64a, n65, n65a,
## n66, n66a, n67, n67a, n68, n68a, n7, n77, n78, n7tren, n8,
## n88, n89, n8nha, n9, n97, n98, n99, n9khach, noEnvi2, occup1,
## policy, policy_a, policy2, reasons, reasons1, s1, Screening,
## selfhealth, SH1, smostt, ter_fa1, ter_in, tertile_fa,
## tertile_indi, test, unitsdiffi1, var242, w1, w2, w3, w4
Packages and library of
library(Zelig)
## Warning: package 'Zelig' was built under R version 3.2.5
z.out<- zelig(logC~c1+h1+ d1a+educ2+ter_in+selfhealth+smostt+b16a
+b6a+b18a+label1+anticam+noEnvi2 ,
above = 150000, thin=5, mcmc=30000,
data=newdata2, model = "tobit.bayes", verbose = FALSE)
## How to cite this model in Zelig:
## Ben Goodrich, and Ying Lu. 2013.
## tobit-bayes: Bayesian Tobit Regression for a Censored Dependent Variable
## in Christine Choirat, Christopher Gandrud, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
## "Zelig: Everyone's Statistical Software," http://zeligproject.org/
z.out$geweke.diag()
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## (Intercept) c12 h12 d1amarried educ22
## -0.44071 2.21660 -1.21844 0.58881 -1.50756
## educ23 ter_in2 ter_in3 selfhealthgood smosttmedium
## -1.09873 -0.27344 0.52289 -0.79391 0.80603
## smosttheavy b16a1 b6ayes b18abad label11
## 2.01286 -0.26267 -0.10248 1.28494 0.37587
## anticam noEnvi2 sigma2
## 0.01164 -0.40376 1.68245
z.out$heidel.diag()
##
## Stationarity start p-value
## test iteration
## (Intercept) passed 1 0.2601
## c12 passed 1 0.4263
## h12 passed 1 0.0908
## d1amarried passed 1 0.4008
## educ22 passed 1 0.2747
## educ23 passed 1 0.7013
## ter_in2 passed 1 0.9368
## ter_in3 passed 1 0.7119
## selfhealthgood passed 1 0.4562
## smosttmedium passed 1 0.5788
## smosttheavy passed 1 0.1209
## b16a1 passed 1 0.3687
## b6ayes passed 1 0.9550
## b18abad passed 1 0.7146
## label11 passed 1 0.1633
## anticam passed 1 0.2518
## noEnvi2 passed 1 0.6839
## sigma2 passed 1201 0.1991
##
## Halfwidth Mean Halfwidth
## test
## (Intercept) passed 4.68471 1.28e-03
## c12 passed -0.01343 1.05e-03
## h12 passed -0.01371 5.65e-04
## d1amarried passed -0.02091 5.62e-04
## educ22 passed 0.04477 7.19e-04
## educ23 passed 0.13419 8.57e-04
## ter_in2 passed 0.02736 6.33e-04
## ter_in3 passed 0.00988 6.46e-04
## selfhealthgood passed 0.06320 5.36e-04
## smosttmedium passed 0.04946 8.39e-04
## smosttheavy passed 0.06811 8.67e-04
## b16a1 passed -0.02811 7.67e-04
## b6ayes passed 0.02445 7.47e-04
## b18abad passed 0.02048 5.49e-04
## label11 passed -0.05608 7.97e-04
## anticam passed 0.03708 6.96e-04
## noEnvi2 passed -0.02891 6.86e-04
## sigma2 passed 0.04223 8.45e-05
z.out$raftery.diag()
##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## (Intercept) 10 19595 3746 5.23
## c12 10 18300 3746 4.89
## h12 10 18805 3746 5.02
## d1amarried 10 18550 3746 4.95
## educ22 10 19065 3746 5.09
## educ23 10 17805 3746 4.75
## ter_in2 10 19595 3746 5.23
## ter_in3 10 19330 3746 5.16
## selfhealthgood 10 18050 3746 4.82
## smosttmedium 10 19595 3746 5.23
## smosttheavy 10 19065 3746 5.09
## b16a1 10 18805 3746 5.02
## b6ayes 10 19065 3746 5.09
## b18abad 10 19065 3746 5.09
## label11 10 18550 3746 4.95
## anticam 10 19065 3746 5.09
## noEnvi2 10 18550 3746 4.95
## sigma2 10 18050 3746 4.82
summary(z.out)
## Model:
##
## Iterations = 1001:30996
## Thinning interval = 5
## Number of chains = 1
## Sample size per chain = 6000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) 4.684706 0.050611 6.534e-04 6.534e-04
## c12 -0.013431 0.041690 5.382e-04 5.382e-04
## h12 -0.013713 0.022324 2.882e-04 2.882e-04
## d1amarried -0.020912 0.022820 2.946e-04 2.869e-04
## educ22 0.044774 0.028421 3.669e-04 3.669e-04
## educ23 0.134188 0.033865 4.372e-04 4.372e-04
## ter_in2 0.027360 0.025031 3.232e-04 3.232e-04
## ter_in3 0.009879 0.026087 3.368e-04 3.298e-04
## selfhealthgood 0.063202 0.021200 2.737e-04 2.737e-04
## smosttmedium 0.049463 0.033146 4.279e-04 4.279e-04
## smosttheavy 0.068112 0.034245 4.421e-04 4.421e-04
## b16a1 -0.028112 0.030308 3.913e-04 3.913e-04
## b6ayes 0.024450 0.029517 3.811e-04 3.811e-04
## b18abad 0.020480 0.021701 2.802e-04 2.802e-04
## label11 -0.056085 0.031700 4.092e-04 4.068e-04
## anticam 0.037077 0.027494 3.549e-04 3.549e-04
## noEnvi2 -0.028910 0.027093 3.498e-04 3.498e-04
## sigma2 0.042267 0.002947 3.804e-05 3.735e-05
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) 4.586583 4.650269 4.684561 4.718158 4.784808
## c12 -0.094011 -0.042747 -0.013990 0.014669 0.070011
## h12 -0.057805 -0.029169 -0.013430 0.001506 0.028932
## d1amarried -0.064900 -0.036830 -0.020701 -0.005672 0.023746
## educ22 -0.009766 0.025622 0.044711 0.063964 0.099748
## educ23 0.067688 0.111522 0.133881 0.156508 0.203352
## ter_in2 -0.021818 0.010752 0.027531 0.044110 0.075235
## ter_in3 -0.041075 -0.007543 0.009952 0.027935 0.059826
## selfhealthgood 0.021290 0.048938 0.063383 0.077298 0.104967
## smosttmedium -0.015816 0.026429 0.049301 0.072186 0.114391
## smosttheavy 0.001336 0.045077 0.067668 0.091133 0.135846
## b16a1 -0.087850 -0.048738 -0.027801 -0.008075 0.031444
## b6ayes -0.034444 0.004709 0.024592 0.044389 0.081542
## b18abad -0.022042 0.005806 0.020043 0.035384 0.062613
## label11 -0.117363 -0.077907 -0.056577 -0.034322 0.005799
## anticam -0.016578 0.018359 0.036783 0.055823 0.092080
## noEnvi2 -0.080444 -0.047361 -0.029230 -0.010459 0.023822
## sigma2 0.036789 0.040262 0.042114 0.044171 0.048420
##
## Next step: Use 'setx' method
x.out <- setx(z.out)
# Simulating quantities of interest from the posterior distribution given x.out.
s.out1 <- sim(z.out, x = x.out)
summary(s.out1)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 4.753942 0.02927181 4.753937 4.696019 4.811676
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 4.751533 0.2041466 4.752736 4.349231 5.145806
Set explanatory variables to their default(mean/mode) values, with high (80th percentile) and low (20th percentile) cost increased (logC):
x.high <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.8))
x.low <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.2))
#Estimating the first difference for the effect of high versus low liquidity ratio on duration( durable):
s.out2 <- sim(z.out, x = x.high, x1 = x.low)
summary(s.out2)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 4.753942 0.02927181 4.753937 4.696019 4.811676
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 4.751221 0.2094996 4.753536 4.340672 5.159351
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 4.753942 0.02927181 4.753937 4.696019 4.811676
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 4.74939 0.2089809 4.750064 4.33654 5.156338
## fd
## mean sd 50% 2.5% 97.5%
## 1 0 0 0 0 0
plot(s.out1)
z.out <- zelig(logC~c1+h1+ d1a+educ2+ter_in+selfhealth+smostt+b16a
+b6a+b18a+label1+anticam+noEnvi2 ,
above = 180000, model = "tobit", data = newdata2)
## How to cite this model in Zelig:
## Christian Kleiber, and Achim Zeileis. 2011.
## tobit: Linear regression for Left-Censored Dependent Variable
## in Christine Choirat, Christopher Gandrud, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
## "Zelig: Everyone's Statistical Software," http://zeligproject.org/
Summarize estimated paramters:
summary(z.out)
## Model:
##
## Call:
## z5$zelig(formula = logC ~ c1 + h1 + d1a + educ2 + ter_in + selfhealth +
## smostt + b16a + b6a + b18a + label1 + anticam + noEnvi2,
## above = 180000, data = newdata2)
##
## Observations: (27 observations deleted due to missingness)
## Total Left-censored Uncensored Right-censored
## 411 0 411 0
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.68562 0.05039 92.990 < 2e-16
## c12 -0.01382 0.04056 -0.341 0.73322
## h12 -0.01302 0.02176 -0.599 0.54941
## d1amarried -0.02092 0.02263 -0.925 0.35517
## educ22 0.04423 0.02769 1.597 0.11018
## educ23 0.13424 0.03295 4.074 4.61e-05
## ter_in2 0.02740 0.02470 1.109 0.26725
## ter_in3 0.01013 0.02547 0.398 0.69092
## selfhealthgood 0.06336 0.02068 3.063 0.00219
## smosttmedium 0.04890 0.03284 1.489 0.13642
## smosttheavy 0.06760 0.03402 1.987 0.04690
## b16a1 -0.02847 0.02971 -0.958 0.33790
## b6ayes 0.02441 0.02915 0.837 0.40253
## b18abad 0.02005 0.02155 0.930 0.35226
## label11 -0.05626 0.03128 -1.798 0.07212
## anticam 0.03720 0.02723 1.367 0.17176
## noEnvi2 -0.02852 0.02633 -1.083 0.27870
## Log(scale) -1.60446 0.03488 -46.001 < 2e-16
##
## Scale: 0.201
##
## Gaussian distribution
## Number of Newton-Raphson Iterations: 3
## Log-likelihood: 76.25 on 18 Df
## Wald-statistic: 49.44 on 16 Df, p-value: 2.8115e-05
##
## Next step: Use 'setx' method
x.out <- setx(z.out)
Simulating quantities of interest from the posterior distribution given x.out.
s.out1 <- sim(z.out, x = x.out)
summary(s.out1)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 4.75348 0.02854102 4.75342 4.698589 4.809213
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 4.750777 0.2007466 4.754827 4.36875 5.173069
Simulating First Differences Set explanatory variables to their default(mean/mode) values, with high (80th percentile) and low (20th percentile) liquidity ratio (quant):
x.high <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.8))
x.low <- setx(z.out, logC = quantile(newdata2$logC, prob = 0.2))
Estimating the first difference for the effect of high versus low liquidity ratio on duration(durable):
s.out2 <- sim(z.out, x = x.high, x1 = x.low)
summary(s.out2)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 4.751835 0.0288039 4.749947 4.69953 4.810419
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 4.752416 0.2059493 4.754019 4.344547 5.175231
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 4.751835 0.0288039 4.749947 4.69953 4.810419
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 4.757742 0.1938928 4.760406 4.394867 5.134123
## fd
## mean sd 50% 2.5% 97.5%
## 1 0 0 0 0 0
plot(s.out1)
Ref: http://docs.zeligproject.org/en/latest/zelig-tobit.html http://docs.zeligproject.org/en/latest/zelig-tobitbayes.html
See you
Binh Thang