library(foreign)
## Warning: package 'foreign' was built under R version 3.2.5
r=read.dta("C:/Users/BINH THANG/Dropbox/Korea/STudy/Thesis/data management/DataR/dataR2.dta")
r1 <- subset(r, costincrease <=150000)
attach(r1)
newdata2=r1
attach(newdata2 )
## The following objects are masked from r1:
##
## a1, advice, advice1, ag, age_group, anticam, anticam2, b1,
## b10, b10a1, b10a2, b10a3, b10a4, b10a5, b10a6, b10a7, b11,
## b11a, b11a2, b11a3, b12, b12a, b13, b14, b15, b16, b16a, b17,
## b18, b18a, b1a, b2, b2a1, b3, b4, b5, b5a, b6, b6a, b6a1,
## b6a2, b6a3, b6a4, b6a5, b6a6, b6a7, b6a7a, b7, b8, b9, br,
## branch, branch1, branch2, c1, c2, c23a, c3, c4, c5, c6, c7,
## c8, c9, COST, cost1, cost2, costincrease, ct, d1, d10, d11,
## d1a, d2, d3, d3a, d4, d5, d6, d7, d8, d9, Decision, e1, e2,
## edu, educ1, educ2, f1, ghi1, ghi2, ghiro2, giadinhkoUH,
## group_age, group_age1, h1, h10a, h10a1, h10a10, h10a11,
## h10a11a, h10a2, h10a3, h10a4, h10a5, h10a6, h10a7, h10a8,
## h10a9, h12, h12a, h12a_1, h12log, h13, h2, h3, h4, h5, h6, h7,
## h8, h9, ha000, itc1, itc2, l1, l2, l3, l4, l5, l6, label1,
## label2, moneyspent, msdt, n01, n02, n03, n05, n06, n07, n08,
## n1, n10, n100, n101, n102, n103, n11, n12, n13, n14, n15, n16,
## n1b, n2, n3, n35, n36, n37, n38, n39, n3posterb, n4, n40, n41,
## n42, n43, n44, n45, n46, n47, n48, n49, n5, n50, n51, n52,
## n53, n54, n55, n56, n57, n58, n59, n6, n60, n61, n61a, n62,
## n62a, n63, n63a, n64, n64a, n65, n65a, n66, n66a, n67, n67a,
## n68, n68a, n7, n77, n78, n7tren, n8, n88, n89, n8nha, n9, n97,
## n98, n99, n9khach, noEnvi2, occup1, policy, policy_a, policy2,
## reasons, reasons1, s1, Screening, selfhealth, SH1, smostt,
## ter_fa1, ter_in, tertile_fa, tertile_indi, test, unitsdiffi1,
## w1, w2, w3, w4
define varibles
newdata2$c1=as.factor(newdata2$c1)
newdata2$h1=as.factor(newdata2$h1)
newdata2$label1=as.factor(newdata2$label1)
newdata2$policy=as.factor(newdata2$policy)
newdata2$educ2=as.factor(newdata2$educ2)
newdata2$age_group=as.factor(newdata2$age_group)
newdata2$d1a=as.factor(newdata2$d1a)
newdata2$selfhealth=as.factor(newdata2$selfhealth)
newdata2$b18a=as.factor(newdata2$b18a)
newdata2$b16a=as.factor(newdata2$b16a)
newdata2$b6a=as.factor(newdata2$b6a)
newdata2$smostt=as.factor(newdata2$smostt)
newdata2$reasons1=as.factor(newdata2$reasons1)
newdata2$ter_in=as.factor(newdata2$ter_in)
newdata2$group_age1=as.factor(newdata2$group_age1)
attach(newdata2)
## The following objects are masked from newdata2 (pos = 3):
##
## a1, advice, advice1, ag, age_group, anticam, anticam2, b1,
## b10, b10a1, b10a2, b10a3, b10a4, b10a5, b10a6, b10a7, b11,
## b11a, b11a2, b11a3, b12, b12a, b13, b14, b15, b16, b16a, b17,
## b18, b18a, b1a, b2, b2a1, b3, b4, b5, b5a, b6, b6a, b6a1,
## b6a2, b6a3, b6a4, b6a5, b6a6, b6a7, b6a7a, b7, b8, b9, br,
## branch, branch1, branch2, c1, c2, c23a, c3, c4, c5, c6, c7,
## c8, c9, COST, cost1, cost2, costincrease, ct, d1, d10, d11,
## d1a, d2, d3, d3a, d4, d5, d6, d7, d8, d9, Decision, e1, e2,
## edu, educ1, educ2, f1, ghi1, ghi2, ghiro2, giadinhkoUH,
## group_age, group_age1, h1, h10a, h10a1, h10a10, h10a11,
## h10a11a, h10a2, h10a3, h10a4, h10a5, h10a6, h10a7, h10a8,
## h10a9, h12, h12a, h12a_1, h12log, h13, h2, h3, h4, h5, h6, h7,
## h8, h9, ha000, itc1, itc2, l1, l2, l3, l4, l5, l6, label1,
## label2, moneyspent, msdt, n01, n02, n03, n05, n06, n07, n08,
## n1, n10, n100, n101, n102, n103, n11, n12, n13, n14, n15, n16,
## n1b, n2, n3, n35, n36, n37, n38, n39, n3posterb, n4, n40, n41,
## n42, n43, n44, n45, n46, n47, n48, n49, n5, n50, n51, n52,
## n53, n54, n55, n56, n57, n58, n59, n6, n60, n61, n61a, n62,
## n62a, n63, n63a, n64, n64a, n65, n65a, n66, n66a, n67, n67a,
## n68, n68a, n7, n77, n78, n7tren, n8, n88, n89, n8nha, n9, n97,
## n98, n99, n9khach, noEnvi2, occup1, policy, policy_a, policy2,
## reasons, reasons1, s1, Screening, selfhealth, SH1, smostt,
## ter_fa1, ter_in, tertile_fa, tertile_indi, test, unitsdiffi1,
## w1, w2, w3, w4
## The following objects are masked from r1:
##
## a1, advice, advice1, ag, age_group, anticam, anticam2, b1,
## b10, b10a1, b10a2, b10a3, b10a4, b10a5, b10a6, b10a7, b11,
## b11a, b11a2, b11a3, b12, b12a, b13, b14, b15, b16, b16a, b17,
## b18, b18a, b1a, b2, b2a1, b3, b4, b5, b5a, b6, b6a, b6a1,
## b6a2, b6a3, b6a4, b6a5, b6a6, b6a7, b6a7a, b7, b8, b9, br,
## branch, branch1, branch2, c1, c2, c23a, c3, c4, c5, c6, c7,
## c8, c9, COST, cost1, cost2, costincrease, ct, d1, d10, d11,
## d1a, d2, d3, d3a, d4, d5, d6, d7, d8, d9, Decision, e1, e2,
## edu, educ1, educ2, f1, ghi1, ghi2, ghiro2, giadinhkoUH,
## group_age, group_age1, h1, h10a, h10a1, h10a10, h10a11,
## h10a11a, h10a2, h10a3, h10a4, h10a5, h10a6, h10a7, h10a8,
## h10a9, h12, h12a, h12a_1, h12log, h13, h2, h3, h4, h5, h6, h7,
## h8, h9, ha000, itc1, itc2, l1, l2, l3, l4, l5, l6, label1,
## label2, moneyspent, msdt, n01, n02, n03, n05, n06, n07, n08,
## n1, n10, n100, n101, n102, n103, n11, n12, n13, n14, n15, n16,
## n1b, n2, n3, n35, n36, n37, n38, n39, n3posterb, n4, n40, n41,
## n42, n43, n44, n45, n46, n47, n48, n49, n5, n50, n51, n52,
## n53, n54, n55, n56, n57, n58, n59, n6, n60, n61, n61a, n62,
## n62a, n63, n63a, n64, n64a, n65, n65a, n66, n66a, n67, n67a,
## n68, n68a, n7, n77, n78, n7tren, n8, n88, n89, n8nha, n9, n97,
## n98, n99, n9khach, noEnvi2, occup1, policy, policy_a, policy2,
## reasons, reasons1, s1, Screening, selfhealth, SH1, smostt,
## ter_fa1, ter_in, tertile_fa, tertile_indi, test, unitsdiffi1,
## w1, w2, w3, w4
Install packages and library
library("brms")
## Warning: package 'brms' was built under R version 3.2.5
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
## Loading 'brms' package (version 1.5.1). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
library("caret")
## Warning: package 'caret' was built under R version 3.2.5
## Loading required package: lattice
library("coda")
## Warning: package 'coda' was built under R version 3.2.5
library("rstan", lib.loc="~/R/win-library/3.2")
## Warning: package 'rstan' was built under R version 3.2.5
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 3.2.5
## rstan (Version 2.14.1, packaged: 2016-12-28 14:55:41 UTC, GitRev: 5fa1e80eb817)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
##
## Attaching package: 'rstan'
## The following object is masked from 'package:coda':
##
## traceplot
options(mc.cores = parallel::detectCores())
set up for fomulation
set.seed(1234)
prior1=get_prior(costincrease~age_group+h1+ d1a+educ2+ter_in+selfhealth+smostt
+b16a+b18a+label1+anticam+noEnvi2,family=gaussian, data=newdata2)
## Warning: Rows containing NAs were excluded from the model
bayesfit1<- brm(costincrease~age_group+h1+d1a+educ2+ter_in+selfhealth+smostt +b16a+b18a+label1+anticam+noEnvi2, prior1, family=gaussian,chains=2,iter=2000, data=newdata2)
## Warning: Rows containing NAs were excluded from the model
## Compiling the C++ model
## Start sampling
summary(bayesfit1)
## Family: gaussian (identity)
## Formula: costincrease ~ age_group + h1 + d1a + educ2 + ter_in + selfhealth + smostt + b16a + b18a + label1 + anticam + noEnvi2
## Data: newdata2 (Number of observations: 419)
## Samples: 2 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 2000
## WAIC: Not computed
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 36039.75 7517.07 21360.62 50140.56 2000 1
## age_groupgr3039 -2006.73 5437.15 -12429.75 9021.74 2000 1
## age_groupgr4049 -5676.09 5694.10 -16895.12 5605.34 1216 1
## age_groupgr5059 -4616.29 6327.77 -16515.61 7629.13 1260 1
## age_group60plus -3903.46 8248.35 -19973.21 11767.36 2000 1
## h12 -1617.62 3545.27 -8617.95 5204.14 2000 1
## d1amarried 1202.94 4656.86 -7744.76 10685.90 1368 1
## educ22 1658.49 4533.02 -7948.57 10377.87 1424 1
## educ23 15128.85 5540.19 4208.66 26633.07 1571 1
## ter_in2 2898.04 3998.30 -5059.23 10693.48 2000 1
## ter_in3 -2820.22 4236.21 -11050.26 5299.33 2000 1
## selfhealthgood 10797.56 3337.61 4442.10 17290.55 2000 1
## smosttmedium 6796.78 4274.07 -1635.12 15328.24 1862 1
## smosttheavy 13252.01 4427.84 4306.24 21362.36 1721 1
## b16a1 -6188.76 4923.68 -15729.23 3379.40 2000 1
## b18abad 4045.06 3394.51 -2782.22 10604.69 2000 1
## label11 -7680.83 5199.16 -17936.49 2150.96 2000 1
## anticam 4382.93 4575.13 -4862.72 13063.27 2000 1
## noEnvi2 -2937.81 4408.35 -11349.79 5901.16 2000 1
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 32701.59 1207.73 30445.5 35267.18 2000 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
bayesfit1$fit
## Inference for Stan model: gaussian(identity) brms-model.
## 2 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=2000.
##
## mean se_mean sd 2.5% 25% 50%
## b_Intercept 36039.75 168.09 7517.07 21360.62 31040.12 36063.67
## b_age_groupgr3039 -2006.73 121.58 5437.15 -12429.75 -5698.80 -2033.87
## b_age_groupgr4049 -5676.09 163.26 5694.10 -16895.12 -9510.89 -5825.80
## b_age_groupgr5059 -4616.29 178.25 6327.77 -16515.61 -8905.28 -4770.05
## b_age_group60plus -3903.46 184.44 8248.35 -19973.21 -9234.95 -3890.23
## b_h12 -1617.62 79.27 3545.27 -8617.95 -4043.95 -1597.48
## b_d1amarried 1202.94 125.90 4656.86 -7744.76 -1990.37 1154.91
## b_educ22 1658.49 120.12 4533.02 -7948.57 -1384.57 1813.71
## b_educ23 15128.85 139.77 5540.19 4208.66 11569.86 15023.83
## b_ter_in2 2898.04 89.40 3998.30 -5059.23 192.07 2942.82
## b_ter_in3 -2820.22 94.72 4236.21 -11050.26 -5491.11 -2927.17
## b_selfhealthgood 10797.56 74.63 3337.61 4442.10 8490.60 10804.58
## b_smosttmedium 6796.78 99.05 4274.07 -1635.12 3912.49 6729.08
## b_smosttheavy 13252.01 106.72 4427.84 4306.24 10334.40 13284.35
## b_b16a1 -6188.76 110.10 4923.68 -15729.23 -9439.37 -6296.64
## b_b18abad 4045.06 75.90 3394.51 -2782.22 1757.34 4046.56
## b_label11 -7680.83 116.26 5199.16 -17936.49 -11174.56 -7568.79
## b_anticam 4382.93 102.30 4575.13 -4862.72 1425.17 4463.16
## b_noEnvi2 -2937.81 98.57 4408.35 -11349.79 -5803.94 -2986.15
## sigma 32701.59 27.01 1207.73 30445.50 31865.66 32668.87
## lp__ -4554.95 0.11 3.26 -4561.84 -4556.89 -4554.57
## 75% 97.5% n_eff Rhat
## b_Intercept 41133.63 50140.56 2000 1
## b_age_groupgr3039 1596.50 9021.74 2000 1
## b_age_groupgr4049 -1814.25 5605.34 1216 1
## b_age_groupgr5059 -420.39 7629.13 1260 1
## b_age_group60plus 1311.36 11767.36 2000 1
## b_h12 757.98 5204.14 2000 1
## b_d1amarried 4335.09 10685.90 1368 1
## b_educ22 4721.98 10377.87 1424 1
## b_educ23 18810.82 26633.07 1571 1
## b_ter_in2 5573.92 10693.48 2000 1
## b_ter_in3 54.58 5299.33 2000 1
## b_selfhealthgood 13024.32 17290.55 2000 1
## b_smosttmedium 9796.30 15328.24 1862 1
## b_smosttheavy 16249.00 21362.36 1721 1
## b_b16a1 -2892.35 3379.40 2000 1
## b_b18abad 6400.21 10604.69 2000 1
## b_label11 -4141.70 2150.96 2000 1
## b_anticam 7375.01 13063.27 2000 1
## b_noEnvi2 102.87 5901.16 2000 1
## sigma 33477.44 35267.18 2000 1
## lp__ -4552.64 -4549.48 831 1
##
## Samples were drawn using NUTS(diag_e) at Tue Mar 21 00:19:48 2017.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
test hypothesis
hypothesis(bayesfit1,"smosttmedium >smosttheavy ",alpha=0.5)
## Hypothesis Tests for class b:
## Estimate Est.Error l-50% CI u-50% CI Evid.Ratio
## (smosttmedium)-(s... > 0 -6455.23 3643.45 -6451.85 Inf 0.04
## ---
## '*': The expected value under the hypothesis lies outside the 50% CI.
hypothesis(bayesfit1,"smosttmedium <smosttheavy ",alpha=0.5)
## Hypothesis Tests for class b:
## Estimate Est.Error l-50% CI u-50% CI Evid.Ratio
## (smosttmedium)-(s... < 0 -6455.23 3643.45 -Inf -6451.85 24.64 *
## ---
## '*': The expected value under the hypothesis lies outside the 50% CI.