In this assignment, I apply Bayesian Methods from a sample that includes data from 4,871 individuals born between 1978 and 1988 (ages 26-38) in the State of Texas. Data was gathered from ACS-IPUMS 2014-2016.
The outcome is set as adjusted income and the covariates are sex, being born in the US, number of years in the US, race and education level.
library (car)
## Loading required package: carData
library(brms)
## Loading required package: Rcpp
## Loading required package: ggplot2
## Loading 'brms' package (version 2.6.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.18.1, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
library(haven)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
`
load("C:/Users/PCMcC/Documents/Hierarchical Models/hierarchichal methods class/ipums_subset2.Rdata")
sub3 <- filter(sub2, birthyr %in% 1978:1988, year %in% 2014:2016, gq != 3, gq != 4, gq!=6, statefip==48)
save(sub3, file="C:/Users/PCMcC/Documents/Hierarchical Models/hierarchichal methods class/ipums_subset3.Rdata")
rm(sub2)
#gc()
#set.seed(1115)
names(sub3) #print the column names
## [1] "year" "datanum" "serial" "hhwt" "hhtype"
## [6] "stateicp" "statefip" "county" "countyfips" "metro"
## [11] "city" "puma" "gq" "hhincome" "foodstmp"
## [16] "pernum" "perwt" "sex" "age" "birthyr"
## [21] "fertyr" "race" "raced" "hispan" "hispand"
## [26] "bpl" "bpld" "citizen" "yrsusa1" "language"
## [31] "languaged" "school" "educ" "educd" "gradeatt"
## [36] "gradeattd" "degfield" "degfieldd" "empstat" "empstatd"
## [41] "labforce" "incwage" "poverty" "trantime"
myvars<-c("statefip","county","countyfips","metro","city","puma","hhincome","incwage","educd","empstatd","gradeatt","race", "hispan", "hispand","raced","sex","empstat","age","trantime","poverty","birthyr","year","gq","hhtype","yrsusa1","bpl")
sub3<-sub3[,myvars]
#rm(sub3); gc()
#RACE/ETHNICITY
sub3$hispanic <- Recode(sub3$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
sub3$race_rec <- Recode(sub3$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
sub3$race_eth <- interaction(sub3$hispanic, sub3$race_rec, sep = "_")
sub3$race_eth <- as.factor(ifelse(substr(as.character(sub3$race_eth),1,8) == "Hispanic", "Hispanic", as.character(sub3$race_eth)))
sub3$race_eth <- relevel(sub3$race_eth, ref = "NonHispanic_White")
#SEX
sub3$sex2<- Recode (sub3$sex, recodes= "1=1; 2=2; else=NA")
sub3$male<- Recode (sub3$sex, recodes= "1=1; 2=0; else=NA")
sub3$female<- Recode (sub3$sex, recodes= "2=1; 1=0; else=NA")
#CITIZEN STATUS (1=born in the US or to US citizens, 0=not US citizen)
#sub3$citizenstatus <- Recode(sub3$citizen, recodes = "1:2=1; 3:5=0; else=NA")
#sub3$usborn <-Recode(sub3$citizenstatus, recodes="1=1; 0=0")
#sub3$notusborn <-Recode(sub3$citizenstatus, recodes="0=1; 1=0")
#table(sub3$usborn)
#EDUCATION LEVEL
sub3$educ_level<- Recode(sub3$educd, recodes = "2:61='0Less_than_HS';62:64='1_HSD/GED';65:80='2_somecollege';90:100='2_somecollege'; 81:83='3_AssocDegree';101='4_bachelordegree'; 110:116='4_BAplus_GradDegree'; else=NA")
sub3$educ_level<-as.factor(sub3$educ_level)
#College or more(1=college degree or higher and 0=Associates degree or lower)
sub3$collegeormore<- Recode(sub3$educd, recodes = "84:116=1; 2:83=0")
#USBORN
sub3$usborn <- Recode(sub3$bpl, recodes = "1:120=1; 121:900=0; else=NA")
#EMPLOYMENT
sub3$employedetailed <- Recode(sub3$empstatd, recodes = "10:12=1; 20:22=0; else=NA")
sub3$employed<-Recode(sub3$empstat, recodes = "1=1; else=NA")
sub3$unemployed<-Recode(sub3$empstat, recodes = "2=1; else=NA")
sub3$not_in_laborforce<-Recode(sub3$empstat, recodes = "3=1; else=NA")
#INCOME ADJUSTED FOR INFLATION
#sub$edu_scal_inc <- ave(sub$incwage, sub$male, FUN = scale)
sub3$inc_adj <- ifelse(sub3$year == 2005, sub3$incwage*1.23,
ifelse(sub3$year == 2006, sub3$incwage*1.18,
ifelse(sub3$year == 2007, sub3$incwage*1.16,
ifelse(sub3$year %in% c(2008,2009), sub3$incwage*1.1,
ifelse(sub3$year == 2010, sub3$incwage*1.1,
ifelse(sub3$year == 2011, sub3$incwage*1.07,
ifelse(sub3$year == 2012,sub3$incwage*1.05,
ifelse(sub3$year == 2013, sub3$incwage*1.03,
ifelse(sub3$year == 2014, sub3$incwage*1.01,
ifelse(sub3$year == 2015, sub3$incwage*1.01, sub3$incwage))))))))))
#TRAVEL TIME TO WORK (does not include zero time commute)
sub3$traveltime<-ifelse(sub3$trantime==000, NA, sub3$trantime)
#IN HS/COLLEGE OR NOT IN HS/COLLEGE
#sub3$inhs<- Recode(sub3$gradeatt, recodes = "5=1; else=0")
#sub3$incollege <- Recode(sub3$gradeatt, recodes = "6:7=1; else=0")
#YEARS IN THE US (0 indicates less than 1 year or NA - I eliminated all NAs)
sub3$yrsusa2<-Recode(sub3$yrsusa1, recodes="0=NA")
#HOUSEHOLD TYPE (1=married family, 2=one male or female living alone, 3=male or female living with someone)
sub3$housetype<-Recode(sub3$hhtype, recodes = "1=1; c(2,3,4,6)=2; c(5,7)=3; 9=NA" )
#METRO AREA (1=not in metro area, 2=metro central city, 3=suburban, 4=central/principal city unknown)
sub3$metroarea<-Recode(sub3$metro,recodes="1=1; 2=2; 3=3; else='NA'")
sub3$urban<-Recode(sub3$metroarea, recodes="2:3=1; 1=0")
sub3$rural<-Recode(sub3$metroarea, recodes="1=1; 2:3=0")
library(tidycensus)
usacs<-get_acs(geography = "county", year = 2015,
variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
"DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
"DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
"DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
"DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
"DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE", "DP02_0113PE", "DP04_0003PE") ,
summary_var = "B01001_001",
geometry = F, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs2<-mutate(usacs,totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100, nwhite=DP05_0032E,nblack=DP05_0033E, pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100, phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,ppov=DP03_0119PE/100, pforn=DP02_0092PE/100,plep=DP02_0113PE/100, pvacant=DP04_0003PE/100, GEOID=GEOID)
myv <- c("GEOID","NAME", "totpop", "pwhite", "pblack", "phisp", "pfemhh", "phsormore", "punemp", "medhhinc", "ppov", "pforn","plep")
acsvars <- usacs2[myv]
acsvars <-filter(acsvars,GEOID %in% 48001:48507)
acsvars$GEOID<- substring(acsvars$GEOID,3)
acsvars$GEOID<-as.numeric(acsvars$GEOID)
#head(acsvars)
#I fix the error:: `x` and `labels` must be same type with the following:
sub3[] <- lapply(sub3, unclass)
joindata<-merge(acsvars, sub3, by.x="GEOID", by.y="countyfips", all.x=T)
#names(joindata)
jdvars<-c("GEOID","NAME","metro","city","puma","hhincome","incwage","educd","empstatd","race", "hispan", "usborn","raced","sex2","age","birthyr","year","gq","hhtype","yrsusa2","totpop", "pwhite", "pblack", "phisp", "pfemhh", "phsormore", "punemp", "medhhinc", "ppov","male", "inc_adj", "pforn","plep","educ_level","race_eth","collegeormore")
joindata2<-joindata[,jdvars]
joindata2<-joindata2%>%
filter(complete.cases(.))
#rm(joindata); gc()
joindata2$yearsintheUSz<-scale(joindata2$yrsusa2, center = T, scale=T)
joindata2$agez<-scale(joindata2$age, center = T, scale=T)
joindata2$inc_adjz<-scale(joindata2$inc_adj, center= T, scale=T)
The simple regression shows that all of the covariates with the exeption of being born in the US have a significant relationship to income.
fit.lm<- glm(inc_adjz~male+agez+usborn+race_eth+educ_level+yearsintheUSz, data=joindata2)
summary(fit.lm)
##
## Call:
## glm(formula = inc_adjz ~ male + agez + usborn + race_eth + educ_level +
## yearsintheUSz, data = joindata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9286 -0.4095 -0.0586 0.2511 10.6956
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.751235 0.046251 -16.242 < 2e-16 ***
## male 0.559351 0.024735 22.614 < 2e-16 ***
## agez 0.092449 0.012610 7.331 2.65e-13 ***
## usborn -0.921420 0.862671 -1.068 0.285528
## race_eth -0.087959 0.019733 -4.457 8.48e-06 ***
## educ_level 0.239952 0.007251 33.092 < 2e-16 ***
## yearsintheUSz 0.046061 0.013425 3.431 0.000606 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.7429637)
##
## Null deviance: 4870.0 on 4870 degrees of freedom
## Residual deviance: 3613.8 on 4864 degrees of freedom
## AIC: 12385
##
## Number of Fisher Scoring iterations: 2
confint(fit.lm)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -0.84188619 -0.6605847
## male 0.51087195 0.6078298
## agez 0.06773355 0.1171636
## usborn -2.61222452 0.7693852
## race_eth -0.12663538 -0.0492835
## educ_level 0.22574006 0.2541635
## yearsintheUSz 0.01974911 0.0723728
#removing files from global environment
rm("joindata")
rm("sub3")
#To find the number of cores on my system
library(parallel)
detectCores()
## [1] 4
options(mc.cores = parallel::detectCores())
# # Set path of Rtools
# Sys.setenv(PATH = paste(Sys.getenv("PATH"), "*InstallDirectory*/Rtools/bin/",
# "*InstallDirectory*/Rtools/mingw_64/bin", sep = ";")) #for 64 bit version
# Sys.setenv(BINPREF = "*InstallDirectory*/Rtools/mingw_64/bin")
# library(devtools)
# Now you can install transformr then gganimate#
#devtools::install_github("thomasp85/transformr")
#devtools::install_github("dgrtwo/gganimate")
# #install.packages("StanHeaders", repos = "http://cran.us.r-project.org")
# library("StanHeaders")
# usethis::edit_r_environ()
# #devtools::install_github("stan-dev/rstan", ref = "develop", subdir = "rstan/rstan")
#
# dotR <- file.path(Sys.getenv("HOME"), ".R")
# if (!file.exists(dotR))
# dir.create(dotR)
# M <- file.path(dotR, "Makevars.win")
# if (!file.exists(M))
# file.create(M)
# cat("\nCXX14FLAGS=-O3 -Wno-unused-variable -Wno-unused-function",
# "CXX14 = $(BINPREF)g++ -m$(WIN) -std=c++1y",
# "CXX11FLAGS=-O3 -Wno-unused-variable -Wno-unused-function",
# file = M, sep = "\n", append = TRUE)
#
The first bayesian model, shows an RHAT=1. The results of this model are similar to those obtained by the simple regression model. Being a male, longer years living in the US and older age groups provide an advantage of higher incomes. However, race seems to be negatively related to income. Finally, education seems to have a positive relationship with income however, not as large as the one presented by males vs females.
Plots show a unimodal distribution resembling the “fuzzy caterpillar” expected.
#install.packages("brm", repos = "http://cran.us.r-project.org")
#install.packages("brms", repos = "http://cran.us.r-project.org")
#install.packages("rstan", repos = "http://cran.us.r-project.org")
#install.packages("rstanarm", repos = "http://cran.us.r-project.org")
#library(rstanarm)
#library(rstan)
#library(brm)
library(brms)
#devtools::install_github('stan-dev/rstan', ref = 'develop', subdir = 'rstan/rstan')
options(mc.cores = parallel::detectCores())
fit.1<-brm(inc_adjz~male+race_eth+educ_level+usborn, data=joindata2, family=gaussian(),
warmup=1000, #burnin for 500 interations for each chain = 1000 burnin
iter=5000, chains=4, #2*5000 = 10000 - 1000 burnin = 9000 total iterations
cores=4,seed = 1100, #number of computer cores, 1 per chain is good.
refresh = 0)
## Compiling the C++ model
## Start sampling
summary(fit.lm)
##
## Call:
## glm(formula = inc_adjz ~ male + agez + usborn + race_eth + educ_level +
## yearsintheUSz, data = joindata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9286 -0.4095 -0.0586 0.2511 10.6956
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.751235 0.046251 -16.242 < 2e-16 ***
## male 0.559351 0.024735 22.614 < 2e-16 ***
## agez 0.092449 0.012610 7.331 2.65e-13 ***
## usborn -0.921420 0.862671 -1.068 0.285528
## race_eth -0.087959 0.019733 -4.457 8.48e-06 ***
## educ_level 0.239952 0.007251 33.092 < 2e-16 ***
## yearsintheUSz 0.046061 0.013425 3.431 0.000606 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.7429637)
##
## Null deviance: 4870.0 on 4870 degrees of freedom
## Residual deviance: 3613.8 on 4864 degrees of freedom
## AIC: 12385
##
## Number of Fisher Scoring iterations: 2
print(summary(fit.1), digits=3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: inc_adjz ~ male + race_eth + educ_level + usborn
## Data: joindata2 (Number of observations: 4871)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.709 0.045 -0.798 -0.620 23394 1.000
## male 0.563 0.025 0.514 0.611 20583 1.000
## race_eth -0.098 0.020 -0.137 -0.059 20625 1.000
## educ_level 0.232 0.007 0.218 0.246 20782 1.000
## usborn -0.819 0.856 -2.506 0.859 20047 1.000
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.869 0.009 0.852 0.887 20771 1.000
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit.1, N=2)
Here, we change the default priors for the model parameters to be Gaussian for the \(\beta\)’s and Inverse Gamma for the residual variances
prior1<-c(set_prior("normal(0,1)", class="b"),#prior for the beta's
set_prior("normal(0,1)", class="Intercept"), #prior for the intercept
set_prior("inv_gamma(.5,.5)", class="sigma"))#prior for the residual std. deviation
fit.1b<-brm(inc_adjz~male+race_eth+educ_level+usborn, data=joindata2, family=gaussian(),
warmup=1000, #burnin for 500 interations for each chain = 1000 burnin
iter=5000, chains=4, #2*5000 = 10000 - 1000 burnin = 9000 total iterations
cores=4,seed = 1100, #number of computer cores, 1 per chain is good.
save_model="fit_lm_gamma_stan.txt", refresh = 0
)
## Compiling the C++ model
## recompiling to avoid crashing R session
## Start sampling
library(brms)
prior<-c(set_prior("normal(0,1)", class="b"),#prior for the beta's
set_prior("cauchy(0,5)", class="sigma"),#prior for the residual std. deviation
set_prior("gamma(1,1.5)", class="nu")) #prior for the t distribution d.f.
fit.lm.b.t<-brm(inc_adjz~male+race_eth+educ_level+usborn, data=joindata2, family=student(),
prior = prior,
warmup=1000,
iter=5000, chains=4,
cores=4,seed = 1100,
save_model="fit_lm_b_t_stan.txt", refresh=0)
## Compiling the C++ model
## Start sampling
print(summary(fit.lm.b.t), digits=3)
## Family: student
## Links: mu = identity; sigma = identity; nu = identity
## Formula: inc_adjz ~ male + race_eth + educ_level + usborn
## Data: joindata2 (Number of observations: 4871)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.605 0.024 -0.652 -0.557 16520 1.000
## male 0.494 0.012 0.471 0.517 13774 1.000
## race_eth -0.033 0.011 -0.056 -0.011 15361 1.000
## educ_level 0.061 0.005 0.051 0.072 9675 1.000
## usborn -0.225 0.420 -1.058 0.721 10863 1.000
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.293 0.008 0.276 0.309 7971 1.000
## nu 1.521 0.061 1.405 1.642 8184 1.000
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit.lm.b.t, N=2)
#Custom Summary Function A comparison between modified and default priors show that they are very similar confirming the robusticity of the analysis.
out1<-as.data.frame(fit.1)
out2<-as.data.frame(fit.1b)
mysum<-function(x){
mu=mean(x,na.rm=T)
sdev=sd(x,na.rm=T)
lci=quantile(x, p=.025,na.rm=T)
uci=quantile(x, p=.975,na.rm=T)
list(mu=mu, sd=sdev, lci=lci, uci=uci)
}
sapply(out1, mysum )
## b_Intercept b_male b_race_eth b_educ_level b_usborn sigma
## mu -0.7091118 0.5625697 -0.09809845 0.2322387 -0.8185917 0.8688939
## sd 0.0453085 0.0246808 0.01979048 0.007106638 0.8557206 0.008854295
## lci -0.7978184 0.5139353 -0.1366965 0.2182395 -2.506197 0.8516538
## uci -0.6200543 0.6112396 -0.05942645 0.2460771 0.8590223 0.8865665
## lp__
## mu -6232.665
## sd 1.757167
## lci -6237.09
## uci -6230.286
sapply(out2, mysum)
## b_Intercept b_male b_race_eth b_educ_level b_usborn sigma
## mu -0.7091118 0.5625697 -0.09809845 0.2322387 -0.8185917 0.8688939
## sd 0.0453085 0.0246808 0.01979048 0.007106638 0.8557206 0.008854295
## lci -0.7978184 0.5139353 -0.1366965 0.2182395 -2.506197 0.8516538
## uci -0.6200543 0.6112396 -0.05942645 0.2460771 0.8590223 0.8865665
## lp__
## mu -6232.665
## sd 1.757167
## lci -6237.09
## uci -6230.286
A comparison of each of the three models after modifying the posterior shows similar patterns. The USborn variable includes zero on all 3 models while the remaining variables, particularly the male variable (0.5-6.0) shows siginificant findings.
posterior_interval(fit.1, prob=0.9, type="central")
## 5% 95%
## b_Intercept -0.7835461 -6.343480e-01
## b_male 0.5219836 6.033911e-01
## b_race_eth -0.1309635 -6.561054e-02
## b_educ_level 0.2205722 2.439621e-01
## b_usborn -2.2219833 5.890871e-01
## sigma 0.8543759 8.836138e-01
## lp__ -6236.0218471 -6.230491e+03
posterior_interval(fit.1b, prob=0.9, type="central")
## 5% 95%
## b_Intercept -0.7835461 -6.343480e-01
## b_male 0.5219836 6.033911e-01
## b_race_eth -0.1309635 -6.561054e-02
## b_educ_level 0.2205722 2.439621e-01
## b_usborn -2.2219833 5.890871e-01
## sigma 0.8543759 8.836138e-01
## lp__ -6236.0218471 -6.230491e+03
posterior_interval(fit.lm.b.t, prob=0.9, type="central")
## 5% 95%
## b_Intercept -6.447860e-01 -5.643874e-01
## b_male 4.751248e-01 5.131386e-01
## b_race_eth -5.200887e-02 -1.500036e-02
## b_educ_level 5.213940e-02 6.978204e-02
## b_usborn -8.700491e-01 4.753107e-01
## sigma 2.790624e-01 3.066498e-01
## nu 1.423450e+00 1.623173e+00
## lp__ -4.452393e+03 -4.446122e+03
If the models are invariant to the prior, the two lines should be very close to each other which in this case, they are.
ggplot()+geom_density(data=out1, aes(b_educ_level), col="blue", alpha=.5, lwd=1.5)+geom_density(data=out2, aes(b_educ_level), col="green", alpha=.5, lwd=1.5)
ggplot()+geom_density(data=out1, aes(sigma), col="blue", alpha=.5, lwd=1.5)+geom_density(data=out2, aes(sigma), col="green", alpha=.5, lwd=1.5)
#Calculation of Bayesian P-Values
Below is the function to calculate Bayesian p-values based on if the posterior mean of the parameter is positive or negative.
bayespval<-function(x){
if(mean(x)<0) {
pval=mean(x<0)
}
else{
pval=mean(x>0)
}
list(pval=pval)
}
sapply(out1, bayespval)
## $b_Intercept.pval
## [1] 1
##
## $b_male.pval
## [1] 1
##
## $b_race_eth.pval
## [1] 1
##
## $b_educ_level.pval
## [1] 1
##
## $b_usborn.pval
## [1] 0.830875
##
## $sigma.pval
## [1] 1
##
## $lp__.pval
## [1] 1
library("brms")
WAIC1<-WAIC(fit.1)
WAIC2<-WAIC(fit.lm.b.t)
compare_ic(WAIC1, WAIC2, ic=c("waic"))
## WAIC SE
## fit.1 12484.27 482.84
## fit.lm.b.t 8885.98 190.87
## fit.1 - fit.lm.b.t 3598.29 436.39
Which certainly looks like the Normal model is preferred here, because of the lower WAIC score. We can also evaluate the model fit to the sensitivity of the prior from the normal model:
WAIC(fit.1, fit.1b, compare = T)
## WAIC SE
## fit.1 12484.27 482.84
## fit.1b 12484.27 482.84
## fit.1 - fit.1b 0.00 0.00
The WAIC shows that there is no difference between the two models.
#install.packages("loo")
#install.packages("blmeco")
library ("loo")
## This is loo version 2.0.0.
## **NOTE: As of version 2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session. Visit mc-stan.org/loo/news for details on other changes.
##
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
##
## loo
library ("blmeco")
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'blmeco'
## The following object is masked from 'package:brms':
##
## WAIC
This technique is used for judging relative model fit as the predictive capacity of the model. The LOO method uses the posterior predictive distribution for the model. The posterior predictive distribution is the posterior distribution of new data, given the parameters we have obtained from our model.
Results of the LOOIC, shows that the model with the alternative likelihoods is a better fit.
library("loo")
loo(fit.1 , fit.lm.b.t)
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'fit.1'. It
## is recommended to set 'reloo = TRUE' in order to calculate the ELPD without
## the assumption that these observations are negligible. This will refit
## the model 1 times to compute the ELPDs for the problematic observations
## directly.
## LOOIC SE
## fit.1 12485.26 482.83
## fit.lm.b.t 8886.12 190.87
## fit.1 - fit.lm.b.t 3599.14 436.37