Observe the data that we have
summary(df)
## SFSP.Kitchens Media Public.Transport Total.Meals
## No :374 High: 52 No :427 High: 73
## Yes: 79 Low :401 Yes: 26 Low :380
Here’s the BN we’ll test it against.
library(DiagrammeR)
grViz("
digraph dot {
graph [layout = dot]
node [shape = oval,
style = filled,
color = black]
node [color=black, fillcolor=white]
SummerSchool Awareness Transport Meals
edge [color = black,font=Arial]
SummerSchool -> Meals
Awareness -> Transport
Transport -> Meals
SummerSchool -> Awareness
SummerSchool -> Transport
}")
Now let’s find the CEG of interest and see what stage struture it finds:
#find the CEG
df <- CheckAndCleanData(df)
## Your data do not contain rows with <NA>, absent, or whitespace values
#ceg:::ContingencyTable(stratified.event.tree = set, data = df)
set <- Stratified.event.tree(df)
## ~~~ Stratified.event.tree: initializator ~~~
plot(set)
## pdf
## 2
prior <- ceg:::PriorDistribution(set,3)
sst <- Stratified.staged.tree(df)
## ~~~ Stratified.event.tree: initializator ~~~
## ~~~ OAHC: initializator ~~~
## Warning in order.max.score[ref:(tree@num.situation[level] - 2)] <-
## order.max.score[(ref + : number of items to replace is not a multiple of
## replacement length
## ~~~ OAHC: initializator ~~~
## ~~~ OAHC: initializator ~~~
## ~~~ OAHC: initializator ~~~
## ~~~ Stratified.staged.tree: initializator ~~~
plot(sst)
## pdf
## 2
meals.ceg <- ceg:::Stratified.ceg.model(stratified.staged.tree = sst)
## ~~~ CEGStretified: initializator ~~~
plot(meals.ceg)
## png
## 2
Because my code relies on a totally different, more cumbersome CEG structure, we’ll write this so it works with the diagnostics. (I need to figure out how to get this to interface with Rodriguo’s stuff.)
#find the diagnostics of the fit structure
#first desribe the structure of the CEG
cega.stage.key <- list()
count(df) -> cega.stage.key[[1]]
## Warning in summarise_impl(.data, dots): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored
df %>% count(SFSP.Kitchens) -> cega.stage.key[[2]]
df %>% count(SFSP.Kitchens, Media) -> cega.stage.key[[3]]
df %>% count(SFSP.Kitchens, Media, Public.Transport) -> cega.stage.key[[4]]
df %>% count(SFSP.Kitchens, Media, Public.Transport, Total.Meals) -> cega.stage.key[[5]]
#define a stage key for each cut in the data
#Q: how does this change for asymmetries?
cega.stage.key[[1]]$stage <- c("cega.w0")
cega.stage.key[[2]]$stage <- c("cega.w1", "cega.w2")
cega.stage.key[[3]]$stage <- c("cega.w3", "cega.w4", "cega.w5", "cega.w5")
cega.stage.key[[4]]$stage <- c("cega.w6", "cega.w7", "cega.w7", "cega.w8", "cega.w6", "cega.w6", "cega.w6")#this contains the structure
cega.stage.key[[5]]$stage <- rep("cega.winf", length(cega.stage.key[[5]]$n))
cega.stages <- list("cega.w0", "cega.w1", "cega.w2", "cega.w3", "cega.w4", "cega.w5", "cega.w6", "cega.w7", "cega.w8")
cega.w0 <- df %>% count(SFSP.Kitchens)
cega.w1 <- df %>% filter(SFSP.Kitchens=="No") %>% count(Media)
cega.w2 <- df %>% filter(SFSP.Kitchens=="Yes") %>% count(Media)
cega.w3 <- df %>% filter(SFSP.Kitchens=="No", Media=="Low") %>% count(Public.Transport)
cega.w4 <- df %>% filter(SFSP.Kitchens=="No", Media=="High") %>% count(Public.Transport)
cega.w5 <- df %>% filter((SFSP.Kitchens=="Yes"& Media=="High") | (SFSP.Kitchens=="Yes"& Media=="Low")) %>% count(Public.Transport)
cega.w6 <- df %>% filter((SFSP.Kitchens=="No"& Media=="High" & Public.Transport=="No") |
(SFSP.Kitchens=="Yes"& Media=="High" & Public.Transport=="No") |
(SFSP.Kitchens=="Yes"& Media=="Low" & Public.Transport=="No") |
(SFSP.Kitchens=="Yes"& Media=="Low" & Public.Transport=="Yes") ) %>% count(Total.Meals)
cega.w7 <- df %>% filter((SFSP.Kitchens=="No" & Media=="High" & Public.Transport=="Yes") |
(SFSP.Kitchens=="No" & Media=="Low" & Public.Transport=="No")) %>% count(Total.Meals)
cega.w8 <- df %>% filter((SFSP.Kitchens=="No" & Media=="Low" & Public.Transport=="Yes")) %>% count(Total.Meals)
cega.struct <- list(cega.w0, cega.w1, cega.w2, cega.w3, cega.w4, cega.w5, cega.w6, cega.w7, cega.w8)#this is the observed values for each of the stages
Now we’re ready to run the diagnostics for the BN and the CEG.
We’ll compare the effect of w4 on w7 for the CEG, and compare this to the parent child monitor for BNs for the Meals node given that the parents are set to SummerSchool=Yes, Awareness=No, and Public.transport=No.
ceg.child.parent.monitor(df,"cega.w7",4,"cega.w4",cega.stages,cega.stage.key,cega.struct, n=500,learn=T) -> pach47l
ceg.child.parent.monitor(df,"cega.w7",4,"cega.w4",cega.stages,cega.stage.key,cega.struct, n=500,learn=F) -> pach47;
bnTl <-bn.parent.child.monitor(df,parents=c("SFSP.Kitchens","Media","Public.Transport"),parent.values = c("No","Low","No"),child = "Total.Meals",n=500,learn=F)
bnT <-bn.parent.child.monitor(df,parents=c("SFSP.Kitchens","Media","Public.Transport"),parent.values = c("No","Low","No"),child = "Total.Meals",n=500,learn=T)
#plottin
The CEG has a slight edge when we examine the cuulative log penalties with learning.
#with learning
plot(bnTl[,1],col='blue',pch=18,type='b')
lines(pach47l[,1], col='red',pch=19,type='b')
legend("topleft", legend=c("CEG", "BN"),
col=c("red", "blue"), lty = 1:2, cex=0.8)
Without learning, the CEG (again in red) loses out to the BN.
#without learning
plot(bnT[,1],col='blue',pch=19)
lines(pach47[,1],col='red',pch=18, type='b')
legend("topleft", legend=c("CEG", "BN"),
col=c("red", "blue"), lty = 1:2, cex=0.8)