Real Data Meals Example

Observe the data that we have

summary(df)

##  SFSP.Kitchens  Media     Public.Transport Total.Meals
##  No :374       High: 52   No :427          High: 73   
##  Yes: 79       Low :401   Yes: 26          Low :380

Here’s the BN we’ll test it against.

library(DiagrammeR)
grViz("
digraph dot {

      graph [layout = dot]

      node [shape = oval,
      style = filled,
      color = black]

      node [color=black, fillcolor=white]
      SummerSchool Awareness Transport Meals

      edge [color = black,font=Arial]
      SummerSchool -> Meals
      Awareness -> Transport
      Transport -> Meals
      SummerSchool -> Awareness
      SummerSchool -> Transport

      }")

Now let’s find the CEG of interest and see what stage struture it finds:

#find the CEG
df <- CheckAndCleanData(df)

## Your data do not contain rows with <NA>, absent, or whitespace  values

#ceg:::ContingencyTable(stratified.event.tree = set, data = df)
set <- Stratified.event.tree(df)

## ~~~ Stratified.event.tree: initializator ~~~

plot(set)

## pdf 
##   2

prior <- ceg:::PriorDistribution(set,3)

sst <- Stratified.staged.tree(df)

## ~~~ Stratified.event.tree: initializator ~~~ 
## ~~~ OAHC: initializator ~~~

## Warning in order.max.score[ref:(tree@num.situation[level] - 2)] <-
## order.max.score[(ref + : number of items to replace is not a multiple of
## replacement length

## ~~~ OAHC: initializator ~~~ 
## ~~~ OAHC: initializator ~~~ 
## ~~~ OAHC: initializator ~~~ 
## ~~~ Stratified.staged.tree: initializator ~~~

plot(sst)

## pdf 
##   2

meals.ceg <- ceg:::Stratified.ceg.model(stratified.staged.tree = sst)

## ~~~ CEGStretified: initializator ~~~

plot(meals.ceg)

## png 
##   2

Because my code relies on a totally different, more cumbersome CEG structure, we’ll write this so it works with the diagnostics. (I need to figure out how to get this to interface with Rodriguo’s stuff.)

#find the diagnostics of the fit structure
#first desribe the structure of the CEG 
cega.stage.key <- list()
count(df) -> cega.stage.key[[1]]

## Warning in summarise_impl(.data, dots): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored

df %>% count(SFSP.Kitchens) -> cega.stage.key[[2]]
df %>% count(SFSP.Kitchens, Media) -> cega.stage.key[[3]]
df %>% count(SFSP.Kitchens, Media, Public.Transport) -> cega.stage.key[[4]]
df %>% count(SFSP.Kitchens, Media, Public.Transport, Total.Meals) -> cega.stage.key[[5]]
#define a stage key for each cut in the data
#Q: how does this change for asymmetries?
cega.stage.key[[1]]$stage <- c("cega.w0")
cega.stage.key[[2]]$stage <- c("cega.w1", "cega.w2")
cega.stage.key[[3]]$stage <- c("cega.w3", "cega.w4", "cega.w5", "cega.w5")
cega.stage.key[[4]]$stage <- c("cega.w6", "cega.w7", "cega.w7", "cega.w8", "cega.w6", "cega.w6", "cega.w6")#this contains the structure
cega.stage.key[[5]]$stage <- rep("cega.winf", length(cega.stage.key[[5]]$n))

cega.stages <- list("cega.w0", "cega.w1", "cega.w2", "cega.w3", "cega.w4", "cega.w5", "cega.w6", "cega.w7", "cega.w8")

cega.w0 <- df %>% count(SFSP.Kitchens)
cega.w1 <- df %>% filter(SFSP.Kitchens=="No") %>% count(Media)
cega.w2 <- df %>% filter(SFSP.Kitchens=="Yes") %>% count(Media)
cega.w3 <- df %>% filter(SFSP.Kitchens=="No", Media=="Low") %>% count(Public.Transport)
cega.w4 <- df %>% filter(SFSP.Kitchens=="No", Media=="High") %>% count(Public.Transport)
cega.w5 <- df %>% filter((SFSP.Kitchens=="Yes"& Media=="High") | (SFSP.Kitchens=="Yes"& Media=="Low")) %>% count(Public.Transport)
cega.w6 <- df %>% filter((SFSP.Kitchens=="No"& Media=="High" & Public.Transport=="No") |
                           (SFSP.Kitchens=="Yes"& Media=="High" & Public.Transport=="No") |
                           (SFSP.Kitchens=="Yes"& Media=="Low" & Public.Transport=="No") |
                           (SFSP.Kitchens=="Yes"& Media=="Low" & Public.Transport=="Yes") ) %>% count(Total.Meals)
cega.w7 <- df %>% filter((SFSP.Kitchens=="No" & Media=="High"  & Public.Transport=="Yes") |  
                           (SFSP.Kitchens=="No" & Media=="Low"  & Public.Transport=="No")) %>% count(Total.Meals)
cega.w8 <- df %>% filter((SFSP.Kitchens=="No" & Media=="Low"  & Public.Transport=="Yes")) %>% count(Total.Meals)
cega.struct <- list(cega.w0, cega.w1, cega.w2, cega.w3, cega.w4, cega.w5, cega.w6, cega.w7, cega.w8)#this is the observed values for each of the stages

Now we’re ready to run the diagnostics for the BN and the CEG.

We’ll compare the effect of w4 on w7 for the CEG, and compare this to the parent child monitor for BNs for the Meals node given that the parents are set to SummerSchool=Yes, Awareness=No, and Public.transport=No.

ceg.child.parent.monitor(df,"cega.w7",4,"cega.w4",cega.stages,cega.stage.key,cega.struct, n=500,learn=T) -> pach47l
ceg.child.parent.monitor(df,"cega.w7",4,"cega.w4",cega.stages,cega.stage.key,cega.struct, n=500,learn=F) -> pach47;
bnTl <-bn.parent.child.monitor(df,parents=c("SFSP.Kitchens","Media","Public.Transport"),parent.values = c("No","Low","No"),child = "Total.Meals",n=500,learn=F)
bnT <-bn.parent.child.monitor(df,parents=c("SFSP.Kitchens","Media","Public.Transport"),parent.values = c("No","Low","No"),child = "Total.Meals",n=500,learn=T)
#plottin

The CEG has a slight edge when we examine the cuulative log penalties with learning.

#with learning
plot(bnTl[,1],col='blue',pch=18,type='b')
lines(pach47l[,1], col='red',pch=19,type='b')
legend("topleft", legend=c("CEG", "BN"),
       col=c("red", "blue"), lty = 1:2, cex=0.8)

Without learning, the CEG (again in red) loses out to the BN.

#without learning
plot(bnT[,1],col='blue',pch=19)
lines(pach47[,1],col='red',pch=18, type='b')
legend("topleft", legend=c("CEG", "BN"),
       col=c("red", "blue"), lty = 1:2, cex=0.8)

Real Data Meals Example

Rachel Wilkerson

1 February 2018