Untitled

packageVersion("gRbase")

## [1] '1.7.2'

packageVersion("gRain")

## [1] '1.2.4'

packageVersion("gRim")

## [1] '0.1.17'

source("http://bioconductor.org/biocLite.R");

## Bioconductor version 3.1 (BiocInstaller 1.18.5), ?biocLite for help
## A newer version of Bioconductor is available for this version of R,
##   ?BiocUpgrade for help

#biocLite(c("graph","RBGL","Rgraphviz"))

#3 Conditional probability tables (CPTs)
#CPTs are just multiway arrays WITH dimnames attribute. For
#example p(tja):
library(gRain)

## Loading required package: gRbase

yn <- c("yes","no");
x <- c(5,95,1,99)
# Vanilla R
t.a <- array(x, dim=c(2,2), 
             dimnames=list(tub=yn,asia=yn))
t.a

##      asia
## tub   yes no
##   yes   5  1
##   no   95 99

# Alternative specification: parray() from gRbase
t.a <- parray(c("tub","asia"), levels=list(yn,yn), 
              values=x)
t.a

##      asia
## tub   yes no
##   yes   5  1
##   no   95 99

# with a formula interface
t.a <- parray(~tub:asia, levels=list(yn,yn), values=x)
t.a

##      asia
## tub   yes no
##   yes   5  1
##   no   95 99

# Alternative (partial) specification
t.a <- cptable(~tub | asia, values=c(5,95,1,99), 
               levels=yn)
t.a

## {v,pa(v)} : chr [1:2] "tub" "asia"
##     <NA> <NA>
## yes    5    1
## no    95   99

#An introduction to the gRain package
#Specify chest clinic network. Can be done in many ways; one is
#from a list of CPTs:
library(gRain)
yn <- c("yes","no")
a <- cptable(~asia, values=c(1,99), levels=yn)
t.a <- cptable(~tub | asia, values=c(5,95,1,99), levels=yn)
s <- cptable(~smoke, values=c(5,5), levels=yn)
l.s <- cptable(~lung | smoke, values=c(1,9,1,99), levels=yn)
b.s <- cptable(~bronc | smoke, values=c(6,4,3,7), levels=yn)
e.lt <- cptable(~either | lung:tub,values=c(1,0,1,0,1,0,0,1),
                  levels=yn)
x.e <- cptable(~xray | either, values=c(98,2,5,95), levels=yn)
d.be <- cptable(~dysp | bronc:either, values=c(9,1,7,3,8,2,1,9),
                  levels=yn)

cpt.list <- compileCPT(list(a, t.a, s, l.s, b.s, e.lt, x.e, d.be))
cpt.list

## CPTspec with probabilities:
##  P( asia )
##  P( tub | asia )
##  P( smoke )
##  P( lung | smoke )
##  P( bronc | smoke )
##  P( either | lung tub )
##  P( xray | either )
##  P( dysp | bronc either )

cpt.list$asia

## asia
##  yes   no 
## 0.01 0.99

cpt.list$tub

##      asia
## tub    yes   no
##   yes 0.05 0.01
##   no  0.95 0.99

ftable(cpt.list$either, row.vars=1) # Notice: logical variable

##        lung yes     no   
##        tub  yes no yes no
## either                   
## yes           1  1   1  0
## no            0  0   0  1

# Create network from CPT list:
bnet <- grain(cpt.list)
# Compile network (details follow)
bnet <- compile(bnet)
bnet

## Independence network: Compiled: TRUE Propagated: FALSE 
##   Nodes: chr [1:8] "asia" "tub" "smoke" "lung" "bronc" "either" ...

#Querying the network
# Query network to find marginal probabilities of diseases
querygrain(bnet, nodes=c("tub","lung","bronc"))

## $tub
## tub
##    yes     no 
## 0.0104 0.9896 
## 
## $lung
## lung
##   yes    no 
## 0.055 0.945 
## 
## $bronc
## bronc
##  yes   no 
## 0.45 0.55

#6 Setting evidence
# Set evidence and query network again
bnet.ev<-setEvidence(bnet, nodes = c("asia","dysp"),
                         states = c("yes","yes"))
querygrain(bnet.ev, nodes=c("tub","lung","bronc"))

## $tub
## tub
##        yes         no 
## 0.08775096 0.91224904 
## 
## $lung
## lung
##        yes         no 
## 0.09952515 0.90047485 
## 
## $bronc
## bronc
##       yes        no 
## 0.8114021 0.1885979

# Set additional evidence and query again
bnet.ev<-setEvidence(bnet.ev, nodes = "xray", states = "yes")
querygrain(bnet.ev, nodes=c("tub","lung","bronc"))

## $tub
## tub
##       yes        no 
## 0.3917117 0.6082883 
## 
## $lung
## lung
##       yes        no 
## 0.4442705 0.5557295 
## 
## $bronc
## bronc
##       yes        no 
## 0.6288218 0.3711782

# Probability of observing the evidence (the normalizing constant)
pEvidence(bnet.ev)

## [1] 0.0009882268

# Get joint dist of tub, lung, bronc given evidence
x<-querygrain(bnet.ev, nodes=c("tub","lung","bronc"),
                type="joint")
ftable(x, row.vars=1)

##     lung          yes                      no            
##     bronc         yes          no         yes          no
## tub                                                      
## yes       0.014056997 0.008156529 0.186757240 0.182740955
## no        0.267082934 0.154974048 0.160924606 0.025306692

# Get distribution of tub given lung, bronc and evidence
x<-querygrain(bnet.ev, nodes=c("tub","lung","bronc"),
                  type="conditional")
ftable(x, row.vars=1)

##      bronc       yes                  no          
##      tub         yes        no       yes        no
## lung                                              
## yes        0.0500000 0.9500000 0.0500000 0.9500000
## no         0.5371498 0.4628502 0.8783611 0.1216389

# Remove evidence
bnet.ev<-retractEvidence(bnet.ev, nodes="asia")
bnet.ev

## Independence network: Compiled: TRUE Propagated: TRUE 
##   Nodes: chr [1:8] "asia" "tub" "smoke" "lung" "bronc" "either" ...
##   Evidence:
##   nodes is.hard.evidence hard.state
## 1  dysp             TRUE        yes
## 2  xray             TRUE        yes
##   pEvidence: 0.070670

## collection of CPTs: p(v|pa(v))
cpt.list

## CPTspec with probabilities:
##  P( asia )
##  P( tub | asia )
##  P( smoke )
##  P( lung | smoke )
##  P( bronc | smoke )
##  P( either | lung tub )
##  P( xray | either )
##  P( dysp | bronc either )

## form joint p(V)= prod p(v|pa(v))
joint <- cpt.list$asia
for (i in 2:length(cpt.list)){
  joint <- tableMult( joint, cpt.list[[i]] )
}
dim(joint)

##   dysp  bronc either   xray   lung    tub  smoke   asia 
##      2      2      2      2      2      2      2      2

head( as.data.frame.table( joint ) )

##   dysp bronc either xray lung tub smoke asia      Freq
## 1  yes   yes    yes  yes  yes yes   yes  yes 1.323e-05
## 2   no   yes    yes  yes  yes yes   yes  yes 1.470e-06
## 3  yes    no    yes  yes  yes yes   yes  yes 6.860e-06
## 4   no    no    yes  yes  yes yes   yes  yes 2.940e-06
## 5  yes   yes     no  yes  yes yes   yes  yes 0.000e+00
## 6   no   yes     no  yes  yes yes   yes  yes 0.000e+00

## form marginal p(a,b,d) by marginalization
marg <- tableMargin(joint, ~asia+bronc+dysp)
dim( marg )

##  asia bronc  dysp 
##     2     2     2

ftable( marg )

##            dysp         yes          no
## asia bronc                             
## yes  yes        0.003652425 0.000847575
##      no         0.000848950 0.004651050
## no   yes        0.359932815 0.085567185
##      no         0.071536410 0.472963590

## Set entries not consistent with asia=yes and dysp=yes
## equal to zero
marg <- tableSetSliceValue(marg, c("asia","dysp"), c("yes","yes"),
                               complement=T)
ftable(marg)

##            dysp         yes          no
## asia bronc                             
## yes  yes        0.003652425 0.000000000
##      no         0.000848950 0.000000000
## no   yes        0.000000000 0.000000000
##      no         0.000000000 0.000000000

result <- tableMargin(marg, ~bronc);
result <- result / sum( result ); result

## bronc
##       yes        no 
## 0.8114021 0.1885979

#8 Message passing  a small example
require(gRbase); require(Rgraphviz)

## Loading required package: Rgraphviz
## Loading required package: graph
## Loading required package: grid

d<-dag( ~smoke + bronc|smoke + dysp|bronc ); plot(d)

library(gRain)
yn <- c("yes","no")
s <- parray("smoke", list(yn), c(.5, .5))
b.s <- parray(c("bronc","smoke"), list(yn,yn), c(6,4, 3,7))
d.b <- parray(c("dysp","bronc"), list(yn, yn), c(9,1, 2,8))
s; b.s; d.b

## smoke
## yes  no 
## 0.5 0.5

##      smoke
## bronc yes no
##   yes   6  3
##   no    4  7

##      bronc
## dysp  yes no
##   yes   9  2
##   no    1  8

#p(s; b; d) = p(s)p(bjs)p(djb)
joint <- tableMult( tableMult(s, b.s), d.b) ; ftable(joint)

##            smoke  yes   no
## dysp bronc                
## yes  yes         27.0 13.5
##      no           4.0  7.0
## no   yes          3.0  1.5
##      no          16.0 28.0

dm <-moralize(d);
jtree<-ug(~smoke.bronc:bronc.dysp);
par(mfrow=c(1,3)); plot(d); plot(dm); plot(jtree)

par(mfrow=c(1,3)); plot(d); plot(dm); plot(jtree)

q1.sb <- tableMult(s, b.s); q1.sb

##      smoke
## bronc yes  no
##   yes   3 1.5
##   no    2 3.5

q2.bd <- d.b; q2.bd

##      bronc
## dysp  yes no
##   yes   9  2
##   no    1  8

#Collect Evidence
plot( jtree )

q1.b <- tableMargin(q1.sb, "bronc"); q1.b

## bronc
## yes  no 
## 4.5 5.5

q2.bd <- tableMult(q2.bd, q1.b); q2.bd

##      dysp
## bronc  yes   no
##   yes 40.5  4.5
##   no  11.0 44.0

q1.sb <- tableDiv(q1.sb, q1.b); q1.sb

##      smoke
## bronc       yes        no
##   yes 0.6666667 0.3333333
##   no  0.3636364 0.6363636

#Recall that the joint distribution is
joint

## , , smoke = yes
## 
##      bronc
## dysp  yes no
##   yes  27  4
##   no    3 16
## 
## , , smoke = no
## 
##      bronc
## dysp   yes no
##   yes 13.5  7
##   no   1.5 28

q1.sb

##      smoke
## bronc       yes        no
##   yes 0.6666667 0.3333333
##   no  0.3636364 0.6363636

tableMargin(joint, c("smoke","bronc"))

##      bronc
## smoke yes no
##   yes  30 20
##   no   15 35

q2.bd

##      dysp
## bronc  yes   no
##   yes 40.5  4.5
##   no  11.0 44.0

tableMargin(joint, c("bronc","dysp"))

##      dysp
## bronc  yes   no
##   yes 40.5  4.5
##   no  11.0 44.0

tableMargin(q1.sb, "bronc") # or

## bronc
## yes  no 
##   1   1

tableMargin(q2.bd, "bronc")

## bronc
## yes  no 
##  45  55

#8.3 Setting evidence
#Next consider the case where we have the evidence that dysp=yes.
q1.sb <- tableMult(s, b.s)
q2.bd <- d.b
q2.bd <- tableSetSliceValue(q2.bd, "dysp", "yes", complement=T)#; q2.bronc??

# Repeat all the same steps as before
q1.b <- tableMargin(q1.sb, "bronc"); q1.b

## bronc
## yes  no 
## 4.5 5.5

q2.bd <- tableMult(q2.bd, q1.b); q2.bd

##      dysp
## bronc  yes no
##   yes 40.5  0
##   no  11.0  0

q1.sb <- tableDiv(q1.sb, q1.b); q1.sb

##      smoke
## bronc       yes        no
##   yes 0.6666667 0.3333333
##   no  0.3636364 0.6363636

q2.b <- tableMargin(q2.bd, "bronc"); q2.b

## bronc
##  yes   no 
## 40.5 11.0

q1.sb <- tableMult(q1.sb, q2.b); q1.sb

##      smoke
## bronc yes   no
##   yes  27 13.5
##   no    4  7.0

ftable( joint )

##            smoke  yes   no
## dysp bronc                
## yes  yes         27.0 13.5
##      no           4.0  7.0
## no   yes          3.0  1.5
##      no          16.0 28.0

q1.sb

##      smoke
## bronc yes   no
##   yes  27 13.5
##   no    4  7.0

tableMargin(joint, c("smoke","bronc"))

##      bronc
## smoke yes no
##   yes  30 20
##   no   15 35

q2.bd

##      dysp
## bronc  yes no
##   yes 40.5  0
##   no  11.0  0

tableMargin(joint, c("bronc","dysp"))

##      dysp
## bronc  yes   no
##   yes 40.5  4.5
##   no  11.0 44.0

##9 Message passing  the bigger picture
#Moral graph: marry parents and drop directions:
par(mfrow=c(1,2)); plot(bnet$dag); plot(moralize(bnet$dag))

par(mfrow=c(1,2)); plot(moralize(bnet$dag));
plot(triangulate(moralize(bnet$dag)))

par(mfrow=c(1,2)); plot(bnet$ug); plot(jTree( bnet$ug ))

str( jTree( bnet$ug )$cliques )

## List of 6
##  $ : chr [1:2] "asia" "tub"
##  $ : chr [1:3] "either" "lung" "tub"
##  $ : chr [1:3] "either" "lung" "bronc"
##  $ : chr [1:3] "smoke" "lung" "bronc"
##  $ : chr [1:3] "either" "dysp" "bronc"
##  $ : chr [1:2] "either" "xray"

#10 Conditional independence
#Consider again the toy example:
plot(dag(~smoke+bronc|smoke+dysp|bronc))
#p(s; b; d) = p(s)p(b|s)p(d|b)

#Clear that s??djb under all these models:
par(mfrow = c(1,4))

plot(dag(~smoke+bronc|smoke+dysp|bronc))
plot(dag(~bronc+smoke|bronc+dysp|bronc))
plot(dag(~dysp+smoke|bronc+bronc|dysp))
plot(ug(~smoke:bronc+bronc:dysp))

plot(dag( ~smoke + dysp + bronc|smoke:dysp ))

#11 Towards data
#Building CPTs from data:
## Example: Simulated data from chest network
data(chestSim1000, package="gRbase")
head(chestSim1000)

##   asia tub smoke lung bronc either xray dysp
## 1   no  no    no   no   yes     no   no  yes
## 2   no  no   yes   no   yes     no   no  yes
## 3   no  no   yes   no    no     no   no   no
## 4   no  no    no   no    no     no   no   no
## 5   no  no   yes   no   yes     no   no  yes
## 6   no  no   yes  yes   yes    yes  yes  yes

#11.1 Extracting CPTs
## Extract empirical distributions
s <- xtabs(~smoke, chestSim1000); s

## smoke
## yes  no 
## 465 535

b.s <- xtabs(~bronc+smoke, chestSim1000); b.s

##      smoke
## bronc yes  no
##   yes 276 160
##   no  189 375

d.b <- xtabs(~dysp+bronc, chestSim1000); d.b

##      bronc
## dysp  yes  no
##   yes 360  68
##   no   76 496

## Normalize to CPTs if desired (not necessary because
## we can always normalize at the end)
s <- as.parray(s, normalize="first"); s

## smoke
##   yes    no 
## 0.465 0.535

b.s <- as.parray(b.s, normalize="first"); b.s

##      smoke
## bronc       yes        no
##   yes 0.5935484 0.2990654
##   no  0.4064516 0.7009346

d.b <- as.parray(d.b, normalize="first"); d.b

##      bronc
## dysp        yes        no
##   yes 0.8256881 0.1205674
##   no  0.1743119 0.8794326

cpt.list <- compileCPT(list(s, b.s, d.b)); cpt.list

## CPTspec with probabilities:
##  P( smoke )
##  P( bronc | smoke )
##  P( dysp | bronc )

net <- grain( cpt.list ); net

## Independence network: Compiled: FALSE Propagated: FALSE 
##   Nodes: chr [1:3] "smoke" "bronc" "dysp"

#But we could just as well extract CPTs for this model,
plot(dag(~bronc + smoke|bronc + dysp|bronc))

#in the sense that the joint distribution will become the same:
## Extract empirical distributions
b <- xtabs(~bronc, chestSim1000);
s.b <- xtabs(~smoke+bronc, chestSim1000);
d.b <- xtabs(~dysp+bronc, chestSim1000);

#Notice, that in this case
plot(dag( ~smoke + dysp + bronc|smoke:dysp ))

s <- xtabs(~smoke, chestSim1000);
d <- xtabs(~dysp, chestSim1000);
b.sd <- xtabs(~bronc+smoke+dysp, chestSim1000);

#11.2 Extracting clique marginals
#Alternatively, we consider the undirected graph
plot(ug( ~smoke:bronc+bronc:dysp ))

#We might as well extract clique marginals directly:
q1.sb <- xtabs(~smoke+bronc, data=chestSim1000); q1.sb

##      bronc
## smoke yes  no
##   yes 276 189
##   no  160 375

q2.db <- xtabs(~bronc+dysp, data=chestSim1000); q2.db

##      dysp
## bronc yes  no
##   yes 360  76
##   no   68 496

q1.sb <- tableDiv(q1.sb, tableMargin(q1.sb, ~smoke)); q1.sb

##      bronc
## smoke       yes        no
##   yes 0.5935484 0.4064516
##   no  0.2990654 0.7009346

#12 Learning the model structure
data(lizardRAW, package="gRbase")
dim(lizardRAW)

## [1] 409   3

head(lizardRAW, 4)

##   diam height species
## 1   >4  >4.75    dist
## 2   >4  >4.75    dist
## 3  <=4 <=4.75   anoli
## 4   >4 <=4.75   anoli

lizard<-xtabs(~., data=lizardRAW)
dim( lizard )

## [1] 2 2 2

ftable( lizard )

##             species anoli dist
## diam height                   
## <=4  <=4.75            86   73
##      >4.75             32   61
## >4   <=4.75            35   70
##      >4.75             11   41

#One estimate of the probabilities is by the relative frquencies:
lizardProb <- lizard/sum(lizard); ftable(lizardProb)

##             species      anoli       dist
## diam height                              
## <=4  <=4.75         0.21026895 0.17848411
##      >4.75          0.07823961 0.14914425
## >4   <=4.75         0.08557457 0.17114914
##      >4.75          0.02689487 0.10024450

tableMargin(lizard, ~height+species)

##         species
## height   anoli dist
##   <=4.75   121  143
##   >4.75     43  102

tableMargin(lizardProb, ~height+species)

##         species
## height       anoli      dist
##   <=4.75 0.2958435 0.3496333
##   >4.75  0.1051345 0.2493888

lizard

## , , species = anoli
## 
##      height
## diam  <=4.75 >4.75
##   <=4     86    32
##   >4      35    11
## 
## , , species = dist
## 
##      height
## diam  <=4.75 >4.75
##   <=4     73    61
##   >4      70    41

#12.3 Hierarchical loglinear models

#12.4 Dependence graphs
par(mfrow=c(1,4))
plot( ug(~D:S + H:S ))
plot( ug(~D:S + H:S + D:H ))
plot( ug(~D:H:S ))
plot( ug(~D + H:S ))

par(mfrow=c(1,1))
#12.5 The Global Markov property
plot( ug(~A:B:C+B:C:D+D:E+E:F ))

#12.7 Fitting loglinear models
#Iterative proportional scaling is implemented in loglin():
ll1 <- loglin(lizard, list(c("species","diam"),
                               c("species","height")))

## 2 iterations: deviation 0

#2 iterations: deviation 0
str( ll1 )

## List of 4
##  $ lrt    : num 2.03
##  $ pearson: num 2.02
##  $ df     : num 2
##  $ margin :List of 2
##   ..$ : chr [1:2] "species" "diam"
##   ..$ : chr [1:2] "species" "height"

#A formula based interface to loglin() is provided by loglm():
library(MASS)
ll2 <- loglm(~species:diam + species:height, data=lizard); ll2

## Call:
## loglm(formula = ~species:diam + species:height, data = lizard)
## 
## Statistics:
##                       X^2 df  P(> X^2)
## Likelihood Ratio 2.025647  2 0.3631921
## Pearson          2.017364  2 0.3646994

coef( ll2 )

## $`(Intercept)`
## [1] 3.787314
## 
## $diam
##        <=4         >4 
##  0.2825882 -0.2825882 
## 
## $height
##     <=4.75      >4.75 
##  0.3431156 -0.3431156 
## 
## $species
##      anoli       dist 
## -0.3090218  0.3090218 
## 
## $diam.species
##      species
## diam       anoli       dist
##   <=4  0.1884334 -0.1884334
##   >4  -0.1884334  0.1884334
## 
## $height.species
##         species
## height        anoli       dist
##   <=4.75  0.1741797 -0.1741797
##   >4.75  -0.1741797  0.1741797

#The dmod() function also provides an interface to loglin(), and
#dmod() oers much more; see later.
library(gRim)
ll3 <- dmod(~species:diam + species:height, data=lizard); ll3

## Model: A dModel with 3 variables
##  graphical :  TRUE  decomposable :  TRUE
##  -2logL    :        1604.43 mdim :    5 aic :      1614.43 
##  ideviance :          23.01 idf  :    2 bic :      1634.49 
##  deviance  :           2.03 df   :    2

par(mfrow=c(1,4))
plot( ug(~D:S + H:S )) ## graphical
plot( ug(~D:S + H:S + D:H )) ## not graphical
plot( ug(~D:H:S )) ## graphical
plot( ug(~D + H:S )) ## graphical

par(mfrow=c(1,1))

par(mfrow=c(1,3))
plot(ug(~A:B:C + B:C:D)) ## graphical, decomposable
plot(ug(~A:B + A:C + B:C:D)) ## not graphical, not decomposable
plot(ug(~A:B + A:C + B:D + C:D)) ## graphical, not decomposable

par(mfrow=c(1,1))

#12.9 ML estimation in decomposable models
n.ds <- tableMargin(lizard, ~diam+species); n.ds

##      species
## diam  anoli dist
##   <=4   118  134
##   >4     46  111

n.ds <- tableMargin(lizard, c("diam", "species"))
n.hs <- tableMargin(lizard, c("height", "species"))
n.s <- tableMargin(lizard, c("species"))
ec <- tableDiv( tableMult(n.ds, n.hs), n.s) ## expected counts
ftable( ec )

##                diam      <=4       >4
## species height                       
## anoli   <=4.75      87.06098 33.93902
##         >4.75       30.93902 12.06098
## dist    <=4.75      78.21224 64.78776
##         >4.75       55.78776 46.21224

ftable( fitted(ll2) )

## Re-fitting to get fitted values

##             species    anoli     dist
## diam height                          
## <=4  <=4.75         87.06098 78.21224
##      >4.75          30.93902 55.78776
## >4   <=4.75         33.93902 64.78776
##      >4.75          12.06098 46.21224

#13 Decomposable models and Bayesian networks
library(gRim)
args(ciTest_table)

## function (x, set = NULL, statistic = "dev", method = "chisq", 
##     adjust.df = TRUE, slice.info = TRUE, L = 20, B = 200, ...) 
## NULL

ciTest(lizard, set=c("diam","height","species"))

## Testing diam _|_ height | species 
## Statistic (DEV):    2.026 df: 2 p-value: 0.3632 method: CHISQ

cit <- ciTest(lizard, set=~diam+height+species, slice.info=T)
cit

## Testing diam _|_ height | species 
## Statistic (DEV):    2.026 df: 2 p-value: 0.3632 method: CHISQ

names(cit)

## [1] "statistic" "p.value"   "df"        "statname"  "method"    "adjust.df"
## [7] "varNames"  "slice"

cit$slice

##   statistic   p.value df species
## 1 0.1779795 0.6731154  1   anoli
## 2 1.8476671 0.1740550  1    dist

#14.2 Example: University admissions
#Example: Admission to graduate school at UC at Berkley in 1973
#for the six largest departments classied by sex and gender.
ftable(UCBAdmissions)

##                 Dept   A   B   C   D   E   F
## Admit    Gender                             
## Admitted Male        512 353 120 138  53  22
##          Female       89  17 202 131  94  24
## Rejected Male        313 207 205 279 138 351
##          Female       19   8 391 244 299 317

#Is there evidence of sexual discrimination?
ag <- tableMargin(UCBAdmissions, ~Admit+Gender); ag

##           Gender
## Admit      Male Female
##   Admitted 1198    557
##   Rejected 1493   1278

as.parray( ag, normalize="first" )

##           Gender
## Admit           Male    Female
##   Admitted 0.4451877 0.3035422
##   Rejected 0.5548123 0.6964578

s<-ciTest(UCBAdmissions, ~Admit+Gender+Dept, slice.info=T); s

## Testing Admit _|_ Gender | Dept 
## Statistic (DEV):   21.736 df: 6 p-value: 0.0014 method: CHISQ

#Hence, admit and gender are not independent within each Dept.
#However, most contribution to the deviance comes from
#department A:
s$slice

##    statistic      p.value df Dept
## 1 19.0540099 1.270705e-05  1    A
## 2  0.2586429 6.110540e-01  1    B
## 3  0.7509844 3.861648e-01  1    C
## 4  0.2978665 5.852230e-01  1    D
## 5  0.9903864 3.196480e-01  1    E
## 6  0.3836167 5.356740e-01  1    F

#So what happens in department A?
x <- tableSlice(UCBAdmissions, margin="Dept", level="A"); x

##           Gender
## Admit      Male Female
##   Admitted  512     89
##   Rejected  313     19

as.parray(x, normalize="first")

##           Gender
## Admit           Male    Female
##   Admitted 0.6206061 0.8240741
##   Rejected 0.3793939 0.1759259

#Why were we mislead at the beginning?
x <- tableMargin(UCBAdmissions, ~Admit+Dept);
x

##           Dept
## Admit        A   B   C   D   E   F
##   Admitted 601 370 322 269 147  46
##   Rejected 332 215 596 523 437 668

as.parray(x, norm="first")

##           Dept
## Admit              A         B         C         D         E          F
##   Admitted 0.6441586 0.6324786 0.3507625 0.3396465 0.2517123 0.06442577
##   Rejected 0.3558414 0.3675214 0.6492375 0.6603535 0.7482877 0.93557423

#15 Loglinear models  the gRim package
#Coronary artery disease data:
data(cad1, package="gRbase")
head(cad1)

##      Sex   AngPec        AMI QWave QWavecode    STcode STchange SuffHeartF
## 1   Male     None NotCertain    No    Usable    Usable       No         No
## 2   Male Atypical NotCertain    No    Usable    Usable       No         No
## 3 Female     None   Definite    No    Usable    Usable       No         No
## 4   Male     None NotCertain    No    Usable Nonusable       No         No
## 5   Male     None NotCertain    No    Usable Nonusable       No         No
## 6   Male     None NotCertain    No    Usable Nonusable       No         No
##   Hypertrophi Hyperchol Smoker Inherit Heartfail CAD
## 1          No        No     No      No        No  No
## 2          No        No     No      No        No  No
## 3          No        No     No      No        No  No
## 4          No        No     No      No        No  No
## 5          No        No     No      No        No  No
## 6          No        No     No      No        No  No

use <- c(1,2,3,9:14)
cad1 <- cad1[,use]
head( cad1, 4 )

##      Sex   AngPec        AMI Hypertrophi Hyperchol Smoker Inherit
## 1   Male     None NotCertain          No        No     No      No
## 2   Male Atypical NotCertain          No        No     No      No
## 3 Female     None   Definite          No        No     No      No
## 4   Male     None NotCertain          No        No     No      No
##   Heartfail CAD
## 1        No  No
## 2        No  No
## 3        No  No
## 4        No  No

#Some (random) model:
m1 <- dmod(~Sex:Smoker:CAD + CAD:Hyperchol:AMI, data=cad1); m1

## Model: A dModel with 5 variables
##  graphical :  TRUE  decomposable :  TRUE
##  -2logL    :        1293.88 mdim :   13 aic :      1319.88 
##  ideviance :         112.54 idf  :    8 bic :      1364.91 
##  deviance  :          16.38 df   :   18

par(mfrow=c(1,1))
plot( m1 )

str( terms( m1 ) )

## List of 2
##  $ : chr [1:3] "Sex" "Smoker" "CAD"
##  $ : chr [1:3] "CAD" "Hyperchol" "AMI"

formula( m1 )

## ~Sex * Smoker * CAD + CAD * Hyperchol * AMI

# Default: a graphNEL object
DG <- ugList( terms( m1 ) ); DG

## A graphNEL graph with undirected edges
## Number of Nodes = 5 
## Number of Edges = 6

# Alternative: an adjacency matrix
a <- ugList( terms( m1 ), result="matrix" ); a

##           Sex Smoker CAD Hyperchol AMI
## Sex         0      1   1         0   0
## Smoker      1      0   1         0   0
## CAD         1      1   0         1   1
## Hyperchol   0      0   1         0   1
## AMI         0      0   1         1   0

A <- ugList( terms( m1 ), result="dgCMatrix" )

#15.1 Model specication shortcuts
#Shortcuts for specifying some models
mar <- c("Sex","AngPec","AMI","CAD")
str(terms(dmod(~.^., data=cad1, margin=mar))) ## Saturated model

## List of 1
##  $ : chr [1:4] "Sex" "AngPec" "AMI" "CAD"

str(terms(dmod(~.^1, data=cad1, margin=mar))) ## Independence model

## List of 4
##  $ : chr "Sex"
##  $ : chr "AngPec"
##  $ : chr "AMI"
##  $ : chr "CAD"

str(terms(dmod(~.^3, data=cad1, margin=mar))) ## All 3-factor model

## List of 4
##  $ : chr [1:3] "Sex" "AngPec" "AMI"
##  $ : chr [1:3] "Sex" "AngPec" "CAD"
##  $ : chr [1:3] "Sex" "AMI" "CAD"
##  $ : chr [1:3] "AngPec" "AMI" "CAD"

#15.2 Altering graphical models
#Natural operations on graphical models: add and delete edges
m1 <- dmod(~Sex:Smoker:CAD + CAD:Hyperchol:AMI, data=cad1); m1

## Model: A dModel with 5 variables
##  graphical :  TRUE  decomposable :  TRUE
##  -2logL    :        1293.88 mdim :   13 aic :      1319.88 
##  ideviance :         112.54 idf  :    8 bic :      1364.91 
##  deviance  :          16.38 df   :   18

m2 <- update(m1,items =
                 list(dedge=~Hyperchol:CAD, # drop edge
                      aedge=~Smoker:AMI)) # add edge
par(mfrow=c(1,2)); plot( m1 ); plot( m2 )

#15.3 Model comparison
#Models are compared with compareModels().
m1 <- dmod(~Sex:Smoker:CAD + CAD:Hyperchol:AMI, data=cad1); m1

## Model: A dModel with 5 variables
##  graphical :  TRUE  decomposable :  TRUE
##  -2logL    :        1293.88 mdim :   13 aic :      1319.88 
##  ideviance :         112.54 idf  :    8 bic :      1364.91 
##  deviance  :          16.38 df   :   18

m3 <- update(m1, items=list(dedge=~Sex:Smoker+Hyperchol:AMI))
compareModels( m1, m3 )

## Large:
##   :"Sex" "Smoker" "CAD"
##   :"CAD" "Hyperchol" "AMI"
## Small:
##   :"Sex" "CAD"
##   :"Smoker" "CAD"
##   :"CAD" "Hyperchol"
##   :"CAD" "AMI"
## -2logL:     8.93 df: 4 AIC(k= 2.0):     0.93 p.value: 0.346446

par(mfrow=c(1,2)); plot( m1 ); plot( m3 )

#15.4 Decomposable models  deleting edges
#Result: If A1 is a decompsable model and we remove an edge
#e = fu; vg which is contained in one clique C only, then the new
#model A2 will also be decomposable.
par(mfrow=c(1,3))
plot(ug(~A:B:C+B:C:D))
plot(ug(~A:C+B:C+B:C:D))
plot(ug(~A:B+A:C+B:D+C:D))

par(mfrow=c(1,1))

#More tricky when adding edge to a decomposable model
plot(ug(~A:B+B:C+C:D), "circo")

UG <- ug(~A:B+B:C+C:D)
mcs(UG)

## [1] "A" "B" "C" "D"

UG1 <- addEdge("A","D",UG)
mcs(UG1)

## character(0)

UG2 <- addEdge("A","C",UG)
mcs(UG2)

## [1] "A" "B" "C" "D"

#15.6 Test for adding and deleting edges
#Done with testdelete() and testadd()
m1 <- dmod(~Sex:Smoker:CAD + CAD:Hyperchol:AMI, data=cad1)
plot( m1 )
testdelete( m1, edge=c("Hyperchol", "AMI") )

## dev:    4.981 df:  2 p.value: 0.08288 AIC(k=2.0):    1.0 edge: Hyperchol:AMI 
## host:  CAD Hyperchol AMI 
## Notice: Test performed in saturated marginal model

m1 <- dmod(~Sex:Smoker:CAD + CAD:Hyperchol:AMI, data=cad1)
plot( m1 )

testadd( m1, edge=c("Smoker", "Hyperchol"))

## dev:    1.658 df:  2 p.value: 0.43654 AIC(k=2.0):    2.3 edge: Smoker:Hyperchol 
## host:  CAD Smoker Hyperchol 
## Notice: Test performed in saturated marginal model

args(stepwise.iModel)

## function (object, criterion = "aic", alpha = NULL, type = "decomposable", 
##     search = "all", steps = 1000, k = 2, direction = "backward", 
##     fixinMAT = NULL, fixoutMAT = NULL, details = 0, trace = 2, 
##     ...) 
## NULL

msat <- dmod( ~.^., data=cad1 )
mnew1 <- stepwise( msat, details=1, k=2 ) # use aic

## STEPWISE:  
##  criterion: aic ( k = 2 ) 
##  direction: backward 
##  type     : decomposable 
##  search   : all 
##  steps    : 1000 
## . BACKWARD: type=decomposable search=all, criterion=aic(2.00), alpha=0.00 
## . Initial model: is graphical=TRUE is decomposable=TRUE
##   change.AIC  -10.1543 Edge deleted: Sex CAD
##   change.AIC  -10.8104 Edge deleted: Sex AngPec
##   change.AIC  -18.3658 Edge deleted: AngPec Smoker
##   change.AIC  -13.6019 Edge deleted: Hyperchol AngPec
##   change.AIC  -10.1275 Edge deleted: Sex Heartfail
##   change.AIC  -10.3829 Edge deleted: Hyperchol Heartfail
##   change.AIC   -7.1000 Edge deleted: AMI Sex
##   change.AIC   -9.2019 Edge deleted: Hyperchol Sex
##   change.AIC   -9.0764 Edge deleted: Inherit Hyperchol
##   change.AIC   -5.1589 Edge deleted: Heartfail Smoker
##   change.AIC   -4.6758 Edge deleted: Inherit Heartfail
##   change.AIC   -1.7378 Edge deleted: Sex Smoker
##   change.AIC   -6.3261 Edge deleted: Smoker Inherit
##   change.AIC   -6.2579 Edge deleted: CAD Inherit

plot( mnew1 )

msat <- dmod( ~.^., data=cad1 )
mnew2 <- stepwise( msat, details=1, k=log(nrow(cad1)) ) # use bic

## STEPWISE:  
##  criterion: aic ( k = 5.46 ) 
##  direction: backward 
##  type     : decomposable 
##  search   : all 
##  steps    : 1000 
## . BACKWARD: type=decomposable search=all, criterion=aic(5.46), alpha=0.00 
## . Initial model: is graphical=TRUE is decomposable=TRUE
##   change.AIC -100.0382 Edge deleted: Sex AngPec
##   change.AIC -103.1520 Edge deleted: Hyperchol AngPec
##   change.AIC  -74.2967 Edge deleted: Smoker AngPec
##   change.AIC  -67.8590 Edge deleted: Sex Hyperchol
##   change.AIC  -60.3907 Edge deleted: AngPec Hypertrophi
##   change.AIC  -51.9489 Edge deleted: Heartfail Hyperchol
##   change.AIC  -50.8580 Edge deleted: Sex CAD
##   change.AIC  -43.8873 Edge deleted: AngPec Heartfail
##   change.AIC  -41.3702 Edge deleted: AMI Sex
##   change.AIC  -43.6158 Edge deleted: AMI Heartfail
##   change.AIC  -40.2509 Edge deleted: Hyperchol Inherit
##   change.AIC  -26.3511 Edge deleted: AngPec AMI
##   change.AIC  -31.4947 Edge deleted: Inherit AMI
##   change.AIC  -25.5315 Edge deleted: Heartfail CAD
##   change.AIC  -31.2732 Edge deleted: Inherit Heartfail
##   change.AIC  -22.9457 Edge deleted: AMI Hypertrophi
##   change.AIC  -17.9850 Edge deleted: Smoker AMI
##   change.AIC  -15.7814 Edge deleted: Sex Heartfail
##   change.AIC  -15.5931 Edge deleted: Smoker Sex
##   change.AIC  -18.5186 Edge deleted: Inherit Smoker
##   change.AIC  -13.8092 Edge deleted: Hyperchol Smoker
##   change.AIC  -12.4648 Edge deleted: AngPec Inherit
##   change.AIC   -6.5068 Edge deleted: Smoker Heartfail
##   change.AIC   -9.2031 Edge deleted: Hypertrophi Smoker
##   change.AIC   -5.9470 Edge deleted: AMI Hyperchol
##   change.AIC   -5.0227 Edge deleted: Hypertrophi Hyperchol
##   change.AIC   -4.0234 Edge deleted: Sex Inherit
##   change.AIC   -6.8882 Edge deleted: Hypertrophi Inherit
##   change.AIC   -3.1347 Edge deleted: Hypertrophi Sex

plot( mnew2 )

#16 From graph and data to network
#Create graphs from models:
ug1 <- ugList( terms( mnew1 ) )
ug2 <- ugList( terms( mnew2 ) )
par(mfrow=c(1,2)); plot( ug1 ); plot( ug2 )

#Create Bayesian networks from (graph, data):
bn1 <- compile( grain( ug1, data=cad1, smooth=0.1 )); bn1

## Independence network: Compiled: TRUE Propagated: FALSE 
##   Nodes: chr [1:9] "Hypertrophi" "AMI" "CAD" "Smoker" "Hyperchol" ...

bn2 <- compile( grain( ug2, data=cad1, smooth=0.1 )); bn2

## Independence network: Compiled: TRUE Propagated: FALSE 
##   Nodes: chr [1:9] "CAD" "AngPec" "Hypertrophi" "Heartfail" "Smoker" ...

querygrain( bn1, "CAD")

## $CAD
## CAD
##        No       Yes 
## 0.5459866 0.4540134

z<-setEvidence( bn1, nodes=c("AngPec", "Hypertrophi"),
                  c("Typical","Yes"))

# alternative form
z<-setEvidence( bn1,
                    nslist=list(AngPec="Typical", Hypertrophi="Yes"))
querygrain( z, "CAD")

#17 Prediction
#Dataset with missing values
data(cad2, package="gRbase")
dim( cad2 )

## [1] 67 14

head( cad2, 4 )

##      Sex   AngPec        AMI QWave QWavecode    STcode STchange SuffHeartF
## 1   Male     None NotCertain    No    Usable    Usable      Yes        Yes
## 2 Female     None NotCertain    No    Usable    Usable      Yes        Yes
## 3 Female     None NotCertain    No Nonusable Nonusable       No         No
## 4   Male Atypical   Definite    No    Usable    Usable       No        Yes
##   Hypertrophi Hyperchol Smoker Inherit Heartfail CAD
## 1          No        No   <NA>      No        No  No
## 2          No        No   <NA>      No        No  No
## 3          No       Yes   <NA>      No        No  No
## 4          No       Yes   <NA>      No        No  No

args(predict.grain)

## function (object, response, predictors = setdiff(names(newdata), 
##     response), newdata, type = "class", ...) 
## NULL

#function (object, response, predictors = setdiff(names(newdata),
#                                                 response), newdata, type = "class", ...)
#  NULL
p1 <- predict(bn1, newdata=cad2, response="CAD")
head( p1$pred$CAD )

## [1] "No"  "No"  "No"  "No"  "No"  "Yes"

z <- data.frame(CAD.obs=cad2$CAD, CAD.pred=p1$pred$CAD)
head( z ) # class assigned by highest probability

##   CAD.obs CAD.pred
## 1      No       No
## 2      No       No
## 3      No       No
## 4      No       No
## 5      No       No
## 6      No      Yes

xtabs(~., data=z)

##        CAD.pred
## CAD.obs No Yes
##     No  32   9
##     Yes  9  17

#Can be more informative to look at conditional probabilities:
q1 <- predict(bn1, newdata=cad2, response="CAD",
                  type="distribution")
head( q1$pred$CAD )

##             No        Yes
## [1,] 0.9741595 0.02584049
## [2,] 0.9741595 0.02584049
## [3,] 0.8982885 0.10171154
## [4,] 0.5348625 0.46513750
## [5,] 0.7865913 0.21340871
## [6,] 0.4509980 0.54900198

head( p1$pred$CAD )

## [1] "No"  "No"  "No"  "No"  "No"  "Yes"

head( cad2$CAD)

## [1] No No No No No No
## Levels: No Yes

par(mfrow=c(1,1))

#18 Other packages
require( bnlearn )

## Loading required package: bnlearn
## 
## Attaching package: 'bnlearn'
## 
##  以下のオブジェクトは 'package:gRbase' からマスクされています: 
## 
##      children, parents

a = bn.fit(hc( cad1 ), cad1)
bn = as.grain(a)
plot(bn)

require( bnlearn )
a = bn.fit(hc(learning.test ),learning.test)
bn = as.grain(a)
plot(bn)

library(bnlearn)

norm <- rnorm(4000)
Data <- matrix(norm, nrow=1000, ncol=4, byrow=T)
colnames(Data) <- c("Height", "BMI", "SBP", "FBS")
Data <- data.frame(Data)
head(Data)

##       Height        BMI        SBP        FBS
## 1  0.3152757 -1.6255617  0.2820571 -0.3756693
## 2  0.1862342  1.0407168 -1.6418698 -0.1539259
## 3 -1.1549326  0.6071451  0.3628388 -0.7618045
## 4 -0.2965907  0.1610033 -0.2913580 -1.0498726
## 5  1.4597698  0.3122938  0.1030056  0.8036521
## 6 -0.8637087 -0.6718771  0.1441947  0.1156066

Data2 <- Data
Data2$Height <- 170 + Data$Height*10
Data2$SBP <- 120 + Data$SBP*10
Data2$BMI <- 22 + Data$Height*2 + Data$BMI*2 + Data$SBP*2
Data2$FBS <- 90 + (Data2$BMI-22)*5 + Data$FBS*10
Data2 <- data.frame(Data2)
head(Data2)

##     Height      BMI      SBP       FBS
## 1 173.1528 19.94354 122.8206  75.96102
## 2 171.8623 21.17016 103.5813  84.31155
## 3 158.4507 21.63010 123.6284  80.53247
## 4 167.0341 21.14611 117.0864  75.23182
## 5 184.5977 25.75014 121.0301 116.78721
## 6 161.3629 19.21722 121.4419  77.24216

res.Data2 = gs(Data2)
plot(res.Data2)

res.arc2 <- set.arc(res.Data2, "FBS", "BMI")    #←HeightをFBSに修正
arc.strength(res.arc2, Data2)

##     from  to      strength
## 1 Height BMI  7.923986e-66
## 2    SBP BMI  8.244726e-63
## 3    FBS BMI 2.701750e-135

#---回帰係数を求める
fitted = bn.fit(res.arc2, Data2)
(Coef <- coefficients(fitted))

## $Height
## (Intercept) 
##    170.4514 
## 
## $BMI
##  (Intercept)       Height          SBP          FBS 
## -16.86251210   0.10439402   0.10260526   0.09747203 
## 
## $SBP
## (Intercept) 
##    120.4283 
## 
## $FBS
## (Intercept) 
##    91.40537

#---絵に上書き
XMax <- max(axTicks(1))
YMax <- max(axTicks(2))

#---手動で位置を設定する
plot(res.Data2)
#切片
text(XMax*0.05, YMax*0.6, round(Coef$Height, digits=2), adj=0)
text(XMax*0.85, YMax*0.6, round(Coef$SBP, digits=2), adj=0)
text(XMax*0.35, YMax*0.9, round(Coef$FBS, digits=2), adj=0)
text(XMax*0.35, YMax*0.05, round(Coef$BMI[1], digits=2), adj=0)
#回帰係数
text(XMax*0.3, YMax*0.3, round(Coef$BMI[2], digits=3), adj=0)
text(XMax*0.65, YMax*0.3, round(Coef$BMI[3], digits=3), adj=0)
text(XMax*0.45, YMax*0.5, round(Coef$BMI[4], digits=3), adj=0)