R lab for Chapter 2

This is an R Markdown document for Chapter 2. Please install the “vcd” package.

    rm(list=ls())
    library(vcd)  # install.packages("vcd")
## Warning: package 'vcd' was built under R version 4.1.2
## Loading required package: grid

Create the frequency tables.

1. Case form (original data)

    head(Arthritis) # in vcd package
##   ID Treatment  Sex Age Improved
## 1 57   Treated Male  27     Some
## 2 46   Treated Male  29     None
## 3 77   Treated Male  30     None
## 4 17   Treated Male  32   Marked
## 5 36   Treated Male  46   Marked
## 6 23   Treated Male  58   Marked
    dim(Arthritis)
## [1] 84  5
    class(Arthritis)
## [1] "data.frame"
    names(Arthritis)
## [1] "ID"        "Treatment" "Sex"       "Age"       "Improved"
    str(Arthritis)
## 'data.frame':    84 obs. of  5 variables:
##  $ ID       : int  57 46 77 17 36 23 75 39 33 55 ...
##  $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Sex      : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Age      : int  27 29 30 32 46 58 59 59 63 63 ...
##  $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...
    summary(Arthritis)
##        ID          Treatment      Sex          Age          Improved 
##  Min.   : 1.00   Placebo:43   Female:59   Min.   :23.00   None  :42  
##  1st Qu.:21.75   Treated:41   Male  :25   1st Qu.:46.00   Some  :14  
##  Median :42.50                            Median :57.00   Marked:28  
##  Mean   :42.50                            Mean   :53.36              
##  3rd Qu.:63.25                            3rd Qu.:63.00              
##  Max.   :84.00                            Max.   :74.00
    table(Arthritis$Treatment,Arthritis$Improved) # two-way table
##          
##           None Some Marked
##   Placebo   29    7      7
##   Treated   13    7     21
    # cf) three-way table
    structable(Arthritis[,-c(1,4)]) 
##                    Sex Female Male
## Treatment Improved                
## Placebo   None             19   10
##           Some              7    0
##           Marked            6    1
## Treated   None              6    7
##           Some              5    2
##           Marked           16    5
    table(Arthritis[,-c(1,4)])
## , , Improved = None
## 
##          Sex
## Treatment Female Male
##   Placebo     19   10
##   Treated      6    7
## 
## , , Improved = Some
## 
##          Sex
## Treatment Female Male
##   Placebo      7    0
##   Treated      5    2
## 
## , , Improved = Marked
## 
##          Sex
## Treatment Female Male
##   Placebo      6    1
##   Treated     16    5

2. Frequency form: page 41

    GP <- expand.grid(Gender=c("Female", "Male"),Party=c("Demo", "Indep", "Rep"))
    Count <- c(762,484,327,239,468,477)
    
    freqData <- data.frame(GP,Count)    
    names(freqData)    
## [1] "Gender" "Party"  "Count"
    str(freqData)
## 'data.frame':    6 obs. of  3 variables:
##  $ Gender: Factor w/ 2 levels "Female","Male": 1 2 1 2 1 2
##  $ Party : Factor w/ 3 levels "Demo","Indep",..: 1 1 2 2 3 3
##  $ Count : num  762 484 327 239 468 477
    sum(freqData$Count) # total 
## [1] 2757
    tabData <- xtabs(Count~Gender+Party, data=freqData)
            
    c0 <- chisq.test(tabData) # pearson's chisq independence test
    
    names(c0)
## [1] "statistic" "parameter" "p.value"   "method"    "data.name" "observed" 
## [7] "expected"  "residuals" "stdres"

3. Table form: page 41

    tabData <- matrix(c(762,484,327,239,468,477),nrow=2,ncol=3)
    dimnames(tabData) <- list(Gender=c("Femaile","Male"), Party=c("Demo", "Indep", "Rep"))
    class(tabData)    
## [1] "matrix" "array"
    tabData <- as.table(tabData)    
    
    c0 <- chisq.test(tabData)
    names(c0)
## [1] "statistic" "parameter" "p.value"   "method"    "data.name" "observed" 
## [7] "expected"  "residuals" "stdres"
    nn <- c0$observed
    ee <- c0$expected
    
    X2 <- sum((nn-ee)^2 / ee)
    G2 <- 2*sum(nn*log(nn/ee))

    chisqTest.fun <- function(tab=tab){
        c0 <- chisq.test(tab)
        
        nn <- c0$observed
        ee <- c0$expected
        
        X2 <- sum((nn-ee)^2 / ee)
        
        G2 <- 2*sum(nn*log(nn/ee))
        p.val <- 1-pchisq(G2,df=c0$parameter)
        
        output <- list(Expected=ee,Observed=nn,stdRes=c0$stdres,X2=X2,X2.pValue=c0$p.value,G2=G2,G2.pValue=p.val)    
        return(output)
    }
    
    chisqTest.fun(tabData)
## $Expected
##          Party
## Gender        Demo    Indep      Rep
##   Femaile 703.6714 319.6453 533.6834
##   Male    542.3286 246.3547 411.3166
## 
## $Observed
##          Party
## Gender    Demo Indep Rep
##   Femaile  762   327 468
##   Male     484   239 477
## 
## $stdRes
##          Party
## Gender          Demo      Indep        Rep
##   Femaile  4.5020535  0.6994517 -5.3159455
##   Male    -4.5020535 -0.6994517  5.3159455
## 
## $X2
## [1] 30.07015
## 
## $X2.pValue
## [1] 2.953589e-07
## 
## $G2
## [1] 30.01669
## 
## $G2.pValue
## [1] 3.033598e-07
    assocstats(tabData)
##                     X^2 df   P(> X^2)
## Likelihood Ratio 30.017  2 3.0336e-07
## Pearson          30.070  2 2.9536e-07
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.104 
## Cramer's V        : 0.104

Test for ordered variable

    tmp <- matrix(c(17066,14464,788,126,37,48,38,5,1,1),nrow=5,ncol=2)
    dimnames(tmp) <- list(Alcohol=c("0","<1","1-2","3-5",">=6"),  Nonnorm=c("No", "Yes"))
    
    chisq.test(tmp)    
## Warning in chisq.test(tmp): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  tmp
## X-squared = 12.082, df = 4, p-value = 0.01675
    chisqTest.fun(tab=tmp)    
## Warning in chisq.test(tab): Chi-squared approximation may be incorrect
## $Expected
##        Nonnorm
## Alcohol          No        Yes
##     0   17065.13888 48.8611162
##     <1  14460.59624 41.4037576
##     1-2   790.73596  2.2640449
##     3-5   126.63741  0.3625898
##     >=6    37.89151  0.1084914
## 
## $Observed
##        Nonnorm
## Alcohol    No Yes
##     0   17066  48
##     <1  14464  38
##     1-2   788   5
##     3-5   126   1
##     >=6    37   1
## 
## $stdRes
##        Nonnorm
## Alcohol         No        Yes
##     0    0.1790737 -0.1790737
##     <1   0.7112005 -0.7112005
##     1-2 -1.8434826  1.8434826
##     3-5 -1.0621365  1.0621365
##     >=6 -2.7120776  2.7120776
## 
## $X2
## [1] 12.08205
## 
## $X2.pValue
## [1] 0.0167514
## 
## $G2
## [1] 6.201998
## 
## $G2.pValue
## [1] 0.1845623
    Tab <- as.table(tmp)
    
    tmp <- matrix(c(17,14,7,1,3,4,3,5,1,1),nrow=5,ncol=2)
    dimnames(tmp) <- list(Alchole=c("0","<1","1-2","3-5",">=6"),  Nonnorm=c("No", "Yes"))
    
    ord.tab <- tmp
    
    cnt <- c(17,14,7,1,3,4,3,5,1,1)
    eg <- expand.grid(dimnames(tmp))
    tab.df <- data.frame(eg,cnt)
    
    tab.df$Alchole <- rep(1:5,times=2)
    tab.df$Nonnorm <- rep(0:1,each=5)
    
    res <- tab.df[,1:2][rep(1:nrow(tab.df),tab.df$cnt),]
    x <- as.numeric(res$Alchole)
    y <- as.numeric(res$Nonnorm)
    cor(res)
##           Alchole   Nonnorm
## Alchole 1.0000000 0.1503248
## Nonnorm 0.1503248 1.0000000
    cor(x,y)
## [1] 0.1503248
    res<-apply(tab.df[,1:2],2,function(x){ rep(x,tab.df$cnt) })
    
    tab.df[c(1,1,1,1,1,1,1),]
##     Alchole Nonnorm cnt
## 1         1       0  17
## 1.1       1       0  17
## 1.2       1       0  17
## 1.3       1       0  17
## 1.4       1       0  17
## 1.5       1       0  17
## 1.6       1       0  17