This is an R Markdown document for Chapter 2. Please install the “vcd” package.
rm(list=ls())
library(vcd) # install.packages("vcd")
## Warning: package 'vcd' was built under R version 4.1.2
## Loading required package: grid
head(Arthritis) # in vcd package
## ID Treatment Sex Age Improved
## 1 57 Treated Male 27 Some
## 2 46 Treated Male 29 None
## 3 77 Treated Male 30 None
## 4 17 Treated Male 32 Marked
## 5 36 Treated Male 46 Marked
## 6 23 Treated Male 58 Marked
dim(Arthritis)
## [1] 84 5
class(Arthritis)
## [1] "data.frame"
names(Arthritis)
## [1] "ID" "Treatment" "Sex" "Age" "Improved"
str(Arthritis)
## 'data.frame': 84 obs. of 5 variables:
## $ ID : int 57 46 77 17 36 23 75 39 33 55 ...
## $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age : int 27 29 30 32 46 58 59 59 63 63 ...
## $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...
summary(Arthritis)
## ID Treatment Sex Age Improved
## Min. : 1.00 Placebo:43 Female:59 Min. :23.00 None :42
## 1st Qu.:21.75 Treated:41 Male :25 1st Qu.:46.00 Some :14
## Median :42.50 Median :57.00 Marked:28
## Mean :42.50 Mean :53.36
## 3rd Qu.:63.25 3rd Qu.:63.00
## Max. :84.00 Max. :74.00
table(Arthritis$Treatment,Arthritis$Improved) # two-way table
##
## None Some Marked
## Placebo 29 7 7
## Treated 13 7 21
# cf) three-way table
structable(Arthritis[,-c(1,4)])
## Sex Female Male
## Treatment Improved
## Placebo None 19 10
## Some 7 0
## Marked 6 1
## Treated None 6 7
## Some 5 2
## Marked 16 5
table(Arthritis[,-c(1,4)])
## , , Improved = None
##
## Sex
## Treatment Female Male
## Placebo 19 10
## Treated 6 7
##
## , , Improved = Some
##
## Sex
## Treatment Female Male
## Placebo 7 0
## Treated 5 2
##
## , , Improved = Marked
##
## Sex
## Treatment Female Male
## Placebo 6 1
## Treated 16 5
GP <- expand.grid(Gender=c("Female", "Male"),Party=c("Demo", "Indep", "Rep"))
Count <- c(762,484,327,239,468,477)
freqData <- data.frame(GP,Count)
names(freqData)
## [1] "Gender" "Party" "Count"
str(freqData)
## 'data.frame': 6 obs. of 3 variables:
## $ Gender: Factor w/ 2 levels "Female","Male": 1 2 1 2 1 2
## $ Party : Factor w/ 3 levels "Demo","Indep",..: 1 1 2 2 3 3
## $ Count : num 762 484 327 239 468 477
sum(freqData$Count) # total
## [1] 2757
tabData <- xtabs(Count~Gender+Party, data=freqData)
c0 <- chisq.test(tabData) # pearson's chisq independence test
names(c0)
## [1] "statistic" "parameter" "p.value" "method" "data.name" "observed"
## [7] "expected" "residuals" "stdres"
tabData <- matrix(c(762,484,327,239,468,477),nrow=2,ncol=3)
dimnames(tabData) <- list(Gender=c("Femaile","Male"), Party=c("Demo", "Indep", "Rep"))
class(tabData)
## [1] "matrix" "array"
tabData <- as.table(tabData)
c0 <- chisq.test(tabData)
names(c0)
## [1] "statistic" "parameter" "p.value" "method" "data.name" "observed"
## [7] "expected" "residuals" "stdres"
nn <- c0$observed
ee <- c0$expected
X2 <- sum((nn-ee)^2 / ee)
G2 <- 2*sum(nn*log(nn/ee))
chisqTest.fun <- function(tab=tab){
c0 <- chisq.test(tab)
nn <- c0$observed
ee <- c0$expected
X2 <- sum((nn-ee)^2 / ee)
G2 <- 2*sum(nn*log(nn/ee))
p.val <- 1-pchisq(G2,df=c0$parameter)
output <- list(Expected=ee,Observed=nn,stdRes=c0$stdres,X2=X2,X2.pValue=c0$p.value,G2=G2,G2.pValue=p.val)
return(output)
}
chisqTest.fun(tabData)
## $Expected
## Party
## Gender Demo Indep Rep
## Femaile 703.6714 319.6453 533.6834
## Male 542.3286 246.3547 411.3166
##
## $Observed
## Party
## Gender Demo Indep Rep
## Femaile 762 327 468
## Male 484 239 477
##
## $stdRes
## Party
## Gender Demo Indep Rep
## Femaile 4.5020535 0.6994517 -5.3159455
## Male -4.5020535 -0.6994517 5.3159455
##
## $X2
## [1] 30.07015
##
## $X2.pValue
## [1] 2.953589e-07
##
## $G2
## [1] 30.01669
##
## $G2.pValue
## [1] 3.033598e-07
assocstats(tabData)
## X^2 df P(> X^2)
## Likelihood Ratio 30.017 2 3.0336e-07
## Pearson 30.070 2 2.9536e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.104
## Cramer's V : 0.104
tmp <- matrix(c(17066,14464,788,126,37,48,38,5,1,1),nrow=5,ncol=2)
dimnames(tmp) <- list(Alcohol=c("0","<1","1-2","3-5",">=6"), Nonnorm=c("No", "Yes"))
chisq.test(tmp)
## Warning in chisq.test(tmp): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tmp
## X-squared = 12.082, df = 4, p-value = 0.01675
chisqTest.fun(tab=tmp)
## Warning in chisq.test(tab): Chi-squared approximation may be incorrect
## $Expected
## Nonnorm
## Alcohol No Yes
## 0 17065.13888 48.8611162
## <1 14460.59624 41.4037576
## 1-2 790.73596 2.2640449
## 3-5 126.63741 0.3625898
## >=6 37.89151 0.1084914
##
## $Observed
## Nonnorm
## Alcohol No Yes
## 0 17066 48
## <1 14464 38
## 1-2 788 5
## 3-5 126 1
## >=6 37 1
##
## $stdRes
## Nonnorm
## Alcohol No Yes
## 0 0.1790737 -0.1790737
## <1 0.7112005 -0.7112005
## 1-2 -1.8434826 1.8434826
## 3-5 -1.0621365 1.0621365
## >=6 -2.7120776 2.7120776
##
## $X2
## [1] 12.08205
##
## $X2.pValue
## [1] 0.0167514
##
## $G2
## [1] 6.201998
##
## $G2.pValue
## [1] 0.1845623
Tab <- as.table(tmp)
tmp <- matrix(c(17,14,7,1,3,4,3,5,1,1),nrow=5,ncol=2)
dimnames(tmp) <- list(Alchole=c("0","<1","1-2","3-5",">=6"), Nonnorm=c("No", "Yes"))
ord.tab <- tmp
cnt <- c(17,14,7,1,3,4,3,5,1,1)
eg <- expand.grid(dimnames(tmp))
tab.df <- data.frame(eg,cnt)
tab.df$Alchole <- rep(1:5,times=2)
tab.df$Nonnorm <- rep(0:1,each=5)
res <- tab.df[,1:2][rep(1:nrow(tab.df),tab.df$cnt),]
x <- as.numeric(res$Alchole)
y <- as.numeric(res$Nonnorm)
cor(res)
## Alchole Nonnorm
## Alchole 1.0000000 0.1503248
## Nonnorm 0.1503248 1.0000000
cor(x,y)
## [1] 0.1503248
res<-apply(tab.df[,1:2],2,function(x){ rep(x,tab.df$cnt) })
tab.df[c(1,1,1,1,1,1,1),]
## Alchole Nonnorm cnt
## 1 1 0 17
## 1.1 1 0 17
## 1.2 1 0 17
## 1.3 1 0 17
## 1.4 1 0 17
## 1.5 1 0 17
## 1.6 1 0 17