Materi ini diambil dari:
Friendly, M., Meyer, D. 2015. Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data. (http://ddar.datavis.ca/pages/home)
count <- c(17, 20, 15, 40)
count
## [1] 17 20 15 40
(sex <- c("female", "male", "female", "male"))
## [1] "female" "male" "female" "male"
(passed <- c(TRUE, TRUE, FALSE, FALSE))
## [1] TRUE TRUE FALSE FALSE
seq(10, 100, by=10)
## [1] 10 20 30 40 50 60 70 80 90 100
seq(0,1, length.out=11)
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
(sex <- rep(c("female", "male"), times=2))
## [1] "female" "male" "female" "male"
(sex <- rep(c("female", "male"), length.out=4))
## [1] "female" "male" "female" "male"
(passed <- rep(c(TRUE, FALSE), each=2))
## [1] TRUE TRUE FALSE FALSE
(matA <- matrix(1:8, nrow=2,ncol=4))
## [,1] [,2] [,3] [,4]
## [1,] 1 3 5 7
## [2,] 2 4 6 8
(matB <- matrix(1:8, nrow=2, ncol=4, byrow=TRUE))
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
(matC <- matrix(1:8, nrow=2, ncol=4))
## [,1] [,2] [,3] [,4]
## [1,] 1 3 5 7
## [2,] 2 4 6 8
dim(matA)
## [1] 2 4
str(matA)
## int [1:2, 1:4] 1 2 3 4 5 6 7 8
dimnames(matA) <- list(c("M", "F"), LETTERS[1:4])
matA
## A B C D
## M 1 3 5 7
## F 2 4 6 8
str(matA)
## int [1:2, 1:4] 1 2 3 4 5 6 7 8
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:2] "M" "F"
## ..$ : chr [1:4] "A" "B" "C" "D"
dimnames(matA) <- list(sex=c("M", "F"), group=LETTERS[1:4])
#bisa juga ditulis sebagai: names(dimnames(matA)) <- c("Sex", "Group")
matA
## group
## sex A B C D
## M 1 3 5 7
## F 2 4 6 8
str(matA)
## int [1:2, 1:4] 1 2 3 4 5 6 7 8
## - attr(*, "dimnames")=List of 2
## ..$ sex : chr [1:2] "M" "F"
## ..$ group: chr [1:4] "A" "B" "C" "D"
rbind(matA, c(10, 20))
## A B C D
## M 1 3 5 7
## F 2 4 6 8
## 10 20 10 20
cbind(matA, c(10, 20))
## A B C D
## M 1 3 5 7 10
## F 2 4 6 8 20
t(matA)
## sex
## group M F
## A 1 2
## B 3 4
## C 5 6
## D 7 8
2 * matA / 100
## group
## sex A B C D
## M 0.02 0.06 0.10 0.14
## F 0.04 0.08 0.12 0.16
dims <- c(2, 4, 2)
(arrayA <- array(1:16, dim=dims))
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 3 5 7
## [2,] 2 4 6 8
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 9 11 13 15
## [2,] 10 12 14 16
str(arrayA)
## int [1:2, 1:4, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
(arrayB <- array(1:16, dim = c(2,8)))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 1 3 5 7 9 11 13 15
## [2,] 2 4 6 8 10 12 14 16
str(arrayB)
## int [1:2, 1:8] 1 2 3 4 5 6 7 8 9 10 ...
dimnames(arrayA) <- list(sex=c("M", "F"),
group=letters[1:4],
time=c("Pre", "Post"))
arrayA
## , , time = Pre
##
## group
## sex a b c d
## M 1 3 5 7
## F 2 4 6 8
##
## , , time = Post
##
## group
## sex a b c d
## M 9 11 13 15
## F 10 12 14 16
str(arrayA)
## int [1:2, 1:4, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "dimnames")=List of 3
## ..$ sex : chr [1:2] "M" "F"
## ..$ group: chr [1:4] "a" "b" "c" "d"
## ..$ time : chr [1:2] "Pre" "Post"
set.seed(12345)
n<-100
A<-factor(sample(c("a1", "a2"), n, replace=TRUE))
B<-factor(sample(c("b1", "b2"), n, replace=TRUE))
sex<-factor(sample(c("M", "F"), n, replace=TRUE))
age<-round(rnorm(n, mean=30, sd=5))
mydata<-data.frame(A, B, sex, age)
head(mydata,5)
## A B sex age
## 1 a2 b1 F 22
## 2 a1 b1 M 33
## 3 a2 b1 F 31
## 4 a2 b2 M 26
## 5 a2 b1 M 29
str(mydata)
## 'data.frame': 100 obs. of 4 variables:
## $ A : Factor w/ 2 levels "a1","a2": 2 1 2 2 2 2 2 1 1 2 ...
## $ B : Factor w/ 2 levels "b1","b2": 1 1 1 2 1 1 1 1 2 1 ...
## $ sex: Factor w/ 2 levels "F","M": 1 2 1 2 2 2 2 1 2 2 ...
## $ age: num 22 33 31 26 29 29 38 28 30 27 ...
mydata[1,2]
## [1] b1
## Levels: b1 b2
mydata$sex
## [1] F M F M M M M F M M M M M M M M M F F M M M F F F M F M M M F M F F F F F
## [38] M M M F F F M F M M M F F M F F M F M M F F F F F M F M M M M M F M M M M
## [75] F M F F F M M F M F F F F F M M F M M M F F F F M F
## Levels: F M
#install.packages("vcd")
data(Arthritis, package="vcd")
names(Arthritis)
## [1] "ID" "Treatment" "Sex" "Age" "Improved"
str(Arthritis)
## 'data.frame': 84 obs. of 5 variables:
## $ ID : int 57 46 77 17 36 23 75 39 33 55 ...
## $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age : int 27 29 30 32 46 58 59 59 63 63 ...
## $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...
head(Arthritis, 5)
## ID Treatment Sex Age Improved
## 1 57 Treated Male 27 Some
## 2 46 Treated Male 29 None
## 3 77 Treated Male 30 None
## 4 17 Treated Male 32 Marked
## 5 36 Treated Male 46 Marked
tmp <- expand.grid(sex=c("female", "male"),
party=c("dem", "indep", "rep"))
tmp
## sex party
## 1 female dem
## 2 male dem
## 3 female indep
## 4 male indep
## 5 female rep
## 6 male rep
GSS <- data.frame(tmp, count=c(279, 165, 73, 47, 225, 191))
GSS
## sex party count
## 1 female dem 279
## 2 male dem 165
## 3 female indep 73
## 4 male indep 47
## 5 female rep 225
## 6 male rep 191
data("HairEyeColor", package="datasets")
str(HairEyeColor)
## 'table' num [1:4, 1:4, 1:2] 32 53 10 3 11 50 10 30 10 25 ...
## - attr(*, "dimnames")=List of 3
## ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
## ..$ Eye : chr [1:4] "Brown" "Blue" "Hazel" "Green"
## ..$ Sex : chr [1:2] "Male" "Female"
dim(HairEyeColor)
## [1] 4 4 2
dimnames(HairEyeColor)
## $Hair
## [1] "Black" "Brown" "Red" "Blond"
##
## $Eye
## [1] "Brown" "Blue" "Hazel" "Green"
##
## $Sex
## [1] "Male" "Female"
sum(HairEyeColor) #banyaknya kasus
## [1] 592
table() dan
xtabs()GSS.tab <- matrix(c(279, 73, 225,
165, 47, 191),
nrow=2, ncol=3, byrow=TRUE)
dimnames(GSS.tab) <- list(sex=c("female", "male"),
party=c("dem", "indep", "rep"))
GSS.tab
## party
## sex dem indep rep
## female 279 73 225
## male 165 47 191
tableGSS.tab <- as.table(GSS.tab)
str(GSS.tab)
## 'table' num [1:2, 1:3] 279 165 73 47 225 191
## - attr(*, "dimnames")=List of 2
## ..$ sex : chr [1:2] "female" "male"
## ..$ party: chr [1:3] "dem" "indep" "rep"
Job SatisfactionJobSat <- matrix(c(1, 2, 1, 0,
3, 3, 6, 1,
10, 10, 14, 9,
6, 7, 12, 11),
nrow=4, ncol=4)
dimnames(JobSat) <-
list(income=c("< 15k", "15-25k", "25-40k", "> 40k"),
satisfaction=c("VeryD", "LittleD", "ModerateS", "VeryS"))
JobSat <- as.table(JobSat)
JobSat
## satisfaction
## income VeryD LittleD ModerateS VeryS
## < 15k 1 3 10 6
## 15-25k 2 3 10 7
## 25-40k 1 6 14 12
## > 40k 0 1 9 11
dimnames(JobSat)$income <-
paste(1:4, dimnames(JobSat)$income, sep=":")
dimnames(JobSat)$satisfaction <-
paste(1:4, dimnames(JobSat)$satisfaction, sep=":")
JobSat
## satisfaction
## income 1:VeryD 2:LittleD 3:ModerateS 4:VeryS
## 1:< 15k 1 3 10 6
## 2:15-25k 2 3 10 7
## 3:25-40k 1 6 14 12
## 4:> 40k 0 1 9 11
data("HairEyeColor")
HEC <- HairEyeColor[,c(1, 3, 4, 2),] #menghubah urutan eye color
str(HEC)
## 'table' num [1:4, 1:4, 1:2] 32 53 10 3 10 25 7 5 3 15 ...
## - attr(*, "dimnames")=List of 3
## ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
## ..$ Eye : chr [1:4] "Brown" "Hazel" "Green" "Blue"
## ..$ Sex : chr [1:2] "Male" "Female"
str(UCBAdmissions)
## 'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
## - attr(*, "dimnames")=List of 3
## ..$ Admit : chr [1:2] "Admitted" "Rejected"
## ..$ Gender: chr [1:2] "Male" "Female"
## ..$ Dept : chr [1:6] "A" "B" "C" "D" ...
UCB <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(UCB)$Admit <- c("Yes", "No")
names(dimnames(UCB)) <- c("Sex", "Admitted", "Department")
str(UCB)
## 'table' num [1:2, 1:2, 1:6] 512 89 313 19 353 17 207 8 120 202 ...
## - attr(*, "dimnames")=List of 3
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Admitted : chr [1:2] "Yes" "No"
## ..$ Department: chr [1:6] "A" "B" "C" "D" ...
table() dan
xtabs()table()table(mydata$A, mydata$B)
##
## b1 b2
## a1 25 21
## a2 22 32
(mytab <- table(mydata[,1:2]))
## B
## A b1 b2
## a1 25 21
## a2 22 32
margin.table(mytab)
## [1] 100
margin.table(mytab,1)
## A
## a1 a2
## 46 54
margin.table(mytab,2)
## B
## b1 b2
## 47 53
addmargins(mytab)
## B
## A b1 b2 Sum
## a1 25 21 46
## a2 22 32 54
## Sum 47 53 100
prop.table(mytab)
## B
## A b1 b2
## a1 0.25 0.21
## a2 0.22 0.32
prop.table(mytab,1)
## B
## A b1 b2
## a1 0.5434783 0.4565217
## a2 0.4074074 0.5925926
prop.table(mytab,2)
## B
## A b1 b2
## a1 0.5319149 0.3962264
## a2 0.4680851 0.6037736
mytab<-table(mydata[,c("A","B","sex")])
ftable(mytab)
## sex F M
## A B
## a1 b1 14 11
## b2 8 13
## a2 b1 7 15
## b2 18 14
xtabs()mytable<-xtabs(~A+B+sex,data=mydata)
ftable(mytable)
## sex F M
## A B
## a1 b1 14 11
## b2 8 13
## a2 b1 7 15
## b2 18 14
summary(mytable)
## Call: xtabs(formula = ~A + B + sex, data = mydata)
## Number of cases in table: 100
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 6.356, df = 4, p-value = 0.1741
(GSStab<-xtabs(count~sex+party,data=GSS))
## party
## sex dem indep rep
## female 279 73 225
## male 165 47 191
summary(GSStab)
## Call: xtabs(formula = count ~ sex + party, data = GSS)
## Number of cases in table: 980
## Number of factors: 2
## Test for independence of all factors:
## Chisq = 7.01, df = 2, p-value = 0.03005
structable() dan
ftable()ftable(UCB) #default
## Department A B C D E F
## Sex Admitted
## Male Yes 512 353 120 138 53 22
## No 313 207 205 279 138 351
## Female Yes 89 17 202 131 94 24
## No 19 8 391 244 299 317
#ftable(Admit+Gender~Dept,data=UCB) #formula method
library(vcd)
## Warning: package 'vcd' was built under R version 4.3.3
## Loading required package: grid
structable(HairEyeColor)
## Eye Brown Blue Hazel Green
## Hair Sex
## Black Male 32 11 10 3
## Female 36 9 5 2
## Brown Male 53 50 25 15
## Female 66 34 29 14
## Red Male 10 10 7 7
## Female 16 7 7 7
## Blond Male 3 30 5 8
## Female 4 64 5 8
structable(Hair+Sex~Eye,HairEyeColor)#specifycol~rowvariables
## Hair Black Brown Red Blond
## Sex Male Female Male Female Male Female Male Female
## Eye
## Brown 32 36 53 66 10 16 3 4
## Blue 11 9 50 34 10 7 30 64
## Hazel 10 5 25 29 7 7 5 5
## Green 3 2 15 14 7 7 8 8
HairEyeColor[,,"Female"]
## Eye
## Hair Brown Blue Hazel Green
## Black 36 9 5 2
## Brown 66 34 29 14
## Red 16 7 7 7
## Blond 4 64 5 8
apply(HairEyeColor, 3, sum)
## Male Female
## 279 313
HairEyeColor[c("Black", "Brown"), c("Hazel", "Green"),]
## , , Sex = Male
##
## Eye
## Hair Hazel Green
## Black 10 3
## Brown 25 15
##
## , , Sex = Female
##
## Eye
## Hair Hazel Green
## Black 5 2
## Brown 29 14
hec<-structable(Eye~Sex+Hair,data=HairEyeColor)
hec
## Eye Brown Blue Hazel Green
## Sex Hair
## Male Black 32 11 10 3
## Brown 53 50 25 15
## Red 10 10 7 7
## Blond 3 30 5 8
## Female Black 36 9 5 2
## Brown 66 34 29 14
## Red 16 7 7 7
## Blond 4 64 5 8
hec[[c("Male","Brown"),]]
## Eye Brown Blue Hazel Green
##
## 53 50 25 15
Art.tab <- table(Arthritis[,c("Treatment", "Sex", "Improved")])
str(Art.tab)
## 'table' int [1:2, 1:2, 1:3] 19 6 10 7 7 5 0 2 6 16 ...
## - attr(*, "dimnames")=List of 3
## ..$ Treatment: chr [1:2] "Placebo" "Treated"
## ..$ Sex : chr [1:2] "Female" "Male"
## ..$ Improved : chr [1:3] "None" "Some" "Marked"
ftable(Art.tab)
## Improved None Some Marked
## Treatment Sex
## Placebo Female 19 7 6
## Male 10 0 1
## Treated Female 6 5 16
## Male 7 2 5
#install.packages("vcdExtra")
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 4.3.3
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 4.3.3
##
## Attaching package: 'vcdExtra'
## The following object is masked _by_ '.GlobalEnv':
##
## JobSat
Art.df <- expand.dft(Art.tab)
str(Art.df)
## 'data.frame': 84 obs. of 3 variables:
## $ Treatment: chr "Placebo" "Placebo" "Placebo" "Placebo" ...
## $ Sex : chr "Female" "Female" "Female" "Female" ...
## $ Improved : chr "None" "None" "None" "None" ...
data("HorseKicks", package = "vcd")
HorseKicks
## nDeaths
## 0 1 2 3 4
## 109 65 22 3 1
library(xtable)
xtable(HorseKicks)
## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
## \hline
## & nDeaths \\
## \hline
## 0 & 109 \\
## 1 & 65 \\
## 2 & 22 \\
## 3 & 3 \\
## 4 & 1 \\
## \hline
## \end{tabular}
## \end{table}
xtable(HorseKicks)
## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
## \hline
## & nDeaths \\
## \hline
## 0 & 109 \\
## 1 & 65 \\
## 2 & 22 \\
## 3 & 3 \\
## 4 & 1 \\
## \hline
## \end{tabular}
## \end{table}
tab <- as.data.frame(HorseKicks)
colnames(tab) <- c("nDeaths", "Freq")
print(xtable(tab), include.rownames = FALSE,
include.colnames = TRUE)
## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{lr}
## \hline
## nDeaths & Freq \\
## \hline
## 0 & 109 \\
## 1 & 65 \\
## 2 & 22 \\
## 3 & 3 \\
## 4 & 1 \\
## \hline
## \end{tabular}
## \end{table}