Materi ini diambil dari:

Friendly, M., Meyer, D. 2015. Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data. (http://ddar.datavis.ca/pages/home)

Bekerja dengan Data menggunakan R

Data Vektor

  • Menginput data vektor secara manual
count <- c(17, 20, 15, 40)
count
## [1] 17 20 15 40
(sex <- c("female", "male", "female", "male"))
## [1] "female" "male"   "female" "male"
(passed <- c(TRUE, TRUE, FALSE, FALSE))
## [1]  TRUE  TRUE FALSE FALSE
  • Menginput data vektor menggunakan suatu fungsi
seq(10, 100, by=10)
##  [1]  10  20  30  40  50  60  70  80  90 100
seq(0,1, length.out=11)
##  [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
(sex <- rep(c("female", "male"), times=2))
## [1] "female" "male"   "female" "male"
(sex <- rep(c("female", "male"), length.out=4))
## [1] "female" "male"   "female" "male"
(passed <- rep(c(TRUE, FALSE), each=2))
## [1]  TRUE  TRUE FALSE FALSE

Data Matriks

  • Menginput data Matriks secara manual
(matA <- matrix(1:8, nrow=2,ncol=4))
##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8
(matB <- matrix(1:8, nrow=2, ncol=4, byrow=TRUE))
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
(matC <- matrix(1:8, nrow=2, ncol=4))
##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8
  • Fungsi pada objek berbentuk Matriks
dim(matA)
## [1] 2 4
str(matA)
##  int [1:2, 1:4] 1 2 3 4 5 6 7 8
dimnames(matA) <- list(c("M", "F"), LETTERS[1:4])
matA
##   A B C D
## M 1 3 5 7
## F 2 4 6 8
str(matA)
##  int [1:2, 1:4] 1 2 3 4 5 6 7 8
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:2] "M" "F"
##   ..$ : chr [1:4] "A" "B" "C" "D"
  • memberikan label pada baris dan kolom matriks
dimnames(matA) <- list(sex=c("M", "F"), group=LETTERS[1:4])
#bisa juga ditulis sebagai: names(dimnames(matA)) <- c("Sex", "Group")
matA
##    group
## sex A B C D
##   M 1 3 5 7
##   F 2 4 6 8
str(matA)
##  int [1:2, 1:4] 1 2 3 4 5 6 7 8
##  - attr(*, "dimnames")=List of 2
##   ..$ sex  : chr [1:2] "M" "F"
##   ..$ group: chr [1:4] "A" "B" "C" "D"
  • menggabungkan matriks
rbind(matA, c(10, 20))
##    A  B  C  D
## M  1  3  5  7
## F  2  4  6  8
##   10 20 10 20
cbind(matA, c(10, 20))
##   A B C D   
## M 1 3 5 7 10
## F 2 4 6 8 20
  • memutar matriks
t(matA)
##      sex
## group M F
##     A 1 2
##     B 3 4
##     C 5 6
##     D 7 8
  • komputasi dasar pada object matriks
2 * matA / 100
##    group
## sex    A    B    C    D
##   M 0.02 0.06 0.10 0.14
##   F 0.04 0.08 0.12 0.16

Arrays

dims <- c(2, 4, 2)

(arrayA <- array(1:16, dim=dims))
## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]    9   11   13   15
## [2,]   10   12   14   16
str(arrayA)
##  int [1:2, 1:4, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
(arrayB <- array(1:16, dim = c(2,8)))
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,]    1    3    5    7    9   11   13   15
## [2,]    2    4    6    8   10   12   14   16
str(arrayB)
##  int [1:2, 1:8] 1 2 3 4 5 6 7 8 9 10 ...
  • memberikan label
dimnames(arrayA) <- list(sex=c("M", "F"),
                         group=letters[1:4],
                         time=c("Pre", "Post"))

arrayA
## , , time = Pre
## 
##    group
## sex a b c d
##   M 1 3 5 7
##   F 2 4 6 8
## 
## , , time = Post
## 
##    group
## sex  a  b  c  d
##   M  9 11 13 15
##   F 10 12 14 16
str(arrayA)
##  int [1:2, 1:4, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ sex  : chr [1:2] "M" "F"
##   ..$ group: chr [1:4] "a" "b" "c" "d"
##   ..$ time : chr [1:2] "Pre" "Post"

Data Frames

set.seed(12345)
n<-100
A<-factor(sample(c("a1", "a2"), n, replace=TRUE))
B<-factor(sample(c("b1", "b2"), n, replace=TRUE))
sex<-factor(sample(c("M", "F"), n, replace=TRUE))
age<-round(rnorm(n, mean=30, sd=5))
mydata<-data.frame(A, B, sex, age)
head(mydata,5)
##    A  B sex age
## 1 a2 b1   F  22
## 2 a1 b1   M  33
## 3 a2 b1   F  31
## 4 a2 b2   M  26
## 5 a2 b1   M  29
str(mydata)
## 'data.frame':    100 obs. of  4 variables:
##  $ A  : Factor w/ 2 levels "a1","a2": 2 1 2 2 2 2 2 1 1 2 ...
##  $ B  : Factor w/ 2 levels "b1","b2": 1 1 1 2 1 1 1 1 2 1 ...
##  $ sex: Factor w/ 2 levels "F","M": 1 2 1 2 2 2 2 1 2 2 ...
##  $ age: num  22 33 31 26 29 29 38 28 30 27 ...
  • mengekstrak data frame
mydata[1,2]
## [1] b1
## Levels: b1 b2
mydata$sex
##   [1] F M F M M M M F M M M M M M M M M F F M M M F F F M F M M M F M F F F F F
##  [38] M M M F F F M F M M M F F M F F M F M M F F F F F M F M M M M M F M M M M
##  [75] F M F F F M M F M F F F F F M M F M M M F F F F M F
## Levels: F M

Contoh-contoh Data

Bentuk Data Kasus

#install.packages("vcd")
data(Arthritis, package="vcd")

names(Arthritis)
## [1] "ID"        "Treatment" "Sex"       "Age"       "Improved"
str(Arthritis)
## 'data.frame':    84 obs. of  5 variables:
##  $ ID       : int  57 46 77 17 36 23 75 39 33 55 ...
##  $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Sex      : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Age      : int  27 29 30 32 46 58 59 59 63 63 ...
##  $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...
head(Arthritis, 5)
##   ID Treatment  Sex Age Improved
## 1 57   Treated Male  27     Some
## 2 46   Treated Male  29     None
## 3 77   Treated Male  30     None
## 4 17   Treated Male  32   Marked
## 5 36   Treated Male  46   Marked

Bentuk Data Frekuensi

  • Contoh data General Social Survey
tmp <- expand.grid(sex=c("female", "male"),
                   party=c("dem", "indep", "rep")) 
tmp
##      sex party
## 1 female   dem
## 2   male   dem
## 3 female indep
## 4   male indep
## 5 female   rep
## 6   male   rep
GSS <- data.frame(tmp, count=c(279, 165, 73, 47, 225, 191))
GSS
##      sex party count
## 1 female   dem   279
## 2   male   dem   165
## 3 female indep    73
## 4   male indep    47
## 5 female   rep   225
## 6   male   rep   191

Bentuk Data Tabel

data("HairEyeColor", package="datasets")
str(HairEyeColor)
##  'table' num [1:4, 1:4, 1:2] 32 53 10 3 11 50 10 30 10 25 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
##   ..$ Eye : chr [1:4] "Brown" "Blue" "Hazel" "Green"
##   ..$ Sex : chr [1:2] "Male" "Female"
dim(HairEyeColor)
## [1] 4 4 2
dimnames(HairEyeColor)
## $Hair
## [1] "Black" "Brown" "Red"   "Blond"
## 
## $Eye
## [1] "Brown" "Blue"  "Hazel" "Green"
## 
## $Sex
## [1] "Male"   "Female"
sum(HairEyeColor) #banyaknya kasus
## [1] 592
  • Menggunakan fungsi table() dan xtabs()
GSS.tab <- matrix(c(279, 73, 225, 
                    165, 47, 191),
                  nrow=2, ncol=3, byrow=TRUE)
dimnames(GSS.tab) <- list(sex=c("female", "male"),
                          party=c("dem", "indep", "rep"))
GSS.tab
##         party
## sex      dem indep rep
##   female 279    73 225
##   male   165    47 191
  • Mengubah objek menjadi kelas table
GSS.tab <- as.table(GSS.tab)
str(GSS.tab)
##  'table' num [1:2, 1:3] 279 165 73 47 225 191
##  - attr(*, "dimnames")=List of 2
##   ..$ sex  : chr [1:2] "female" "male"
##   ..$ party: chr [1:3] "dem" "indep" "rep"
  • Menginput tabel secara manual, contoh data Job Satisfaction
JobSat <- matrix(c(1, 2, 1, 0,
                   3, 3, 6, 1, 
                   10, 10, 14, 9,
                   6, 7, 12, 11),
                 nrow=4, ncol=4)
dimnames(JobSat) <-
  list(income=c("< 15k", "15-25k", "25-40k", "> 40k"),
       satisfaction=c("VeryD", "LittleD", "ModerateS", "VeryS"))
JobSat <- as.table(JobSat)
JobSat
##         satisfaction
## income   VeryD LittleD ModerateS VeryS
##   < 15k      1       3        10     6
##   15-25k     2       3        10     7
##   25-40k     1       6        14    12
##   > 40k      0       1         9    11

Tabel yang berisi faktor terurut dan tidak terurut

dimnames(JobSat)$income <-
  paste(1:4, dimnames(JobSat)$income, sep=":")

dimnames(JobSat)$satisfaction <-
  paste(1:4, dimnames(JobSat)$satisfaction, sep=":")

JobSat
##           satisfaction
## income     1:VeryD 2:LittleD 3:ModerateS 4:VeryS
##   1:< 15k        1         3          10       6
##   2:15-25k       2         3          10       7
##   3:25-40k       1         6          14      12
##   4:> 40k        0         1           9      11
data("HairEyeColor")

HEC <- HairEyeColor[,c(1, 3, 4, 2),] #menghubah urutan eye color
str(HEC)
##  'table' num [1:4, 1:4, 1:2] 32 53 10 3 10 25 7 5 3 15 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
##   ..$ Eye : chr [1:4] "Brown" "Hazel" "Green" "Blue"
##   ..$ Sex : chr [1:2] "Male" "Female"
str(UCBAdmissions)
##  'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Admit : chr [1:2] "Admitted" "Rejected"
##   ..$ Gender: chr [1:2] "Male" "Female"
##   ..$ Dept  : chr [1:6] "A" "B" "C" "D" ...
UCB <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(UCB)$Admit <- c("Yes", "No")
names(dimnames(UCB)) <- c("Sex", "Admitted", "Department")
str(UCB)
##  'table' num [1:2, 1:2, 1:6] 512 89 313 19 353 17 207 8 120 202 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Sex       : chr [1:2] "Male" "Female"
##   ..$ Admitted  : chr [1:2] "Yes" "No"
##   ..$ Department: chr [1:6] "A" "B" "C" "D" ...

Menyusun tabel menggunakan fungsi table() dan xtabs()

fungsi table()

table(mydata$A, mydata$B)
##     
##      b1 b2
##   a1 25 21
##   a2 22 32
(mytab <- table(mydata[,1:2]))
##     B
## A    b1 b2
##   a1 25 21
##   a2 22 32
margin.table(mytab)
## [1] 100
margin.table(mytab,1)
## A
## a1 a2 
## 46 54
margin.table(mytab,2)
## B
## b1 b2 
## 47 53
addmargins(mytab)
##      B
## A      b1  b2 Sum
##   a1   25  21  46
##   a2   22  32  54
##   Sum  47  53 100
prop.table(mytab)
##     B
## A      b1   b2
##   a1 0.25 0.21
##   a2 0.22 0.32
prop.table(mytab,1)
##     B
## A           b1        b2
##   a1 0.5434783 0.4565217
##   a2 0.4074074 0.5925926
prop.table(mytab,2)
##     B
## A           b1        b2
##   a1 0.5319149 0.3962264
##   a2 0.4680851 0.6037736
mytab<-table(mydata[,c("A","B","sex")])
ftable(mytab)
##       sex  F  M
## A  B           
## a1 b1     14 11
##    b2      8 13
## a2 b1      7 15
##    b2     18 14

fungsi xtabs()

mytable<-xtabs(~A+B+sex,data=mydata)
ftable(mytable)
##       sex  F  M
## A  B           
## a1 b1     14 11
##    b2      8 13
## a2 b1      7 15
##    b2     18 14
summary(mytable)
## Call: xtabs(formula = ~A + B + sex, data = mydata)
## Number of cases in table: 100 
## Number of factors: 3 
## Test for independence of all factors:
##  Chisq = 6.356, df = 4, p-value = 0.1741
(GSStab<-xtabs(count~sex+party,data=GSS))
##         party
## sex      dem indep rep
##   female 279    73 225
##   male   165    47 191
summary(GSStab)
## Call: xtabs(formula = count ~ sex + party, data = GSS)
## Number of cases in table: 980 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 7.01, df = 2, p-value = 0.03005

Mencetak Tabel dengan fungsi structable() dan ftable()

ftable(UCB) #default
##                 Department   A   B   C   D   E   F
## Sex    Admitted                                   
## Male   Yes                 512 353 120 138  53  22
##        No                  313 207 205 279 138 351
## Female Yes                  89  17 202 131  94  24
##        No                   19   8 391 244 299 317
#ftable(Admit+Gender~Dept,data=UCB) #formula method
library(vcd)
## Warning: package 'vcd' was built under R version 4.3.3
## Loading required package: grid
structable(HairEyeColor)
##              Eye Brown Blue Hazel Green
## Hair  Sex                              
## Black Male          32   11    10     3
##       Female        36    9     5     2
## Brown Male          53   50    25    15
##       Female        66   34    29    14
## Red   Male          10   10     7     7
##       Female        16    7     7     7
## Blond Male           3   30     5     8
##       Female         4   64     5     8
structable(Hair+Sex~Eye,HairEyeColor)#specifycol~rowvariables
##       Hair Black        Brown         Red        Blond       
##       Sex   Male Female  Male Female Male Female  Male Female
## Eye                                                          
## Brown         32     36    53     66   10     16     3      4
## Blue          11      9    50     34   10      7    30     64
## Hazel         10      5    25     29    7      7     5      5
## Green          3      2    15     14    7      7     8      8

Subsetting Data

Subsetting tables

HairEyeColor[,,"Female"]
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8
apply(HairEyeColor, 3, sum)
##   Male Female 
##    279    313
HairEyeColor[c("Black", "Brown"), c("Hazel", "Green"),]
## , , Sex = Male
## 
##        Eye
## Hair    Hazel Green
##   Black    10     3
##   Brown    25    15
## 
## , , Sex = Female
## 
##        Eye
## Hair    Hazel Green
##   Black     5     2
##   Brown    29    14

Subsetting Structables

hec<-structable(Eye~Sex+Hair,data=HairEyeColor)
hec
##              Eye Brown Blue Hazel Green
## Sex    Hair                            
## Male   Black        32   11    10     3
##        Brown        53   50    25    15
##        Red          10   10     7     7
##        Blond         3   30     5     8
## Female Black        36    9     5     2
##        Brown        66   34    29    14
##        Red          16    7     7     7
##        Blond         4   64     5     8
hec[[c("Male","Brown"),]]
## Eye Brown Blue Hazel Green
##                           
##        53   50    25    15

Bentuk Kasus menjadi Bentuk Tabel Frekuensi

Art.tab <- table(Arthritis[,c("Treatment", "Sex", "Improved")])
str(Art.tab)
##  'table' int [1:2, 1:2, 1:3] 19 6 10 7 7 5 0 2 6 16 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Treatment: chr [1:2] "Placebo" "Treated"
##   ..$ Sex      : chr [1:2] "Female" "Male"
##   ..$ Improved : chr [1:3] "None" "Some" "Marked"
ftable(Art.tab)
##                  Improved None Some Marked
## Treatment Sex                             
## Placebo   Female            19    7      6
##           Male              10    0      1
## Treated   Female             6    5     16
##           Male               7    2      5

Mengembalikan bentuk tabel frekuensi menjadi bentuk kasus

#install.packages("vcdExtra")
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 4.3.3
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 4.3.3
## 
## Attaching package: 'vcdExtra'
## The following object is masked _by_ '.GlobalEnv':
## 
##     JobSat
Art.df <- expand.dft(Art.tab)
str(Art.df)
## 'data.frame':    84 obs. of  3 variables:
##  $ Treatment: chr  "Placebo" "Placebo" "Placebo" "Placebo" ...
##  $ Sex      : chr  "Female" "Female" "Female" "Female" ...
##  $ Improved : chr  "None" "None" "None" "None" ...

Mempublish tabel ke dalam LatEx atau HTML

data("HorseKicks", package = "vcd")
HorseKicks
## nDeaths
##   0   1   2   3   4 
## 109  65  22   3   1
library(xtable)
xtable(HorseKicks)
## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
##   \hline
##  & nDeaths \\ 
##   \hline
## 0 & 109 \\ 
##   1 &  65 \\ 
##   2 &  22 \\ 
##   3 &   3 \\ 
##   4 &   1 \\ 
##    \hline
## \end{tabular}
## \end{table}
xtable(HorseKicks)
## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
##   \hline
##  & nDeaths \\ 
##   \hline
## 0 & 109 \\ 
##   1 &  65 \\ 
##   2 &  22 \\ 
##   3 &   3 \\ 
##   4 &   1 \\ 
##    \hline
## \end{tabular}
## \end{table}
tab <- as.data.frame(HorseKicks)
colnames(tab) <- c("nDeaths", "Freq")
print(xtable(tab), include.rownames = FALSE,
      include.colnames = TRUE)
## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{lr}
##   \hline
## nDeaths & Freq \\ 
##   \hline
## 0 & 109 \\ 
##   1 &  65 \\ 
##   2 &  22 \\ 
##   3 &   3 \\ 
##   4 &   1 \\ 
##    \hline
## \end{tabular}
## \end{table}