Materi ini diambil dari:

Friendly, M., Meyer, D. 2015. Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data. (http://ddar.datavis.ca/pages/home)

Bekerja dengan Data menggunakan R

Data Vektor

Menginput data vektor secara manual

count <- c(17, 20, 15, 40)
count

## [1] 17 20 15 40

(sex <- c("female", "male", "female", "male"))

## [1] "female" "male"   "female" "male"

(passed <- c(TRUE, TRUE, FALSE, FALSE))

## [1]  TRUE  TRUE FALSE FALSE

Menginput data vektor menggunakan suatu fungsi

seq(10, 100, by=10)

##  [1]  10  20  30  40  50  60  70  80  90 100

seq(0,1, length.out=11)

##  [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0

(sex <- rep(c("female", "male"), times=2))

## [1] "female" "male"   "female" "male"

(sex <- rep(c("female", "male"), length.out=4))

## [1] "female" "male"   "female" "male"

(passed <- rep(c(TRUE, FALSE), each=2))

## [1]  TRUE  TRUE FALSE FALSE

Data Matriks

Menginput data Matriks secara manual

(matA <- matrix(1:8, nrow=2,ncol=4))

##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8

(matB <- matrix(1:8, nrow=2, ncol=4, byrow=TRUE))

##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8

(matC <- matrix(1:8, nrow=2, ncol=4))

##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8

Fungsi pada objek berbentuk Matriks

dim(matA)

## [1] 2 4

str(matA)

##  int [1:2, 1:4] 1 2 3 4 5 6 7 8

dimnames(matA) <- list(c("M", "F"), LETTERS[1:4])
matA

##   A B C D
## M 1 3 5 7
## F 2 4 6 8

str(matA)

##  int [1:2, 1:4] 1 2 3 4 5 6 7 8
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:2] "M" "F"
##   ..$ : chr [1:4] "A" "B" "C" "D"

memberikan label pada baris dan kolom matriks

dimnames(matA) <- list(sex=c("M", "F"), group=LETTERS[1:4])
#bisa juga ditulis sebagai: names(dimnames(matA)) <- c("Sex", "Group")
matA

##    group
## sex A B C D
##   M 1 3 5 7
##   F 2 4 6 8

str(matA)

##  int [1:2, 1:4] 1 2 3 4 5 6 7 8
##  - attr(*, "dimnames")=List of 2
##   ..$ sex  : chr [1:2] "M" "F"
##   ..$ group: chr [1:4] "A" "B" "C" "D"

menggabungkan matriks

rbind(matA, c(10, 20))

##    A  B  C  D
## M  1  3  5  7
## F  2  4  6  8
##   10 20 10 20

cbind(matA, c(10, 20))

##   A B C D   
## M 1 3 5 7 10
## F 2 4 6 8 20

memutar matriks

t(matA)

##      sex
## group M F
##     A 1 2
##     B 3 4
##     C 5 6
##     D 7 8

komputasi dasar pada object matriks

2 * matA / 100

##    group
## sex    A    B    C    D
##   M 0.02 0.06 0.10 0.14
##   F 0.04 0.08 0.12 0.16

Arrays

dims <- c(2, 4, 2)

(arrayA <- array(1:16, dim=dims))

## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]    9   11   13   15
## [2,]   10   12   14   16

str(arrayA)

##  int [1:2, 1:4, 1:2] 1 2 3 4 5 6 7 8 9 10 ...

(arrayB <- array(1:16, dim = c(2,8)))

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,]    1    3    5    7    9   11   13   15
## [2,]    2    4    6    8   10   12   14   16

str(arrayB)

##  int [1:2, 1:8] 1 2 3 4 5 6 7 8 9 10 ...

memberikan label

dimnames(arrayA) <- list(sex=c("M", "F"),
                         group=letters[1:4],
                         time=c("Pre", "Post"))

arrayA

## , , time = Pre
## 
##    group
## sex a b c d
##   M 1 3 5 7
##   F 2 4 6 8
## 
## , , time = Post
## 
##    group
## sex  a  b  c  d
##   M  9 11 13 15
##   F 10 12 14 16

str(arrayA)

##  int [1:2, 1:4, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ sex  : chr [1:2] "M" "F"
##   ..$ group: chr [1:4] "a" "b" "c" "d"
##   ..$ time : chr [1:2] "Pre" "Post"

Data Frames

set.seed(12345)
n<-100
A<-factor(sample(c("a1", "a2"), n, replace=TRUE))
B<-factor(sample(c("b1", "b2"), n, replace=TRUE))
sex<-factor(sample(c("M", "F"), n, replace=TRUE))
age<-round(rnorm(n, mean=30, sd=5))
mydata<-data.frame(A, B, sex, age)
head(mydata,5)

##    A  B sex age
## 1 a2 b1   F  22
## 2 a1 b1   M  33
## 3 a2 b1   F  31
## 4 a2 b2   M  26
## 5 a2 b1   M  29

str(mydata)

## 'data.frame':    100 obs. of  4 variables:
##  $ A  : Factor w/ 2 levels "a1","a2": 2 1 2 2 2 2 2 1 1 2 ...
##  $ B  : Factor w/ 2 levels "b1","b2": 1 1 1 2 1 1 1 1 2 1 ...
##  $ sex: Factor w/ 2 levels "F","M": 1 2 1 2 2 2 2 1 2 2 ...
##  $ age: num  22 33 31 26 29 29 38 28 30 27 ...

mengekstrak data frame

mydata[1,2]

## [1] b1
## Levels: b1 b2

mydata$sex

##   [1] F M F M M M M F M M M M M M M M M F F M M M F F F M F M M M F M F F F F F
##  [38] M M M F F F M F M M M F F M F F M F M M F F F F F M F M M M M M F M M M M
##  [75] F M F F F M M F M F F F F F M M F M M M F F F F M F
## Levels: F M

Contoh-contoh Data

Bentuk Data Kasus

#install.packages("vcd")
data(Arthritis, package="vcd")

names(Arthritis)

## [1] "ID"        "Treatment" "Sex"       "Age"       "Improved"

str(Arthritis)

## 'data.frame':    84 obs. of  5 variables:
##  $ ID       : int  57 46 77 17 36 23 75 39 33 55 ...
##  $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Sex      : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Age      : int  27 29 30 32 46 58 59 59 63 63 ...
##  $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...

head(Arthritis, 5)

##   ID Treatment  Sex Age Improved
## 1 57   Treated Male  27     Some
## 2 46   Treated Male  29     None
## 3 77   Treated Male  30     None
## 4 17   Treated Male  32   Marked
## 5 36   Treated Male  46   Marked

Bentuk Data Frekuensi

Contoh data General Social Survey

tmp <- expand.grid(sex=c("female", "male"),
                   party=c("dem", "indep", "rep")) 
tmp

##      sex party
## 1 female   dem
## 2   male   dem
## 3 female indep
## 4   male indep
## 5 female   rep
## 6   male   rep

GSS <- data.frame(tmp, count=c(279, 165, 73, 47, 225, 191))
GSS

##      sex party count
## 1 female   dem   279
## 2   male   dem   165
## 3 female indep    73
## 4   male indep    47
## 5 female   rep   225
## 6   male   rep   191

Bentuk Data Tabel

data("HairEyeColor", package="datasets")
str(HairEyeColor)

##  'table' num [1:4, 1:4, 1:2] 32 53 10 3 11 50 10 30 10 25 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
##   ..$ Eye : chr [1:4] "Brown" "Blue" "Hazel" "Green"
##   ..$ Sex : chr [1:2] "Male" "Female"

dim(HairEyeColor)

## [1] 4 4 2

dimnames(HairEyeColor)

## $Hair
## [1] "Black" "Brown" "Red"   "Blond"
## 
## $Eye
## [1] "Brown" "Blue"  "Hazel" "Green"
## 
## $Sex
## [1] "Male"   "Female"

sum(HairEyeColor) #banyaknya kasus

## [1] 592

Menggunakan fungsi table() dan xtabs()

GSS.tab <- matrix(c(279, 73, 225, 
                    165, 47, 191),
                  nrow=2, ncol=3, byrow=TRUE)
dimnames(GSS.tab) <- list(sex=c("female", "male"),
                          party=c("dem", "indep", "rep"))
GSS.tab

##         party
## sex      dem indep rep
##   female 279    73 225
##   male   165    47 191

Mengubah objek menjadi kelas table

GSS.tab <- as.table(GSS.tab)
str(GSS.tab)

##  'table' num [1:2, 1:3] 279 165 73 47 225 191
##  - attr(*, "dimnames")=List of 2
##   ..$ sex  : chr [1:2] "female" "male"
##   ..$ party: chr [1:3] "dem" "indep" "rep"

Menginput tabel secara manual, contoh data Job Satisfaction

JobSat <- matrix(c(1, 2, 1, 0,
                   3, 3, 6, 1, 
                   10, 10, 14, 9,
                   6, 7, 12, 11),
                 nrow=4, ncol=4)
dimnames(JobSat) <-
  list(income=c("< 15k", "15-25k", "25-40k", "> 40k"),
       satisfaction=c("VeryD", "LittleD", "ModerateS", "VeryS"))
JobSat <- as.table(JobSat)
JobSat

##         satisfaction
## income   VeryD LittleD ModerateS VeryS
##   < 15k      1       3        10     6
##   15-25k     2       3        10     7
##   25-40k     1       6        14    12
##   > 40k      0       1         9    11

Tabel yang berisi faktor terurut dan tidak terurut

Mengurutkan kolom dan baris pada data Job Satifsaction

dimnames(JobSat)$income <-
  paste(1:4, dimnames(JobSat)$income, sep=":")

dimnames(JobSat)$satisfaction <-
  paste(1:4, dimnames(JobSat)$satisfaction, sep=":")

JobSat

##           satisfaction
## income     1:VeryD 2:LittleD 3:ModerateS 4:VeryS
##   1:< 15k        1         3          10       6
##   2:15-25k       2         3          10       7
##   3:25-40k       1         6          14      12
##   4:> 40k        0         1           9      11

Data Hair Eye Color

data("HairEyeColor")

HEC <- HairEyeColor[,c(1, 3, 4, 2),] #menghubah urutan eye color
str(HEC)

##  'table' num [1:4, 1:4, 1:2] 32 53 10 3 10 25 7 5 3 15 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
##   ..$ Eye : chr [1:4] "Brown" "Hazel" "Green" "Blue"
##   ..$ Sex : chr [1:2] "Male" "Female"

Data UCBA Admission

str(UCBAdmissions)

##  'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Admit : chr [1:2] "Admitted" "Rejected"
##   ..$ Gender: chr [1:2] "Male" "Female"
##   ..$ Dept  : chr [1:6] "A" "B" "C" "D" ...

UCB <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(UCB)$Admit <- c("Yes", "No")
names(dimnames(UCB)) <- c("Sex", "Admitted", "Department")
str(UCB)

##  'table' num [1:2, 1:2, 1:6] 512 89 313 19 353 17 207 8 120 202 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Sex       : chr [1:2] "Male" "Female"
##   ..$ Admitted  : chr [1:2] "Yes" "No"
##   ..$ Department: chr [1:6] "A" "B" "C" "D" ...

Menyusun tabel menggunakan fungsi `table()` dan `xtabs()`

fungsi `table()`

table(mydata$A, mydata$B)

##     
##      b1 b2
##   a1 25 21
##   a2 22 32

(mytab <- table(mydata[,1:2]))

##     B
## A    b1 b2
##   a1 25 21
##   a2 22 32

margin.table(mytab)

## [1] 100

margin.table(mytab,1)

## A
## a1 a2 
## 46 54

margin.table(mytab,2)

## B
## b1 b2 
## 47 53

addmargins(mytab)

##      B
## A      b1  b2 Sum
##   a1   25  21  46
##   a2   22  32  54
##   Sum  47  53 100

prop.table(mytab)

##     B
## A      b1   b2
##   a1 0.25 0.21
##   a2 0.22 0.32

prop.table(mytab,1)

##     B
## A           b1        b2
##   a1 0.5434783 0.4565217
##   a2 0.4074074 0.5925926

prop.table(mytab,2)

##     B
## A           b1        b2
##   a1 0.5319149 0.3962264
##   a2 0.4680851 0.6037736

mytab<-table(mydata[,c("A","B","sex")])
ftable(mytab)

##       sex  F  M
## A  B           
## a1 b1     14 11
##    b2      8 13
## a2 b1      7 15
##    b2     18 14

fungsi `xtabs()`

mytable<-xtabs(~A+B+sex,data=mydata)
ftable(mytable)

##       sex  F  M
## A  B           
## a1 b1     14 11
##    b2      8 13
## a2 b1      7 15
##    b2     18 14

summary(mytable)

## Call: xtabs(formula = ~A + B + sex, data = mydata)
## Number of cases in table: 100 
## Number of factors: 3 
## Test for independence of all factors:
##  Chisq = 6.356, df = 4, p-value = 0.1741

(GSStab<-xtabs(count~sex+party,data=GSS))

##         party
## sex      dem indep rep
##   female 279    73 225
##   male   165    47 191

summary(GSStab)

## Call: xtabs(formula = count ~ sex + party, data = GSS)
## Number of cases in table: 980 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 7.01, df = 2, p-value = 0.03005

Mencetak Tabel dengan fungsi `structable()` dan `ftable()`

ftable(UCB) #default

##                 Department   A   B   C   D   E   F
## Sex    Admitted                                   
## Male   Yes                 512 353 120 138  53  22
##        No                  313 207 205 279 138 351
## Female Yes                  89  17 202 131  94  24
##        No                   19   8 391 244 299 317

#ftable(Admit+Gender~Dept,data=UCB) #formula method

library(vcd)

## Warning: package 'vcd' was built under R version 4.3.3

## Loading required package: grid

structable(HairEyeColor)

##              Eye Brown Blue Hazel Green
## Hair  Sex                              
## Black Male          32   11    10     3
##       Female        36    9     5     2
## Brown Male          53   50    25    15
##       Female        66   34    29    14
## Red   Male          10   10     7     7
##       Female        16    7     7     7
## Blond Male           3   30     5     8
##       Female         4   64     5     8

structable(Hair+Sex~Eye,HairEyeColor)#specifycol~rowvariables

##       Hair Black        Brown         Red        Blond       
##       Sex   Male Female  Male Female Male Female  Male Female
## Eye                                                          
## Brown         32     36    53     66   10     16     3      4
## Blue          11      9    50     34   10      7    30     64
## Hazel         10      5    25     29    7      7     5      5
## Green          3      2    15     14    7      7     8      8

Subsetting Data

Subsetting tables

HairEyeColor[,,"Female"]

##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8

apply(HairEyeColor, 3, sum)

##   Male Female 
##    279    313

HairEyeColor[c("Black", "Brown"), c("Hazel", "Green"),]

## , , Sex = Male
## 
##        Eye
## Hair    Hazel Green
##   Black    10     3
##   Brown    25    15
## 
## , , Sex = Female
## 
##        Eye
## Hair    Hazel Green
##   Black     5     2
##   Brown    29    14

Subsetting Structables

hec<-structable(Eye~Sex+Hair,data=HairEyeColor)
hec

##              Eye Brown Blue Hazel Green
## Sex    Hair                            
## Male   Black        32   11    10     3
##        Brown        53   50    25    15
##        Red          10   10     7     7
##        Blond         3   30     5     8
## Female Black        36    9     5     2
##        Brown        66   34    29    14
##        Red          16    7     7     7
##        Blond         4   64     5     8

hec[[c("Male","Brown"),]]

## Eye Brown Blue Hazel Green
##                           
##        53   50    25    15

Bentuk Kasus menjadi Bentuk Tabel Frekuensi

Art.tab <- table(Arthritis[,c("Treatment", "Sex", "Improved")])
str(Art.tab)

##  'table' int [1:2, 1:2, 1:3] 19 6 10 7 7 5 0 2 6 16 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Treatment: chr [1:2] "Placebo" "Treated"
##   ..$ Sex      : chr [1:2] "Female" "Male"
##   ..$ Improved : chr [1:3] "None" "Some" "Marked"

ftable(Art.tab)

##                  Improved None Some Marked
## Treatment Sex                             
## Placebo   Female            19    7      6
##           Male              10    0      1
## Treated   Female             6    5     16
##           Male               7    2      5

Mengembalikan bentuk tabel frekuensi menjadi bentuk kasus

#install.packages("vcdExtra")
library(vcdExtra)

## Warning: package 'vcdExtra' was built under R version 4.3.3

## Loading required package: gnm

## Warning: package 'gnm' was built under R version 4.3.3

## 
## Attaching package: 'vcdExtra'

## The following object is masked _by_ '.GlobalEnv':
## 
##     JobSat

Art.df <- expand.dft(Art.tab)
str(Art.df)

## 'data.frame':    84 obs. of  3 variables:
##  $ Treatment: chr  "Placebo" "Placebo" "Placebo" "Placebo" ...
##  $ Sex      : chr  "Female" "Female" "Female" "Female" ...
##  $ Improved : chr  "None" "None" "None" "None" ...

Mempublish tabel ke dalam LatEx atau HTML

data("HorseKicks", package = "vcd")
HorseKicks

## nDeaths
##   0   1   2   3   4 
## 109  65  22   3   1

library(xtable)
xtable(HorseKicks)

## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
##   \hline
##  & nDeaths \\ 
##   \hline
## 0 & 109 \\ 
##   1 &  65 \\ 
##   2 &  22 \\ 
##   3 &   3 \\ 
##   4 &   1 \\ 
##    \hline
## \end{tabular}
## \end{table}

xtable(HorseKicks)

## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
##   \hline
##  & nDeaths \\ 
##   \hline
## 0 & 109 \\ 
##   1 &  65 \\ 
##   2 &  22 \\ 
##   3 &   3 \\ 
##   4 &   1 \\ 
##    \hline
## \end{tabular}
## \end{table}

tab <- as.data.frame(HorseKicks)
colnames(tab) <- c("nDeaths", "Freq")
print(xtable(tab), include.rownames = FALSE,
      include.colnames = TRUE)

## % latex table generated in R 4.3.2 by xtable 1.8-4 package
## % Mon Aug 19 19:26:34 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{lr}
##   \hline
## nDeaths & Freq \\ 
##   \hline
## 0 & 109 \\ 
##   1 &  65 \\ 
##   2 &  22 \\ 
##   3 &   3 \\ 
##   4 &   1 \\ 
##    \hline
## \end{tabular}
## \end{table}

Pengenalan R untuk Analisis Data Observasional

2024-08-19

Bekerja dengan Data menggunakan R

Data Vektor

Data Matriks

Arrays

Data Frames

Contoh-contoh Data

Bentuk Data Kasus

Bentuk Data Frekuensi

Bentuk Data Tabel

Tabel yang berisi faktor terurut dan tidak terurut

Menyusun tabel menggunakan fungsi `table()` dan `xtabs()`

fungsi `table()`

fungsi `xtabs()`

Mencetak Tabel dengan fungsi `structable()` dan `ftable()`

Subsetting Data

Subsetting tables

Subsetting Structables

Bentuk Kasus menjadi Bentuk Tabel Frekuensi

Mengembalikan bentuk tabel frekuensi menjadi bentuk kasus

Mempublish tabel ke dalam LatEx atau HTML

Pengenalan R untuk Analisis Data Observasional

2024-08-19

Bekerja dengan Data menggunakan R

Data Vektor

Data Matriks

Arrays

Data Frames

Contoh-contoh Data

Bentuk Data Kasus

Bentuk Data Frekuensi

Bentuk Data Tabel

Tabel yang berisi faktor terurut dan tidak terurut

Menyusun tabel menggunakan fungsi table() dan xtabs()

fungsi table()

fungsi xtabs()

Mencetak Tabel dengan fungsi structable() dan ftable()

Subsetting Data

Subsetting tables

Subsetting Structables

Bentuk Kasus menjadi Bentuk Tabel Frekuensi

Mengembalikan bentuk tabel frekuensi menjadi bentuk kasus

Mempublish tabel ke dalam LatEx atau HTML

Menyusun tabel menggunakan fungsi `table()` dan `xtabs()`

fungsi `table()`

fungsi `xtabs()`

Mencetak Tabel dengan fungsi `structable()` dan `ftable()`