library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 3.3.3
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 3.3.3
str(UCBAdmissions) #A Three-Way table of frequencies
## table [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
## - attr(*, "dimnames")=List of 3
## ..$ Admit : chr [1:2] "Admitted" "Rejected"
## ..$ Gender: chr [1:2] "Male" "Female"
## ..$ Dept : chr [1:6] "A" "B" "C" "D" ...
plot(UCBAdmissions)
dim(UCBAdmissions)
## [1] 2 2 6
dimnames(UCBAdmissions)
## $Admit
## [1] "Admitted" "Rejected"
##
## $Gender
## [1] "Male" "Female"
##
## $Dept
## [1] "A" "B" "C" "D" "E" "F"
sum(UCBAdmissions)
## [1] 4526
```
Let’s generate a frequency table from discrete factor variables in a data frame using the table() function, set.seed(), etc.:
set.seed(123456) #Reproducibility
n<-100
Admit<-factor(sample(c("Admitted", "Rejected"), n, replace=TRUE))
Gender<-factor(sample(c("Male", "Female"), n, replace = TRUE))
Dept<-factor(sample(c("A", "B", "C", "D", "E", "F"), n, replace=TRUE))
DFdata<-data.frame(Admit, Gender, Dept)
head(DFdata, 6)
## Admit Gender Dept
## 1 Rejected Male B
## 2 Rejected Female A
## 3 Admitted Male F
## 4 Admitted Male B
## 5 Admitted Female A
## 6 Admitted Female C
str(DFdata)
## 'data.frame': 100 obs. of 3 variables:
## $ Admit : Factor w/ 2 levels "Admitted","Rejected": 2 2 1 1 1 1 2 1 2 1 ...
## $ Gender: Factor w/ 2 levels "Female","Male": 2 1 2 2 1 1 1 2 1 1 ...
## $ Dept : Factor w/ 6 levels "A","B","C","D",..: 2 1 6 2 1 3 5 1 5 2 ...
plot(DFdata)
Rows, columns, and individual values in a data frame can be manipulated in the same way as a matrix, using subscripting ([,]).
DFdata[2,3]
## [1] A
## Levels: A B C D E F
Also, variables can be extracted using the $ operator: use Dept
DFdata$Dept
## [1] B A F B A C E A E B F B A F C D B B F D B F D C C F B D C A C D B E F
## [36] B C C C D F B A C A C A D B F E F C B D B A F D D D E A E E F E D A A
## [71] E C C D F F D C B E F F D B C B F A F F B D E E A D C C D B
## Levels: A B C D E F
table(DFdata$Dept)
##
## A B C D E F
## 14 19 18 18 12 19
DFtab<-table(DFdata$Dept)
prop.table(DFtab)
##
## A B C D E F
## 0.14 0.19 0.18 0.18 0.12 0.19
table(DFdata$Dept, DFdata$Gender)
##
## Female Male
## A 5 9
## B 6 13
## C 10 8
## D 13 5
## E 9 3
## F 9 10
Protab<-table(DFdata$Dept, DFdata$Gender)
prop.table(Protab)
##
## Female Male
## A 0.05 0.09
## B 0.06 0.13
## C 0.10 0.08
## D 0.13 0.05
## E 0.09 0.03
## F 0.09 0.10
data("UKSoccer", Package = "vcd")
## Warning in data("UKSoccer", Package = "vcd"): data set 'vcd' not found
ftable(UKSoccer)
## Away 0 1 2 3 4
## Home
## 0 27 29 10 8 2
## 1 59 53 14 12 4
## 2 28 32 14 12 4
## 3 19 14 7 4 1
## 4 7 8 10 2 0
plot(UKSoccer)
sum(UKSoccer)
## [1] 380
str(UKSoccer)
## table [1:5, 1:5] 27 59 28 19 7 29 53 32 14 8 ...
## - attr(*, "dimnames")=List of 2
## ..$ Home: chr [1:5] "0" "1" "2" "3" ...
## ..$ Away: chr [1:5] "0" "1" "2" "3" ...
mytab<-ftable(UKSoccer [ ,1:5])
margin.table(mytab)
## [1] 380
addmargins(mytab)
## Sum
## 27 29 10 8 2 76
## 59 53 14 12 4 142
## 28 32 14 12 4 90
## 19 14 7 4 1 45
## 7 8 10 2 0 27
## Sum 140 136 55 38 11 380
prop.table(mytab)
## Away 0 1 2 3 4
## Home
## 0 0.071052632 0.076315789 0.026315789 0.021052632 0.005263158
## 1 0.155263158 0.139473684 0.036842105 0.031578947 0.010526316
## 2 0.073684211 0.084210526 0.036842105 0.031578947 0.010526316
## 3 0.050000000 0.036842105 0.018421053 0.010526316 0.002631579
## 4 0.018421053 0.021052632 0.026315789 0.005263158 0.000000000