Homework #1 is worth 100 points and each question is worth 6.5 points each.
Submission Instructions: save the .HTML file as ‘Familiar_ Categorical_Data_Assignmentyourlastname.HTML’ and upload the HTML file to the assignment entitled ‘Getting Familiar with Categorical Data in R’ on Canvas on or before Wednesday November 13, 2019 by 11:59p.m. EST. No late assignments are accepted.
Run the code chunk below.
library(vcd)
## Warning: package 'vcd' was built under R version 3.5.3
## Loading required package: grid
library(grid)
library(gnm)
## Warning: package 'gnm' was built under R version 3.5.3
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 3.5.3
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds, vec.len=2)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" ...
## $ class : chr "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" ...
View(ds)
View(UCBAdmissions)
str(UCBAdmissions)
## 'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
## - attr(*, "dimnames")=List of 3
## ..$ Admit : chr [1:2] "Admitted" "Rejected"
## ..$ Gender: chr [1:2] "Male" "Female"
## ..$ Dept : chr [1:6] "A" "B" "C" "D" ...
nrow(ds)
## [1] 76
nrow(ds[ds$Package == "vcd",])
## [1] 33
nrow(ds[ds$Package == "vcdExtra",])
## [1] 43
There are 76 datasets altogether, where 33 are in ‘vcd’ and 43 are in ‘vcdExtra’.
table(ds$Package,ds$class)
##
## array data.frame matrix table
## vcd 1 17 0 15
## vcdExtra 3 24 1 15
??Arthristis
## starting httpd help server ... done
example("Arthritis")
##
## Arthrt> data("Arthritis")
##
## Arthrt> art <- xtabs(~ Treatment + Improved, data = Arthritis, subset = Sex == "Female")
##
## Arthrt> art
## Improved
## Treatment None Some Marked
## Placebo 19 7 6
## Treated 6 5 16
##
## Arthrt> mosaic(art, gp = shading_Friendly)
##
## Arthrt> mosaic(art, gp = shading_max)
??Hitters
example("Hitters")
##
## Hittrs> data("Hitters")
##
## Hittrs> attach(Hitters)
##
## Hittrs> colors <- c("black","red","green","blue","red","black","blue")
##
## Hittrs> pch <- substr(levels(Positions), 1, 1)
##
## Hittrs> ternaryplot(Hitters[,2:4],
## Hittrs+ pch = as.character(Positions),
## Hittrs+ col = colors[as.numeric(Positions)],
## Hittrs+ main = "Baseball Hitters Data")
##
## Hittrs> grid_legend(0.8, 0.9, pch, colors, levels(Positions),
## Hittrs+ title = "POSITION(S)")
##
## Hittrs> detach(Hitters)
sum(UCBAdmissions)
## [1] 4526
margin.table(UCBAdmissions,3)
## Dept
## A B C D E F
## 933 585 918 792 584 714
admitted <- UCBAdmissions[,1,]+UCBAdmissions[,2,]
prop.table(admitted,2)
## Dept
## Admit A B C D E
## Admitted 0.64415863 0.63247863 0.35076253 0.33964646 0.25171233
## Rejected 0.35584137 0.36752137 0.64923747 0.66035354 0.74828767
## Dept
## Admit F
## Admitted 0.06442577
## Rejected 0.93557423
dep_gender <- aperm(UCBAdmissions,c(3,2,1))
prop.table(dep_gender)
## , , Admit = Admitted
##
## Gender
## Dept Male Female
## A 0.113124171 0.019664163
## B 0.077993814 0.003756076
## C 0.026513478 0.044631021
## D 0.030490499 0.028943880
## E 0.011710119 0.020768891
## F 0.004860804 0.005302696
##
## , , Admit = Rejected
##
## Gender
## Dept Male Female
## A 0.069155988 0.004197967
## B 0.045735749 0.001767565
## C 0.045293858 0.086389748
## D 0.061643836 0.053910738
## E 0.030490499 0.066062749
## F 0.077551922 0.070039770
sum(DanishWelfare$Freq)
## [1] 5144
DanishWelfare_tab <- xtabs(Freq ~., data = DanishWelfare)
str(DanishWelfare_tab)
## 'xtabs' num [1:3, 1:4, 1:3, 1:5] 1 3 2 8 1 3 2 5 2 42 ...
## - attr(*, "dimnames")=List of 4
## ..$ Alcohol: chr [1:3] "<1" "1-2" ">2"
## ..$ Income : chr [1:4] "0-50" "50-100" "100-150" ">150"
## ..$ Status : chr [1:3] "Widow" "Married" "Unmarried"
## ..$ Urban : chr [1:5] "Copenhagen" "SubCopenhagen" "LargeCity" "City" ...
## - attr(*, "call")= language xtabs(formula = Freq ~ ., data = DanishWelfare)
ftable(xtabs(Freq ~., data = DanishWelfare))
## Urban Copenhagen SubCopenhagen LargeCity City Country
## Alcohol Income Status
## <1 0-50 Widow 1 4 1 8 6
## Married 14 8 41 100 175
## Unmarried 6 1 2 6 9
## 50-100 Widow 8 2 7 14 5
## Married 42 51 62 234 255
## Unmarried 7 5 9 20 27
## 100-150 Widow 2 3 1 5 2
## Married 21 30 23 87 77
## Unmarried 3 2 1 12 4
## >150 Widow 42 29 17 95 46
## Married 24 30 50 167 232
## Unmarried 33 24 15 64 68
## 1-2 0-50 Widow 3 0 1 4 2
## Married 15 7 15 25 48
## Unmarried 2 3 9 9 7
## 50-100 Widow 1 1 3 8 4
## Married 39 59 68 172 143
## Unmarried 12 3 11 20 23
## 100-150 Widow 5 4 1 9 4
## Married 32 68 43 128 86
## Unmarried 6 10 5 21 15
## >150 Widow 26 34 14 48 24
## Married 43 76 70 198 136
## Unmarried 36 23 48 89 64
## >2 0-50 Widow 2 0 2 1 0
## Married 1 2 2 7 7
## Unmarried 3 0 1 5 1
## 50-100 Widow 3 0 2 1 3
## Married 14 21 14 38 35
## Unmarried 2 0 3 12 13
## 100-150 Widow 2 1 1 1 0
## Married 20 31 10 36 21
## Unmarried 0 2 3 9 7
## >150 Widow 21 13 5 20 8
## Married 23 47 21 53 36
## Unmarried 38 20 13 39 26
#code from text
data("UKSoccer", package = "vcd")
ftable(UKSoccer)
## Away 0 1 2 3 4
## Home
## 0 27 29 10 8 2
## 1 59 53 14 12 4
## 2 28 32 14 12 4
## 3 19 14 7 4 1
## 4 7 8 10 2 0
sum(UKSoccer)
## [1] 380
addmargins(UKSoccer)
## Away
## Home 0 1 2 3 4 Sum
## 0 27 29 10 8 2 76
## 1 59 53 14 12 4 142
## 2 28 32 14 12 4 90
## 3 19 14 7 4 1 45
## 4 7 8 10 2 0 27
## Sum 140 136 55 38 11 380
prop.table(addmargins(UKSoccer))
## Away
## Home 0 1 2 3 4
## 0 0.0177631579 0.0190789474 0.0065789474 0.0052631579 0.0013157895
## 1 0.0388157895 0.0348684211 0.0092105263 0.0078947368 0.0026315789
## 2 0.0184210526 0.0210526316 0.0092105263 0.0078947368 0.0026315789
## 3 0.0125000000 0.0092105263 0.0046052632 0.0026315789 0.0006578947
## 4 0.0046052632 0.0052631579 0.0065789474 0.0013157895 0.0000000000
## Sum 0.0921052632 0.0894736842 0.0361842105 0.0250000000 0.0072368421
## Away
## Home Sum
## 0 0.0500000000
## 1 0.0934210526
## 2 0.0592105263
## 3 0.0296052632
## 4 0.0177631579
## Sum 0.2500000000
library(vcd)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" "BrokenMarriage" "Bundesliga" ...
## $ class : chr "data.frame" "data.frame" "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" "20x4" "14018x7" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" "Broken Marriage Data" "Ergebnisse der Fussball-Bundesliga" ...
View(ds)
structable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1
ftable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1