Workshop #1 is worth 100 points and each question is worth 6.5 points each.
Submission Instructions: save the .HTML file as ‘Familiar_ Categorical_Data_Assignmentyourlastname.HTML’ and upload the HTML file to the assignment entitled ‘Getting Familiar with Categorical Data in R’ on Canvas on or before the due date.
Good luck!
Run the code chunk below.
library(vcd)
## Loading required package: grid
library(grid)
library(gnm)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds, vec.len=2)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" ...
## $ class : chr "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" ...
View(ds)
View(UCBAdmissions)
str(UCBAdmissions)
## 'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
## - attr(*, "dimnames")=List of 3
## ..$ Admit : chr [1:2] "Admitted" "Rejected"
## ..$ Gender: chr [1:2] "Male" "Female"
## ..$ Dept : chr [1:6] "A" "B" "C" "D" ...
nrow(ds)
## [1] 76
nrow(ds[ds$Package == "vcd",])
## [1] 33
nrow(ds[ds$Package == "vcdExtra",])
## [1] 43
Conclusion: There are 76 datasets altogether, where 33 are in ‘vcd’ and 43 are in ‘vcdExtra’.
table(ds$Package,ds$class)
##
## array data.frame matrix table
## vcd 1 17 0 15
## vcdExtra 3 24 1 15
?Arthritis
## starting httpd help server ... done
example("Arthritis")
##
## Arthrt> data("Arthritis")
##
## Arthrt> art <- xtabs(~ Treatment + Improved, data = Arthritis, subset = Sex == "Female")
##
## Arthrt> art
## Improved
## Treatment None Some Marked
## Placebo 19 7 6
## Treated 6 5 16
##
## Arthrt> mosaic(art, gp = shading_Friendly)
##
## Arthrt> mosaic(art, gp = shading_max)
?Trucks
example("Trucks")
##
## Trucks> library(MASS)
##
## Trucks> data("Trucks")
##
## Trucks> tab <- xtabs(Freq ~ period + collision + light + parked, data = Trucks)
##
## Trucks> loglm(~ (collision + period) * parked * light, data = tab)
## Call:
## loglm(formula = ~(collision + period) * parked * light, data = tab)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 6.853404 6 0.3346175
## Pearson 6.972248 6 0.3234213
##
## Trucks> doubledecker(collision ~ parked + light + period, data = tab)
##
## Trucks> cotabplot(tab, panel = cotab_coindep)
summary(UCBAdmissions)
## Number of cases in table: 4526
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 2000.3, df = 16, p-value = 0
colSums(UCBAdmissions, na.rm = TRUE)
## Dept
## Gender A B C D E F
## Male 825 560 325 417 191 373
## Female 108 25 593 375 393 341
prop.table(UCBAdmissions)
## , , Dept = A
##
## Gender
## Admit Male Female
## Admitted 0.113124171 0.019664163
## Rejected 0.069155988 0.004197967
##
## , , Dept = B
##
## Gender
## Admit Male Female
## Admitted 0.077993814 0.003756076
## Rejected 0.045735749 0.001767565
##
## , , Dept = C
##
## Gender
## Admit Male Female
## Admitted 0.026513478 0.044631021
## Rejected 0.045293858 0.086389748
##
## , , Dept = D
##
## Gender
## Admit Male Female
## Admitted 0.030490499 0.028943880
## Rejected 0.061643836 0.053910738
##
## , , Dept = E
##
## Gender
## Admit Male Female
## Admitted 0.011710119 0.020768891
## Rejected 0.030490499 0.066062749
##
## , , Dept = F
##
## Gender
## Admit Male Female
## Admitted 0.004860804 0.005302696
## Rejected 0.077551922 0.070039770
ftable(prop.table(UCBAdmissions, c(3,2)))
## Dept A B C D E F
## Admit Gender
## Admitted Male 0.62060606 0.63035714 0.36923077 0.33093525 0.27748691 0.05898123
## Female 0.82407407 0.68000000 0.34064081 0.34933333 0.23918575 0.07038123
## Rejected Male 0.37939394 0.36964286 0.63076923 0.66906475 0.72251309 0.94101877
## Female 0.17592593 0.32000000 0.65935919 0.65066667 0.76081425 0.92961877
data("DanishWelfare")
summary(DanishWelfare)
## Freq Alcohol Income Status Urban
## Min. : 0.00 <1 :60 0-50 :45 Widow :60 Copenhagen :36
## 1st Qu.: 3.00 1-2:60 50-100 :45 Married :60 SubCopenhagen:36
## Median : 12.00 >2 :60 100-150:45 Unmarried:60 LargeCity :36
## Mean : 28.58 >150 :45 City :36
## 3rd Qu.: 35.25 Country :36
## Max. :255.00
sum(DanishWelfare$Freq)
## [1] 5144
DanishWelfare.tab <- xtabs(Freq ~., data = DanishWelfare)
summary(DanishWelfare.tab)
## Call: xtabs(formula = Freq ~ ., data = DanishWelfare)
## Number of cases in table: 5144
## Number of factors: 4
## Test for independence of all factors:
## Chisq = 1733.5, df = 168, p-value = 7.514e-258
## Chi-squared approximation may be incorrect
str(DanishWelfare.tab)
## 'xtabs' num [1:3, 1:4, 1:3, 1:5] 1 3 2 8 1 3 2 5 2 42 ...
## - attr(*, "dimnames")=List of 4
## ..$ Alcohol: chr [1:3] "<1" "1-2" ">2"
## ..$ Income : chr [1:4] "0-50" "50-100" "100-150" ">150"
## ..$ Status : chr [1:3] "Widow" "Married" "Unmarried"
## ..$ Urban : chr [1:5] "Copenhagen" "SubCopenhagen" "LargeCity" "City" ...
## - attr(*, "call")= language xtabs(formula = Freq ~ ., data = DanishWelfare)
ftable(xtabs(Freq ~., data = DanishWelfare))
## Urban Copenhagen SubCopenhagen LargeCity City Country
## Alcohol Income Status
## <1 0-50 Widow 1 4 1 8 6
## Married 14 8 41 100 175
## Unmarried 6 1 2 6 9
## 50-100 Widow 8 2 7 14 5
## Married 42 51 62 234 255
## Unmarried 7 5 9 20 27
## 100-150 Widow 2 3 1 5 2
## Married 21 30 23 87 77
## Unmarried 3 2 1 12 4
## >150 Widow 42 29 17 95 46
## Married 24 30 50 167 232
## Unmarried 33 24 15 64 68
## 1-2 0-50 Widow 3 0 1 4 2
## Married 15 7 15 25 48
## Unmarried 2 3 9 9 7
## 50-100 Widow 1 1 3 8 4
## Married 39 59 68 172 143
## Unmarried 12 3 11 20 23
## 100-150 Widow 5 4 1 9 4
## Married 32 68 43 128 86
## Unmarried 6 10 5 21 15
## >150 Widow 26 34 14 48 24
## Married 43 76 70 198 136
## Unmarried 36 23 48 89 64
## >2 0-50 Widow 2 0 2 1 0
## Married 1 2 2 7 7
## Unmarried 3 0 1 5 1
## 50-100 Widow 3 0 2 1 3
## Married 14 21 14 38 35
## Unmarried 2 0 3 12 13
## 100-150 Widow 2 1 1 1 0
## Married 20 31 10 36 21
## Unmarried 0 2 3 9 7
## >150 Widow 21 13 5 20 8
## Married 23 47 21 53 36
## Unmarried 38 20 13 39 26
#code from text
data("UKSoccer", package = "vcd")
ftable(UKSoccer)
## Away 0 1 2 3 4
## Home
## 0 27 29 10 8 2
## 1 59 53 14 12 4
## 2 28 32 14 12 4
## 3 19 14 7 4 1
## 4 7 8 10 2 0
sum(UKSoccer)
## [1] 380
addmargins(UKSoccer)
## Away
## Home 0 1 2 3 4 Sum
## 0 27 29 10 8 2 76
## 1 59 53 14 12 4 142
## 2 28 32 14 12 4 90
## 3 19 14 7 4 1 45
## 4 7 8 10 2 0 27
## Sum 140 136 55 38 11 380
prop.table(addmargins(UKSoccer))
## Away
## Home 0 1 2 3 4
## 0 0.0177631579 0.0190789474 0.0065789474 0.0052631579 0.0013157895
## 1 0.0388157895 0.0348684211 0.0092105263 0.0078947368 0.0026315789
## 2 0.0184210526 0.0210526316 0.0092105263 0.0078947368 0.0026315789
## 3 0.0125000000 0.0092105263 0.0046052632 0.0026315789 0.0006578947
## 4 0.0046052632 0.0052631579 0.0065789474 0.0013157895 0.0000000000
## Sum 0.0921052632 0.0894736842 0.0361842105 0.0250000000 0.0072368421
## Away
## Home Sum
## 0 0.0500000000
## 1 0.0934210526
## 2 0.0592105263
## 3 0.0296052632
## 4 0.0177631579
## Sum 0.2500000000
library(vcd)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" "BrokenMarriage" "Bundesliga" ...
## $ class : chr "data.frame" "data.frame" "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" "20x4" "14018x7" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" "Broken Marriage Data" "Ergebnisse der Fussball-Bundesliga" ...
View(ds)
structable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1
ftable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1