Homework #1 is worth 100 points and each question is worth 6.5 points each.
Submission Instructions: save the .HTML file as ‘Familiar_ Categorical_Data_Assignmentyourlastname.HTML’ and upload the HTML file to the assignment entitled ‘Getting Familiar with Categorical Data in R’ on Canvas on or before Wednesday November 13, 2019 by 11:59p.m. EST. No late assignments are accepted.
Run the code chunk below.
library(vcd)
## Loading required package: grid
library(grid)
library(gnm)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds, vec.len=2)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" ...
## $ class : chr "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" ...
View(ds)
View(UCBAdmissions)
str(UCBAdmissions)
## 'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
## - attr(*, "dimnames")=List of 3
## ..$ Admit : chr [1:2] "Admitted" "Rejected"
## ..$ Gender: chr [1:2] "Male" "Female"
## ..$ Dept : chr [1:6] "A" "B" "C" "D" ...
nrow(ds)
## [1] 76
ds1 = datasets(package = "vcd")
nrow(ds1)
## [1] 33
ds2 = datasets(package = "vcdExtra")
nrow(ds2)
## [1] 43
table(ds$Package, ds$class)
##
## array data.frame matrix table
## vcd 1 17 0 15
## vcdExtra 3 24 1 15
help(Arthritis)
## starting httpd help server ... done
example(Arthritis)
##
## Arthrt> data("Arthritis")
##
## Arthrt> art <- xtabs(~ Treatment + Improved, data = Arthritis, subset = Sex == "Female")
##
## Arthrt> art
## Improved
## Treatment None Some Marked
## Placebo 19 7 6
## Treated 6 5 16
##
## Arthrt> mosaic(art, gp = shading_Friendly)
##
## Arthrt> mosaic(art, gp = shading_max)
help(TV)
example(TV)
##
## TV> data(TV)
##
## TV> structable(TV)
## Time 8:00 8:15 8:30 8:45 9:00 9:15 9:30 9:45 10:00 10:15 10:30
## Day Network
## Monday ABC 146 151 156 83 325 350 386 340 352 280 278
## CBS 337 293 304 233 311 251 241 164 252 265 272
## NBC 263 219 236 140 226 235 239 246 279 263 283
## Tuesday ABC 244 181 231 205 385 283 345 192 329 351 364
## CBS 173 180 184 109 218 235 256 250 274 263 261
## NBC 315 254 280 241 370 214 195 111 188 190 210
## Wednesday ABC 233 161 194 156 339 264 279 140 237 228 203
## CBS 158 126 207 59 98 103 122 86 109 105 110
## NBC 134 146 166 66 194 230 264 143 274 289 306
## Thursday ABC 174 183 197 181 187 198 211 86 110 122 117
## CBS 196 185 195 104 106 116 116 47 102 84 84
## NBC 515 463 472 477 590 473 446 349 649 705 747
## Friday ABC 294 281 305 239 278 246 245 138 246 232 233
## CBS 130 144 154 81 129 153 136 126 138 136 152
## NBC 195 220 248 160 172 164 169 85 183 198 204
##
## TV> doubledecker(TV)
##
## TV> # reduce number of levels of Time
## TV> TV.df <- as.data.frame.table(TV)
##
## TV> levels(TV.df$Time) <- rep(c("8:00-8:59", "9:00-9:59", "10:00-10:44"), c(4, 4, 3))
##
## TV> TV2 <- xtabs(Freq ~ Day + Time + Network, TV.df)
##
## TV> # re-label for mosaic display
## TV> levels(TV.df$Time) <- c("8", "9", "10")
##
## TV> # fit mode of joint independence, showing association of Network with Day*Time
## TV> mosaic(~ Day + Network + Time, data = TV.df, expected = ~ Day:Time + Network, legend = FALSE)
##
## TV> # with doubledecker arrangement
## TV> mosaic(~ Day + Network + Time, data = TV.df, expected = ~ Day:Time + Network,
## TV+ split = c(TRUE, TRUE, FALSE), spacing = spacing_highlighting, legend = FALSE)
summary(UCBAdmissions)
## Number of cases in table: 4526
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 2000.3, df = 16, p-value = 0
margin.table(UCBAdmissions,3)
## Dept
## A B C D E F
## 933 585 918 792 584 714
data1 = UCBAdmissions[,1,]+UCBAdmissions[,2,]
prop.table(data1,2)
## Dept
## Admit A B C D E
## Admitted 0.64415863 0.63247863 0.35076253 0.33964646 0.25171233
## Rejected 0.35584137 0.36752137 0.64923747 0.66035354 0.74828767
## Dept
## Admit F
## Admitted 0.06442577
## Rejected 0.93557423
data2 = aperm(UCBAdmissions, c(3,2,1))
prop.table(data2)
## , , Admit = Admitted
##
## Gender
## Dept Male Female
## A 0.113124171 0.019664163
## B 0.077993814 0.003756076
## C 0.026513478 0.044631021
## D 0.030490499 0.028943880
## E 0.011710119 0.020768891
## F 0.004860804 0.005302696
##
## , , Admit = Rejected
##
## Gender
## Dept Male Female
## A 0.069155988 0.004197967
## B 0.045735749 0.001767565
## C 0.045293858 0.086389748
## D 0.061643836 0.053910738
## E 0.030490499 0.066062749
## F 0.077551922 0.070039770
sum(DanishWelfare$Freq)
## [1] 5144
DanishWelfare_tab <- xtabs(Freq ~., data = DanishWelfare)
str(DanishWelfare_tab)
## 'xtabs' num [1:3, 1:4, 1:3, 1:5] 1 3 2 8 1 3 2 5 2 42 ...
## - attr(*, "dimnames")=List of 4
## ..$ Alcohol: chr [1:3] "<1" "1-2" ">2"
## ..$ Income : chr [1:4] "0-50" "50-100" "100-150" ">150"
## ..$ Status : chr [1:3] "Widow" "Married" "Unmarried"
## ..$ Urban : chr [1:5] "Copenhagen" "SubCopenhagen" "LargeCity" "City" ...
## - attr(*, "call")= language xtabs(formula = Freq ~ ., data = DanishWelfare)
ftable(xtabs(Freq ~., data = DanishWelfare))
## Urban Copenhagen SubCopenhagen LargeCity City Country
## Alcohol Income Status
## <1 0-50 Widow 1 4 1 8 6
## Married 14 8 41 100 175
## Unmarried 6 1 2 6 9
## 50-100 Widow 8 2 7 14 5
## Married 42 51 62 234 255
## Unmarried 7 5 9 20 27
## 100-150 Widow 2 3 1 5 2
## Married 21 30 23 87 77
## Unmarried 3 2 1 12 4
## >150 Widow 42 29 17 95 46
## Married 24 30 50 167 232
## Unmarried 33 24 15 64 68
## 1-2 0-50 Widow 3 0 1 4 2
## Married 15 7 15 25 48
## Unmarried 2 3 9 9 7
## 50-100 Widow 1 1 3 8 4
## Married 39 59 68 172 143
## Unmarried 12 3 11 20 23
## 100-150 Widow 5 4 1 9 4
## Married 32 68 43 128 86
## Unmarried 6 10 5 21 15
## >150 Widow 26 34 14 48 24
## Married 43 76 70 198 136
## Unmarried 36 23 48 89 64
## >2 0-50 Widow 2 0 2 1 0
## Married 1 2 2 7 7
## Unmarried 3 0 1 5 1
## 50-100 Widow 3 0 2 1 3
## Married 14 21 14 38 35
## Unmarried 2 0 3 12 13
## 100-150 Widow 2 1 1 1 0
## Married 20 31 10 36 21
## Unmarried 0 2 3 9 7
## >150 Widow 21 13 5 20 8
## Married 23 47 21 53 36
## Unmarried 38 20 13 39 26
#code from text
data("UKSoccer", package = "vcd")
ftable(UKSoccer)
## Away 0 1 2 3 4
## Home
## 0 27 29 10 8 2
## 1 59 53 14 12 4
## 2 28 32 14 12 4
## 3 19 14 7 4 1
## 4 7 8 10 2 0
sum(UKSoccer)
## [1] 380
margin.table(UKSoccer,1)
## Home
## 0 1 2 3 4
## 76 142 90 45 27
margin.table(UKSoccer,2)
## Away
## 0 1 2 3 4
## 140 136 55 38 11
prop.table(margin.table(UKSoccer,1))
## Home
## 0 1 2 3 4
## 0.20000000 0.37368421 0.23684211 0.11842105 0.07105263
prop.table(margin.table(UKSoccer,2))
## Away
## 0 1 2 3 4
## 0.36842105 0.35789474 0.14473684 0.10000000 0.02894737
library(vcd)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" "BrokenMarriage" "Bundesliga" ...
## $ class : chr "data.frame" "data.frame" "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" "20x4" "14018x7" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" "Broken Marriage Data" "Ergebnisse der Fussball-Bundesliga" ...
View(ds)
structable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1
ftable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1