Homework #1 is worth 100 points and each question is worth 6.5 points each.
Submission Instructions: save the .HTML file as ‘Familiar_ Categorical_Data_Assignmentyourlastname.HTML’ and upload the HTML file to the assignment entitled ‘Getting Familiar with Categorical Data in R’ on Canvas on or before Tuesday November 12, 2019 by 11:59p.m. EST. No late assignments are accepted.
Run the code chunk below.
library(vcd)
## Loading required package: grid
library(grid)
library(gnm)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds, vec.len=2)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" ...
## $ class : chr "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" ...
View(ds)
View(UCBAdmissions)
str(UCBAdmissions)
## 'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
## - attr(*, "dimnames")=List of 3
## ..$ Admit : chr [1:2] "Admitted" "Rejected"
## ..$ Gender: chr [1:2] "Male" "Female"
## ..$ Dept : chr [1:6] "A" "B" "C" "D" ...
nrow(ds)
## [1] 76
table(ds$Package)
##
## vcd vcdExtra
## 33 43
table(ds$Package, ds$class)
##
## array data.frame matrix table
## vcd 1 17 0 15
## vcdExtra 3 24 1 15
?Arthritis
## starting httpd help server ... done
example("Arthritis")
##
## Arthrt> data("Arthritis")
##
## Arthrt> art <- xtabs(~ Treatment + Improved, data = Arthritis, subset = Sex == "Female")
##
## Arthrt> art
## Improved
## Treatment None Some Marked
## Placebo 19 7 6
## Treated 6 5 16
##
## Arthrt> mosaic(art, gp = shading_Friendly)
##
## Arthrt> mosaic(art, gp = shading_max)
?Trucks
example("Trucks")
##
## Trucks> library(MASS)
##
## Trucks> data("Trucks")
##
## Trucks> tab <- xtabs(Freq ~ period + collision + light + parked, data = Trucks)
##
## Trucks> loglm(~ (collision + period) * parked * light, data = tab)
## Call:
## loglm(formula = ~(collision + period) * parked * light, data = tab)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 6.853404 6 0.3346175
## Pearson 6.972248 6 0.3234213
##
## Trucks> doubledecker(collision ~ parked + light + period, data = tab)
##
## Trucks> cotabplot(tab, panel = cotab_coindep)
summary(UCBAdmissions)
## Number of cases in table: 4526
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 2000.3, df = 16, p-value = 0
colSums(UCBAdmissions, na.rm = TRUE)
## Dept
## Gender A B C D E F
## Male 825 560 325 417 191 373
## Female 108 25 593 375 393 341
prop.table(UCBAdmissions)
## , , Dept = A
##
## Gender
## Admit Male Female
## Admitted 0.113124171 0.019664163
## Rejected 0.069155988 0.004197967
##
## , , Dept = B
##
## Gender
## Admit Male Female
## Admitted 0.077993814 0.003756076
## Rejected 0.045735749 0.001767565
##
## , , Dept = C
##
## Gender
## Admit Male Female
## Admitted 0.026513478 0.044631021
## Rejected 0.045293858 0.086389748
##
## , , Dept = D
##
## Gender
## Admit Male Female
## Admitted 0.030490499 0.028943880
## Rejected 0.061643836 0.053910738
##
## , , Dept = E
##
## Gender
## Admit Male Female
## Admitted 0.011710119 0.020768891
## Rejected 0.030490499 0.066062749
##
## , , Dept = F
##
## Gender
## Admit Male Female
## Admitted 0.004860804 0.005302696
## Rejected 0.077551922 0.070039770
ftable(prop.table(UCBAdmissions, c(3,2)))
## Dept A B C D E F
## Admit Gender
## Admitted Male 0.62060606 0.63035714 0.36923077 0.33093525 0.27748691 0.05898123
## Female 0.82407407 0.68000000 0.34064081 0.34933333 0.23918575 0.07038123
## Rejected Male 0.37939394 0.36964286 0.63076923 0.66906475 0.72251309 0.94101877
## Female 0.17592593 0.32000000 0.65935919 0.65066667 0.76081425 0.92961877
data("DanishWelfare")
summary(DanishWelfare)
## Freq Alcohol Income Status Urban
## Min. : 0.00 <1 :60 0-50 :45 Widow :60 Copenhagen :36
## 1st Qu.: 3.00 1-2:60 50-100 :45 Married :60 SubCopenhagen:36
## Median : 12.00 >2 :60 100-150:45 Unmarried:60 LargeCity :36
## Mean : 28.58 >150 :45 City :36
## 3rd Qu.: 35.25 Country :36
## Max. :255.00
sum(DanishWelfare$Freq)
## [1] 5144
DanishWelfare.tab <- xtabs(Freq ~., data = DanishWelfare)
summary(DanishWelfare.tab)
## Call: xtabs(formula = Freq ~ ., data = DanishWelfare)
## Number of cases in table: 5144
## Number of factors: 4
## Test for independence of all factors:
## Chisq = 1733.5, df = 168, p-value = 7.514e-258
## Chi-squared approximation may be incorrect
str(DanishWelfare.tab)
## 'xtabs' num [1:3, 1:4, 1:3, 1:5] 1 3 2 8 1 3 2 5 2 42 ...
## - attr(*, "dimnames")=List of 4
## ..$ Alcohol: chr [1:3] "<1" "1-2" ">2"
## ..$ Income : chr [1:4] "0-50" "50-100" "100-150" ">150"
## ..$ Status : chr [1:3] "Widow" "Married" "Unmarried"
## ..$ Urban : chr [1:5] "Copenhagen" "SubCopenhagen" "LargeCity" "City" ...
## - attr(*, "call")= language xtabs(formula = Freq ~ ., data = DanishWelfare)
ftable(xtabs(Freq ~., data = DanishWelfare))
## Urban Copenhagen SubCopenhagen LargeCity City Country
## Alcohol Income Status
## <1 0-50 Widow 1 4 1 8 6
## Married 14 8 41 100 175
## Unmarried 6 1 2 6 9
## 50-100 Widow 8 2 7 14 5
## Married 42 51 62 234 255
## Unmarried 7 5 9 20 27
## 100-150 Widow 2 3 1 5 2
## Married 21 30 23 87 77
## Unmarried 3 2 1 12 4
## >150 Widow 42 29 17 95 46
## Married 24 30 50 167 232
## Unmarried 33 24 15 64 68
## 1-2 0-50 Widow 3 0 1 4 2
## Married 15 7 15 25 48
## Unmarried 2 3 9 9 7
## 50-100 Widow 1 1 3 8 4
## Married 39 59 68 172 143
## Unmarried 12 3 11 20 23
## 100-150 Widow 5 4 1 9 4
## Married 32 68 43 128 86
## Unmarried 6 10 5 21 15
## >150 Widow 26 34 14 48 24
## Married 43 76 70 198 136
## Unmarried 36 23 48 89 64
## >2 0-50 Widow 2 0 2 1 0
## Married 1 2 2 7 7
## Unmarried 3 0 1 5 1
## 50-100 Widow 3 0 2 1 3
## Married 14 21 14 38 35
## Unmarried 2 0 3 12 13
## 100-150 Widow 2 1 1 1 0
## Married 20 31 10 36 21
## Unmarried 0 2 3 9 7
## >150 Widow 21 13 5 20 8
## Married 23 47 21 53 36
## Unmarried 38 20 13 39 26
#code from text
data("UKSoccer", package = "vcd")
ftable(UKSoccer)
## Away 0 1 2 3 4
## Home
## 0 27 29 10 8 2
## 1 59 53 14 12 4
## 2 28 32 14 12 4
## 3 19 14 7 4 1
## 4 7 8 10 2 0
sum(UKSoccer)
## [1] 380
margin.table(UKSoccer,1)
## Home
## 0 1 2 3 4
## 76 142 90 45 27
margin.table(UKSoccer,2)
## Away
## 0 1 2 3 4
## 140 136 55 38 11
prop.table(margin.table(UKSoccer, 1))
## Home
## 0 1 2 3 4
## 0.20000000 0.37368421 0.23684211 0.11842105 0.07105263
prop.table(margin.table(UKSoccer, 2))
## Away
## 0 1 2 3 4
## 0.36842105 0.35789474 0.14473684 0.10000000 0.02894737
library(vcd)
library(vcdExtra)
ds <- datasets(package = c("vcd", "vcdExtra"))
str(ds)
## 'data.frame': 76 obs. of 5 variables:
## $ Package: chr "vcd" "vcd" "vcd" "vcd" ...
## $ Item : chr "Arthritis" "Baseball" "BrokenMarriage" "Bundesliga" ...
## $ class : chr "data.frame" "data.frame" "data.frame" "data.frame" ...
## $ dim : chr "84x5" "322x25" "20x4" "14018x7" ...
## $ Title : chr "Arthritis Treatment Data" "Baseball Data" "Broken Marriage Data" "Ergebnisse der Fussball-Bundesliga" ...
View(ds)
?SpaceShuttle
summary(SpaceShuttle)
## FlightNumber Temperature Pressure Fail nFailures
## 1 : 1 Min. :53.00 Min. : 50.0 no :16 Min. :0.0000
## 2 : 1 1st Qu.:67.00 1st Qu.: 50.0 yes : 7 1st Qu.:0.0000
## 3 : 1 Median :70.00 Median :200.0 NA's: 1 Median :0.0000
## 4 : 1 Mean :70.00 Mean :141.7 Mean :0.3913
## 41B : 1 3rd Qu.:75.25 3rd Qu.:200.0 3rd Qu.:1.0000
## 41C : 1 Max. :81.00 Max. :200.0 Max. :2.0000
## (Other):18 NA's :1
## Damage
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 1.609
## 3rd Qu.: 4.000
## Max. :11.000
## NA's :1
structable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1
xtabs(Damage ~ Fail + nFailures, data = SpaceShuttle)
## nFailures
## Fail 0 1 2
## no 4 0 0
## yes 0 18 15
ftable(Damage ~ Fail + nFailures, data = SpaceShuttle)
## Damage 0 2 4 11
## Fail nFailures
## no 0 15 0 1 0
## 1 0 0 0 0
## 2 0 0 0 0
## yes 0 0 0 0 0
## 1 0 1 4 0
## 2 0 0 1 1