hw4

# The data set criminal in the package logmult gives the 4 × 5 table below of the number 
# of men aged 15-19 charged with a criminal case for whom charges were dropped in Denmark 
# from 1955-1958.

library(logmult)

## Loading required package: gnm

## 
## Attaching package: 'gnm'

## The following object is masked from 'package:lattice':
## 
##     barley

## 
## Attaching package: 'logmult'

## The following object is masked from 'package:gnm':
## 
##     se

library(vcd)

## Loading required package: grid

## 
## Attaching package: 'vcd'

## The following object is masked from 'package:logmult':
## 
##     assoc

library(MASS)
library(vcdExtra)

data("criminal", package = "logmult")
head(criminal)

##       Age
## Year    15  16  17  18  19
##   1955 141 285 320 441 427
##   1956 144 292 342 441 396
##   1957 196 380 424 462 427
##   1958 212 424 399 442 430

# Use loglm() to test whether there is an association between Year and Age. 
# Is there evidence that dropping of charges in relation to age changed over the years 
# recorded here?
loglm(~Year + Age, data = criminal)

## Call:
## loglm(formula = ~Year + Age, data = criminal)
## 
## Statistics:
##                       X^2 df     P(> X^2)
## Likelihood Ratio 38.24466 12 0.0001400372
## Pearson          38.41033 12 0.0001315495

# Use mosaic() with the option shade=TRUE to display the pattern of signs and magnitudes of 
# the residuals. Compare this with the result of mosaic() using "Friendly shading," 
# from the option gp=shading_Friendly. Describe verbally what you see in each regarding 
# the pattern of association in this table.

 mosaic(criminal, shade = TRUE,labeling = labeling_residuals)

# Use mosaic() with the option shade=TRUE to display the pattern of signs and magnitudes of 
# the residuals. Compare this with the result of mosaic() using "Friendly shading," 
# from the option gp=shading_Friendly. Describe verbally what you see in each regarding 
# the pattern of association in this table.


mosaic(criminal, gp = shading_Friendly,labeling = labeling_residuals)

The most related ones are age 19 in year 1955 and age 16 in year 1958.

# Bertin (1983, pp. 30-31) used a 4-way table of frequencies of traffic accident victims 
# in France in 1958 to illustrate his scheme for classifying data sets by numerous variables,
# each of which could have various types and could be assigned to various visual attributes. 
# His data are contained in Accident in vcdExtra, a frequency data frame representing his 
# 5 × 2 × 4 × 2 table of the variables age, result (died or injured), mode of transportation, 
# and gender.
  data("Accident",package="vcdExtra")
str(Accident)

## 'data.frame':    80 obs. of  5 variables:
##  $ age   : Ord.factor w/ 5 levels "0-9"<"10-19"<..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ result: Factor w/ 2 levels "Died","Injured": 1 1 1 1 1 1 1 1 2 2 ...
##  $ mode  : Factor w/ 4 levels "4-Wheeled","Bicycle",..: 4 4 2 2 3 3 1 1 4 4 ...
##  $ gender: Factor w/ 2 levels "Female","Male": 2 1 2 1 2 1 2 1 2 1 ...
##  $ Freq  : int  704 378 396 56 742 78 513 253 5206 5449 ...

#Use loglm() to fit the model of mutual independence, Freq ~ age+mode+gender+result to this data set.
loglm(Freq~ age + mode + gender + result, data=Accident)

## Call:
## loglm(formula = Freq ~ age + mode + gender + result, data = Accident)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 60320.05 70        0
## Pearson          76865.31 70        0

#Use loglm() to fit the model of mutual independence, Freq ~ age+mode+gender+result to this data set.
loglm(Freq~ age + mode + gender + result, data=Accident)

## Call:
## loglm(formula = Freq ~ age + mode + gender + result, data = Accident)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 60320.05 70        0
## Pearson          76865.31 70        0

# Use mosaic() to produce an interpretable mosaic plot of the associations among all variables under the model of mutual independence. Try different orders of the variables in the mosaic. (Hint: the abbreviate component of the labeling_args argument to mosaic() will be useful to avoid some overlap of the category labels.)
loglm(Freq~ age + mode + gender + result, data=Accident)

## Call:
## loglm(formula = Freq ~ age + mode + gender + result, data = Accident)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 60320.05 70        0
## Pearson          76865.31 70        0

Accident$mode <- ordered(Accident$mode, levels=levels(Accident$mode)[c(1,3,2,4)])
mosaic(loglm(Freq ~ age + mode + gender + result, data = Accident), labeling_args = list(abbreviate=c(mode=3,gender=1,result=3)))

# Treat result ("Died" vs. "Injured") as the response variable, and fit the model Freq ~ age X mode X gender + result that asserts independence of result from all others jointly.
loglm(Freq ~ age * mode * gender + result, data = Accident)

## Call:
## loglm(formula = Freq ~ age * mode * gender + result, data = Accident)
## 
## Statistics:
##                      X^2 df P(> X^2)
## Likelihood Ratio 2217.72 39        0
## Pearson          2347.60 39        0

# Construct a mosaic display for the residual associations in this model. Which combinations of the predictor factors are more likely to result in death?
mosaic(loglm(Freq ~ age * mode * gender + result, data = Accident), gp = shading_Friendly, labeling = labeling_residuals)

hw4

rdb

September 2, 2018