#Group Members:
# 1. Pallavi Saitu
# 2. Damodar Arji
# 3. Harsh Dhabhai
# 4. Venkat Sai Katepalli
# 5. Vinay Kumar Anumula
# Exercise 6.2 The data set criminal in the package logmult gives the 4 × 5 table
# below of the number of men aged 15–19 charged with a criminal case for whom
# charges were dropped in Denmark from 1955–1958.
library(logmult)
## Warning: package 'logmult' was built under R version 3.5.2
## Loading required package: gnm
##
## Attaching package: 'logmult'
## The following object is masked from 'package:gnm':
##
## se
library(ca)
data("criminal",package="logmult")
criminal
## Age
## Year 15 16 17 18 19
## 1955 141 285 320 441 427
## 1956 144 292 342 441 396
## 1957 196 380 424 462 427
## 1958 212 424 399 442 430
# (a) What percentages of the Pearson χ2 for association are explained by
# the various dimensions?
criminal.ca <- ca(criminal)
summary(criminal.ca)
##
## Principal inertias (eigenvalues):
##
## dim value % cum% scree plot
## 1 0.004939 90.3 90.3 ***********************
## 2 0.000491 9.0 99.3 **
## 3 3.8e-050 0.7 100.0
## -------- -----
## Total: 0.005468 100.0
##
##
## Rows:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | 1955 | 230 996 347 | 88 939 361 | -22 58 223 |
## 2 | 1956 | 230 978 157 | 58 908 157 | 16 71 124 |
## 3 | 1957 | 269 984 111 | -39 669 82 | 27 315 391 |
## 4 | 1958 | 271 999 385 | -85 938 399 | -22 61 262 |
##
## Columns:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | 15 | 99 998 185 | -101 992 203 | -7 5 11 |
## 2 | 16 | 197 996 312 | -91 959 331 | -18 37 128 |
## 3 | 17 | 211 991 75 | -23 281 23 | 37 710 594 |
## 4 | 18 | 254 989 235 | 70 980 255 | 7 9 24 |
## 5 | 19 | 239 990 194 | 62 877 188 | -22 112 243 |
#(b) Plot the 2D correspondence analysis solution. Describe the pattern of
# association between year and age.
plot(criminal.ca)

# Age 15, 16 are associated with year 1958, age 17 is associated with year 1957,
# age 18 is associated with year 1956 and age 19 is associated with year 1955.
# Younger ages have association with later years.
# Exercise 6.11 The data set Vietnam in vcdExtra gives a 2 × 5 × 4 contingency
# table in frequency form reflecting a survey of student opinion on the Vietnam
# War at the University of North Carolina in May 1967. The table variables are sex,
# year in school, and response, which has categories:
# (A) Defeat North Vietnam by widespread bombing and land invasion;
# (B) Maintain the present policy;
# (C) De-escalate military activity, stop bombing and begin negotiations;
# (D) Withdraw military forces immediately.
#install.packages(vcdExtra)
library(vcdExtra)
## Loading required package: vcd
## Loading required package: grid
##
## Attaching package: 'vcd'
## The following object is masked from 'package:logmult':
##
## assoc
data("Vietnam", package="vcdExtra")
str(Vietnam)
## 'data.frame': 40 obs. of 4 variables:
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1 1 1 1 2 2 2 2 3 3 ...
## $ response: Factor w/ 4 levels "A","B","C","D": 1 2 3 4 1 2 3 4 1 2 ...
## $ Freq : int 13 19 40 5 5 9 33 3 22 29 ...
Vietnam
## sex year response Freq
## 1 Female 1 A 13
## 2 Female 1 B 19
## 3 Female 1 C 40
## 4 Female 1 D 5
## 5 Female 2 A 5
## 6 Female 2 B 9
## 7 Female 2 C 33
## 8 Female 2 D 3
## 9 Female 3 A 22
## 10 Female 3 B 29
## 11 Female 3 C 110
## 12 Female 3 D 6
## 13 Female 4 A 12
## 14 Female 4 B 21
## 15 Female 4 C 58
## 16 Female 4 D 10
## 17 Female 5 A 19
## 18 Female 5 B 27
## 19 Female 5 C 128
## 20 Female 5 D 13
## 21 Male 1 A 175
## 22 Male 1 B 116
## 23 Male 1 C 131
## 24 Male 1 D 17
## 25 Male 2 A 160
## 26 Male 2 B 126
## 27 Male 2 C 135
## 28 Male 2 D 21
## 29 Male 3 A 132
## 30 Male 3 B 120
## 31 Male 3 C 154
## 32 Male 3 D 29
## 33 Male 4 A 145
## 34 Male 4 B 95
## 35 Male 4 C 185
## 36 Male 4 D 44
## 37 Male 5 A 118
## 38 Male 5 B 176
## 39 Male 5 C 345
## 40 Male 5 D 141
# (a). Using the stacking approach, carry out a correspondence analysis corresponding
# to the loglinear model (R)(YS), which asserts that the response is independent of
# the combinations of year an sex.
Vietnam <- within(Vietnam, {year_sex <- paste(year, toupper(substr(sex,1,1)))})
Vietnam
## sex year response Freq year_sex
## 1 Female 1 A 13 1 F
## 2 Female 1 B 19 1 F
## 3 Female 1 C 40 1 F
## 4 Female 1 D 5 1 F
## 5 Female 2 A 5 2 F
## 6 Female 2 B 9 2 F
## 7 Female 2 C 33 2 F
## 8 Female 2 D 3 2 F
## 9 Female 3 A 22 3 F
## 10 Female 3 B 29 3 F
## 11 Female 3 C 110 3 F
## 12 Female 3 D 6 3 F
## 13 Female 4 A 12 4 F
## 14 Female 4 B 21 4 F
## 15 Female 4 C 58 4 F
## 16 Female 4 D 10 4 F
## 17 Female 5 A 19 5 F
## 18 Female 5 B 27 5 F
## 19 Female 5 C 128 5 F
## 20 Female 5 D 13 5 F
## 21 Male 1 A 175 1 M
## 22 Male 1 B 116 1 M
## 23 Male 1 C 131 1 M
## 24 Male 1 D 17 1 M
## 25 Male 2 A 160 2 M
## 26 Male 2 B 126 2 M
## 27 Male 2 C 135 2 M
## 28 Male 2 D 21 2 M
## 29 Male 3 A 132 3 M
## 30 Male 3 B 120 3 M
## 31 Male 3 C 154 3 M
## 32 Male 3 D 29 3 M
## 33 Male 4 A 145 4 M
## 34 Male 4 B 95 4 M
## 35 Male 4 C 185 4 M
## 36 Male 4 D 44 4 M
## 37 Male 5 A 118 5 M
## 38 Male 5 B 176 5 M
## 39 Male 5 C 345 5 M
## 40 Male 5 D 141 5 M
Vietnam.tab <- xtabs(Freq ~ year_sex + response, data=Vietnam)
Vietnam.tab
## response
## year_sex A B C D
## 1 F 13 19 40 5
## 1 M 175 116 131 17
## 2 F 5 9 33 3
## 2 M 160 126 135 21
## 3 F 22 29 110 6
## 3 M 132 120 154 29
## 4 F 12 21 58 10
## 4 M 145 95 185 44
## 5 F 19 27 128 13
## 5 M 118 176 345 141
library(ca)
Vietnam.ca <- ca(Vietnam.tab)
summary(Vietnam.ca)
##
## Principal inertias (eigenvalues):
##
## dim value % cum% scree plot
## 1 0.085680 73.6 73.6 ******************
## 2 0.027881 23.9 97.5 ******
## 3 0.002854 2.5 100.0 *
## -------- -----
## Total: 0.116415 100.0
##
##
## Rows:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | 1F | 24 818 13 | -167 452 8 | -150 367 20 |
## 2 | 1M | 139 997 181 | 386 986 242 | -41 11 8 |
## 3 | 2F | 16 995 35 | -407 647 31 | -299 349 51 |
## 4 | 2M | 140 984 131 | 326 982 175 | -15 2 1 |
## 5 | 3F | 53 999 112 | -334 453 69 | -367 547 256 |
## 6 | 3M | 138 904 40 | 175 904 49 | -4 0 0 |
## 7 | 4F | 32 982 37 | -344 887 44 | -113 95 15 |
## 8 | 4M | 149 383 23 | 81 372 11 | 14 11 1 |
## 9 | 5F | 59 994 153 | -453 686 143 | -304 309 197 |
## 10 | 5M | 248 1000 276 | -281 608 228 | 225 391 451 |
##
## Columns:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | A | 255 985 381 | 414 985 509 | -1 0 0 |
## 2 | B | 235 720 60 | 135 608 50 | 58 112 28 |
## 3 | C | 419 999 283 | -247 773 298 | -133 226 267 |
## 4 | D | 92 995 276 | -366 383 143 | 463 612 705 |
# (b). Construct an informative 2D plot of the solution, and interpret in terms
# of how the response varies with the year for males
plot(Vietnam.ca)

# The plot results could be interpreted as:
# Option A : selected by males in year 1 or 2.
# Option B : selected by males in year 3 or 4.
# Option C : selected by females regardless of the year.
# Option D: selected by males in year 5.
# (c). Use mjca() to carry out an MCA on the three-way table.
# Make a useful plot of the solution and interpret in terms of the
# relationship of the response to year and sex.
Vietnam.mca <- mjca(Vietnam.tab)
summary(Vietnam.mca)
##
## Principal inertias (eigenvalues):
##
## dim value % cum% scree plot
## 1 0.085680 73.6 73.6 ******************
## 2 0.027881 23.9 97.5 ******
## 3 0.002854 2.5 100.0 *
## 4 00000000 0.0 100.0
## 5 00000000 0.0 100.0
## 6 00000000 0.0 100.0
## 7 00000000 0.0 100.0
## -------- -----
## Total: 0.116415
##
##
## Columns:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | year_sex:1 F | 12 818 80 | 167 452 4 | -150 367 10 |
## 2 | year_sex:1 M | 70 997 72 | -386 986 121 | -41 11 4 |
## 3 | year_sex:2 F | 8 995 81 | 407 647 15 | -299 349 25 |
## 4 | year_sex:2 M | 70 984 72 | -326 982 87 | -15 2 1 |
## 5 | year_sex:3 F | 27 999 78 | 334 453 34 | -367 547 128 |
## 6 | year_sex:3 M | 69 904 71 | -175 904 25 | -4 0 0 |
## 7 | year_sex:4 F | 16 982 79 | 344 887 22 | -113 95 7 |
## 8 | year_sex:4 M | 75 383 70 | -81 372 6 | 14 11 1 |
## 9 | year_sex:5 F | 30 994 78 | 453 686 71 | -304 309 99 |
## 10 | year_sex:5 M | 124 1000 64 | 281 608 114 | 225 391 225 |
## 11 | response:A | 127 985 65 | -414 985 255 | -1 0 0 |
## 12 | response:B | 117 720 63 | -135 608 25 | 58 112 14 |
## 13 | response:C | 210 999 50 | 247 773 149 | -133 226 134 |
## 14 | response:D | 46 995 77 | 366 383 72 | 463 612 352 |
plot(Vietnam.mca)

# The plot results could be interpreted as:
# Option A : selected by males in year 1 or 2.
# Option B : selected by males in year 3 or 4.
# Option C : selected by females regardless of the year.
# Option D: selected by males in year 5.