#Group Members:
#  1. Pallavi Saitu
#  2. Damodar Arji
#  3. Harsh Dhabhai
#  4. Venkat Sai Katepalli
#  5. Vinay Kumar Anumula 
# Exercise 6.2 The data set criminal in the package logmult gives the 4 × 5 table 
# below of the number of men aged 15–19 charged with a criminal case for whom 
# charges were dropped in Denmark from 1955–1958.

library(logmult)
## Warning: package 'logmult' was built under R version 3.5.2
## Loading required package: gnm
## 
## Attaching package: 'logmult'
## The following object is masked from 'package:gnm':
## 
##     se
library(ca)
data("criminal",package="logmult")
criminal
##       Age
## Year    15  16  17  18  19
##   1955 141 285 320 441 427
##   1956 144 292 342 441 396
##   1957 196 380 424 462 427
##   1958 212 424 399 442 430
# (a) What percentages of the Pearson χ2 for association are explained by 
#     the various dimensions?
criminal.ca <- ca(criminal)
summary(criminal.ca)
## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.004939  90.3  90.3  ***********************  
##  2      0.000491   9.0  99.3  **                       
##  3      3.8e-050   0.7 100.0                           
##         -------- -----                                 
##  Total: 0.005468 100.0                                 
## 
## 
## Rows:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 | 1955 |  230  996  347 |   88 939 361 |  -22  58 223 |
## 2 | 1956 |  230  978  157 |   58 908 157 |   16  71 124 |
## 3 | 1957 |  269  984  111 |  -39 669  82 |   27 315 391 |
## 4 | 1958 |  271  999  385 |  -85 938 399 |  -22  61 262 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |   15 |   99  998  185 | -101 992 203 |   -7   5  11 |
## 2 |   16 |  197  996  312 |  -91 959 331 |  -18  37 128 |
## 3 |   17 |  211  991   75 |  -23 281  23 |   37 710 594 |
## 4 |   18 |  254  989  235 |   70 980 255 |    7   9  24 |
## 5 |   19 |  239  990  194 |   62 877 188 |  -22 112 243 |
#(b) Plot the 2D correspondence analysis solution. Describe the pattern of 
#    association between year and age.
plot(criminal.ca)

# Age 15, 16 are associated with year 1958, age 17 is associated with year 1957, 
# age 18 is associated with year 1956 and age 19 is associated with year 1955. 
# Younger ages have association with later years.


# Exercise 6.11 The data set Vietnam in vcdExtra gives a 2 × 5 × 4 contingency 
# table in frequency form reflecting a survey of student opinion on the Vietnam 
# War at the University of North Carolina in May 1967. The table variables are sex, 
# year in school, and response, which has categories: 
#   (A) Defeat North Vietnam by widespread bombing and land invasion; 
#   (B) Maintain the present policy; 
#   (C) De-escalate military activity, stop bombing and begin negotiations; 
#   (D) Withdraw military forces immediately.
  
#install.packages(vcdExtra)
library(vcdExtra)
## Loading required package: vcd
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:logmult':
## 
##     assoc
data("Vietnam", package="vcdExtra")
str(Vietnam)
## 'data.frame':    40 obs. of  4 variables:
##  $ sex     : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
##  $ year    : int  1 1 1 1 2 2 2 2 3 3 ...
##  $ response: Factor w/ 4 levels "A","B","C","D": 1 2 3 4 1 2 3 4 1 2 ...
##  $ Freq    : int  13 19 40 5 5 9 33 3 22 29 ...
Vietnam
##       sex year response Freq
## 1  Female    1        A   13
## 2  Female    1        B   19
## 3  Female    1        C   40
## 4  Female    1        D    5
## 5  Female    2        A    5
## 6  Female    2        B    9
## 7  Female    2        C   33
## 8  Female    2        D    3
## 9  Female    3        A   22
## 10 Female    3        B   29
## 11 Female    3        C  110
## 12 Female    3        D    6
## 13 Female    4        A   12
## 14 Female    4        B   21
## 15 Female    4        C   58
## 16 Female    4        D   10
## 17 Female    5        A   19
## 18 Female    5        B   27
## 19 Female    5        C  128
## 20 Female    5        D   13
## 21   Male    1        A  175
## 22   Male    1        B  116
## 23   Male    1        C  131
## 24   Male    1        D   17
## 25   Male    2        A  160
## 26   Male    2        B  126
## 27   Male    2        C  135
## 28   Male    2        D   21
## 29   Male    3        A  132
## 30   Male    3        B  120
## 31   Male    3        C  154
## 32   Male    3        D   29
## 33   Male    4        A  145
## 34   Male    4        B   95
## 35   Male    4        C  185
## 36   Male    4        D   44
## 37   Male    5        A  118
## 38   Male    5        B  176
## 39   Male    5        C  345
## 40   Male    5        D  141
# (a). Using the stacking approach, carry out a correspondence analysis corresponding 
#      to the loglinear model (R)(YS), which asserts that the response is independent of 
#      the combinations of year an sex.

Vietnam <- within(Vietnam, {year_sex <- paste(year, toupper(substr(sex,1,1)))})
Vietnam
##       sex year response Freq year_sex
## 1  Female    1        A   13      1 F
## 2  Female    1        B   19      1 F
## 3  Female    1        C   40      1 F
## 4  Female    1        D    5      1 F
## 5  Female    2        A    5      2 F
## 6  Female    2        B    9      2 F
## 7  Female    2        C   33      2 F
## 8  Female    2        D    3      2 F
## 9  Female    3        A   22      3 F
## 10 Female    3        B   29      3 F
## 11 Female    3        C  110      3 F
## 12 Female    3        D    6      3 F
## 13 Female    4        A   12      4 F
## 14 Female    4        B   21      4 F
## 15 Female    4        C   58      4 F
## 16 Female    4        D   10      4 F
## 17 Female    5        A   19      5 F
## 18 Female    5        B   27      5 F
## 19 Female    5        C  128      5 F
## 20 Female    5        D   13      5 F
## 21   Male    1        A  175      1 M
## 22   Male    1        B  116      1 M
## 23   Male    1        C  131      1 M
## 24   Male    1        D   17      1 M
## 25   Male    2        A  160      2 M
## 26   Male    2        B  126      2 M
## 27   Male    2        C  135      2 M
## 28   Male    2        D   21      2 M
## 29   Male    3        A  132      3 M
## 30   Male    3        B  120      3 M
## 31   Male    3        C  154      3 M
## 32   Male    3        D   29      3 M
## 33   Male    4        A  145      4 M
## 34   Male    4        B   95      4 M
## 35   Male    4        C  185      4 M
## 36   Male    4        D   44      4 M
## 37   Male    5        A  118      5 M
## 38   Male    5        B  176      5 M
## 39   Male    5        C  345      5 M
## 40   Male    5        D  141      5 M
Vietnam.tab <- xtabs(Freq ~ year_sex + response, data=Vietnam)
Vietnam.tab
##         response
## year_sex   A   B   C   D
##      1 F  13  19  40   5
##      1 M 175 116 131  17
##      2 F   5   9  33   3
##      2 M 160 126 135  21
##      3 F  22  29 110   6
##      3 M 132 120 154  29
##      4 F  12  21  58  10
##      4 M 145  95 185  44
##      5 F  19  27 128  13
##      5 M 118 176 345 141
library(ca)
Vietnam.ca <- ca(Vietnam.tab)
summary(Vietnam.ca)
## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.085680  73.6  73.6  ******************       
##  2      0.027881  23.9  97.5  ******                   
##  3      0.002854   2.5 100.0  *                        
##         -------- -----                                 
##  Total: 0.116415 100.0                                 
## 
## 
## Rows:
##      name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1  |   1F |   24  818   13 | -167 452   8 | -150 367  20 |
## 2  |   1M |  139  997  181 |  386 986 242 |  -41  11   8 |
## 3  |   2F |   16  995   35 | -407 647  31 | -299 349  51 |
## 4  |   2M |  140  984  131 |  326 982 175 |  -15   2   1 |
## 5  |   3F |   53  999  112 | -334 453  69 | -367 547 256 |
## 6  |   3M |  138  904   40 |  175 904  49 |   -4   0   0 |
## 7  |   4F |   32  982   37 | -344 887  44 | -113  95  15 |
## 8  |   4M |  149  383   23 |   81 372  11 |   14  11   1 |
## 9  |   5F |   59  994  153 | -453 686 143 | -304 309 197 |
## 10 |   5M |  248 1000  276 | -281 608 228 |  225 391 451 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |    A |  255  985  381 |  414 985 509 |   -1   0   0 |
## 2 |    B |  235  720   60 |  135 608  50 |   58 112  28 |
## 3 |    C |  419  999  283 | -247 773 298 | -133 226 267 |
## 4 |    D |   92  995  276 | -366 383 143 |  463 612 705 |
# (b). Construct an informative 2D plot of the solution, and interpret in terms 
#      of how the response varies with the year for males

plot(Vietnam.ca)

# The plot results could be interpreted as:
    # Option A : selected by males in year 1 or 2.
    # Option B : selected by males in year 3 or 4.
    # Option C : selected by females regardless of the year.
    # Option D:  selected by males in year 5.

# (c). Use mjca() to carry out an MCA on the three-way table. 
#     Make a useful plot of the solution and interpret in terms of the 
#     relationship of the response to year and sex.
Vietnam.mca <- mjca(Vietnam.tab)
summary(Vietnam.mca)
## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.085680  73.6  73.6  ******************       
##  2      0.027881  23.9  97.5  ******                   
##  3      0.002854   2.5 100.0  *                        
##  4      00000000   0.0 100.0                           
##  5      00000000   0.0 100.0                           
##  6      00000000   0.0 100.0                           
##  7      00000000   0.0 100.0                           
##         -------- -----                                 
##  Total: 0.116415                                       
## 
## 
## Columns:
##              name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1  | year_sex:1 F |   12  818   80 |  167 452   4 | -150 367  10 |
## 2  | year_sex:1 M |   70  997   72 | -386 986 121 |  -41  11   4 |
## 3  | year_sex:2 F |    8  995   81 |  407 647  15 | -299 349  25 |
## 4  | year_sex:2 M |   70  984   72 | -326 982  87 |  -15   2   1 |
## 5  | year_sex:3 F |   27  999   78 |  334 453  34 | -367 547 128 |
## 6  | year_sex:3 M |   69  904   71 | -175 904  25 |   -4   0   0 |
## 7  | year_sex:4 F |   16  982   79 |  344 887  22 | -113  95   7 |
## 8  | year_sex:4 M |   75  383   70 |  -81 372   6 |   14  11   1 |
## 9  | year_sex:5 F |   30  994   78 |  453 686  71 | -304 309  99 |
## 10 | year_sex:5 M |  124 1000   64 |  281 608 114 |  225 391 225 |
## 11 |   response:A |  127  985   65 | -414 985 255 |   -1   0   0 |
## 12 |   response:B |  117  720   63 | -135 608  25 |   58 112  14 |
## 13 |   response:C |  210  999   50 |  247 773 149 | -133 226 134 |
## 14 |   response:D |   46  995   77 |  366 383  72 |  463 612 352 |
plot(Vietnam.mca)

# The plot results could be interpreted as:
    # Option A : selected by males in year 1 or 2.
    # Option B : selected by males in year 3 or 4.
    # Option C : selected by females regardless of the year.
    # Option D:  selected by males in year 5.