library(foreign)
library(psych)
library(MASS)
library(biotools)
## ---
## biotools version 3.0
library(candisc)
library(klaR)
df <- read.spss("/home/ekaterina/Downloads/discrim.sav", to.data.frame=TRUE)
## Warning in read.spss("/home/ekaterina/Downloads/discrim.sav", to.data.frame
## = TRUE): /home/ekaterina/Downloads/discrim.sav: Unrecognized record type 7,
## subtype 18 encountered in system file
df <- df[,-5]
pairs.panels(df, lm = TRUE)

summary(df)
##     OUTDOOR          SOCIAL       CONSERVATIVE        JOB       
##  Min.   : 0.00   Min.   : 7.00   Min.   : 0.00   Min.   :1.000  
##  1st Qu.:13.00   1st Qu.:17.00   1st Qu.: 8.00   1st Qu.:1.000  
##  Median :16.00   Median :21.00   Median :11.00   Median :2.000  
##  Mean   :15.64   Mean   :20.68   Mean   :10.59   Mean   :1.922  
##  3rd Qu.:19.00   3rd Qu.:25.00   3rd Qu.:13.00   3rd Qu.:3.000  
##  Max.   :28.00   Max.   :35.00   Max.   :20.00   Max.   :3.000

Признаки outdoor, social, conservative являются результатом тестирования работников аэропорта. job - категориальная переменная типа работы:

1 - customer service personnel

2 - mechanics

3 - dispatchers

job <- df[,4]
Y <- df[,-4]

Гомоскедастичность

boxM(Y,job)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 25.642, df = 12, p-value = 0.01206

MANOVA

model<-manova(as.matrix(Y)~job)
summary(model,test="Wilks")
##            Df   Wilks approx F num Df den Df    Pr(>F)    
## job         1 0.48065   86.441      3    240 < 2.2e-16 ***
## Residuals 242                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Canonical Discriminant Analysis

cda <- candisc(model)
print(cda)
## 
## Canonical Discriminant Analysis for job:
## 
##    CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.51935     1.0805                100        100
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F numDF denDF   Pr(> F)    
## 1      0.48065   86.441     3   240 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Одна значимая каноническая переменная

cda$coeffs.std
##                    Can1
## OUTDOOR      -0.4260213
## SOCIAL        0.8403255
## CONSERVATIVE -0.5233380
cda$structure
##                    Can1
## OUTDOOR      -0.3927924
## SOCIAL        0.8584773
## CONSERVATIVE -0.6020502
plot(cda)

greedy.wilks(JOB~., df)
## Formula containing included variables: 
## 
## JOB ~ SOCIAL + OUTDOOR + CONSERVATIVE
## <environment: 0x90cb980>
## 
## 
## Values calculated in each step of the selection procedure: 
## 
##           vars Wilks.lambda F.statistics.overall p.value.overall
## 1       SOCIAL    0.6039814             79.00945    4.105260e-27
## 2      OUTDOOR    0.4326715             62.43246    1.814539e-42
## 3 CONSERVATIVE    0.3639880             52.38173    1.634919e-49
##   F.statistics.diff p.value.diff
## 1          79.00945 4.105260e-27
## 2          47.51225 0.000000e+00
## 3          22.54931 1.061537e-09