library(foreign)
library(psych)
library(MASS)
library(biotools)
## ---
## biotools version 3.0
library(candisc)
library(klaR)
df <- read.spss("/home/ekaterina/Downloads/discrim.sav", to.data.frame=TRUE)
## Warning in read.spss("/home/ekaterina/Downloads/discrim.sav", to.data.frame
## = TRUE): /home/ekaterina/Downloads/discrim.sav: Unrecognized record type 7,
## subtype 18 encountered in system file
df <- df[,-5]
pairs.panels(df, lm = TRUE)
summary(df)
## OUTDOOR SOCIAL CONSERVATIVE JOB
## Min. : 0.00 Min. : 7.00 Min. : 0.00 Min. :1.000
## 1st Qu.:13.00 1st Qu.:17.00 1st Qu.: 8.00 1st Qu.:1.000
## Median :16.00 Median :21.00 Median :11.00 Median :2.000
## Mean :15.64 Mean :20.68 Mean :10.59 Mean :1.922
## 3rd Qu.:19.00 3rd Qu.:25.00 3rd Qu.:13.00 3rd Qu.:3.000
## Max. :28.00 Max. :35.00 Max. :20.00 Max. :3.000
Признаки outdoor, social, conservative являются результатом тестирования работников аэропорта. job - категориальная переменная типа работы:
1 - customer service personnel
2 - mechanics
3 - dispatchers
job <- df[,4]
Y <- df[,-4]
boxM(Y,job)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y
## Chi-Sq (approx.) = 25.642, df = 12, p-value = 0.01206
model<-manova(as.matrix(Y)~job)
summary(model,test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## job 1 0.48065 86.441 3 240 < 2.2e-16 ***
## Residuals 242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cda <- candisc(model)
print(cda)
##
## Canonical Discriminant Analysis for job:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.51935 1.0805 100 100
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## LR test stat approx F numDF denDF Pr(> F)
## 1 0.48065 86.441 3 240 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Одна значимая каноническая переменная
cda$coeffs.std
## Can1
## OUTDOOR -0.4260213
## SOCIAL 0.8403255
## CONSERVATIVE -0.5233380
cda$structure
## Can1
## OUTDOOR -0.3927924
## SOCIAL 0.8584773
## CONSERVATIVE -0.6020502
plot(cda)
greedy.wilks(JOB~., df)
## Formula containing included variables:
##
## JOB ~ SOCIAL + OUTDOOR + CONSERVATIVE
## <environment: 0x90cb980>
##
##
## Values calculated in each step of the selection procedure:
##
## vars Wilks.lambda F.statistics.overall p.value.overall
## 1 SOCIAL 0.6039814 79.00945 4.105260e-27
## 2 OUTDOOR 0.4326715 62.43246 1.814539e-42
## 3 CONSERVATIVE 0.3639880 52.38173 1.634919e-49
## F.statistics.diff p.value.diff
## 1 79.00945 4.105260e-27
## 2 47.51225 0.000000e+00
## 3 22.54931 1.061537e-09