CCA

典型相关分析,计算向量之间的相关系数

library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --

## √ ggplot2 3.2.1     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   1.0.0     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0

## Warning: package 'ggplot2' was built under R version 3.6.1

## Warning: package 'tibble' was built under R version 3.6.1

## Warning: package 'tidyr' was built under R version 3.6.1

## Warning: package 'purrr' was built under R version 3.6.1

## Warning: package 'dplyr' was built under R version 3.6.1

## -- Conflicts ------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(CCA)#做典型相关分析的包
## Warning: package 'CCA' was built under R version 3.6.1

## Loading required package: fda

## Warning: package 'fda' was built under R version 3.6.1

## Loading required package: splines

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## 
## Attaching package: 'fda'

## The following object is masked from 'package:graphics':
## 
##     matplot

## Loading required package: fields

## Warning: package 'fields' was built under R version 3.6.1

## Loading required package: spam

## Warning: package 'spam' was built under R version 3.6.1

## Loading required package: dotCall64

## Warning: package 'dotCall64' was built under R version 3.6.1

## Loading required package: grid

## Spam version 2.4-0 (2019-11-01) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.

## 
## Attaching package: 'spam'

## The following object is masked from 'package:Matrix':
## 
##     det

## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve

## Loading required package: maps

## Warning: package 'maps' was built under R version 3.6.1

## 
## Attaching package: 'maps'

## The following object is masked from 'package:purrr':
## 
##     map

## See https://github.com/NCAR/Fields for
##  an extensive vignette, other supplements and source code
fit1 <- read_csv("../例8-1.csv")
## Parsed with column specification:
## cols(
##   nu = col_double(),
##   weight = col_double(),
##   waist = col_double(),
##   pulse = col_double(),
##   chins = col_double(),
##   situps = col_double(),
##   jumps = col_double()
## )
fit <- fit1 %>%
  select(-nu)
PHY <- fit[,1:3]#把1到3这三个变量看为一组,为生理指标
EXER <- fit[,4:6]#把4到6这三个变量看为一组,为运动指标
matcor(PHY,EXER )#计算PHY 与EXER的自相关矩阵,及之间的相关矩阵
## $Xcor
##            weight      waist      pulse
## weight  1.0000000  0.8702435 -0.3657620
## waist   0.8702435  1.0000000 -0.3528921
## pulse  -0.3657620 -0.3528921  1.0000000
## 
## $Ycor
##            chins    situps     jumps
## chins  1.0000000 0.6957274 0.4957602
## situps 0.6957274 1.0000000 0.6692061
## jumps  0.4957602 0.6692061 1.0000000
## 
## $XYcor
##            weight      waist       pulse      chins     situps       jumps
## weight  1.0000000  0.8702435 -0.36576203 -0.3896937 -0.4930836 -0.22629556
## waist   0.8702435  1.0000000 -0.35289213 -0.5522321 -0.6455980 -0.19149937
## pulse  -0.3657620 -0.3528921  1.00000000  0.1506480  0.2250381  0.03493306
## chins  -0.3896937 -0.5522321  0.15064802  1.0000000  0.6957274  0.49576018
## situps -0.4930836 -0.6455980  0.22503808  0.6957274  1.0000000  0.66920608
## jumps  -0.2262956 -0.1914994  0.03493306  0.4957602  0.6692061  1.00000000

典型系数

ccl <- cc(PHY,EXER )#进行典型相关分析
ccl[1]#输出典型相关系数,就是主成分后经过拉格朗日乘子计算出来的lambda即为相关系数的那些lambda(第一个为第一个典型相关系数,就是最大的那个主成分计算出来的典型相关系数)
## $cor
## [1] 0.79560815 0.20055604 0.07257029
ccl[3:4]#$xcoef的第一列就是第一个生理指标的典型相关系数计算出来的系数,就是第一列的a,$ycoef的第一列就是第一个运动指标典型相关系数计算出来的系数,就是第一列的b
## $xcoef
##                [,1]        [,2]         [,3]
## weight  0.031404688  0.07631951 -0.007735047
## waist  -0.493241676 -0.36872299  0.158033647
## pulse   0.008199315  0.03205199  0.145732242
## 
## $ycoef
##               [,1]         [,2]         [,3]
## chins   0.06611399  0.071041211 -0.245275347
## situps  0.01684623 -0.001973745  0.019767637
## jumps  -0.01397157 -0.020714106 -0.008167472
ccl[5]#典型变量的得分及典型变量与原始变量的相关系数矩阵(计算出来的典型系数生理指标、运动指标分别与原始的U、V的相关系数)
## $scores
## $scores$xscores
##               [,1]        [,2]        [,3]
##  [1,]  0.043457300  0.52961092 -0.89006107
##  [2,] -0.814622152  0.20121991  0.57639407
##  [3,]  0.441092338  0.61748692 -1.61555359
##  [4,] -0.265736402  1.51086703  0.14569875
##  [5,]  2.235378930  1.99768113  1.93337021
##  [6,]  0.339037518 -0.41197224 -1.03595733
##  [7,] -0.017242384 -1.10803692  1.12031975
##  [8,]  0.073469414  0.55404196 -1.48846013
##  [9,] -0.888057432 -0.85569669 -0.03307275
## [10,]  0.456815513 -0.90719486 -0.51050641
## [11,] -0.496195120  0.07235291 -0.42509284
## [12,] -0.275645187 -0.93030784  0.92500854
## [13,] -0.189988998  0.03504733  0.05394781
## [14,]  0.358221297 -0.24409131  0.43683518
## [15,]  0.410404769 -0.99572988 -0.20357183
## [16,]  0.754463761 -0.20810378 -0.87932136
## [17,] -3.130296936  1.11627338  0.25711279
## [18,] -0.005941024  1.38502643  0.93167397
## [19,]  0.965063246 -0.52625635 -0.96773958
## [20,]  0.006321548 -1.83221805  1.66897582
## 
## $scores$yscores
##              [,1]         [,2]        [,3]
##  [1,]  0.12682042 -0.135246206  1.50077789
##  [2,] -1.01083608 -0.366837618 -1.75684178
##  [3,]  0.56575183  0.488327913 -0.58346340
##  [4,] -0.39508319  0.653986234 -0.26118963
##  [5,]  1.70754843  0.914445706 -0.03745591
##  [6,]  0.52002108  1.255855971 -2.09308265
##  [7,]  0.98597593 -0.532618594 -0.02655171
##  [8,] -0.95174333  0.718088662 -0.32626338
##  [9,] -1.16860420  0.720028331  0.01561577
## [10,]  1.66764243  0.181536564  0.18720843
## [11,] -0.94752555 -0.245735080  1.20868680
## [12,] -0.04927075  0.950970203 -1.15505300
## [13,] -0.71542541  0.286964964  0.68724187
## [14,] -0.15094476  0.423105710  0.68744942
## [15,] -0.23509529 -3.394095205 -1.23502632
## [16,]  0.69591510 -0.800932141  0.03821072
## [17,] -1.88469769  0.008789495  0.34957863
## [18,]  0.55994327 -0.975543884  0.24264871
## [19,]  1.38961665 -0.257495751  1.20997570
## [20,] -0.71000887  0.106404727  1.34753383
## 
## $scores$corr.X.xscores
##              [,1]       [,2]        [,3]
## weight -0.6206424  0.7723919 -0.13495886
## waist  -0.9254249  0.3776614 -0.03099486
## pulse   0.3328481 -0.0414842  0.94206752
## 
## $scores$corr.Y.xscores
##             [,1]       [,2]        [,3]
## chins  0.5789047 -0.0475222 -0.04671717
## situps 0.6505914 -0.1149232  0.00395139
## jumps  0.1290401 -0.1922586 -0.01697689
## 
## $scores$corr.X.yscores
##              [,1]         [,2]         [,3]
## weight -0.4937881  0.154907853 -0.009794003
## waist  -0.7362756  0.075742277 -0.002249306
## pulse   0.2648166 -0.008319907  0.068366110
## 
## $scores$corr.Y.yscores
##             [,1]       [,2]        [,3]
## chins  0.7276254 -0.2369522 -0.64375064
## situps 0.8177285 -0.5730231  0.05444915
## jumps  0.1621905 -0.9586280 -0.23393722
sdx <- sapply(PHY,function(x) sd(x))#生理指标的标准差
s1 <- diag(sdx)#把那个行向量标准差写成以sdx各元素为对角线的对角矩阵
sdx
##    weight     waist     pulse 
## 24.690505  3.201973  7.210373
s1
##          [,1]     [,2]     [,3]
## [1,] 24.69051 0.000000 0.000000
## [2,]  0.00000 3.201973 0.000000
## [3,]  0.00000 0.000000 7.210373
ccl$xcoef
##                [,1]        [,2]         [,3]
## weight  0.031404688  0.07631951 -0.007735047
## waist  -0.493241676 -0.36872299  0.158033647
## pulse   0.008199315  0.03205199  0.145732242
s1 %*% ccl$xcoef#相当于给每个计算出来的典型系数加了个权,权重为每个系数对应指标的标准差
##             [,1]       [,2]       [,3]
## [1,]  0.77539761  1.8843672 -0.1909822
## [2,] -1.57934657 -1.1806411  0.5060195
## [3,]  0.05912012  0.2311068  1.0507838

加了权重的第一典型变量表达式

[ \begin{gather} u = \delta_1 a_{11} x_1 + \delta_2 a_{12} x_2+ ...+ \delta_p a_{1p} x_p\ u =24.69051* 0.031404688 x_1+ 3.201973*-0.493241676 x_2+ 7.210373*0.008199315x_2 \end{gather} ]

显著性检验

ev<-ccl$cor^2   #cc1$cor是典型相关系数,其平方即为典型根
ev2<-1-ev
n<-dim(PHY)[1]    #样本量赋值给n
p<-length(PHY)    #PHY所含变量的个数赋给p
q<-length(EXER)   #EXER所含变量的个数赋给q
l<-length(ev)
m<-n -1 - (p+q+1)/2  
w<-cbind(NULL)  #定义w以保存中间计算值
for (i in 1:l){
  w<-cbind(w,prod(ev2[i:l]))
}

d<-cbind(NULL)
Q<-cbind(NULL)

for (i in 1:l){
  Q<-cbind(Q,-(m-(i-1))*log(w[i]))
  d<-cbind(d,(p-i+1)*(q-i+1))
}  

pvalue<-pchisq(Q,d,lower.tail=FALSE)    #计算卡方统计量对应的概率
bat<-cbind(t(Q),t(d),t(pvalue))
colnames(bat)<-c("Chi-Squared","df","pvalue")
rownames(bat)<-c(seq(1:l))
bat 
##   Chi-Squared df     pvalue
## 1  16.2549575  9 0.06174456
## 2   0.6718487  4 0.95475464
## 3   0.0712849  1 0.78947507

$$ $$