The data are measurements done on middle-aged men in a health fitness club (Dr. A. C. Linnerud, NC State University). There are two sets of data about the men. The physiological data: Weight, Waist, Pulse. The exercises data: Chins, Situps, Jumps.
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'psych' was built under R version 4.4.3
## Warning: package 'CCA' was built under R version 4.4.3
## Loading required package: fda
## Warning: package 'fda' was built under R version 4.4.3
## Loading required package: splines
## Loading required package: fds
## Warning: package 'fds' was built under R version 4.4.3
## Loading required package: rainbow
## Warning: package 'rainbow' was built under R version 4.4.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: pcaPP
## Warning: package 'pcaPP' was built under R version 4.4.3
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 4.4.3
## Loading required package: deSolve
## Warning: package 'deSolve' was built under R version 4.4.3
##
## Attaching package: 'fda'
## The following object is masked from 'package:graphics':
##
## matplot
## Loading required package: fields
## Warning: package 'fields' was built under R version 4.4.3
## Loading required package: spam
## Warning: package 'spam' was built under R version 4.4.3
## Spam version 2.11-1 (2025-01-20) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
## Loading required package: viridisLite
## Loading required package: RColorBrewer
##
## Try help(fields) to get started.
##
## Attaching package: 'fields'
## The following object is masked from 'package:psych':
##
## describe
## Warning: package 'GGally' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
dat2 <- read_xlsx(path = "D:/UNY/MySta/SEM 5/StatMul/dataset StatMul/fitness.xlsx", sheet = 1)
dat2 <- dplyr::select(dat2, -ID)
str(dat2) ## tibble [20 × 6] (S3: tbl_df/tbl/data.frame)
## $ Weight: num [1:20] 86.6 85.7 87.5 73.5 85.7 ...
## $ Waist : num [1:20] 36 37 38 35 35 36 38 34 31 33 ...
## $ Pulse : num [1:20] 50 52 58 62 46 56 56 60 74 56 ...
## $ Chins : num [1:20] 5 2 12 12 13 4 8 6 15 17 ...
## $ Situps: num [1:20] 162 110 101 105 155 101 101 125 200 251 ...
## $ Jumps : num [1:20] 60 60 101 37 58 42 38 40 40 250 ...
## tibble [20 × 6] (S3: tbl_df/tbl/data.frame)
## $ Weight: num [1:20] 86.6 85.7 87.5 73.5 85.7 ...
## $ Waist : num [1:20] 36 37 38 35 35 36 38 34 31 33 ...
## $ Pulse : num [1:20] 50 52 58 62 46 56 56 60 74 56 ...
## $ Chins : num [1:20] 5 2 12 12 13 4 8 6 15 17 ...
## $ Situps: num [1:20] 162 110 101 105 155 101 101 125 200 251 ...
## $ Jumps : num [1:20] 60 60 101 37 58 42 38 40 40 250 ...
# Exploratory pairwise relationships
pairs.panels(dat2,
method = "pearson",
hist.col = "#00AFBB",
density = TRUE,
ellipses = FALSE)# Define variable sets and scale them
physio_raw <- dat2[, c("Weight", "Waist")] # physiological variables
exercise_raw <- dat2[, c("Chins", "Situps", "Jumps")] # exercise performance variables
# Standardize (mean = 0, sd = 1)
physio <- scale(physio_raw)
exercise <- scale(exercise_raw)
# Perform Canonical Correlation Analysis
cc_fit <- cc(physio, exercise)
# Display canonical correlations
cat("\nCanonical correlations:\n")##
## Canonical correlations:
## [1] 0.7944229 0.1964931
n <- nrow(dat2)
p <- ncol(physio)
q <- ncol(exercise)
cat("\nSignificance test using Wilks’ Lambda:\n")##
## Significance test using Wilks’ Lambda:
## Wilks' Lambda, using F-approximation (Rao's F):
## stat approx df1 df2 p.value
## 1 to 2: 0.3546495 3.3959599 6 30 0.0112200
## 2 to 2: 0.9613905 0.3212809 2 16 0.7297909
## $id
## [1] "Wilks"
##
## $stat
## [1] 0.3546495 0.9613905
##
## $approx
## [1] 3.3959599 0.3212809
##
## $df1
## [1] 6 2
##
## $df2
## [1] 30 16
##
## $p.value
## [1] 0.0112200 0.7297909
##
## Squared canonical correlations:
## [1] 0.63110778 0.03860955
##
## Interpretation:
cat("At α = 0.05, the first canonical correlation is borderline significant (p =",
round(wilks_test$p.value[1], 4),
"). Others are not significant.\n\n")## At α = 0.05, the first canonical correlation is borderline significant (p = 0.0112 ). Others are not significant.
# Extract canonical variate scores (U and V)
xs <- as.data.frame(scale(physio) %*% cc_fit$xcoef) # U variates
ys <- as.data.frame(scale(exercise) %*% cc_fit$ycoef) # V variates
# Name the columns
colnames(xs) <- paste0("U", 1:ncol(xs))
colnames(ys) <- paste0("V", 1:ncol(ys))
# Combine into one data frame
newdat <- data.frame(xs, ys)
head(newdat)## U1 U2 V1 V2
## 1 -0.08436379 0.70849414 -0.11991692 -0.2535522
## 2 0.47363126 0.16165387 0.95263778 -0.3434656
## 3 0.84670242 0.07213638 1.00108971 -0.2290561
## 4 0.31308127 -1.10749996 0.04661993 1.0394822
## 5 -0.51907626 0.95045256 -0.56705591 0.5346354
## 6 0.19302205 0.02250997 0.71967015 0.2295648
# Visualization of canonical variate pairs
library(ggplot2)
library(gridExtra)
p1 <- ggplot(newdat, aes(x = U1, y = V1)) +
geom_point(color = "#0072B2") +
geom_smooth(method = "lm", se = FALSE, color = "black") +
theme_bw() +
ggtitle("Canonical Variate Pair 1 (U1 vs V1)")
p2 <- ggplot(newdat, aes(x = U2, y = V2)) +
geom_point(color = "#E69F00") +
geom_smooth(method = "lm", se = FALSE, color = "black") +
theme_bw() +
ggtitle("Canonical Variate Pair 2 (U2 vs V2)")
grid.arrange(p1, p2, ncol = 2)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Correlation between original variables and their canonical variates (loadings)
corr.X.U <- cor(physio_raw, xs)
corr.Y.V <- cor(exercise_raw, ys)
cat("\nCanonical loadings (X set – physiological variables):\n")##
## Canonical loadings (X set – physiological variables):
## U1 U2
## Weight 0.622 0.783
## Waist 0.927 0.375
##
## Canonical loadings (Y set – exercise variables):
## V1 V2
## Chins -0.732 -0.183
## Situps -0.819 -0.573
## Jumps -0.166 -0.937
corr.X.V <- cor(physio_raw, ys)
corr.Y.U <- cor(exercise_raw, xs)
cat("\nCross-loadings (X variables with Y canonical variates):\n")##
## Cross-loadings (X variables with Y canonical variates):
## V1 V2
## Weight 0.494 0.154
## Waist 0.736 0.074
##
## Cross-loadings (Y variables with X canonical variates):
## U1 U2
## Chins -0.581 -0.036
## Situps -0.651 -0.113
## Jumps -0.132 -0.184