knitr::opts_chunk$set(echo = TRUE, comment = "", message = FALSE)

Lesson 1: Measures of Central Tendency, Dispersion and Association

Warning: package 'readxl' was built under R version 4.3.3
Warning: package 'dplyr' was built under R version 4.3.3
# A tibble: 737 × 5
   calcium  iron protein `vitamin A` `vitamin C`
     <dbl> <dbl>   <dbl>       <dbl>       <dbl>
 1    522. 10.2     42.6       349.        54.1 
 2    343.  4.11    67.8       267.        24.8 
 3    858. 13.7     59.9       668.       155.  
 4    576. 13.2     42.2       792.       225.  
 5   1928. 18.9    111.        740.        81.0 
 6    608.  6.8     45.8       166.        13.0 
 7   1046. 18.4    116.       1120.       159.  
 8    181. 12.8     64.2        78.5       26.9 
 9    327.  8.69    49.2       568.        50.0 
10    383. 13.7    104.       1029.         8.40
# ℹ 727 more rows

1.4

colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")
calculate_summary <- function(x) {
  data.frame(
    N = sum(!is.na(x)),
    Mean = mean(x, na.rm = TRUE),
    SD = sd(x, na.rm = TRUE),
    Minimum = min(x, na.rm = TRUE),
    Maximum = max(x, na.rm = TRUE)
  )
}

summary_stats <- lapply(nutrient, calculate_summary)
summary_table <- do.call(rbind, summary_stats)
kable(summary_table, caption = "Summary Statistics", format = "html", col.names = c("Variable", "N", "Mean", "SD", "Minimum", "Maximum")) %>%
  kable_styling()
Summary Statistics
Variable N Mean SD Minimum Maximum
calcium 737 624.04925 397.277540 7.44 2866.440
iron 737 11.12990 5.984191 0.00 58.668
protein 737 65.80344 30.575756 0.00 251.012
vitamin A 737 839.63535 1633.539828 0.00 34434.270
vitamin C 737 78.92845 73.595272 0.00 433.339
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")


calculate_summary <- function(x) {
  data.frame(
    Mean = sprintf("%.1f", mean(x, na.rm = TRUE)),
    Standard_Deviation = sprintf("%.1f", sd(x, na.rm = TRUE))
  )
}

summary_stats <- lapply(nutrient, calculate_summary)

summary_table <- do.call(rbind, summary_stats)

kable(summary_table, caption = "Summary Statistics", format = "html", col.names = c("Variable", "Mean", "Standard Deviation")) %>%
  kable_styling()
Summary Statistics
Variable Mean Standard Deviation
calcium 624.0 397.3
iron 11.1 6.0
protein 65.8 30.6
vitamin A 839.6 1633.5
vitamin C 78.9 73.6
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")

summary_table <- summary(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
summary_table <- as.data.frame(t(summary_table))  
kable(summary_table, caption = "Descriptive Statistics", format = "html", col.names = NA) %>%
  kable_styling()
Descriptive Statistics
Var1 Var2 Freq
calcium Min. : 7.44
iron Min. : 0.00
protein Min. : 0.00
vitamin A Min. : 0.0
vitamin C Min. : 0.00
calcium 1st Qu.: 326.54
iron 1st Qu.: 7.46
protein 1st Qu.: 45.52
vitamin A 1st Qu.: 276.3
vitamin C 1st Qu.: 24.94
calcium Median : 548.29
iron Median :10.03
protein Median : 61.07
vitamin A Median : 524.0
vitamin C Median : 53.59
calcium Mean : 624.05
iron Mean :11.13
protein Mean : 65.80
vitamin A Mean : 839.6
vitamin C Mean : 78.93
calcium 3rd Qu.: 826.51
iron 3rd Qu.:13.71
protein 3rd Qu.: 80.77
vitamin A 3rd Qu.: 943.2
vitamin C 3rd Qu.:108.67
calcium Max. :2866.44
iron Max. :58.67
protein Max. :251.01
vitamin A Max. :34434.3
vitamin C Max. :433.34
cor_matrix <- cor(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], method = "pearson")
kable(cor_matrix, caption = "Pearson Correlation Matrix", format = "html", col.names = NA) %>%
  kable_styling()
Pearson Correlation Matrix
calcium iron protein vitamin A vitamin C
calcium 1.0000000 0.3954301 0.5001882 0.1578060 0.2292111
iron 0.3954301 1.0000000 0.6233662 0.2437905 0.3126009
protein 0.5001882 0.6233662 1.0000000 0.1467574 0.2120670
vitamin A 0.1578060 0.2437905 0.1467574 1.0000000 0.1835227
vitamin C 0.2292111 0.3126009 0.2120670 0.1835227 1.0000000
cov_matrix <- cov(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
kable(cov_matrix, caption = "The variance-covariance matrix is also copied into the matrix below.", format = "html", col.names = NA) %>%
  kable_styling()
The variance-covariance matrix is also copied into the matrix below.
calcium iron protein vitamin A vitamin C
calcium 157829.4439 940.08944 6075.8163 102411.127 6701.6160
iron 940.0894 35.81054 114.0580 2383.153 137.6720
protein 6075.8163 114.05803 934.8769 7330.052 477.1998
vitamin A 102411.1266 2383.15341 7330.0515 2668452.371 22063.2486
vitamin C 6701.6160 137.67199 477.1998 22063.249 5416.2641
sample_cor_matrix <- cor(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], method = "pearson", use = "complete.obs")
sample_cor_matrix <- round(sample_cor_matrix, 3)  # Limit to 3 decimal places
sample_cor_matrix <- data.frame(sample_cor_matrix)


kable(sample_cor_matrix, caption = "Sample Correlation Matrix", format = "html", col.names = NA) %>%
  kable_styling()
Sample Correlation Matrix
calcium iron protein vitamin.A vitamin.C
calcium 1.000 0.395 0.500 0.158 0.229
iron 0.395 1.000 0.623 0.244 0.313
protein 0.500 0.623 1.000 0.147 0.212
vitamin A 0.158 0.244 0.147 1.000 0.184
vitamin C 0.229 0.313 0.212 0.184 1.000

1.5

variances <- apply(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], 2, var)

total_variance <- sum(variances)

variance_table <- data.frame(
  Variable = names(variances),
  Variance = round(variances, 3)
)

table <- rbind(variance_table, c("Total", round(total_variance, 3)))
table
           Variable    Variance
calcium     calcium  157829.444
iron           iron      35.811
protein     protein     934.877
vitamin A vitamin A 2668452.371
vitamin C vitamin C    5416.264
6             Total 2832668.766

1.6

cov_matrix <- cov(nutrient)
generalized_variance <- det(cov_matrix)
cat("Sample Variance-Covariance Matrix:\n")
Sample Variance-Covariance Matrix:
print(round(cov_matrix, 5))
              calcium       iron   protein   vitamin A  vitamin C
calcium   157829.4439  940.08944 6075.8162  102411.127  6701.6160
iron         940.0894   35.81054  114.0580    2383.153   137.6720
protein     6075.8162  114.05803  934.8769    7330.052   477.1998
vitamin A 102411.1266 2383.15341 7330.0515 2668452.371 22063.2486
vitamin C   6701.6160  137.67199  477.1998   22063.249  5416.2641
cat("\nGeneralized Variance:\n")

Generalized Variance:
print(round(generalized_variance, 3))
[1] 2.831042e+19

Lesson 2: Linear Combinations of Random Variables

Section 2.2

colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")


calculate_summary <- function(x) {
  data.frame(
    Mean = sprintf("%.1f", mean(x, na.rm = TRUE))
  )
}

summary_stats <- lapply(nutrient, calculate_summary)

summary_table <- do.call(rbind, summary_stats)

kable(summary_table, caption = "Population Mean", format = "html", col.names = c("Variable", "Mean")) %>%
  kable_styling()
Population Mean
Variable Mean
calcium 624.0
iron 11.1
protein 65.8
vitamin A 839.6
vitamin C 78.9

Example 2.3

colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")

summary_table <- summary(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
summary_table <- as.data.frame(t(summary_table))  
kable(summary_table, caption = "Descriptive Statistics", format = "html", col.names = NA) %>%
  kable_styling()
Descriptive Statistics
Var1 Var2 Freq
calcium Min. : 7.44
iron Min. : 0.00
protein Min. : 0.00
vitamin A Min. : 0.0
vitamin C Min. : 0.00
calcium 1st Qu.: 326.54
iron 1st Qu.: 7.46
protein 1st Qu.: 45.52
vitamin A 1st Qu.: 276.3
vitamin C 1st Qu.: 24.94
calcium Median : 548.29
iron Median :10.03
protein Median : 61.07
vitamin A Median : 524.0
vitamin C Median : 53.59
calcium Mean : 624.05
iron Mean :11.13
protein Mean : 65.80
vitamin A Mean : 839.6
vitamin C Mean : 78.93
calcium 3rd Qu.: 826.51
iron 3rd Qu.:13.71
protein 3rd Qu.: 80.77
vitamin A 3rd Qu.: 943.2
vitamin C 3rd Qu.:108.67
calcium Max. :2866.44
iron Max. :58.67
protein Max. :251.01
vitamin A Max. :34434.3
vitamin C Max. :433.34
cor_matrix <- cor(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], method = "pearson")
kable(cor_matrix, caption = "Pearson Correlation Matrix", format = "html", col.names = NA) %>%
  kable_styling()
Pearson Correlation Matrix
calcium iron protein vitamin A vitamin C
calcium 1.0000000 0.3954301 0.5001882 0.1578060 0.2292111
iron 0.3954301 1.0000000 0.6233662 0.2437905 0.3126009
protein 0.5001882 0.6233662 1.0000000 0.1467574 0.2120670
vitamin A 0.1578060 0.2437905 0.1467574 1.0000000 0.1835227
vitamin C 0.2292111 0.3126009 0.2120670 0.1835227 1.0000000
cov_matrix <- cov(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
kable(cov_matrix, caption = "The variance-covariance matrix is also copied into the matrix below.", format = "html", col.names = NA) %>%
  kable_styling()
The variance-covariance matrix is also copied into the matrix below.
calcium iron protein vitamin A vitamin C
calcium 157829.4439 940.08944 6075.8163 102411.127 6701.6160
iron 940.0894 35.81054 114.0580 2383.153 137.6720
protein 6075.8163 114.05803 934.8769 7330.052 477.1998
vitamin A 102411.1266 2383.15341 7330.0515 2668452.371 22063.2486
vitamin C 6701.6160 137.67199 477.1998 22063.249 5416.2641

Lesson 3: Graphical Display of Multivariate Data

Section 3.1

options(width = 78)

nutrient$L_calciu <- log(nutrient$calcium)
nutrient$S_calciu <- sqrt(nutrient$calcium)
nutrient$S_S_calc <- nutrient$calcium^0.25
nutrient$L_iron <- log(nutrient$iron)
nutrient$S_S_iron <- nutrient$iron^0.25
nutrient$L_prot <- log(nutrient$protein)
nutrient$S_S_prot <- nutrient$protein^0.25
nutrient$S_S_a <- nutrient$`vitamin A`^0.25
nutrient$S_S_c <- nutrient$`vitamin C`^0.25

summary_stats <- summary(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C", "L_calciu", "S_calciu", "S_S_calc", "L_iron", "S_S_iron", "L_prot", "S_S_prot", "S_S_a", "S_S_c")])
print(summary_stats)
    calcium             iron          protein         vitamin A      
 Min.   :   7.44   Min.   : 0.00   Min.   :  0.00   Min.   :    0.0  
 1st Qu.: 326.54   1st Qu.: 7.46   1st Qu.: 45.52   1st Qu.:  276.3  
 Median : 548.29   Median :10.03   Median : 61.07   Median :  524.0  
 Mean   : 624.05   Mean   :11.13   Mean   : 65.80   Mean   :  839.6  
 3rd Qu.: 826.51   3rd Qu.:13.71   3rd Qu.: 80.77   3rd Qu.:  943.2  
 Max.   :2866.44   Max.   :58.67   Max.   :251.01   Max.   :34434.3  
   vitamin C         L_calciu        S_calciu         S_S_calc    
 Min.   :  0.00   Min.   :2.007   Min.   : 2.728   Min.   :1.652  
 1st Qu.: 24.94   1st Qu.:5.789   1st Qu.:18.070   1st Qu.:4.251  
 Median : 53.59   Median :6.307   Median :23.416   Median :4.839  
 Mean   : 78.93   Mean   :6.222   Mean   :23.766   Mean   :4.809  
 3rd Qu.:108.67   3rd Qu.:6.717   3rd Qu.:28.749   3rd Qu.:5.362  
 Max.   :433.34   Max.   :7.961   Max.   :53.539   Max.   :7.317  
     L_iron         S_S_iron         L_prot         S_S_prot    
 Min.   : -Inf   Min.   :0.000   Min.   : -Inf   Min.   :0.000  
 1st Qu.:2.010   1st Qu.:1.653   1st Qu.:3.818   1st Qu.:2.597  
 Median :2.306   Median :1.780   Median :4.112   Median :2.796  
 Mean   : -Inf   Mean   :1.781   Mean   : -Inf   Mean   :2.786  
 3rd Qu.:2.618   3rd Qu.:1.924   3rd Qu.:4.392   3rd Qu.:2.998  
 Max.   :4.072   Max.   :2.768   Max.   :5.526   Max.   :3.980  
     S_S_a            S_S_c      
 Min.   : 0.000   Min.   :0.000  
 1st Qu.: 4.077   1st Qu.:2.235  
 Median : 4.785   Median :2.706  
 Mean   : 4.858   Mean   :2.735  
 3rd Qu.: 5.542   3rd Qu.:3.229  
 Max.   :13.622   Max.   :4.563  
par(mfrow = c(1, 1), mar = c(5, 5, 4, 2) + 0.1)
hist(nutrient$calcium, main = "Histogram for Calcium", col = "skyblue", xlab = "Calcium", ylab = "Frequency", cex.lab = 1.2, cex.axis = 1.2)

diff_x <- nutrient$iron - 500
diff_y <- nutrient$calcium - 10

plot(nutrient$iron, nutrient$calcium, main = "Scatterplot of calcium vs iron", 
     xlab = "iron", ylab = "calcium", col = "darkblue", pch = 16)

diff_x <- nutrient$S_S_iron - 0.5
diff_y <- nutrient$S_S_calc - 1

plot(diff_x, diff_y, main = "Scatterplot of Difference in S_S_calc vs Difference in S_S_iron", 
     xlab = "S_S_iron", ylab = "S_S_calc", 
     col = "darkblue", pch = 16)

if (!requireNamespace("ggplot2", quietly = TRUE)) {
  install.packages("ggplot2")
}
library(ggplot2)

pairs(
  nutrient[, c("S_S_calc", "S_S_iron", "S_S_prot", "S_S_a", "S_S_c")],
  main = "Scatterplot Matrix for Nutrition Data",
  col= "darkblue")

Lesson 4: Multivariate Normal Distribution

Section 4.2

options(width = 78)

r <- 0.9

x1 <- seq(-4, 4, by = 0.1)
x2 <- seq(-4, 4, by = 0.1)
grid <- expand.grid(x1 = x1, x2 = x2)


grid$phi <- exp(-(grid$x1^2 - 2 * r * grid$x1 * grid$x2 + grid$x2^2) / (2 * (1 - r^2))) / (2 * pi * sqrt(1 - r^2))
matrix_data <- matrix(grid$phi, nrow = length(x1), ncol = length(x2), byrow = TRUE)

persp(
  x = x1, y = x2, z = matrix_data,
  main = "Bivariate Normal Density",
  xlab = "x1", ylab = "x2", zlab = "Density",
  theta = -20,
  col = "lightgreen"
)

#### Section 4.3

boards <- read.csv("D:/stat/boardstiffness.csv", skip = 1, header = TRUE, sep = ",")
pca_result <- princomp(boards)
mahal <- data.frame(dist2 = rowSums(pca_result$scores^2))
mahal
         dist2
1   893935.032
2   391392.446
3   278375.446
4    68326.618
5    75628.273
6    64510.308
7   110144.273
8  3796596.860
9   111029.722
10   68173.480
11   88038.722
12   20206.136
13    8433.101
14   27164.963
15  461749.480
16 1232904.756
17  621252.998
18   30311.446
19  101990.377
20  478555.791
21   87228.584
22  222617.825
23  154558.067
24  129191.273
25  285655.067
26  138544.549
27   94044.963
28 1048160.101
29  590113.411

Section 4.7

library(MASS)

wechsler <- read.csv("D:/stat/wechsler.csv")
print(wechsler)
   id info sim arith pict
1   1    7   5     9    8
2   2    8   8     5    6
3   3   16  18    11    9
4   4    8   3     7    9
5   5    6   3    13    9
6   6   11   8    10   10
7   7   12   7     9    8
8   8    8  11     9    3
9   9   14  12    11    4
10 10   13  13    13    6
11 11   13   9     9    9
12 12   13  10    15    7
13 13   14  11    12    8
14 14   15  11    11   10
15 15   13  10    15    9
16 16   10   5     8    6
17 17   10   3     7    7
18 18   17  13    13    7
19 19   10   6    10    7
20 20   10  10    15    8
21 21   14   7    11    5
22 22   16  11    12   11
23 23   10   7    14    6
24 24   10  10     9    6
25 25   10   7    10   10
26 26    7   6     5    9
27 27   15  12    10    6
28 28   17  15    15    8
29 29   16  13    16    9
30 30   13  10    17    8
31 31   13  10    17   10
32 32   19  12    16   10
33 33   19  15    17   11
34 34   13  10     7    8
35 35   15  11    12    8
36 36   16   9    11   11
37 37   14  13    14    9
pca_result <- princomp(wechsler[, -1], cor = FALSE)
summary(pca_result)
Importance of components:
                          Comp.1    Comp.2    Comp.3     Comp.4
Standard deviation     5.0533104 2.4670432 1.9558360 1.36381119
Proportion of Variance 0.6844717 0.1631387 0.1025341 0.04985539
Cumulative Proportion  0.6844717 0.8476105 0.9501446 1.00000000
variable_means <- colMeans(wechsler[, -1])
cat("Variable Means:\n")
Variable Means:
print(variable_means)
     info       sim     arith      pict 
12.567568  9.567568 11.486486  7.972973 
variable_means_df <- data.frame(Variable = names(variable_means), Mean = variable_means)
variable_means_table <- knitr::kable(variable_means_df, align = c("l", "r"), col.names = c("Variable", "Mean"), digits = 2)
kableExtra::kable_styling(variable_means_table, "striped", full_width = FALSE)
Variable Mean
info info 12.57
sim sim 9.57
arith arith 11.49
pict pict 7.97
cov_matrix <- cov(wechsler[, -1])
cov_table <- knitr::kable(cov_matrix, digits = 4)
kableExtra::kable_styling(cov_table, "striped", full_width = FALSE)
info sim arith pict
info 11.4745 9.0856 6.3829 2.0713
sim 9.0856 12.0856 5.9384 0.5435
arith 6.3829 5.9384 11.0901 1.7913
pict 2.0713 0.5435 1.7913 3.6937

Section 4.8

library(MASS)
mu <- c(0, 0)  # Mean vector
sigma <- matrix(c(1.0000, 0.5000, 0.5000, 2.0000), nrow = 2)  # Covariance matrix


lambda <- eigen(sigma)$values
e <- eigen(sigma)$vectors
theta <- seq(0, 2 * pi, length.out = 201)
z <- matrix(c(cos(theta), sin(theta)), ncol = 2)
d <- diag(sqrt(lambda))
ellipse_coords <- t(z %*% t(e) %*% d * sqrt(5.99))
x <- ellipse_coords[, 1] + mu[1]
y <- ellipse_coords[, 2] + mu[2]

plot(x, y, type = "l", xlab = "X", ylab = "Y", main = "95% prediction ellipse", xlim = c(-5, 5), ylim = c(-5, 5))

Lesson 6: Multivariate Conditional Distribution and Partial Correlation

Section 6.2

options(width = 78)

title <- "Partial Correlations - Wechsler Data"
wechsler <- read.csv("D:/stat/wechsler.csv", header = TRUE)
model <- lm(cbind(info, sim) ~ arith + pict, data = wechsler)
anova_table <- anova(model)

print(anova_table)
Analysis of Variance Table

            Df  Pillai approx F num Df den Df    Pr(>F)    
(Intercept)  1 0.95595   358.05      2     33 < 2.2e-16 ***
arith        1 0.33836     8.44      2     33  0.001097 ** 
pict         1 0.12496     2.36      2     33  0.110531    
Residuals   34                                             
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Lesson 7: Inferences Regarding Multivariate Population Mean

Section 7.1.4

library(MASS)
library(dplyr)


x <- as.matrix(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])


hotel <- function(x, mu0) {
  one <- rep(1, nrow(x))
  ident <- diag(nrow(x))
  ybar <- t(x) %*% one / nrow(x)
  s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1)
  
  t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
  f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
  df1 <- ncol(x)
  df2 <- nrow(x) - ncol(x)
  p <- 1 - pf(f, df1, df2)
  
  result <- data.frame(
    T2 = t2,
    F = f,
    df1 = df1,
    df2 = df2,
    p_value = p
  )
  
  return(result)
}


mu0 <- c(1000, 15, 60, 800, 75)
result <- hotel(x, mu0)


print(result)
        T2        F df1 df2 p_value
1 1758.541 349.7968   5 732       0
arranged_result <- arrange(result, T2)
print(arranged_result)
        T2        F df1 df2 p_value
1 1758.541 349.7968   5 732       0

Section 7.1.9

library(MASS)

hotel <- function(x) {
  mu0 <- rep(0, ncol(x))
  one <- rep(1, nrow(x))
  ident <- diag(nrow(x))
  ybar <- t(x) %*% one / nrow(x)
  s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1)
  
  cat("mu0:\n")
  print(mu0)
  cat("\nSample Means (ybar):\n")
  print(ybar)
  cat("\nSample Covariance Matrix (S):\n")
  print(s)
  
  t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
  f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
  df1 <- ncol(x)
  df2 <- nrow(x) - ncol(x)
  p <- 1 - pf(f, df1, df2)
  
  # Print test statistics
  cat("\nHotelling T2 Statistic:\n")
  print(t2)
  
  cat("\nF-Statistic:\n")
  print(f)
  
  cat("\nDegrees of Freedom:\n")
  cat("df1:", df1, "\n")
  cat("df2:", df2, "\n")
  
  cat("\nP-Value:\n")
  print(p)
}

spouse <- read.csv("D:/stat/spouse.csv", header = TRUE)
d1 <- spouse$h1 - spouse$w1
d2 <- spouse$h2 - spouse$w2
d3 <- spouse$h3 - spouse$w3
d4 <- spouse$h4 - spouse$w4
diffs <- data.frame(d1, d2, d3, d4)

# Extract the columns d1, d2, d3, and d4 into a matrix x
x <- as.matrix(diffs)

# Apply the Hotelling function to the data
hotel(x)
mu0:
[1] 0 0 0 0

Sample Means (ybar):
          [,1]
d1  0.06666667
d2 -0.13333333
d3 -0.30000000
d4 -0.13333333

Sample Covariance Matrix (S):
            d1          d2         d3          d4
d1  0.82298851  0.07816092 -0.0137931 -0.05977011
d2  0.07816092  0.80919540 -0.2137931 -0.15632184
d3 -0.01379310 -0.21379310  0.5620690  0.51034483
d4 -0.05977011 -0.15632184  0.5103448  0.60229885

Hotelling T2 Statistic:
         [,1]
[1,] 13.12784

F-Statistic:
         [,1]
[1,] 2.942447

Degrees of Freedom:
df1: 4 
df2: 26 

P-Value:
           [,1]
[1,] 0.03936914

Error in (ident - one %% t(one)/nrow(x)) %% x : requires numeric/complex matrix/vector arguments

Section 7.1.10

hotel <- function(x) {
  mu0 <- rep(0, ncol(x))
  one <- rep(1, nrow(x))
  ident <- diag(nrow(x))
  ybar <- t(x) %*% one / nrow(x)
  s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1)

  means <- colMeans(x)
  variances <- apply(x, 2, var)

  print(mu0)
  print(ybar)
  print(s)
  
  t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
  f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
  df1 <- ncol(x)
  df2 <- nrow(x) - ncol(x)
  p <- 1 - pf(f, df1, df2)

  print(t2)
  print(f)
  print(df1)
  print(df2)
  print(p)

  return(list(means = means, variances = variances, t2 = t2, f = f, df1 = df1, df2 = df2, p = p))
}

d1 <- spouse$h1 - spouse$w1
d2 <- spouse$h2 - spouse$w2
d3 <- spouse$h3 - spouse$w3
d4 <- spouse$h4 - spouse$w4
diffs <- data.frame(d1, d2, d3, d4)

x <- as.matrix(diffs)
result <- hotel(x)
[1] 0 0 0 0
          [,1]
d1  0.06666667
d2 -0.13333333
d3 -0.30000000
d4 -0.13333333
            d1          d2         d3          d4
d1  0.82298851  0.07816092 -0.0137931 -0.05977011
d2  0.07816092  0.80919540 -0.2137931 -0.15632184
d3 -0.01379310 -0.21379310  0.5620690  0.51034483
d4 -0.05977011 -0.15632184  0.5103448  0.60229885
         [,1]
[1,] 13.12784
         [,1]
[1,] 2.942447
[1] 4
[1] 26
           [,1]
[1,] 0.03936914
print("Means:")
[1] "Means:"
print(result$means)
         d1          d2          d3          d4 
 0.06666667 -0.13333333 -0.30000000 -0.13333333 
print("Variances:")
[1] "Variances:"
print(result$variances)
       d1        d2        d3        d4 
0.8229885 0.8091954 0.5620690 0.6022989 

Section 7.1.11

spouse$d1 <- spouse$h1 - spouse$w2
spouse$d2 <- spouse$h2 - spouse$w1
spouse$d3 <- spouse$h3 - spouse$w4
spouse$d4 <- spouse$h4 - spouse$w3


print(spouse)
   h1 h2 h3 h4 w1 w2 w3 w4 d1 d2 d3 d4
1   2  3  5  5  4  4  5  5 -2 -1  0  0
2   5  5  4  4  4  5  5  5  0  1 -1 -1
3   4  5  5  5  4  4  5  5  0  1  0  0
4   4  3  4  4  4  5  5  5 -1 -1 -1 -1
5   3  3  5  5  4  4  5  5 -1 -1  0  0
6   3  3  4  5  3  3  4  4  0  0  0  1
7   3  4  4  4  4  3  5  4  0  0  0 -1
8   4  4  5  5  3  4  5  5  0  1  0  0
9   4  5  5  5  4  4  5  4  0  1  1  0
10  4  4  3  3  3  4  4  4  0  1 -1 -1
11  4  4  5  5  4  5  5  5 -1  0  0  0
12  5  5  4  4  5  5  5  5  0  0 -1 -1
13  4  4  4  4  4  4  5  5  0  0 -1 -1
14  4  3  5  5  4  4  4  4  0 -1  1  1
15  4  4  5  5  4  4  5  5  0  0  0  0
16  3  3  4  5  3  4  4  4 -1  0  0  1
17  4  5  4  4  5  5  5  5 -1  0 -1 -1
18  5  5  5  5  4  5  4  4  0  1  1  1
19  5  5  4  4  3  4  4  4  1  2  0  0
20  4  4  4  4  5  3  4  4  1 -1  0  0
21  4  4  4  4  5  3  4  4  1 -1  0  0
22  4  4  4  4  4  5  4  4 -1  0  0  0
23  3  4  5  5  2  5  5  5 -2  2  0  0
24  5  3  5  5  3  4  5  5  1  0  0  0
25  5  5  3  3  4  3  5  5  2  1 -2 -2
26  3  3  4  4  4  4  4  4 -1 -1  0  0
27  4  4  4  4  4  4  5  5  0  0 -1 -1
28  3  3  5  5  3  4  4  4 -1  0  1  1
29  4  4  3  3  4  4  5  4  0  0 -1 -2
30  4  4  5  5  4  4  5  5  0  0  0  0
hotel <- function(x) {
  mu0 <- c(0, 0, 0, 0)
  one <- rep(1, nrow(x))
  ident <- diag(nrow(x))
  
  ybar <- t(x) %*% one / nrow(x)
  s <- t(x) %*% (ident - one %*% t(one) / nrow(x)) %*% x / (nrow(x) - 1)
  
  print(mu0)
  print(ybar)
  print(s)
  
  t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
  f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
  df1 <- ncol(x)
  df2 <- nrow(x) - ncol(x)
  p_value <- 1 - pf(f, df1, df2)
  
  print(t2)
  print(f)
  print(df1)
  print(df2)
  print(p_value)
}

x <- as.matrix(spouse[, c("d1", "d2", "d3", "d4")])
hotel(x)
[1] 0 0 0 0
         [,1]
d1 -0.2000000
d2  0.1333333
d3 -0.2000000
d4 -0.2333333
           d1          d2          d3          d4
d1  0.7862069  0.13103448 -0.14482759 -0.18620690
d2  0.1310345  0.74022989 -0.04137931 -0.07126437
d3 -0.1448276 -0.04137931  0.51034483  0.50344828
d4 -0.1862069 -0.07126437  0.50344828  0.66781609
         [,1]
[1,] 6.426621
        [,1]
[1,] 1.44045
[1] 4
[1] 26
          [,1]
[1,] 0.2488886

Section 7.1.15

library(MASS)
hotel2 <- function(x1, x2) {
  n1 <- nrow(x1)
  n2 <- nrow(x2)
  k <- ncol(x1)
  one1 <- matrix(1, n1, 1)
  one2 <- matrix(1, n2, 1)
  ident1 <- diag(n1)
  ident2 <- diag(n2)
  
  ybar1 <- t(x1) %*% one1 / n1
  s1 <- t(x1) %*% ((ident1 - one1 %*% t(one1) / n1) %*% x1) / (n1 - 1.0)
  
  
  print(round(n1, 3))
  print(round(ybar1, 3))
  print(round(s1, 3))
  
  ybar2 <- t(x2) %*% one2 / n2
  s2 <- t(x2) %*% ((ident2 - one2 %*% t(one2) / n2) %*% x2) / (n2 - 1.0)
  
  print(round(n2, 3))
  print(round(ybar2, 3))
  print(round(s2, 3))
  
  spool <- ((n1 - 1.0) * s1 + (n2 - 1.0) * s2) / (n1 + n2 - 2.0)
  
  print(round(spool, 3))
  
  t2 <- t(ybar1 - ybar2) %*% solve(spool * (1/n1 + 1/n2)) %*% (ybar1 - ybar2)
  f <- (n1 + n2 - k - 1) * t2 / k / (n1 + n2 - 2)
  df1 <- k
  df2 <- n1 + n2 - k - 1
  p <- 1 - pf(f, df1, df2)
  

  print(round(t2, 3))
  print(round(f, 3))
  print(round(df1, 3))
  print(round(df2, 3))
  print(round(p, 3))
}

swiss <- read.csv("D:/stat/swiss3.csv", header = TRUE)
x1 <- subset(swiss, type == "real")[, c("length", "left", "right", "bottom", "top", "diag")]
x2 <- subset(swiss, type == "fake")[, c("length", "left", "right", "bottom", "top", "diag")]

hotel2(as.matrix(x1), as.matrix(x2))
[1] 100
          [,1]
length 214.969
left   129.943
right  129.720
bottom   8.305
top     10.168
diag   141.517
       length   left  right bottom    top   diag
length  0.150  0.058  0.057  0.057  0.014  0.005
left    0.058  0.133  0.086  0.057  0.049 -0.043
right   0.057  0.086  0.126  0.058  0.031 -0.024
bottom  0.057  0.057  0.058  0.413 -0.263  0.000
top     0.014  0.049  0.031 -0.263  0.421 -0.075
diag    0.005 -0.043 -0.024  0.000 -0.075  0.200
[1] 100
          [,1]
length 214.823
left   130.300
right  130.193
bottom  10.530
top     11.133
diag   139.450
       length   left  right bottom    top   diag
length  0.124  0.032  0.024 -0.101  0.019  0.012
left    0.032  0.065  0.047 -0.024 -0.012 -0.005
right   0.024  0.047  0.089 -0.019  0.000  0.034
bottom -0.101 -0.024 -0.019  1.281 -0.490  0.238
top     0.019 -0.012  0.000 -0.490  0.404 -0.022
diag    0.012 -0.005  0.034  0.238 -0.022  0.311
       length   left right bottom    top   diag
length  0.137  0.045 0.041 -0.022  0.017  0.009
left    0.045  0.099 0.066  0.016  0.019 -0.024
right   0.041  0.066 0.108  0.020  0.015  0.005
bottom -0.022  0.016 0.020  0.847 -0.377  0.119
top     0.017  0.019 0.015 -0.377  0.413 -0.049
diag    0.009 -0.024 0.005  0.119 -0.049  0.256
         [,1]
[1,] 2412.451
        [,1]
[1,] 391.922
[1] 6
[1] 193
     [,1]
[1,]    0

Section 7.2.1

library(MASS)

hotel <- function(x) {
  mu0 <- rep(0, ncol(x))
  one <- rep(1, nrow(x))
  ident <- diag(nrow(x))
  ybar <- t(x) %*% one / nrow(x)
  s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1.0)

  print(mu0)
  print(ybar)
  print(s)
  
  t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
  f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
  df1 <- ncol(x)
  df2 <- nrow(x) - ncol(x)
  p <- 1 - pf(f, df1, df2)
  
  print(t2)
  print(f)
  print(df1)
  print(df2)
  print(p)
}

nutrient <- read.csv("D:/Stat/nutrient1.csv", header = TRUE)

nutrient$calcium <- nutrient$calcium / 1000
nutrient$iron <- nutrient$iron / 15
nutrient$protein <- nutrient$protein / 60
nutrient$a <- nutrient$vitamin.A / 800
nutrient$c <- nutrient$vitamin.C / 75

nutrient$diff1 <- nutrient$iron - nutrient$calcium
nutrient$diff2 <- nutrient$protein - nutrient$iron
nutrient$diff3 <- nutrient$a - nutrient$protein
nutrient$diff4 <- nutrient$c - nutrient$a

x <- as.matrix(nutrient[, c("diff1", "diff2", "diff3", "diff4")])

hotel(x)
[1] 0 0 0 0
              [,1]
diff1  0.117944052
diff2  0.354730710
diff3 -0.047179834
diff4  0.002835103
            diff1       diff2       diff3       diff4
diff1  0.19164212 -0.07101776  0.04511467 -0.03756199
diff2 -0.07101776  0.16538367 -0.17884359  0.02955601
diff3  0.04511467 -0.17884359  4.12372604 -3.75507101
diff4 -0.03756199  0.02955601 -3.75507101  4.39690660
         [,1]
[1,] 1030.795
         [,1]
[1,] 256.6484
[1] 4
[1] 733
     [,1]
[1,]    0
library(MASS)

hotel <- function(x) {
  mu0 <- rep(0, ncol(x))
  one <- rep(1, nrow(x))
  ident <- diag(nrow(x))
  ybar <- t(x) %*% one / nrow(x)
  s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1.0)
  
  t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
  f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
  df1 <- ncol(x)
  df2 <- nrow(x) - ncol(x)
  p <- 1 - pf(f, df1, df2)
  
  results <- data.frame(
    mu0 = mu0,
    ybar = ybar)
  
  cat("S matrix:\n")
  print(s)
  
  cat("Test Statistics:\n")
  print(data.frame(
    t2 = t2,
    f = f,
    df1 = df1,
    df2 = df2,
    p = p
  ))
  
  return(results)
}

nutrient <- read.csv("D:/Stat/nutrient1.csv", header = TRUE)

nutrient$calcium <- nutrient$calcium / 1000
nutrient$iron <- nutrient$iron / 15
nutrient$protein <- nutrient$protein / 60
nutrient$a <- nutrient$vitamin.A / 800
nutrient$c <- nutrient$vitamin.C / 75

nutrient$diff1 <- nutrient$iron - nutrient$calcium
nutrient$diff2 <- nutrient$protein - nutrient$iron
nutrient$diff3 <- nutrient$a - nutrient$protein
nutrient$diff4 <- nutrient$c - nutrient$a

x <- as.matrix(nutrient[, c("diff1", "diff2", "diff3", "diff4")])

results <- hotel(x)
S matrix:
            diff1       diff2       diff3       diff4
diff1  0.19164212 -0.07101776  0.04511467 -0.03756199
diff2 -0.07101776  0.16538367 -0.17884359  0.02955601
diff3  0.04511467 -0.17884359  4.12372604 -3.75507101
diff4 -0.03756199  0.02955601 -3.75507101  4.39690660
Test Statistics:
        t2        f df1 df2 p
1 1030.795 256.6484   4 733 0
print(results)
      mu0         ybar
diff1   0  0.117944052
diff2   0  0.354730710
diff3   0 -0.047179834
diff4   0  0.002835103

Section 7.2.3

swiss <- read.csv("D:/stat/swiss3.csv")

real <- subset(swiss, type == "real")
fake <- subset(swiss, type == "fake")

compute_stats <- function(data) {
  data_summary <- lapply(data, function(x) c(length(x), mean(x), var(x)))
  data_summary <- do.call(rbind, data_summary)
  colnames(data_summary) <- c("n", "mean", "var")
  return(data_summary)}

pop1 <- compute_stats(real[, -1])
pop2 <- compute_stats(fake[, -1])

combine <- merge(pop1, pop2, by = "row.names", suffixes = c(".real", ".fake"))
rownames(combine) <- combine$Row.names
combine$Row.names <- NULL

p <- 6
df1 <- ncol(pop1) - 1
df2 <- sum(pop1[, 1]) + sum(pop2[, 1]) - ncol(pop1) - 1

combine$F <- qf(0.95, df1, df2)
combine$T <- qt(1 - 0.025/6, df2)
combine$sp <- ((combine$n.real - 1) * combine$var.real + (combine$n.fake - 1) * combine$var.fake) / (combine$n.real + combine$n.fake - 2)
combine$losim <- (combine$mean.real - combine$mean.fake) - sqrt(p * (combine$n.real + combine$n.fake - 2) * combine$F * (1/combine$n.real + 1/combine$n.fake) * combine$sp / (combine$n.real + combine$n.fake - p - 1))
combine$upsim <- (combine$mean.real - combine$mean.fake) + sqrt(p * (combine$n.real + combine$n.fake - 2) * combine$F * (1/combine$n.real + 1/combine$n.fake) * combine$sp / (combine$n.real + combine$n.fake - p - 1))
combine$lobon <- (combine$mean.real - combine$mean.fake) - combine$T * sqrt((1/combine$n.real + 1/combine$n.fake) * combine$sp)
combine$upbon <- (combine$mean.real - combine$mean.fake) + combine$T * sqrt((1/combine$n.real + 1/combine$n.fake) * combine$sp)

print(combine)
       n.real mean.real  var.real n.fake mean.fake   var.fake        F
bottom    100     8.305 0.4132071    100    10.530 1.28131313 3.003249
diag      100   141.517 0.1998091    100   139.450 0.31121212 3.003249
left      100   129.943 0.1325768    100   130.300 0.06505051 3.003249
length    100   214.969 0.1502414    100   214.823 0.12401111 3.003249
right     100   129.720 0.1262626    100   130.193 0.08894040 3.003249
top       100    10.168 0.4211879    100    11.133 0.40445556 3.003249
              T         sp       losim      upsim        lobon      upbon
bottom 2.642654 0.84726010 -2.78469133 -1.6653087 -2.569004162 -1.8809958
diag   2.642654 0.25551061  1.75964190  2.3743581  1.878087896  2.2559121
left   2.642654 0.09881364 -0.54813871 -0.1658613 -0.474479952 -0.2395200
length 2.642654 0.13712626 -0.07916481  0.3711648  0.007606517  0.2843935
right  2.642654 0.10760152 -0.67245705 -0.2735429 -0.595592672 -0.3504073
top    2.642654 0.41282172 -1.35568026 -0.5743197 -1.205124563 -0.7248754

Section 7.2.6

library(biotools)
Warning: package 'biotools' was built under R version 4.3.3
result <- boxM(data = swiss[, -1], grouping = swiss$type)
result

    Box's M-test for Homogeneity of Covariance Matrices

data:  swiss[, -1]
Chi-Sq (approx.) = 121.9, df = 21, p-value = 3.198e-16

LESSON 8

Section 8.4

           ral          rfe        rmg          rca         rna
1   1.83571429  0.627857143 -0.5264286 -0.052142857  0.25928571
2   1.23571429  0.707857143 -1.3964286 -0.082142857 -0.08071429
3   2.03571429  0.717857143 -0.9464286 -0.072142857 -0.05071429
4  -1.06428571 -0.002142857  0.8135714 -0.042142857 -0.11071429
5   1.23571429  0.687857143  0.5135714 -0.002142857 -0.05071429
6  -1.66428571 -0.112142857 -1.3564286 -0.032142857 -0.03071429
7  -2.46428571 -2.112142857 -0.5664286 -0.002142857 -0.07071429
8  -0.96428571 -0.592142857  1.0835714 -0.022142857 -0.09071429
9  -1.46428571 -0.882142857 -0.3064286  0.087857143  0.04928571
10  0.83571429  0.547857143  2.4035714  0.077857143 -0.05071429
11 -0.16428571 -0.242142857  0.8635714  0.017857143  0.28928571
12  0.53571429  0.267857143  0.6835714  0.107857143 -0.01071429
13  0.13571429  0.317857143 -0.3764286 -0.002142857 -0.03071429
14 -0.06428571  0.067857143 -0.8864286  0.017857143 -0.02071429
15  0.10000000  0.025000000  0.0850000  0.005000000 -0.01000000
16 -0.10000000 -0.025000000 -0.0850000 -0.005000000  0.01000000
17  0.12000000 -0.432000000 -0.0040000  0.004000000 -0.02400000
18 -2.38000000  0.678000000 -0.0440000 -0.016000000 -0.01400000
19 -0.18000000 -0.212000000 -0.0040000 -0.016000000  0.00600000
20 -0.18000000  0.168000000  0.0060000 -0.016000000 -0.01400000
21  2.62000000 -0.202000000  0.0460000  0.044000000  0.04600000
22  0.38000000 -0.392000000 -0.0460000  0.008000000  0.01200000
23  0.98000000 -0.372000000  0.0640000  0.008000000  0.00200000
24 -0.62000000 -0.592000000 -0.0760000 -0.042000000  0.00200000
25 -2.52000000  1.228000000  0.0640000 -0.022000000  0.00200000
26  1.78000000  0.128000000 -0.0060000  0.048000000 -0.01800000
for(variable in c("al", "fe", "mg", "ca", "na")) {

  hist(pottery[[variable]], 
       main = paste("Histogram of", variable), 
       xlab = variable, 
       col = "yellow", 
       border = "black")
}

p <- 5  
g <- length(unique(pottery$site))  
df <- 0.5 * p * (p + 1) * (g - 1)

cov_matrices <- list()
for (site in unique(pottery$site)) {
  cov_matrices[[site]] <- cov(subset(pottery, site == site)[, c("al", "fe", "mg", "ca", "na")])
}
l_statistic <- 0
for (i in 1:(g - 1)) {
  for (j in (i + 1):g) {
    l_statistic <- l_statistic + (sum(diag((cov_matrices[[i]] + cov_matrices[[j]]) %*% solve(cov_matrices[[i]] + cov_matrices[[j]]))) - p)
  }
}
l_statistic <- l_statistic * 2


alpha <- 0.05  
critical_value <- qchisq(1 - alpha, df)

cat("Test statistic:", l_statistic, "\n")
Test statistic: -2.131628e-14 
cat("Degrees of freedom:", df, "\n")
Degrees of freedom: 45 
cat("Critical value (", 1 - alpha, "):", critical_value, "\n")
Critical value ( 0.95 ): 61.65623 
if (l_statistic > critical_value) {
  cat("Reject the null hypothesis: Covariance matrices are not homogeneous.\n")
} else {
  cat("Fail to reject the null hypothesis: Covariance matrices are homogeneous.\n")
}
Fail to reject the null hypothesis: Covariance matrices are homogeneous.

Section 8.5

outcome <- cbind(pottery$al, pottery$fe, pottery$mg, pottery$ca, pottery$na)
model<- manova(outcome ~ site, data = pottery)
summary(model, intercept = TRUE, test = "Wilks")
            Df     Wilks approx F num Df den Df    Pr(>F)    
(Intercept)  1 0.0068354   523.07      5 18.000 < 2.2e-16 ***
site         3 0.0123009    13.09     15 50.091  1.84e-12 ***
Residuals   22                                               
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(pottery)
# A tibble: 26 × 6
   site     al    fe    mg    ca    na
   <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 L      14.4  7     4.3   0.15  0.51
 2 L      13.8  7.08  3.43  0.12  0.17
 3 L      14.6  7.09  3.88  0.13  0.2 
 4 L      11.5  6.37  5.64  0.16  0.14
 5 L      13.8  7.06  5.34  0.2   0.2 
 6 L      10.9  6.26  3.47  0.17  0.22
 7 L      10.1  4.26  4.26  0.2   0.18
 8 L      11.6  5.78  5.91  0.18  0.16
 9 L      11.1  5.49  4.52  0.29  0.3 
10 L      13.4  6.92  7.23  0.28  0.2 
# ℹ 16 more rows
model <- manova(cbind(al, ca, fe, mg, na) ~ site, data = pottery)
summary(model)
          Df Pillai approx F num Df den Df    Pr(>F)    
site       3 1.5539   4.2984     15     60 2.413e-05 ***
Residuals 22                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(reshape2)
Warning: package 'reshape2' was built under R version 4.3.3
pottery_melted <- melt(pottery, id.vars = "site", variable.name = "chemical", value.name = "amount")


mean_data <- aggregate(amount ~ site + chemical, data = pottery_melted, FUN = mean)

# Plot
ggplot(mean_data, aes(x = chemical, y = amount, group = site, color = site)) +
  geom_line() +
  geom_point() +
  labs(title = "Profile Plot for Pottery Data", x = "Chemical", y = "Mean Amount") +
  theme_minimal()

manova_result <- manova(cbind(al, fe, mg, ca, na) ~ site, data = pottery)
summary(manova_result)
          Df Pillai approx F num Df den Df    Pr(>F)    
site       3 1.5539   4.2984     15     60 2.413e-05 ***
Residuals 22                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Section 8.7

library(car)

pottery$site <- factor(pottery$site)

site_contrasts <- matrix(c(8, -2, 8, -14,    # C+L-A-I
                            1, 0, -1, 0,      # A vs I
                            0, 1, 0, -1),     # C vs L
                          nrow = 3, byrow = TRUE)

rownames(site_contrasts) <- c("C+L-A-I", "A vs I", "C vs L")
colnames(site_contrasts) <- c("C", "L", "A", "I")

print(site_contrasts)
        C  L  A   I
C+L-A-I 8 -2  8 -14
A vs I  1  0 -1   0
C vs L  0  1  0  -1

Section 8.11

library(readr)
library(car)
library(emmeans)
Warning: package 'emmeans' was built under R version 4.3.3
rice <- read_csv("D:/stat/rice.csv")

rice$block <- factor(rice$block)
rice$variety <- factor(rice$variety)
manova_result <- manova(cbind(height, tillers) ~ block + variety, data = rice)
summary(manova_result)
          Df  Pillai approx F num Df den Df  Pr(>F)  
block      4 0.23044  0.39067      8     24 0.91486  
variety    3 0.76192  2.46162      6     24 0.05349 .
Residuals 12                                         
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_result <- emmeans(manova_result, "variety")
print(lsmeans_result)
 variety emmean   SE df lower.CL upper.CL
 a         32.1 0.78 12     30.4     33.8
 b         28.1 0.78 12     26.4     29.8
 c         30.6 0.78 12     28.9     32.3
 d         28.8 0.78 12     27.1     30.5

Results are averaged over the levels of: block, rep.meas 
Confidence level used: 0.95 
print(summary.aov(manova_result))
 Response height :
            Df Sum Sq Mean Sq F value Pr(>F)  
block        4   31.7   7.925  0.6038 0.6673  
variety      3  165.0  55.000  4.1905 0.0303 *
Residuals   12  157.5  13.125                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response tillers :
            Df Sum Sq Mean Sq F value Pr(>F)
block        4    1.0  0.2500  0.1648 0.9522
variety      3    5.8  1.9333  1.2747 0.3273
Residuals   12   18.2  1.5167               
summary(aov(height ~ block + variety, data = rice))
            Df Sum Sq Mean Sq F value Pr(>F)  
block        4   31.7    7.93   0.604 0.6673  
variety      3  165.0   55.00   4.190 0.0303 *
Residuals   12  157.5   13.12                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(tillers ~ block + variety, data = rice))
            Df Sum Sq Mean Sq F value Pr(>F)
block        4    1.0   0.250   0.165  0.952
variety      3    5.8   1.933   1.275  0.327
Residuals   12   18.2   1.517