Lesson 1: Measures of Central Tendency, Dispersion and
Association
Warning: package 'readxl' was built under R version 4.3.3
Warning: package 'dplyr' was built under R version 4.3.3
# A tibble: 737 × 5
calcium iron protein `vitamin A` `vitamin C`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 522. 10.2 42.6 349. 54.1
2 343. 4.11 67.8 267. 24.8
3 858. 13.7 59.9 668. 155.
4 576. 13.2 42.2 792. 225.
5 1928. 18.9 111. 740. 81.0
6 608. 6.8 45.8 166. 13.0
7 1046. 18.4 116. 1120. 159.
8 181. 12.8 64.2 78.5 26.9
9 327. 8.69 49.2 568. 50.0
10 383. 13.7 104. 1029. 8.40
# ℹ 727 more rows
1.4
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")
calculate_summary <- function(x) {
data.frame(
N = sum(!is.na(x)),
Mean = mean(x, na.rm = TRUE),
SD = sd(x, na.rm = TRUE),
Minimum = min(x, na.rm = TRUE),
Maximum = max(x, na.rm = TRUE)
)
}
summary_stats <- lapply(nutrient, calculate_summary)
summary_table <- do.call(rbind, summary_stats)
kable(summary_table, caption = "Summary Statistics", format = "html", col.names = c("Variable", "N", "Mean", "SD", "Minimum", "Maximum")) %>%
kable_styling()
Summary Statistics
|
Variable
|
N
|
Mean
|
SD
|
Minimum
|
Maximum
|
|
calcium
|
737
|
624.04925
|
397.277540
|
7.44
|
2866.440
|
|
iron
|
737
|
11.12990
|
5.984191
|
0.00
|
58.668
|
|
protein
|
737
|
65.80344
|
30.575756
|
0.00
|
251.012
|
|
vitamin A
|
737
|
839.63535
|
1633.539828
|
0.00
|
34434.270
|
|
vitamin C
|
737
|
78.92845
|
73.595272
|
0.00
|
433.339
|
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")
calculate_summary <- function(x) {
data.frame(
Mean = sprintf("%.1f", mean(x, na.rm = TRUE)),
Standard_Deviation = sprintf("%.1f", sd(x, na.rm = TRUE))
)
}
summary_stats <- lapply(nutrient, calculate_summary)
summary_table <- do.call(rbind, summary_stats)
kable(summary_table, caption = "Summary Statistics", format = "html", col.names = c("Variable", "Mean", "Standard Deviation")) %>%
kable_styling()
Summary Statistics
|
Variable
|
Mean
|
Standard Deviation
|
|
calcium
|
624.0
|
397.3
|
|
iron
|
11.1
|
6.0
|
|
protein
|
65.8
|
30.6
|
|
vitamin A
|
839.6
|
1633.5
|
|
vitamin C
|
78.9
|
73.6
|
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")
summary_table <- summary(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
summary_table <- as.data.frame(t(summary_table))
kable(summary_table, caption = "Descriptive Statistics", format = "html", col.names = NA) %>%
kable_styling()
Descriptive Statistics
|
Var1
|
Var2
|
Freq
|
|
calcium
|
|
Min. : 7.44
|
|
iron
|
|
Min. : 0.00
|
|
protein
|
|
Min. : 0.00
|
|
vitamin A
|
|
Min. : 0.0
|
|
vitamin C
|
|
Min. : 0.00
|
|
calcium
|
|
1st Qu.: 326.54
|
|
iron
|
|
1st Qu.: 7.46
|
|
protein
|
|
1st Qu.: 45.52
|
|
vitamin A
|
|
1st Qu.: 276.3
|
|
vitamin C
|
|
1st Qu.: 24.94
|
|
calcium
|
|
Median : 548.29
|
|
iron
|
|
Median :10.03
|
|
protein
|
|
Median : 61.07
|
|
vitamin A
|
|
Median : 524.0
|
|
vitamin C
|
|
Median : 53.59
|
|
calcium
|
|
Mean : 624.05
|
|
iron
|
|
Mean :11.13
|
|
protein
|
|
Mean : 65.80
|
|
vitamin A
|
|
Mean : 839.6
|
|
vitamin C
|
|
Mean : 78.93
|
|
calcium
|
|
3rd Qu.: 826.51
|
|
iron
|
|
3rd Qu.:13.71
|
|
protein
|
|
3rd Qu.: 80.77
|
|
vitamin A
|
|
3rd Qu.: 943.2
|
|
vitamin C
|
|
3rd Qu.:108.67
|
|
calcium
|
|
Max. :2866.44
|
|
iron
|
|
Max. :58.67
|
|
protein
|
|
Max. :251.01
|
|
vitamin A
|
|
Max. :34434.3
|
|
vitamin C
|
|
Max. :433.34
|
cor_matrix <- cor(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], method = "pearson")
kable(cor_matrix, caption = "Pearson Correlation Matrix", format = "html", col.names = NA) %>%
kable_styling()
Pearson Correlation Matrix
|
|
calcium
|
iron
|
protein
|
vitamin A
|
vitamin C
|
|
calcium
|
1.0000000
|
0.3954301
|
0.5001882
|
0.1578060
|
0.2292111
|
|
iron
|
0.3954301
|
1.0000000
|
0.6233662
|
0.2437905
|
0.3126009
|
|
protein
|
0.5001882
|
0.6233662
|
1.0000000
|
0.1467574
|
0.2120670
|
|
vitamin A
|
0.1578060
|
0.2437905
|
0.1467574
|
1.0000000
|
0.1835227
|
|
vitamin C
|
0.2292111
|
0.3126009
|
0.2120670
|
0.1835227
|
1.0000000
|
cov_matrix <- cov(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
kable(cov_matrix, caption = "The variance-covariance matrix is also copied into the matrix below.", format = "html", col.names = NA) %>%
kable_styling()
The variance-covariance matrix is also copied into the matrix below.
|
|
calcium
|
iron
|
protein
|
vitamin A
|
vitamin C
|
|
calcium
|
157829.4439
|
940.08944
|
6075.8163
|
102411.127
|
6701.6160
|
|
iron
|
940.0894
|
35.81054
|
114.0580
|
2383.153
|
137.6720
|
|
protein
|
6075.8163
|
114.05803
|
934.8769
|
7330.052
|
477.1998
|
|
vitamin A
|
102411.1266
|
2383.15341
|
7330.0515
|
2668452.371
|
22063.2486
|
|
vitamin C
|
6701.6160
|
137.67199
|
477.1998
|
22063.249
|
5416.2641
|
sample_cor_matrix <- cor(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], method = "pearson", use = "complete.obs")
sample_cor_matrix <- round(sample_cor_matrix, 3) # Limit to 3 decimal places
sample_cor_matrix <- data.frame(sample_cor_matrix)
kable(sample_cor_matrix, caption = "Sample Correlation Matrix", format = "html", col.names = NA) %>%
kable_styling()
Sample Correlation Matrix
|
|
calcium
|
iron
|
protein
|
vitamin.A
|
vitamin.C
|
|
calcium
|
1.000
|
0.395
|
0.500
|
0.158
|
0.229
|
|
iron
|
0.395
|
1.000
|
0.623
|
0.244
|
0.313
|
|
protein
|
0.500
|
0.623
|
1.000
|
0.147
|
0.212
|
|
vitamin A
|
0.158
|
0.244
|
0.147
|
1.000
|
0.184
|
|
vitamin C
|
0.229
|
0.313
|
0.212
|
0.184
|
1.000
|
1.5
variances <- apply(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], 2, var)
total_variance <- sum(variances)
variance_table <- data.frame(
Variable = names(variances),
Variance = round(variances, 3)
)
table <- rbind(variance_table, c("Total", round(total_variance, 3)))
table
Variable Variance
calcium calcium 157829.444
iron iron 35.811
protein protein 934.877
vitamin A vitamin A 2668452.371
vitamin C vitamin C 5416.264
6 Total 2832668.766
1.6
cov_matrix <- cov(nutrient)
generalized_variance <- det(cov_matrix)
cat("Sample Variance-Covariance Matrix:\n")
Sample Variance-Covariance Matrix:
print(round(cov_matrix, 5))
calcium iron protein vitamin A vitamin C
calcium 157829.4439 940.08944 6075.8162 102411.127 6701.6160
iron 940.0894 35.81054 114.0580 2383.153 137.6720
protein 6075.8162 114.05803 934.8769 7330.052 477.1998
vitamin A 102411.1266 2383.15341 7330.0515 2668452.371 22063.2486
vitamin C 6701.6160 137.67199 477.1998 22063.249 5416.2641
cat("\nGeneralized Variance:\n")
Generalized Variance:
print(round(generalized_variance, 3))
[1] 2.831042e+19
Lesson 2: Linear Combinations of Random Variables
Section 2.2
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")
calculate_summary <- function(x) {
data.frame(
Mean = sprintf("%.1f", mean(x, na.rm = TRUE))
)
}
summary_stats <- lapply(nutrient, calculate_summary)
summary_table <- do.call(rbind, summary_stats)
kable(summary_table, caption = "Population Mean", format = "html", col.names = c("Variable", "Mean")) %>%
kable_styling()
Population Mean
|
Variable
|
Mean
|
|
calcium
|
624.0
|
|
iron
|
11.1
|
|
protein
|
65.8
|
|
vitamin A
|
839.6
|
|
vitamin C
|
78.9
|
Example 2.3
colnames(nutrient) <- c("calcium", "iron", "protein", "vitamin A", "vitamin C")
summary_table <- summary(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
summary_table <- as.data.frame(t(summary_table))
kable(summary_table, caption = "Descriptive Statistics", format = "html", col.names = NA) %>%
kable_styling()
Descriptive Statistics
|
Var1
|
Var2
|
Freq
|
|
calcium
|
|
Min. : 7.44
|
|
iron
|
|
Min. : 0.00
|
|
protein
|
|
Min. : 0.00
|
|
vitamin A
|
|
Min. : 0.0
|
|
vitamin C
|
|
Min. : 0.00
|
|
calcium
|
|
1st Qu.: 326.54
|
|
iron
|
|
1st Qu.: 7.46
|
|
protein
|
|
1st Qu.: 45.52
|
|
vitamin A
|
|
1st Qu.: 276.3
|
|
vitamin C
|
|
1st Qu.: 24.94
|
|
calcium
|
|
Median : 548.29
|
|
iron
|
|
Median :10.03
|
|
protein
|
|
Median : 61.07
|
|
vitamin A
|
|
Median : 524.0
|
|
vitamin C
|
|
Median : 53.59
|
|
calcium
|
|
Mean : 624.05
|
|
iron
|
|
Mean :11.13
|
|
protein
|
|
Mean : 65.80
|
|
vitamin A
|
|
Mean : 839.6
|
|
vitamin C
|
|
Mean : 78.93
|
|
calcium
|
|
3rd Qu.: 826.51
|
|
iron
|
|
3rd Qu.:13.71
|
|
protein
|
|
3rd Qu.: 80.77
|
|
vitamin A
|
|
3rd Qu.: 943.2
|
|
vitamin C
|
|
3rd Qu.:108.67
|
|
calcium
|
|
Max. :2866.44
|
|
iron
|
|
Max. :58.67
|
|
protein
|
|
Max. :251.01
|
|
vitamin A
|
|
Max. :34434.3
|
|
vitamin C
|
|
Max. :433.34
|
cor_matrix <- cor(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")], method = "pearson")
kable(cor_matrix, caption = "Pearson Correlation Matrix", format = "html", col.names = NA) %>%
kable_styling()
Pearson Correlation Matrix
|
|
calcium
|
iron
|
protein
|
vitamin A
|
vitamin C
|
|
calcium
|
1.0000000
|
0.3954301
|
0.5001882
|
0.1578060
|
0.2292111
|
|
iron
|
0.3954301
|
1.0000000
|
0.6233662
|
0.2437905
|
0.3126009
|
|
protein
|
0.5001882
|
0.6233662
|
1.0000000
|
0.1467574
|
0.2120670
|
|
vitamin A
|
0.1578060
|
0.2437905
|
0.1467574
|
1.0000000
|
0.1835227
|
|
vitamin C
|
0.2292111
|
0.3126009
|
0.2120670
|
0.1835227
|
1.0000000
|
cov_matrix <- cov(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
kable(cov_matrix, caption = "The variance-covariance matrix is also copied into the matrix below.", format = "html", col.names = NA) %>%
kable_styling()
The variance-covariance matrix is also copied into the matrix below.
|
|
calcium
|
iron
|
protein
|
vitamin A
|
vitamin C
|
|
calcium
|
157829.4439
|
940.08944
|
6075.8163
|
102411.127
|
6701.6160
|
|
iron
|
940.0894
|
35.81054
|
114.0580
|
2383.153
|
137.6720
|
|
protein
|
6075.8163
|
114.05803
|
934.8769
|
7330.052
|
477.1998
|
|
vitamin A
|
102411.1266
|
2383.15341
|
7330.0515
|
2668452.371
|
22063.2486
|
|
vitamin C
|
6701.6160
|
137.67199
|
477.1998
|
22063.249
|
5416.2641
|
Lesson 3: Graphical Display of Multivariate Data
Section 3.1
options(width = 78)
nutrient$L_calciu <- log(nutrient$calcium)
nutrient$S_calciu <- sqrt(nutrient$calcium)
nutrient$S_S_calc <- nutrient$calcium^0.25
nutrient$L_iron <- log(nutrient$iron)
nutrient$S_S_iron <- nutrient$iron^0.25
nutrient$L_prot <- log(nutrient$protein)
nutrient$S_S_prot <- nutrient$protein^0.25
nutrient$S_S_a <- nutrient$`vitamin A`^0.25
nutrient$S_S_c <- nutrient$`vitamin C`^0.25
summary_stats <- summary(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C", "L_calciu", "S_calciu", "S_S_calc", "L_iron", "S_S_iron", "L_prot", "S_S_prot", "S_S_a", "S_S_c")])
print(summary_stats)
calcium iron protein vitamin A
Min. : 7.44 Min. : 0.00 Min. : 0.00 Min. : 0.0
1st Qu.: 326.54 1st Qu.: 7.46 1st Qu.: 45.52 1st Qu.: 276.3
Median : 548.29 Median :10.03 Median : 61.07 Median : 524.0
Mean : 624.05 Mean :11.13 Mean : 65.80 Mean : 839.6
3rd Qu.: 826.51 3rd Qu.:13.71 3rd Qu.: 80.77 3rd Qu.: 943.2
Max. :2866.44 Max. :58.67 Max. :251.01 Max. :34434.3
vitamin C L_calciu S_calciu S_S_calc
Min. : 0.00 Min. :2.007 Min. : 2.728 Min. :1.652
1st Qu.: 24.94 1st Qu.:5.789 1st Qu.:18.070 1st Qu.:4.251
Median : 53.59 Median :6.307 Median :23.416 Median :4.839
Mean : 78.93 Mean :6.222 Mean :23.766 Mean :4.809
3rd Qu.:108.67 3rd Qu.:6.717 3rd Qu.:28.749 3rd Qu.:5.362
Max. :433.34 Max. :7.961 Max. :53.539 Max. :7.317
L_iron S_S_iron L_prot S_S_prot
Min. : -Inf Min. :0.000 Min. : -Inf Min. :0.000
1st Qu.:2.010 1st Qu.:1.653 1st Qu.:3.818 1st Qu.:2.597
Median :2.306 Median :1.780 Median :4.112 Median :2.796
Mean : -Inf Mean :1.781 Mean : -Inf Mean :2.786
3rd Qu.:2.618 3rd Qu.:1.924 3rd Qu.:4.392 3rd Qu.:2.998
Max. :4.072 Max. :2.768 Max. :5.526 Max. :3.980
S_S_a S_S_c
Min. : 0.000 Min. :0.000
1st Qu.: 4.077 1st Qu.:2.235
Median : 4.785 Median :2.706
Mean : 4.858 Mean :2.735
3rd Qu.: 5.542 3rd Qu.:3.229
Max. :13.622 Max. :4.563
par(mfrow = c(1, 1), mar = c(5, 5, 4, 2) + 0.1)
hist(nutrient$calcium, main = "Histogram for Calcium", col = "skyblue", xlab = "Calcium", ylab = "Frequency", cex.lab = 1.2, cex.axis = 1.2)

diff_x <- nutrient$iron - 500
diff_y <- nutrient$calcium - 10
plot(nutrient$iron, nutrient$calcium, main = "Scatterplot of calcium vs iron",
xlab = "iron", ylab = "calcium", col = "darkblue", pch = 16)

diff_x <- nutrient$S_S_iron - 0.5
diff_y <- nutrient$S_S_calc - 1
plot(diff_x, diff_y, main = "Scatterplot of Difference in S_S_calc vs Difference in S_S_iron",
xlab = "S_S_iron", ylab = "S_S_calc",
col = "darkblue", pch = 16)

if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
library(ggplot2)
pairs(
nutrient[, c("S_S_calc", "S_S_iron", "S_S_prot", "S_S_a", "S_S_c")],
main = "Scatterplot Matrix for Nutrition Data",
col= "darkblue")

Lesson 4: Multivariate Normal Distribution
Section 4.2
options(width = 78)
r <- 0.9
x1 <- seq(-4, 4, by = 0.1)
x2 <- seq(-4, 4, by = 0.1)
grid <- expand.grid(x1 = x1, x2 = x2)
grid$phi <- exp(-(grid$x1^2 - 2 * r * grid$x1 * grid$x2 + grid$x2^2) / (2 * (1 - r^2))) / (2 * pi * sqrt(1 - r^2))
matrix_data <- matrix(grid$phi, nrow = length(x1), ncol = length(x2), byrow = TRUE)
persp(
x = x1, y = x2, z = matrix_data,
main = "Bivariate Normal Density",
xlab = "x1", ylab = "x2", zlab = "Density",
theta = -20,
col = "lightgreen"
)
#### Section 4.3
boards <- read.csv("D:/stat/boardstiffness.csv", skip = 1, header = TRUE, sep = ",")
pca_result <- princomp(boards)
mahal <- data.frame(dist2 = rowSums(pca_result$scores^2))
mahal
dist2
1 893935.032
2 391392.446
3 278375.446
4 68326.618
5 75628.273
6 64510.308
7 110144.273
8 3796596.860
9 111029.722
10 68173.480
11 88038.722
12 20206.136
13 8433.101
14 27164.963
15 461749.480
16 1232904.756
17 621252.998
18 30311.446
19 101990.377
20 478555.791
21 87228.584
22 222617.825
23 154558.067
24 129191.273
25 285655.067
26 138544.549
27 94044.963
28 1048160.101
29 590113.411
Section 4.7
library(MASS)
wechsler <- read.csv("D:/stat/wechsler.csv")
print(wechsler)
id info sim arith pict
1 1 7 5 9 8
2 2 8 8 5 6
3 3 16 18 11 9
4 4 8 3 7 9
5 5 6 3 13 9
6 6 11 8 10 10
7 7 12 7 9 8
8 8 8 11 9 3
9 9 14 12 11 4
10 10 13 13 13 6
11 11 13 9 9 9
12 12 13 10 15 7
13 13 14 11 12 8
14 14 15 11 11 10
15 15 13 10 15 9
16 16 10 5 8 6
17 17 10 3 7 7
18 18 17 13 13 7
19 19 10 6 10 7
20 20 10 10 15 8
21 21 14 7 11 5
22 22 16 11 12 11
23 23 10 7 14 6
24 24 10 10 9 6
25 25 10 7 10 10
26 26 7 6 5 9
27 27 15 12 10 6
28 28 17 15 15 8
29 29 16 13 16 9
30 30 13 10 17 8
31 31 13 10 17 10
32 32 19 12 16 10
33 33 19 15 17 11
34 34 13 10 7 8
35 35 15 11 12 8
36 36 16 9 11 11
37 37 14 13 14 9
pca_result <- princomp(wechsler[, -1], cor = FALSE)
summary(pca_result)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4
Standard deviation 5.0533104 2.4670432 1.9558360 1.36381119
Proportion of Variance 0.6844717 0.1631387 0.1025341 0.04985539
Cumulative Proportion 0.6844717 0.8476105 0.9501446 1.00000000
variable_means <- colMeans(wechsler[, -1])
cat("Variable Means:\n")
Variable Means:
print(variable_means)
info sim arith pict
12.567568 9.567568 11.486486 7.972973
variable_means_df <- data.frame(Variable = names(variable_means), Mean = variable_means)
variable_means_table <- knitr::kable(variable_means_df, align = c("l", "r"), col.names = c("Variable", "Mean"), digits = 2)
kableExtra::kable_styling(variable_means_table, "striped", full_width = FALSE)
|
|
Variable
|
Mean
|
|
info
|
info
|
12.57
|
|
sim
|
sim
|
9.57
|
|
arith
|
arith
|
11.49
|
|
pict
|
pict
|
7.97
|
cov_matrix <- cov(wechsler[, -1])
cov_table <- knitr::kable(cov_matrix, digits = 4)
kableExtra::kable_styling(cov_table, "striped", full_width = FALSE)
|
|
info
|
sim
|
arith
|
pict
|
|
info
|
11.4745
|
9.0856
|
6.3829
|
2.0713
|
|
sim
|
9.0856
|
12.0856
|
5.9384
|
0.5435
|
|
arith
|
6.3829
|
5.9384
|
11.0901
|
1.7913
|
|
pict
|
2.0713
|
0.5435
|
1.7913
|
3.6937
|
Section 4.8
library(MASS)
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1.0000, 0.5000, 0.5000, 2.0000), nrow = 2) # Covariance matrix
lambda <- eigen(sigma)$values
e <- eigen(sigma)$vectors
theta <- seq(0, 2 * pi, length.out = 201)
z <- matrix(c(cos(theta), sin(theta)), ncol = 2)
d <- diag(sqrt(lambda))
ellipse_coords <- t(z %*% t(e) %*% d * sqrt(5.99))
x <- ellipse_coords[, 1] + mu[1]
y <- ellipse_coords[, 2] + mu[2]
plot(x, y, type = "l", xlab = "X", ylab = "Y", main = "95% prediction ellipse", xlim = c(-5, 5), ylim = c(-5, 5))

Lesson 7: Inferences Regarding Multivariate Population Mean
Section 7.1.4
library(MASS)
library(dplyr)
x <- as.matrix(nutrient[, c("calcium", "iron", "protein", "vitamin A", "vitamin C")])
hotel <- function(x, mu0) {
one <- rep(1, nrow(x))
ident <- diag(nrow(x))
ybar <- t(x) %*% one / nrow(x)
s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1)
t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
df1 <- ncol(x)
df2 <- nrow(x) - ncol(x)
p <- 1 - pf(f, df1, df2)
result <- data.frame(
T2 = t2,
F = f,
df1 = df1,
df2 = df2,
p_value = p
)
return(result)
}
mu0 <- c(1000, 15, 60, 800, 75)
result <- hotel(x, mu0)
print(result)
T2 F df1 df2 p_value
1 1758.541 349.7968 5 732 0
arranged_result <- arrange(result, T2)
print(arranged_result)
T2 F df1 df2 p_value
1 1758.541 349.7968 5 732 0
Section 7.1.9
library(MASS)
hotel <- function(x) {
mu0 <- rep(0, ncol(x))
one <- rep(1, nrow(x))
ident <- diag(nrow(x))
ybar <- t(x) %*% one / nrow(x)
s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1)
cat("mu0:\n")
print(mu0)
cat("\nSample Means (ybar):\n")
print(ybar)
cat("\nSample Covariance Matrix (S):\n")
print(s)
t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
df1 <- ncol(x)
df2 <- nrow(x) - ncol(x)
p <- 1 - pf(f, df1, df2)
# Print test statistics
cat("\nHotelling T2 Statistic:\n")
print(t2)
cat("\nF-Statistic:\n")
print(f)
cat("\nDegrees of Freedom:\n")
cat("df1:", df1, "\n")
cat("df2:", df2, "\n")
cat("\nP-Value:\n")
print(p)
}
spouse <- read.csv("D:/stat/spouse.csv", header = TRUE)
d1 <- spouse$h1 - spouse$w1
d2 <- spouse$h2 - spouse$w2
d3 <- spouse$h3 - spouse$w3
d4 <- spouse$h4 - spouse$w4
diffs <- data.frame(d1, d2, d3, d4)
# Extract the columns d1, d2, d3, and d4 into a matrix x
x <- as.matrix(diffs)
# Apply the Hotelling function to the data
hotel(x)
mu0:
[1] 0 0 0 0
Sample Means (ybar):
[,1]
d1 0.06666667
d2 -0.13333333
d3 -0.30000000
d4 -0.13333333
Sample Covariance Matrix (S):
d1 d2 d3 d4
d1 0.82298851 0.07816092 -0.0137931 -0.05977011
d2 0.07816092 0.80919540 -0.2137931 -0.15632184
d3 -0.01379310 -0.21379310 0.5620690 0.51034483
d4 -0.05977011 -0.15632184 0.5103448 0.60229885
Hotelling T2 Statistic:
[,1]
[1,] 13.12784
F-Statistic:
[,1]
[1,] 2.942447
Degrees of Freedom:
df1: 4
df2: 26
P-Value:
[,1]
[1,] 0.03936914
Error in (ident - one %% t(one)/nrow(x)) %% x : requires
numeric/complex matrix/vector arguments
Section 7.1.10
hotel <- function(x) {
mu0 <- rep(0, ncol(x))
one <- rep(1, nrow(x))
ident <- diag(nrow(x))
ybar <- t(x) %*% one / nrow(x)
s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1)
means <- colMeans(x)
variances <- apply(x, 2, var)
print(mu0)
print(ybar)
print(s)
t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
df1 <- ncol(x)
df2 <- nrow(x) - ncol(x)
p <- 1 - pf(f, df1, df2)
print(t2)
print(f)
print(df1)
print(df2)
print(p)
return(list(means = means, variances = variances, t2 = t2, f = f, df1 = df1, df2 = df2, p = p))
}
d1 <- spouse$h1 - spouse$w1
d2 <- spouse$h2 - spouse$w2
d3 <- spouse$h3 - spouse$w3
d4 <- spouse$h4 - spouse$w4
diffs <- data.frame(d1, d2, d3, d4)
x <- as.matrix(diffs)
result <- hotel(x)
[1] 0 0 0 0
[,1]
d1 0.06666667
d2 -0.13333333
d3 -0.30000000
d4 -0.13333333
d1 d2 d3 d4
d1 0.82298851 0.07816092 -0.0137931 -0.05977011
d2 0.07816092 0.80919540 -0.2137931 -0.15632184
d3 -0.01379310 -0.21379310 0.5620690 0.51034483
d4 -0.05977011 -0.15632184 0.5103448 0.60229885
[,1]
[1,] 13.12784
[,1]
[1,] 2.942447
[1] 4
[1] 26
[,1]
[1,] 0.03936914
print("Means:")
[1] "Means:"
print(result$means)
d1 d2 d3 d4
0.06666667 -0.13333333 -0.30000000 -0.13333333
print("Variances:")
[1] "Variances:"
print(result$variances)
d1 d2 d3 d4
0.8229885 0.8091954 0.5620690 0.6022989
Section 7.1.11
spouse$d1 <- spouse$h1 - spouse$w2
spouse$d2 <- spouse$h2 - spouse$w1
spouse$d3 <- spouse$h3 - spouse$w4
spouse$d4 <- spouse$h4 - spouse$w3
print(spouse)
h1 h2 h3 h4 w1 w2 w3 w4 d1 d2 d3 d4
1 2 3 5 5 4 4 5 5 -2 -1 0 0
2 5 5 4 4 4 5 5 5 0 1 -1 -1
3 4 5 5 5 4 4 5 5 0 1 0 0
4 4 3 4 4 4 5 5 5 -1 -1 -1 -1
5 3 3 5 5 4 4 5 5 -1 -1 0 0
6 3 3 4 5 3 3 4 4 0 0 0 1
7 3 4 4 4 4 3 5 4 0 0 0 -1
8 4 4 5 5 3 4 5 5 0 1 0 0
9 4 5 5 5 4 4 5 4 0 1 1 0
10 4 4 3 3 3 4 4 4 0 1 -1 -1
11 4 4 5 5 4 5 5 5 -1 0 0 0
12 5 5 4 4 5 5 5 5 0 0 -1 -1
13 4 4 4 4 4 4 5 5 0 0 -1 -1
14 4 3 5 5 4 4 4 4 0 -1 1 1
15 4 4 5 5 4 4 5 5 0 0 0 0
16 3 3 4 5 3 4 4 4 -1 0 0 1
17 4 5 4 4 5 5 5 5 -1 0 -1 -1
18 5 5 5 5 4 5 4 4 0 1 1 1
19 5 5 4 4 3 4 4 4 1 2 0 0
20 4 4 4 4 5 3 4 4 1 -1 0 0
21 4 4 4 4 5 3 4 4 1 -1 0 0
22 4 4 4 4 4 5 4 4 -1 0 0 0
23 3 4 5 5 2 5 5 5 -2 2 0 0
24 5 3 5 5 3 4 5 5 1 0 0 0
25 5 5 3 3 4 3 5 5 2 1 -2 -2
26 3 3 4 4 4 4 4 4 -1 -1 0 0
27 4 4 4 4 4 4 5 5 0 0 -1 -1
28 3 3 5 5 3 4 4 4 -1 0 1 1
29 4 4 3 3 4 4 5 4 0 0 -1 -2
30 4 4 5 5 4 4 5 5 0 0 0 0
hotel <- function(x) {
mu0 <- c(0, 0, 0, 0)
one <- rep(1, nrow(x))
ident <- diag(nrow(x))
ybar <- t(x) %*% one / nrow(x)
s <- t(x) %*% (ident - one %*% t(one) / nrow(x)) %*% x / (nrow(x) - 1)
print(mu0)
print(ybar)
print(s)
t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
df1 <- ncol(x)
df2 <- nrow(x) - ncol(x)
p_value <- 1 - pf(f, df1, df2)
print(t2)
print(f)
print(df1)
print(df2)
print(p_value)
}
x <- as.matrix(spouse[, c("d1", "d2", "d3", "d4")])
hotel(x)
[1] 0 0 0 0
[,1]
d1 -0.2000000
d2 0.1333333
d3 -0.2000000
d4 -0.2333333
d1 d2 d3 d4
d1 0.7862069 0.13103448 -0.14482759 -0.18620690
d2 0.1310345 0.74022989 -0.04137931 -0.07126437
d3 -0.1448276 -0.04137931 0.51034483 0.50344828
d4 -0.1862069 -0.07126437 0.50344828 0.66781609
[,1]
[1,] 6.426621
[,1]
[1,] 1.44045
[1] 4
[1] 26
[,1]
[1,] 0.2488886
Section 7.1.15
library(MASS)
hotel2 <- function(x1, x2) {
n1 <- nrow(x1)
n2 <- nrow(x2)
k <- ncol(x1)
one1 <- matrix(1, n1, 1)
one2 <- matrix(1, n2, 1)
ident1 <- diag(n1)
ident2 <- diag(n2)
ybar1 <- t(x1) %*% one1 / n1
s1 <- t(x1) %*% ((ident1 - one1 %*% t(one1) / n1) %*% x1) / (n1 - 1.0)
print(round(n1, 3))
print(round(ybar1, 3))
print(round(s1, 3))
ybar2 <- t(x2) %*% one2 / n2
s2 <- t(x2) %*% ((ident2 - one2 %*% t(one2) / n2) %*% x2) / (n2 - 1.0)
print(round(n2, 3))
print(round(ybar2, 3))
print(round(s2, 3))
spool <- ((n1 - 1.0) * s1 + (n2 - 1.0) * s2) / (n1 + n2 - 2.0)
print(round(spool, 3))
t2 <- t(ybar1 - ybar2) %*% solve(spool * (1/n1 + 1/n2)) %*% (ybar1 - ybar2)
f <- (n1 + n2 - k - 1) * t2 / k / (n1 + n2 - 2)
df1 <- k
df2 <- n1 + n2 - k - 1
p <- 1 - pf(f, df1, df2)
print(round(t2, 3))
print(round(f, 3))
print(round(df1, 3))
print(round(df2, 3))
print(round(p, 3))
}
swiss <- read.csv("D:/stat/swiss3.csv", header = TRUE)
x1 <- subset(swiss, type == "real")[, c("length", "left", "right", "bottom", "top", "diag")]
x2 <- subset(swiss, type == "fake")[, c("length", "left", "right", "bottom", "top", "diag")]
hotel2(as.matrix(x1), as.matrix(x2))
[1] 100
[,1]
length 214.969
left 129.943
right 129.720
bottom 8.305
top 10.168
diag 141.517
length left right bottom top diag
length 0.150 0.058 0.057 0.057 0.014 0.005
left 0.058 0.133 0.086 0.057 0.049 -0.043
right 0.057 0.086 0.126 0.058 0.031 -0.024
bottom 0.057 0.057 0.058 0.413 -0.263 0.000
top 0.014 0.049 0.031 -0.263 0.421 -0.075
diag 0.005 -0.043 -0.024 0.000 -0.075 0.200
[1] 100
[,1]
length 214.823
left 130.300
right 130.193
bottom 10.530
top 11.133
diag 139.450
length left right bottom top diag
length 0.124 0.032 0.024 -0.101 0.019 0.012
left 0.032 0.065 0.047 -0.024 -0.012 -0.005
right 0.024 0.047 0.089 -0.019 0.000 0.034
bottom -0.101 -0.024 -0.019 1.281 -0.490 0.238
top 0.019 -0.012 0.000 -0.490 0.404 -0.022
diag 0.012 -0.005 0.034 0.238 -0.022 0.311
length left right bottom top diag
length 0.137 0.045 0.041 -0.022 0.017 0.009
left 0.045 0.099 0.066 0.016 0.019 -0.024
right 0.041 0.066 0.108 0.020 0.015 0.005
bottom -0.022 0.016 0.020 0.847 -0.377 0.119
top 0.017 0.019 0.015 -0.377 0.413 -0.049
diag 0.009 -0.024 0.005 0.119 -0.049 0.256
[,1]
[1,] 2412.451
[,1]
[1,] 391.922
[1] 6
[1] 193
[,1]
[1,] 0
Section 7.2.1
library(MASS)
hotel <- function(x) {
mu0 <- rep(0, ncol(x))
one <- rep(1, nrow(x))
ident <- diag(nrow(x))
ybar <- t(x) %*% one / nrow(x)
s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1.0)
print(mu0)
print(ybar)
print(s)
t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
df1 <- ncol(x)
df2 <- nrow(x) - ncol(x)
p <- 1 - pf(f, df1, df2)
print(t2)
print(f)
print(df1)
print(df2)
print(p)
}
nutrient <- read.csv("D:/Stat/nutrient1.csv", header = TRUE)
nutrient$calcium <- nutrient$calcium / 1000
nutrient$iron <- nutrient$iron / 15
nutrient$protein <- nutrient$protein / 60
nutrient$a <- nutrient$vitamin.A / 800
nutrient$c <- nutrient$vitamin.C / 75
nutrient$diff1 <- nutrient$iron - nutrient$calcium
nutrient$diff2 <- nutrient$protein - nutrient$iron
nutrient$diff3 <- nutrient$a - nutrient$protein
nutrient$diff4 <- nutrient$c - nutrient$a
x <- as.matrix(nutrient[, c("diff1", "diff2", "diff3", "diff4")])
hotel(x)
[1] 0 0 0 0
[,1]
diff1 0.117944052
diff2 0.354730710
diff3 -0.047179834
diff4 0.002835103
diff1 diff2 diff3 diff4
diff1 0.19164212 -0.07101776 0.04511467 -0.03756199
diff2 -0.07101776 0.16538367 -0.17884359 0.02955601
diff3 0.04511467 -0.17884359 4.12372604 -3.75507101
diff4 -0.03756199 0.02955601 -3.75507101 4.39690660
[,1]
[1,] 1030.795
[,1]
[1,] 256.6484
[1] 4
[1] 733
[,1]
[1,] 0
library(MASS)
hotel <- function(x) {
mu0 <- rep(0, ncol(x))
one <- rep(1, nrow(x))
ident <- diag(nrow(x))
ybar <- t(x) %*% one / nrow(x)
s <- t(x) %*% ((ident - one %*% t(one) / nrow(x)) %*% x) / (nrow(x) - 1.0)
t2 <- nrow(x) * t(ybar - mu0) %*% solve(s) %*% (ybar - mu0)
f <- (nrow(x) - ncol(x)) * t2 / ncol(x) / (nrow(x) - 1)
df1 <- ncol(x)
df2 <- nrow(x) - ncol(x)
p <- 1 - pf(f, df1, df2)
results <- data.frame(
mu0 = mu0,
ybar = ybar)
cat("S matrix:\n")
print(s)
cat("Test Statistics:\n")
print(data.frame(
t2 = t2,
f = f,
df1 = df1,
df2 = df2,
p = p
))
return(results)
}
nutrient <- read.csv("D:/Stat/nutrient1.csv", header = TRUE)
nutrient$calcium <- nutrient$calcium / 1000
nutrient$iron <- nutrient$iron / 15
nutrient$protein <- nutrient$protein / 60
nutrient$a <- nutrient$vitamin.A / 800
nutrient$c <- nutrient$vitamin.C / 75
nutrient$diff1 <- nutrient$iron - nutrient$calcium
nutrient$diff2 <- nutrient$protein - nutrient$iron
nutrient$diff3 <- nutrient$a - nutrient$protein
nutrient$diff4 <- nutrient$c - nutrient$a
x <- as.matrix(nutrient[, c("diff1", "diff2", "diff3", "diff4")])
results <- hotel(x)
S matrix:
diff1 diff2 diff3 diff4
diff1 0.19164212 -0.07101776 0.04511467 -0.03756199
diff2 -0.07101776 0.16538367 -0.17884359 0.02955601
diff3 0.04511467 -0.17884359 4.12372604 -3.75507101
diff4 -0.03756199 0.02955601 -3.75507101 4.39690660
Test Statistics:
t2 f df1 df2 p
1 1030.795 256.6484 4 733 0
print(results)
mu0 ybar
diff1 0 0.117944052
diff2 0 0.354730710
diff3 0 -0.047179834
diff4 0 0.002835103
Section 7.2.3
swiss <- read.csv("D:/stat/swiss3.csv")
real <- subset(swiss, type == "real")
fake <- subset(swiss, type == "fake")
compute_stats <- function(data) {
data_summary <- lapply(data, function(x) c(length(x), mean(x), var(x)))
data_summary <- do.call(rbind, data_summary)
colnames(data_summary) <- c("n", "mean", "var")
return(data_summary)}
pop1 <- compute_stats(real[, -1])
pop2 <- compute_stats(fake[, -1])
combine <- merge(pop1, pop2, by = "row.names", suffixes = c(".real", ".fake"))
rownames(combine) <- combine$Row.names
combine$Row.names <- NULL
p <- 6
df1 <- ncol(pop1) - 1
df2 <- sum(pop1[, 1]) + sum(pop2[, 1]) - ncol(pop1) - 1
combine$F <- qf(0.95, df1, df2)
combine$T <- qt(1 - 0.025/6, df2)
combine$sp <- ((combine$n.real - 1) * combine$var.real + (combine$n.fake - 1) * combine$var.fake) / (combine$n.real + combine$n.fake - 2)
combine$losim <- (combine$mean.real - combine$mean.fake) - sqrt(p * (combine$n.real + combine$n.fake - 2) * combine$F * (1/combine$n.real + 1/combine$n.fake) * combine$sp / (combine$n.real + combine$n.fake - p - 1))
combine$upsim <- (combine$mean.real - combine$mean.fake) + sqrt(p * (combine$n.real + combine$n.fake - 2) * combine$F * (1/combine$n.real + 1/combine$n.fake) * combine$sp / (combine$n.real + combine$n.fake - p - 1))
combine$lobon <- (combine$mean.real - combine$mean.fake) - combine$T * sqrt((1/combine$n.real + 1/combine$n.fake) * combine$sp)
combine$upbon <- (combine$mean.real - combine$mean.fake) + combine$T * sqrt((1/combine$n.real + 1/combine$n.fake) * combine$sp)
print(combine)
n.real mean.real var.real n.fake mean.fake var.fake F
bottom 100 8.305 0.4132071 100 10.530 1.28131313 3.003249
diag 100 141.517 0.1998091 100 139.450 0.31121212 3.003249
left 100 129.943 0.1325768 100 130.300 0.06505051 3.003249
length 100 214.969 0.1502414 100 214.823 0.12401111 3.003249
right 100 129.720 0.1262626 100 130.193 0.08894040 3.003249
top 100 10.168 0.4211879 100 11.133 0.40445556 3.003249
T sp losim upsim lobon upbon
bottom 2.642654 0.84726010 -2.78469133 -1.6653087 -2.569004162 -1.8809958
diag 2.642654 0.25551061 1.75964190 2.3743581 1.878087896 2.2559121
left 2.642654 0.09881364 -0.54813871 -0.1658613 -0.474479952 -0.2395200
length 2.642654 0.13712626 -0.07916481 0.3711648 0.007606517 0.2843935
right 2.642654 0.10760152 -0.67245705 -0.2735429 -0.595592672 -0.3504073
top 2.642654 0.41282172 -1.35568026 -0.5743197 -1.205124563 -0.7248754
Section 7.2.6
library(biotools)
Warning: package 'biotools' was built under R version 4.3.3
result <- boxM(data = swiss[, -1], grouping = swiss$type)
result
Box's M-test for Homogeneity of Covariance Matrices
data: swiss[, -1]
Chi-Sq (approx.) = 121.9, df = 21, p-value = 3.198e-16
LESSON 8
Section 8.4
ral rfe rmg rca rna
1 1.83571429 0.627857143 -0.5264286 -0.052142857 0.25928571
2 1.23571429 0.707857143 -1.3964286 -0.082142857 -0.08071429
3 2.03571429 0.717857143 -0.9464286 -0.072142857 -0.05071429
4 -1.06428571 -0.002142857 0.8135714 -0.042142857 -0.11071429
5 1.23571429 0.687857143 0.5135714 -0.002142857 -0.05071429
6 -1.66428571 -0.112142857 -1.3564286 -0.032142857 -0.03071429
7 -2.46428571 -2.112142857 -0.5664286 -0.002142857 -0.07071429
8 -0.96428571 -0.592142857 1.0835714 -0.022142857 -0.09071429
9 -1.46428571 -0.882142857 -0.3064286 0.087857143 0.04928571
10 0.83571429 0.547857143 2.4035714 0.077857143 -0.05071429
11 -0.16428571 -0.242142857 0.8635714 0.017857143 0.28928571
12 0.53571429 0.267857143 0.6835714 0.107857143 -0.01071429
13 0.13571429 0.317857143 -0.3764286 -0.002142857 -0.03071429
14 -0.06428571 0.067857143 -0.8864286 0.017857143 -0.02071429
15 0.10000000 0.025000000 0.0850000 0.005000000 -0.01000000
16 -0.10000000 -0.025000000 -0.0850000 -0.005000000 0.01000000
17 0.12000000 -0.432000000 -0.0040000 0.004000000 -0.02400000
18 -2.38000000 0.678000000 -0.0440000 -0.016000000 -0.01400000
19 -0.18000000 -0.212000000 -0.0040000 -0.016000000 0.00600000
20 -0.18000000 0.168000000 0.0060000 -0.016000000 -0.01400000
21 2.62000000 -0.202000000 0.0460000 0.044000000 0.04600000
22 0.38000000 -0.392000000 -0.0460000 0.008000000 0.01200000
23 0.98000000 -0.372000000 0.0640000 0.008000000 0.00200000
24 -0.62000000 -0.592000000 -0.0760000 -0.042000000 0.00200000
25 -2.52000000 1.228000000 0.0640000 -0.022000000 0.00200000
26 1.78000000 0.128000000 -0.0060000 0.048000000 -0.01800000
for(variable in c("al", "fe", "mg", "ca", "na")) {
hist(pottery[[variable]],
main = paste("Histogram of", variable),
xlab = variable,
col = "yellow",
border = "black")
}





p <- 5
g <- length(unique(pottery$site))
df <- 0.5 * p * (p + 1) * (g - 1)
cov_matrices <- list()
for (site in unique(pottery$site)) {
cov_matrices[[site]] <- cov(subset(pottery, site == site)[, c("al", "fe", "mg", "ca", "na")])
}
l_statistic <- 0
for (i in 1:(g - 1)) {
for (j in (i + 1):g) {
l_statistic <- l_statistic + (sum(diag((cov_matrices[[i]] + cov_matrices[[j]]) %*% solve(cov_matrices[[i]] + cov_matrices[[j]]))) - p)
}
}
l_statistic <- l_statistic * 2
alpha <- 0.05
critical_value <- qchisq(1 - alpha, df)
cat("Test statistic:", l_statistic, "\n")
Test statistic: -2.131628e-14
cat("Degrees of freedom:", df, "\n")
Degrees of freedom: 45
cat("Critical value (", 1 - alpha, "):", critical_value, "\n")
Critical value ( 0.95 ): 61.65623
if (l_statistic > critical_value) {
cat("Reject the null hypothesis: Covariance matrices are not homogeneous.\n")
} else {
cat("Fail to reject the null hypothesis: Covariance matrices are homogeneous.\n")
}
Fail to reject the null hypothesis: Covariance matrices are homogeneous.
Section 8.5
outcome <- cbind(pottery$al, pottery$fe, pottery$mg, pottery$ca, pottery$na)
model<- manova(outcome ~ site, data = pottery)
summary(model, intercept = TRUE, test = "Wilks")
Df Wilks approx F num Df den Df Pr(>F)
(Intercept) 1 0.0068354 523.07 5 18.000 < 2.2e-16 ***
site 3 0.0123009 13.09 15 50.091 1.84e-12 ***
Residuals 22
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(pottery)
# A tibble: 26 × 6
site al fe mg ca na
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 L 14.4 7 4.3 0.15 0.51
2 L 13.8 7.08 3.43 0.12 0.17
3 L 14.6 7.09 3.88 0.13 0.2
4 L 11.5 6.37 5.64 0.16 0.14
5 L 13.8 7.06 5.34 0.2 0.2
6 L 10.9 6.26 3.47 0.17 0.22
7 L 10.1 4.26 4.26 0.2 0.18
8 L 11.6 5.78 5.91 0.18 0.16
9 L 11.1 5.49 4.52 0.29 0.3
10 L 13.4 6.92 7.23 0.28 0.2
# ℹ 16 more rows
model <- manova(cbind(al, ca, fe, mg, na) ~ site, data = pottery)
summary(model)
Df Pillai approx F num Df den Df Pr(>F)
site 3 1.5539 4.2984 15 60 2.413e-05 ***
Residuals 22
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(reshape2)
Warning: package 'reshape2' was built under R version 4.3.3
pottery_melted <- melt(pottery, id.vars = "site", variable.name = "chemical", value.name = "amount")
mean_data <- aggregate(amount ~ site + chemical, data = pottery_melted, FUN = mean)
# Plot
ggplot(mean_data, aes(x = chemical, y = amount, group = site, color = site)) +
geom_line() +
geom_point() +
labs(title = "Profile Plot for Pottery Data", x = "Chemical", y = "Mean Amount") +
theme_minimal()

manova_result <- manova(cbind(al, fe, mg, ca, na) ~ site, data = pottery)
summary(manova_result)
Df Pillai approx F num Df den Df Pr(>F)
site 3 1.5539 4.2984 15 60 2.413e-05 ***
Residuals 22
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Section 8.7
library(car)
pottery$site <- factor(pottery$site)
site_contrasts <- matrix(c(8, -2, 8, -14, # C+L-A-I
1, 0, -1, 0, # A vs I
0, 1, 0, -1), # C vs L
nrow = 3, byrow = TRUE)
rownames(site_contrasts) <- c("C+L-A-I", "A vs I", "C vs L")
colnames(site_contrasts) <- c("C", "L", "A", "I")
print(site_contrasts)
C L A I
C+L-A-I 8 -2 8 -14
A vs I 1 0 -1 0
C vs L 0 1 0 -1
Section 8.11
library(readr)
library(car)
library(emmeans)
Warning: package 'emmeans' was built under R version 4.3.3
rice <- read_csv("D:/stat/rice.csv")
rice$block <- factor(rice$block)
rice$variety <- factor(rice$variety)
manova_result <- manova(cbind(height, tillers) ~ block + variety, data = rice)
summary(manova_result)
Df Pillai approx F num Df den Df Pr(>F)
block 4 0.23044 0.39067 8 24 0.91486
variety 3 0.76192 2.46162 6 24 0.05349 .
Residuals 12
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_result <- emmeans(manova_result, "variety")
print(lsmeans_result)
variety emmean SE df lower.CL upper.CL
a 32.1 0.78 12 30.4 33.8
b 28.1 0.78 12 26.4 29.8
c 30.6 0.78 12 28.9 32.3
d 28.8 0.78 12 27.1 30.5
Results are averaged over the levels of: block, rep.meas
Confidence level used: 0.95
print(summary.aov(manova_result))
Response height :
Df Sum Sq Mean Sq F value Pr(>F)
block 4 31.7 7.925 0.6038 0.6673
variety 3 165.0 55.000 4.1905 0.0303 *
Residuals 12 157.5 13.125
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Response tillers :
Df Sum Sq Mean Sq F value Pr(>F)
block 4 1.0 0.2500 0.1648 0.9522
variety 3 5.8 1.9333 1.2747 0.3273
Residuals 12 18.2 1.5167
summary(aov(height ~ block + variety, data = rice))
Df Sum Sq Mean Sq F value Pr(>F)
block 4 31.7 7.93 0.604 0.6673
variety 3 165.0 55.00 4.190 0.0303 *
Residuals 12 157.5 13.12
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(tillers ~ block + variety, data = rice))
Df Sum Sq Mean Sq F value Pr(>F)
block 4 1.0 0.250 0.165 0.952
variety 3 5.8 1.933 1.275 0.327
Residuals 12 18.2 1.517