Да се покаже дека \((\boldsymbol{x}-\bar{x}\boldsymbol{j})'(\boldsymbol{y}-\bar{y}\boldsymbol{j})=\sum_i (x_i-\bar{x})(y_i-\bar{y})\)
\[ (\boldsymbol{x}-\bar{x}\boldsymbol{j})'(\boldsymbol{y}-\bar{y}\boldsymbol{j})= \begin{pmatrix}x_1-\bar{x} & \dots & x_n-\bar{x}\end{pmatrix} \begin{pmatrix}y_1-\bar{y} \\ \vdots \\ y_n-\bar{y}\end{pmatrix}= \sum_i (x_i-\bar{x})(y_i-\bar{y}) \]
Да се покаже дека (за 3 димензии, мада решението е исто за колку и да е димензии):
\[ \frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'= \begin{pmatrix} s_{11} & s_{12} & s_{13} \\ s_{21} & s_{22} & s_{23} \\ s_{31} & s_{32} & s_{33} \end{pmatrix} \]
\[ \frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=\frac{1}{n-1}\sum_{i=1}^{n}\boldsymbol{a}_i\boldsymbol{a}_i'\\ \boldsymbol{a}_{i}=\begin{pmatrix} \boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1\\ \dots \\ \boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n \end{pmatrix} \\ \boldsymbol{a}_{i}\boldsymbol{a}_{i}'=\begin{pmatrix} (\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)^2 & \dots & (\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)\\ \vdots & \ddots & \vdots\\ (\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1) & \dots & (\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)^2 \end{pmatrix}\\ \frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=\frac{1}{n-1}\sum_{i=1}^{n}\boldsymbol{a}_i\boldsymbol{a}_i'=\\ \begin{pmatrix} \frac{\sum_{i=1}^n(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)^2}{n-1} & \dots & \frac{\sum_{i=1}^n(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)}{n-1}\\ \vdots & \ddots & \vdots\\ \frac{\sum_{i=1}^n(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)}{n-1} & \dots & \frac{\sum_{i=1}^n(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)^2}{n-1} \end{pmatrix}=\\ \begin{pmatrix} s_{11} & \dots & s_{1n} \\ \vdots & \ddots & \vdots \\ s_{n1} & \dots & s_{nn} \end{pmatrix} \]
Да се покаже дека \(tr(\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}')=\sum_{i=1}^k\boldsymbol{a}'_i\boldsymbol{S}\boldsymbol{a}_i\)
\[ \boldsymbol{A}\boldsymbol{S}\boldsymbol{A}'=\begin{pmatrix} \boldsymbol{a}_1\boldsymbol{S}\boldsymbol{a}'_1 & \dots & \boldsymbol{a}_1\boldsymbol{S}\boldsymbol{a}'_k\\ \vdots & \ddots & \vdots\\ \boldsymbol{a}_k\boldsymbol{S}\boldsymbol{a}'_1 & \dots & \boldsymbol{a}_k\boldsymbol{S}\boldsymbol{a}'_k \end{pmatrix}\implies tr(\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}')=\sum_{i=1}^k\boldsymbol{a}'_i\boldsymbol{S}\boldsymbol{a}_i \]
Податоци:
data = c(
191, 155, 179, 145,
195, 149, 201, 152,
181, 148, 185, 149,
183, 153, 188, 149,
176, 144, 171, 142,
208, 157, 192, 152,
189, 150, 190, 149,
197, 159, 189, 152,
188, 152, 197, 159,
192, 150, 187, 151,
179, 158, 186, 148,
183, 147, 174, 147,
174, 150, 185, 152,
190, 159, 195, 157,
188, 151, 187, 158,
163, 137, 161, 130,
195, 155, 183, 158,
186, 153, 173, 148,
181, 145, 182, 146,
175, 140, 165, 137,
192, 154, 185, 152,
174, 143, 178, 147,
176, 139, 176, 143,
197, 167, 200, 158,
190, 163, 187, 150
)
y1 = data[seq(1, length(data), 4)]
y2 = data[seq(2, length(data), 4)]
x1 = data[seq(3, length(data), 4)]
x2 = data[seq(4, length(data), 4)]
data.frame(y1, y2, x1, x2)
Да се најде \(\bar{\boldsymbol{y}}\)
custom_sum <- function(x) {
s = 0;
for (k in 1:length(x)) {
s = s + x[k];
}
return(s);
}
custom_mean <- function(x) {
return(custom_sum(x) / length(x))
}
y1_mean = custom_mean(y1)
y2_mean = custom_mean(y2)
y_mean = c(y1_mean, y2_mean)
y_mean
[1] 185.72 151.12
Да се најде \(\boldsymbol{S}\)
custom_cov <- function(x, y) {
n = length(x)
mean_x = custom_mean(x);
mean_y = custom_mean(y);
prod_xy = 0;
for (k in 1:n) {
prod_xy = prod_xy + x[k] * y[k];
}
return((prod_xy - n * mean_x * mean_y) / (n - 1));
}
vars = list(x1, x2, y1, y2)
S = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
for (i in 1:4) {
S[k, i] = custom_cov(vars[[k]], vars[[i]])
}
}
S
[,1] [,2] [,3] [,4]
[1,] 100.80667 56.54000 69.66167 51.31167
[2,] 56.54000 45.02333 46.11167 35.05333
[3,] 69.66167 46.11167 95.29333 52.86833
[4,] 51.31167 35.05333 52.86833 54.36000
Да се најде \(\boldsymbol{R}\)
R = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
for (i in 1:4) {
R[k, i] = S[k, i] / sqrt(S[k, k] * S[i, i])
}
}
R
[,1] [,2] [,3] [,4]
[1,] 1.0000000 0.8392519 0.7107518 0.6931573
[2,] 0.8392519 1.0000000 0.7039807 0.7085504
[3,] 0.7107518 0.7039807 1.0000000 0.7345555
[4,] 0.6931573 0.7085504 0.7345555 1.0000000
Да се најде \(|\boldsymbol{S}|\)
custom_determinant_2_2 <- function(x) {
return(x[1, 1] * x[2, 2] - x[1, 2] * x[2, 1])
}
custom_determinant <- function(x) {
n = sqrt(length(x))
if (n == 2) {
return(custom_determinant_2_2(x))
}
res = 0
rows = 2:n
for (k in 1:n) {
cols_l = c()
cols_r = c()
if (k > 1) {
cols_l = 1:(k - 1)
}
if (k < n) {
cols_r = (k + 1):n
}
sub_res = custom_determinant(x[rows, c(cols_l, cols_r)])
if (k %% 2 == 1) {
res = res + x[1, k] * sub_res
} else {
res = res - x[1, k] * sub_res
}
}
return(res)
}
custom_determinant(S)
[1] 1207109
Да се најде \(tr(\boldsymbol{S})\)
custom_tr <- function(x) {
s = 0
n = sqrt(length(x))
for (k in 1:n) {
s = s + x[k, k]
}
return(s)
}
custom_tr(S)
[1] 295.4833
Податоците од табелата 3.6 се:
data = c(
1, 47.8, 48.8, 49.0, 49.7,
2, 46.4, 47.3, 47.7, 48.4,
3, 46.3, 46.8, 47.8, 48.5,
4, 45.1, 45.3, 46.1, 47.2,
5, 47.6, 48.5, 48.9, 49.3,
6, 52.5, 53.2, 53.3, 53.7,
7, 51.2, 53.0, 54.3, 54.5,
8, 49.8, 50.0, 50.3, 52.7,
9, 48.1, 50.8, 52.3, 54.4,
10, 45.0, 47.0, 47.3, 48.3,
11, 51.2, 51.4, 51.6, 51.9,
12, 48.5, 49.2, 53.0, 55.5,
13, 52.1, 52.8, 53.7, 55.0,
14, 48.2, 48.9, 49.3, 49.8,
15, 49.6, 50.4, 51.2, 51.8,
16, 50.7, 51.7, 52.7, 53.3,
17, 47.2, 47.7, 48.4, 49.5,
18, 53.3, 54.6, 55.1, 55.3,
19, 46.2, 47.5, 48.1, 48.4,
20, 46.3, 47.6, 51.3, 51.8
)
y1 = data[seq(2, length(data), 5)]
y2 = data[seq(3, length(data), 5)]
y3 = data[seq(4, length(data), 5)]
y4 = data[seq(5, length(data), 5)]
data.frame(y1, y2, y3, y4)
Новите податоци се:
z1 = 2 * y1 + 3 * y2 - y3 + 4 * y4
z2 = -2 * y1 - y2 + 4 * y3 - 2 * y4
z3 = 3 * y1 - 2 * y2 - y3 + 3 * y4
data.frame(z1, z2, z3)
Матрицата \(\boldsymbol{A}\) со која се помножени \(\boldsymbol{y}\)-ците е:
A = matrix(
data=c(
2, 3, -1, 4,
-2, -1, 4, -2,
3, -2, -1, 3
),
nrow=3,
ncol=4,
byrow=T
)
A
[,1] [,2] [,3] [,4]
[1,] 2 3 -1 4
[2,] -2 -1 4 -2
[3,] 3 -2 -1 3
Да се најде \(\bar{\boldsymbol{z}}\)
vars = list(y1, y2, y3, y4)
y_mean = rep(0, 4)
for (k in 1:4) {
y_mean[k] = custom_mean(vars[[k]])
}
z_mean = A %*% y_mean
z_mean
[,1]
[1,] 401.415
[2,] -47.555
[3,] 150.495
Да се најде \(\boldsymbol{S}_z\)
S = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
for (i in 1:4) {
S[k, i] = custom_cov(vars[[k]], vars[[i]])
}
}
S_z = A %*% S %*% t(A)
S_z
[,1] [,2] [,3]
[1,] 399.32029 -44.58439 148.85166
[2,] -44.58439 12.35103 -16.95450
[3,] 148.85166 -16.95450 59.65839
Да се најде \(\boldsymbol{R}_z\)
D_z = matrix(rep(0, 9), nrow=3, ncol=3)
for (k in 1:3) {
D_z[k, k] = sqrt(S_z[k, k])
}
D_z_inv = solve(D_z)
R_z = D_z_inv %*% S_z %*% D_z_inv
R_z
[,1] [,2] [,3]
[1,] 1.0000000 -0.6348493 0.9644000
[2,] -0.6348493 1.0000000 -0.6245938
[3,] 0.9644000 -0.6245938 1.0000000