[3.4]

Да се покаже дека \((\boldsymbol{x}-\bar{x}\boldsymbol{j})'(\boldsymbol{y}-\bar{y}\boldsymbol{j})=\sum_i (x_i-\bar{x})(y_i-\bar{y})\)

\[ (\boldsymbol{x}-\bar{x}\boldsymbol{j})'(\boldsymbol{y}-\bar{y}\boldsymbol{j})= \begin{pmatrix}x_1-\bar{x} & \dots & x_n-\bar{x}\end{pmatrix} \begin{pmatrix}y_1-\bar{y} \\ \vdots \\ y_n-\bar{y}\end{pmatrix}= \sum_i (x_i-\bar{x})(y_i-\bar{y}) \]

[3.5]

Да се покаже дека (за 3 димензии, мада решението е исто за колку и да е димензии):

\[ \frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'= \begin{pmatrix} s_{11} & s_{12} & s_{13} \\ s_{21} & s_{22} & s_{23} \\ s_{31} & s_{32} & s_{33} \end{pmatrix} \]

\[ \frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=\frac{1}{n-1}\sum_{i=1}^{n}\boldsymbol{a}_i\boldsymbol{a}_i'\\ \boldsymbol{a}_{i}=\begin{pmatrix} \boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1\\ \dots \\ \boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n \end{pmatrix} \\ \boldsymbol{a}_{i}\boldsymbol{a}_{i}'=\begin{pmatrix} (\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)^2 & \dots & (\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)\\ \vdots & \ddots & \vdots\\ (\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1) & \dots & (\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)^2 \end{pmatrix}\\ \frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=\frac{1}{n-1}\sum_{i=1}^{n}\boldsymbol{a}_i\boldsymbol{a}_i'=\\ \begin{pmatrix} \frac{\sum_{i=1}^n(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)^2}{n-1} & \dots & \frac{\sum_{i=1}^n(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)}{n-1}\\ \vdots & \ddots & \vdots\\ \frac{\sum_{i=1}^n(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)}{n-1} & \dots & \frac{\sum_{i=1}^n(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)^2}{n-1} \end{pmatrix}=\\ \begin{pmatrix} s_{11} & \dots & s_{1n} \\ \vdots & \ddots & \vdots \\ s_{n1} & \dots & s_{nn} \end{pmatrix} \]

[3.8]

Да се покаже дека \(tr(\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}')=\sum_{i=1}^k\boldsymbol{a}'_i\boldsymbol{S}\boldsymbol{a}_i\)

\[ \boldsymbol{A}\boldsymbol{S}\boldsymbol{A}'=\begin{pmatrix} \boldsymbol{a}_1\boldsymbol{S}\boldsymbol{a}'_1 & \dots & \boldsymbol{a}_1\boldsymbol{S}\boldsymbol{a}'_k\\ \vdots & \ddots & \vdots\\ \boldsymbol{a}_k\boldsymbol{S}\boldsymbol{a}'_1 & \dots & \boldsymbol{a}_k\boldsymbol{S}\boldsymbol{a}'_k \end{pmatrix}\implies tr(\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}')=\sum_{i=1}^k\boldsymbol{a}'_i\boldsymbol{S}\boldsymbol{a}_i \]

[3.18]

Податоци:

data = c(
  191, 155, 179, 145,
  195, 149, 201, 152,
  181, 148, 185, 149,
  183, 153, 188, 149,
  176, 144, 171, 142,
  208, 157, 192, 152,
  189, 150, 190, 149,
  197, 159, 189, 152,
  188, 152, 197, 159,
  192, 150, 187, 151,
  179, 158, 186, 148,
  183, 147, 174, 147,
  174, 150, 185, 152,
  190, 159, 195, 157,
  188, 151, 187, 158,
  163, 137, 161, 130,
  195, 155, 183, 158,
  186, 153, 173, 148,
  181, 145, 182, 146,
  175, 140, 165, 137,
  192, 154, 185, 152,
  174, 143, 178, 147,
  176, 139, 176, 143,
  197, 167, 200, 158,
  190, 163, 187, 150
)
y1 = data[seq(1, length(data), 4)]
y2 = data[seq(2, length(data), 4)]
x1 = data[seq(3, length(data), 4)]
x2 = data[seq(4, length(data), 4)]
data.frame(y1, y2, x1, x2)

(a)

Да се најде \(\bar{\boldsymbol{y}}\)

custom_sum <- function(x) {
  s = 0;
  for (k in 1:length(x)) {
    s = s + x[k];
  }
  return(s);
}

custom_mean <- function(x) {
  return(custom_sum(x) / length(x))
}

y1_mean = custom_mean(y1)
y2_mean = custom_mean(y2)
y_mean = c(y1_mean, y2_mean)
y_mean
[1] 185.72 151.12

Да се најде \(\boldsymbol{S}\)

custom_cov <- function(x, y) {
  n = length(x)
  mean_x = custom_mean(x);
  mean_y = custom_mean(y);
  prod_xy = 0;
  for (k in 1:n) {
    prod_xy = prod_xy + x[k] * y[k];
  }
  return((prod_xy - n * mean_x * mean_y) / (n - 1));
}

vars = list(x1, x2, y1, y2)
S = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
  for (i in 1:4) {
    S[k, i] = custom_cov(vars[[k]], vars[[i]])
  }
}
S
          [,1]     [,2]     [,3]     [,4]
[1,] 100.80667 56.54000 69.66167 51.31167
[2,]  56.54000 45.02333 46.11167 35.05333
[3,]  69.66167 46.11167 95.29333 52.86833
[4,]  51.31167 35.05333 52.86833 54.36000

Да се најде \(\boldsymbol{R}\)

R = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
  for (i in 1:4) {
    R[k, i] = S[k, i] / sqrt(S[k, k] * S[i, i])
  }
}
R
          [,1]      [,2]      [,3]      [,4]
[1,] 1.0000000 0.8392519 0.7107518 0.6931573
[2,] 0.8392519 1.0000000 0.7039807 0.7085504
[3,] 0.7107518 0.7039807 1.0000000 0.7345555
[4,] 0.6931573 0.7085504 0.7345555 1.0000000

(b)

Да се најде \(|\boldsymbol{S}|\)

custom_determinant_2_2 <- function(x) {
  return(x[1, 1] * x[2, 2] - x[1, 2] * x[2, 1])
}

custom_determinant <- function(x) {
  n = sqrt(length(x))
  if (n == 2) {
    return(custom_determinant_2_2(x))
  }
  res = 0
  rows = 2:n
  for (k in 1:n) {
    cols_l = c()
    cols_r = c()
    if (k > 1) {
      cols_l = 1:(k - 1)
    }
    if (k < n) {
      cols_r = (k + 1):n
    }
    sub_res = custom_determinant(x[rows, c(cols_l, cols_r)])
    if (k %% 2 == 1) {
      res = res + x[1, k] * sub_res
    } else {
      res = res - x[1, k] * sub_res
    }
  }
  return(res)
}

custom_determinant(S)
[1] 1207109

Да се најде \(tr(\boldsymbol{S})\)

custom_tr <- function(x) {
  s = 0
  n = sqrt(length(x))
  for (k in 1:n) {
    s = s + x[k, k]
  }
  return(s)
}

custom_tr(S)
[1] 295.4833

[3.20]

Податоците од табелата 3.6 се:

data = c(
  1, 47.8, 48.8, 49.0, 49.7,
  2, 46.4, 47.3, 47.7, 48.4,
  3, 46.3, 46.8, 47.8, 48.5,
  4, 45.1, 45.3, 46.1, 47.2,
  5, 47.6, 48.5, 48.9, 49.3,
  6, 52.5, 53.2, 53.3, 53.7,
  7, 51.2, 53.0, 54.3, 54.5,
  8, 49.8, 50.0, 50.3, 52.7,
  9, 48.1, 50.8, 52.3, 54.4,
  10, 45.0, 47.0, 47.3, 48.3,
  11, 51.2, 51.4, 51.6, 51.9,
  12, 48.5, 49.2, 53.0, 55.5,
  13, 52.1, 52.8, 53.7, 55.0,
  14, 48.2, 48.9, 49.3, 49.8,
  15, 49.6, 50.4, 51.2, 51.8,
  16, 50.7, 51.7, 52.7, 53.3,
  17, 47.2, 47.7, 48.4, 49.5,
  18, 53.3, 54.6, 55.1, 55.3,
  19, 46.2, 47.5, 48.1, 48.4,
  20, 46.3, 47.6, 51.3, 51.8
)
y1 = data[seq(2, length(data), 5)]
y2 = data[seq(3, length(data), 5)]
y3 = data[seq(4, length(data), 5)]
y4 = data[seq(5, length(data), 5)]
data.frame(y1, y2, y3, y4)

Новите податоци се:

z1 = 2 * y1 + 3 * y2 - y3 + 4 * y4
z2 = -2 * y1 - y2 + 4 * y3 - 2 * y4
z3 = 3 * y1 - 2 * y2 - y3 + 3 * y4
data.frame(z1, z2, z3)

Матрицата \(\boldsymbol{A}\) со која се помножени \(\boldsymbol{y}\)-ците е:

A = matrix(
  data=c(
    2, 3, -1, 4,
    -2, -1, 4, -2,
    3, -2, -1, 3
  ), 
  nrow=3, 
  ncol=4, 
  byrow=T
)
A
     [,1] [,2] [,3] [,4]
[1,]    2    3   -1    4
[2,]   -2   -1    4   -2
[3,]    3   -2   -1    3

Да се најде \(\bar{\boldsymbol{z}}\)

vars = list(y1, y2, y3, y4)
y_mean = rep(0, 4)
for (k in 1:4) {
  y_mean[k] = custom_mean(vars[[k]])
}
z_mean = A %*% y_mean
z_mean
        [,1]
[1,] 401.415
[2,] -47.555
[3,] 150.495

Да се најде \(\boldsymbol{S}_z\)

S = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
  for (i in 1:4) {
    S[k, i] = custom_cov(vars[[k]], vars[[i]])
  }
}
S_z = A %*% S %*% t(A)
S_z
          [,1]      [,2]      [,3]
[1,] 399.32029 -44.58439 148.85166
[2,] -44.58439  12.35103 -16.95450
[3,] 148.85166 -16.95450  59.65839

Да се најде \(\boldsymbol{R}_z\)

D_z = matrix(rep(0, 9), nrow=3, ncol=3)
for (k in 1:3) {
  D_z[k, k] = sqrt(S_z[k, k])
}
D_z_inv = solve(D_z)
R_z = D_z_inv %*% S_z %*% D_z_inv
R_z
           [,1]       [,2]       [,3]
[1,]  1.0000000 -0.6348493  0.9644000
[2,] -0.6348493  1.0000000 -0.6245938
[3,]  0.9644000 -0.6245938  1.0000000
---
title: "Глава 3: Characterizing and Displaying Multivariate Data"
output: html_notebook
---

# [3.4]

Да се покаже дека $(\boldsymbol{x}-\bar{x}\boldsymbol{j})'(\boldsymbol{y}-\bar{y}\boldsymbol{j})=\sum_i (x_i-\bar{x})(y_i-\bar{y})$

$$
(\boldsymbol{x}-\bar{x}\boldsymbol{j})'(\boldsymbol{y}-\bar{y}\boldsymbol{j})=
\begin{pmatrix}x_1-\bar{x} & \dots & x_n-\bar{x}\end{pmatrix}
\begin{pmatrix}y_1-\bar{y} \\ \vdots \\ y_n-\bar{y}\end{pmatrix}=
\sum_i (x_i-\bar{x})(y_i-\bar{y})
$$

# [3.5]

Да се покаже дека (за 3 димензии, мада решението е исто за колку и да е димензии):

$$
\frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=
\begin{pmatrix}
s_{11} & s_{12} & s_{13} \\
s_{21} & s_{22} & s_{23} \\
s_{31} & s_{32} & s_{33}
\end{pmatrix}
$$

$$
\frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=\frac{1}{n-1}\sum_{i=1}^{n}\boldsymbol{a}_i\boldsymbol{a}_i'\\
\boldsymbol{a}_{i}=\begin{pmatrix}
\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1\\ \dots \\ \boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n
\end{pmatrix} \\
\boldsymbol{a}_{i}\boldsymbol{a}_{i}'=\begin{pmatrix}
(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)^2 & \dots & (\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)\\
\vdots & \ddots & \vdots\\
(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1) & \dots & (\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)^2
\end{pmatrix}\\
\frac{1}{n-1}\sum_{i=1}^{n}(\boldsymbol{y}_i-\bar{\boldsymbol{y}})(\boldsymbol{y}_i-\bar{\boldsymbol{y}})'=\frac{1}{n-1}\sum_{i=1}^{n}\boldsymbol{a}_i\boldsymbol{a}_i'=\\
\begin{pmatrix}
\frac{\sum_{i=1}^n(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)^2}{n-1} & \dots & \frac{\sum_{i=1}^n(\boldsymbol{y}_{i1}-\bar{\boldsymbol{y}}_1)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)}{n-1}\\
\vdots & \ddots & \vdots\\
\frac{\sum_{i=1}^n(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)}{n-1} & \dots & \frac{\sum_{i=1}^n(\boldsymbol{y}_{in}-\bar{\boldsymbol{y}}_n)^2}{n-1}
\end{pmatrix}=\\
\begin{pmatrix}
s_{11} & \dots & s_{1n} \\
\vdots & \ddots & \vdots \\
s_{n1} & \dots & s_{nn}
\end{pmatrix}
$$

# [3.8]

Да се покаже дека $tr(\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}')=\sum_{i=1}^k\boldsymbol{a}'_i\boldsymbol{S}\boldsymbol{a}_i$

$$
\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}'=\begin{pmatrix}
\boldsymbol{a}_1\boldsymbol{S}\boldsymbol{a}'_1 & \dots & \boldsymbol{a}_1\boldsymbol{S}\boldsymbol{a}'_k\\
\vdots & \ddots & \vdots\\
\boldsymbol{a}_k\boldsymbol{S}\boldsymbol{a}'_1 & \dots & \boldsymbol{a}_k\boldsymbol{S}\boldsymbol{a}'_k
\end{pmatrix}\implies tr(\boldsymbol{A}\boldsymbol{S}\boldsymbol{A}')=\sum_{i=1}^k\boldsymbol{a}'_i\boldsymbol{S}\boldsymbol{a}_i
$$

# [3.18]

Податоци:

```{r}
data = c(
  191, 155, 179, 145,
  195, 149, 201, 152,
  181, 148, 185, 149,
  183, 153, 188, 149,
  176, 144, 171, 142,
  208, 157, 192, 152,
  189, 150, 190, 149,
  197, 159, 189, 152,
  188, 152, 197, 159,
  192, 150, 187, 151,
  179, 158, 186, 148,
  183, 147, 174, 147,
  174, 150, 185, 152,
  190, 159, 195, 157,
  188, 151, 187, 158,
  163, 137, 161, 130,
  195, 155, 183, 158,
  186, 153, 173, 148,
  181, 145, 182, 146,
  175, 140, 165, 137,
  192, 154, 185, 152,
  174, 143, 178, 147,
  176, 139, 176, 143,
  197, 167, 200, 158,
  190, 163, 187, 150
)
y1 = data[seq(1, length(data), 4)]
y2 = data[seq(2, length(data), 4)]
x1 = data[seq(3, length(data), 4)]
x2 = data[seq(4, length(data), 4)]
data.frame(y1, y2, x1, x2)
```

### (a)

Да се најде $\bar{\boldsymbol{y}}$

```{r}
custom_sum <- function(x) {
  s = 0
  for (k in 1:length(x)) {
    s = s + x[k]
  }
  return(s)
}

custom_mean <- function(x) {
  return(custom_sum(x) / length(x))
}

y1_mean = custom_mean(y1)
y2_mean = custom_mean(y2)
y_mean = c(y1_mean, y2_mean)
y_mean
```

Да се најде $\boldsymbol{S}$

```{r}
custom_cov <- function(x, y) {
  n = length(x)
  mean_x = custom_mean(x);
  mean_y = custom_mean(y);
  prod_xy = 0;
  for (k in 1:n) {
    prod_xy = prod_xy + x[k] * y[k];
  }
  return((prod_xy - n * mean_x * mean_y) / (n - 1));
}

vars = list(x1, x2, y1, y2)
S = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
  for (i in 1:4) {
    S[k, i] = custom_cov(vars[[k]], vars[[i]])
  }
}
S
```

Да се најде $\boldsymbol{R}$

```{r}
R = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
  for (i in 1:4) {
    R[k, i] = S[k, i] / sqrt(S[k, k] * S[i, i])
  }
}
R
```

### (b)

Да се најде $|\boldsymbol{S}|$

```{r}
custom_determinant_2_2 <- function(x) {
  return(x[1, 1] * x[2, 2] - x[1, 2] * x[2, 1])
}

custom_determinant <- function(x) {
  n = sqrt(length(x))
  if (n == 2) {
    return(custom_determinant_2_2(x))
  }
  res = 0
  rows = 2:n
  for (k in 1:n) {
    cols_l = c()
    cols_r = c()
    if (k > 1) {
      cols_l = 1:(k - 1)
    }
    if (k < n) {
      cols_r = (k + 1):n
    }
    sub_res = custom_determinant(x[rows, c(cols_l, cols_r)])
    if (k %% 2 == 1) {
      res = res + x[1, k] * sub_res
    } else {
      res = res - x[1, k] * sub_res
    }
  }
  return(res)
}

custom_determinant(S)
```

Да се најде $tr(\boldsymbol{S})$

```{r}
custom_tr <- function(x) {
  s = 0
  n = sqrt(length(x))
  for (k in 1:n) {
    s = s + x[k, k]
  }
  return(s)
}

custom_tr(S)
```

# [3.20]

Податоците од табелата 3.6 се:

```{r}
data = c(
  1, 47.8, 48.8, 49.0, 49.7,
  2, 46.4, 47.3, 47.7, 48.4,
  3, 46.3, 46.8, 47.8, 48.5,
  4, 45.1, 45.3, 46.1, 47.2,
  5, 47.6, 48.5, 48.9, 49.3,
  6, 52.5, 53.2, 53.3, 53.7,
  7, 51.2, 53.0, 54.3, 54.5,
  8, 49.8, 50.0, 50.3, 52.7,
  9, 48.1, 50.8, 52.3, 54.4,
  10, 45.0, 47.0, 47.3, 48.3,
  11, 51.2, 51.4, 51.6, 51.9,
  12, 48.5, 49.2, 53.0, 55.5,
  13, 52.1, 52.8, 53.7, 55.0,
  14, 48.2, 48.9, 49.3, 49.8,
  15, 49.6, 50.4, 51.2, 51.8,
  16, 50.7, 51.7, 52.7, 53.3,
  17, 47.2, 47.7, 48.4, 49.5,
  18, 53.3, 54.6, 55.1, 55.3,
  19, 46.2, 47.5, 48.1, 48.4,
  20, 46.3, 47.6, 51.3, 51.8
)
y1 = data[seq(2, length(data), 5)]
y2 = data[seq(3, length(data), 5)]
y3 = data[seq(4, length(data), 5)]
y4 = data[seq(5, length(data), 5)]
data.frame(y1, y2, y3, y4)
```

Новите податоци се:

```{r}
z1 = 2 * y1 + 3 * y2 - y3 + 4 * y4
z2 = -2 * y1 - y2 + 4 * y3 - 2 * y4
z3 = 3 * y1 - 2 * y2 - y3 + 3 * y4
data.frame(z1, z2, z3)
```

Матрицата $\boldsymbol{A}$ со која се помножени $\boldsymbol{y}$-ците е:

```{r}
A = matrix(
  data=c(
    2, 3, -1, 4,
    -2, -1, 4, -2,
    3, -2, -1, 3
  ), 
  nrow=3, 
  ncol=4, 
  byrow=T
)
A
```


Да се најде $\bar{\boldsymbol{z}}$

```{r}
vars = list(y1, y2, y3, y4)
y_mean = rep(0, 4)
for (k in 1:4) {
  y_mean[k] = custom_mean(vars[[k]])
}
z_mean = A %*% y_mean
z_mean
```

Да се најде $\boldsymbol{S}_z$

```{r}
S = matrix(rep(0, 16), nrow=4, ncol=4)
for (k in 1:4) {
  for (i in 1:4) {
    S[k, i] = custom_cov(vars[[k]], vars[[i]])
  }
}
S_z = A %*% S %*% t(A)
S_z
```

Да се најде $\boldsymbol{R}_z$

```{r}
D_z = matrix(rep(0, 9), nrow=3, ncol=3)
for (k in 1:3) {
  D_z[k, k] = sqrt(S_z[k, k])
}
D_z_inv = solve(D_z)
R_z = D_z_inv %*% S_z %*% D_z_inv
R_z
```
