Practice Worksheet - 1

Question 2

X1 <- c(1, 2, 3, 3, 4, 5, 6, 8, 9, 11)
X2 <- c(18.95, 19.00, 17.95, 15.54, 14.00, 12.95, 8.94, 7.49, 6.00, 3.99)

# Scatterplot of X1 and X2
plot(X1, X2, pch=19)
plot of chunk unnamed-chunk-1
# Dot plot of X1
dotchart(X1, pch=19)
plot of chunk unnamed-chunk-1
# Dot plot of X2
dotchart(X2, pch=19)
plot of chunk unnamed-chunk-1

The sign of the sample covariance is negative.

# Sample mean of X1
mean(X1)
[1] 5.2
# Sample mean of X2
mean(X2)
[1] 12.481
# Sample variance of X1
var(X1)
[1] 10.62222
# Sample variance of X2
var(X2)
[1] 30.85437
# Sample covariance of X1 and X2
cov(X1, X2)
[1] -17.71022
# Sample correlation of X1 and X2
cor(X1, X2)
[1] -0.9782684

X1 and X2 have a strong negative correlation. That is, large X1 occurs with small X2 and vice versa.

data <- cbind(X1, X2)

# Sample mean array
colMeans(data)
    X1     X2 
 5.200 12.481 
# Sample variance-covariance matrix
cov(data)
          X1        X2
X1  10.62222 -17.71022
X2 -17.71022  30.85437
# Sample correlation matrix
cor(data)
           X1         X2
X1  1.0000000 -0.9782684
X2 -0.9782684  1.0000000

Question 4

X1 <- c(-6, -3, -2, 1, 2, 5, 6, 8)
X2 <- c(-2, -3, 1, -1, 2, 1, 5, 3)

# Scatterplot of X1 and X2
plot(X1, X2, pch=19)
plot of chunk unnamed-chunk-4
# Data matrix
data <- cbind(X1, X2)

# Euclidean distance between each pair of measurements
dist(data, method="euclidean")
          1         2         3         4         5         6         7
2  3.162278                                                            
3  5.000000  4.123106                                                  
4  7.071068  4.472136  3.605551                                        
5  8.944272  7.071068  4.123106  3.162278                              
6 11.401754  8.944272  7.000000  4.472136  3.162278                    
7 13.892444 12.041595  8.944272  7.810250  5.000000  4.123106          
8 14.866069 12.529964 10.198039  8.062258  6.082763  3.605551  2.828427
# Mahalanobis distance between each measurement and the mean vector of the data
sqrt(mahalanobis(data, center=colMeans(data), cov=cov(data)))
[1] 1.5604666 1.4653827 1.3153760 1.0130664 0.6336689 1.1458979 1.6962688
[8] 1.4384485
# Mahalanobis distance between (-2,1) and (6,5)
P <- data[3, ]
Q <- data[7, ]
sqrt(mahalanobis(P, center=Q, cov=cov(data)))
[1] 1.677988

Question 5

X1 <- c(9, 2, 6, 5, 8)
X2 <- c(12, 8, 6, 4, 10)
X3 <- c(3, 4, 0, 2, 1)

# Data matrix
data <- cbind(X1, X2, X3)

# Sample mean array
colMeans(data)
X1 X2 X3 
 6  8  2 
# Sample variance-covariance matrix
cov(data)
      X1   X2    X3
X1  7.50  5.0 -1.75
X2  5.00 10.0  1.50
X3 -1.75  1.5  2.50
# Sample correlation matrix
cor(data)
           X1        X2         X3
X1  1.0000000 0.5773503 -0.4041452
X2  0.5773503 1.0000000  0.3000000
X3 -0.4041452 0.3000000  1.0000000
# Euclidean distance between each pair of measurements
dist(data, method="euclidean")
         1        2        3        4
2 8.124038                           
3 7.348469 6.000000                  
4 9.000000 5.385165 3.000000         
5 3.000000 7.000000 4.582576 6.782330
# Mahalanobis distance between each measurement and the mean vector of the data
sqrt(mahalanobis(data, center=colMeans(data), cov=cov(data)))
[1] 1.609423 1.788854 1.415937 1.726409 1.106522
# Mahalanobis distance between (9,12,3) and (5,4,2)
P <- data[1, ]
Q <- data[4, ]
sqrt(mahalanobis(P, center=Q, cov=cov(data)))
[1] 2.537523