Multivariate Statistical Analysis

Introduction

Intro

################################################### The following is the code for Introdution to R ##
2 + 5

[1] 7

3/5

[1] 0.6

[1] 35

1 - 3 * 4

[1] -11

(1 - 3) * 4

[1] -8

# Function-name(arguments)
sqrt(40)  # the square root function

[1] 6.324555

# squareroot(40) # R does not recognize a function named ``squareroot'
exp(1)  # the exp function: exp(x)= e^x

[1] 2.718282

log(14)  # the log base-e function (natural log, or ln)

[1] 2.639057

# Log(14) # R is case-sensitive
abs(-0.43)  # absolute value

[1] 0.43

floor(3.4)  # round down

[1] 3

ceiling(3.4)  # round up

[1] 4

x <- 10  # the value 10 is assigned to an object named x.
x  # call the value of x.

[1] 10

y = 2 * x  # assign a new object name y with twice value of x.
y

[1] 20

x = x + 5  # replace  x  with the value x+5.
x

[1] 15

x = c(5, 3, 6, 4, 6, 7, 1)
x

[1] 5 3 6 4 6 7 1

x[2]  # Extract the 2nd element

[1] 3

2:4  # Create an integer sequence from 2 to 4

[1] 2 3 4

x[2:4]  # Extract the 2nd through 4th elements

[1] 3 6 4

x[c(2, 4, 6)]  # Extract the 2nd, 4th, 6th elements

[1] 3 4 7

x[-2]  # Extract all but the 2nd elements

[1] 5 6 4 6 7 1

-2:-4

[1] -2 -3 -4

x[-2:-4]  # Extract all but the 2nd through 4th elements

[1] 5 6 7 1

x[2] = 10  # Assign 10 to the 2nd element
x

[1]  5 10  6  4  6  7  1

x[c(1, 3, 6)] = c(10, 30, 60)  # How many elements does x have?
x

[1] 10 10 30  4  6 60  1

x = 3:6
y = 2 * x  # Functions apply to each element of a vector at the same time.
y

[1]  6  8 10 12

seq(from = 0, to = 6, by = 2)  # a function has multiple arguments.

[1] 0 2 4 6

# What are the first three argument in the seq() function?
seq(0, 6, 2)  # R expects a certain order for function arguments

[1] 0 2 4 6

# seq(6,0,2) # R could not read our mind.
seq(to = 6, from = 0, by = 2)

[1] 0 2 4 6

z = seq(from = 0, to = 6, by = 2)
x * z  # Entry-wise multiply

[1]  0  8 20 36

x^z  # Entry-wise exponetial

[1]     1    16   625 46656

x %*% y  # Dot (inner) product of two vectors.

     [,1]
[1,]  172

1:100  # gives an answer that is several lines long

  [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
 [18]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34
 [35]  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
 [52]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
 [69]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
 [86]  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100

gpa.a = c(3.13, 3.55, 2.92, 2.73, 3, 3.18, 2.66, 3.76)
gpa.b = c(3.14, 3.13, 3.25, 2.93, 3.73, 3.5, 2.7)
all.gpa = c(gpa.a, gpa.b)  # Combine (stack) data vectors
all.gpa

 [1] 3.13 3.55 2.92 2.73 3.00 3.18 2.66 3.76 3.14 3.13 3.25 2.93 3.73 3.50
[15] 2.70

sum(all.gpa)  # Returns the sum of vector elements

[1] 47.31

length(all.gpa)  # Returns the vector length (i.e., n)

[1] 15

mean(all.gpa)  # Returns the vector mean

[1] 3.154

var(all.gpa)  # Returns the sample variance

[1] 0.1246686

sd(all.gpa)  # Returns the sample standard deviation

[1] 0.3530844

min(all.gpa)  # Returns the minimum value

[1] 2.66

max(all.gpa)  # Returns the maximum value

[1] 3.76

sort(all.gpa)  # Returns the sorted vector elements

 [1] 2.66 2.70 2.73 2.92 2.93 3.00 3.13 3.13 3.14 3.18 3.25 3.50 3.55 3.73
[15] 3.76

summary(all.gpa)  # Returns  the five number summary and mean

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  2.660   2.925   3.130   3.154   3.375   3.760

Grade = c("freshman", "sophomore", "junior", "senior")
Grade

[1] "freshman"  "sophomore" "junior"    "senior"

grade = c("1", "2", "3", "4")  # R is case-sensitive.
grade

[1] "1" "2" "3" "4"

Grade1 = c("freshman", "sophomore", "junior", 4)
Grade1

[1] "freshman"  "sophomore" "junior"    "4"

video = c(47, 63, 58, 53, 53, 63, 53, 39, 58, 50)
video > 55

 [1] FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE

video == 55

 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

sum(video > 55)

[1] 4

which(video > 55)

[1] 2 3 6 9

video[video > 55]

[1] 63 58 63 58

mean(video[video > 55])

[1] 60.5

iris

    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
1            5.1         3.5          1.4         0.2     setosa
2            4.9         3.0          1.4         0.2     setosa
3            4.7         3.2          1.3         0.2     setosa
4            4.6         3.1          1.5         0.2     setosa
5            5.0         3.6          1.4         0.2     setosa
6            5.4         3.9          1.7         0.4     setosa
7            4.6         3.4          1.4         0.3     setosa
8            5.0         3.4          1.5         0.2     setosa
9            4.4         2.9          1.4         0.2     setosa
10           4.9         3.1          1.5         0.1     setosa
11           5.4         3.7          1.5         0.2     setosa
12           4.8         3.4          1.6         0.2     setosa
13           4.8         3.0          1.4         0.1     setosa
14           4.3         3.0          1.1         0.1     setosa
15           5.8         4.0          1.2         0.2     setosa
16           5.7         4.4          1.5         0.4     setosa
17           5.4         3.9          1.3         0.4     setosa
18           5.1         3.5          1.4         0.3     setosa
19           5.7         3.8          1.7         0.3     setosa
20           5.1         3.8          1.5         0.3     setosa
21           5.4         3.4          1.7         0.2     setosa
22           5.1         3.7          1.5         0.4     setosa
23           4.6         3.6          1.0         0.2     setosa
24           5.1         3.3          1.7         0.5     setosa
25           4.8         3.4          1.9         0.2     setosa
26           5.0         3.0          1.6         0.2     setosa
27           5.0         3.4          1.6         0.4     setosa
28           5.2         3.5          1.5         0.2     setosa
29           5.2         3.4          1.4         0.2     setosa
30           4.7         3.2          1.6         0.2     setosa
31           4.8         3.1          1.6         0.2     setosa
32           5.4         3.4          1.5         0.4     setosa
33           5.2         4.1          1.5         0.1     setosa
34           5.5         4.2          1.4         0.2     setosa
35           4.9         3.1          1.5         0.2     setosa
36           5.0         3.2          1.2         0.2     setosa
37           5.5         3.5          1.3         0.2     setosa
38           4.9         3.6          1.4         0.1     setosa
39           4.4         3.0          1.3         0.2     setosa
40           5.1         3.4          1.5         0.2     setosa
41           5.0         3.5          1.3         0.3     setosa
42           4.5         2.3          1.3         0.3     setosa
43           4.4         3.2          1.3         0.2     setosa
44           5.0         3.5          1.6         0.6     setosa
45           5.1         3.8          1.9         0.4     setosa
46           4.8         3.0          1.4         0.3     setosa
47           5.1         3.8          1.6         0.2     setosa
48           4.6         3.2          1.4         0.2     setosa
49           5.3         3.7          1.5         0.2     setosa
50           5.0         3.3          1.4         0.2     setosa
51           7.0         3.2          4.7         1.4 versicolor
52           6.4         3.2          4.5         1.5 versicolor
53           6.9         3.1          4.9         1.5 versicolor
54           5.5         2.3          4.0         1.3 versicolor
55           6.5         2.8          4.6         1.5 versicolor
56           5.7         2.8          4.5         1.3 versicolor
57           6.3         3.3          4.7         1.6 versicolor
58           4.9         2.4          3.3         1.0 versicolor
59           6.6         2.9          4.6         1.3 versicolor
60           5.2         2.7          3.9         1.4 versicolor
61           5.0         2.0          3.5         1.0 versicolor
62           5.9         3.0          4.2         1.5 versicolor
63           6.0         2.2          4.0         1.0 versicolor
64           6.1         2.9          4.7         1.4 versicolor
65           5.6         2.9          3.6         1.3 versicolor
66           6.7         3.1          4.4         1.4 versicolor
67           5.6         3.0          4.5         1.5 versicolor
68           5.8         2.7          4.1         1.0 versicolor
69           6.2         2.2          4.5         1.5 versicolor
70           5.6         2.5          3.9         1.1 versicolor
71           5.9         3.2          4.8         1.8 versicolor
72           6.1         2.8          4.0         1.3 versicolor
73           6.3         2.5          4.9         1.5 versicolor
74           6.1         2.8          4.7         1.2 versicolor
75           6.4         2.9          4.3         1.3 versicolor
76           6.6         3.0          4.4         1.4 versicolor
77           6.8         2.8          4.8         1.4 versicolor
78           6.7         3.0          5.0         1.7 versicolor
79           6.0         2.9          4.5         1.5 versicolor
80           5.7         2.6          3.5         1.0 versicolor
81           5.5         2.4          3.8         1.1 versicolor
82           5.5         2.4          3.7         1.0 versicolor
83           5.8         2.7          3.9         1.2 versicolor
84           6.0         2.7          5.1         1.6 versicolor
85           5.4         3.0          4.5         1.5 versicolor
86           6.0         3.4          4.5         1.6 versicolor
87           6.7         3.1          4.7         1.5 versicolor
88           6.3         2.3          4.4         1.3 versicolor
89           5.6         3.0          4.1         1.3 versicolor
90           5.5         2.5          4.0         1.3 versicolor
91           5.5         2.6          4.4         1.2 versicolor
92           6.1         3.0          4.6         1.4 versicolor
93           5.8         2.6          4.0         1.2 versicolor
94           5.0         2.3          3.3         1.0 versicolor
95           5.6         2.7          4.2         1.3 versicolor
96           5.7         3.0          4.2         1.2 versicolor
97           5.7         2.9          4.2         1.3 versicolor
98           6.2         2.9          4.3         1.3 versicolor
99           5.1         2.5          3.0         1.1 versicolor
100          5.7         2.8          4.1         1.3 versicolor
101          6.3         3.3          6.0         2.5  virginica
102          5.8         2.7          5.1         1.9  virginica
103          7.1         3.0          5.9         2.1  virginica
104          6.3         2.9          5.6         1.8  virginica
105          6.5         3.0          5.8         2.2  virginica
106          7.6         3.0          6.6         2.1  virginica
107          4.9         2.5          4.5         1.7  virginica
108          7.3         2.9          6.3         1.8  virginica
109          6.7         2.5          5.8         1.8  virginica
110          7.2         3.6          6.1         2.5  virginica
111          6.5         3.2          5.1         2.0  virginica
112          6.4         2.7          5.3         1.9  virginica
113          6.8         3.0          5.5         2.1  virginica
114          5.7         2.5          5.0         2.0  virginica
115          5.8         2.8          5.1         2.4  virginica
116          6.4         3.2          5.3         2.3  virginica
117          6.5         3.0          5.5         1.8  virginica
118          7.7         3.8          6.7         2.2  virginica
119          7.7         2.6          6.9         2.3  virginica
120          6.0         2.2          5.0         1.5  virginica
121          6.9         3.2          5.7         2.3  virginica
122          5.6         2.8          4.9         2.0  virginica
123          7.7         2.8          6.7         2.0  virginica
124          6.3         2.7          4.9         1.8  virginica
125          6.7         3.3          5.7         2.1  virginica
126          7.2         3.2          6.0         1.8  virginica
127          6.2         2.8          4.8         1.8  virginica
128          6.1         3.0          4.9         1.8  virginica
129          6.4         2.8          5.6         2.1  virginica
130          7.2         3.0          5.8         1.6  virginica
131          7.4         2.8          6.1         1.9  virginica
132          7.9         3.8          6.4         2.0  virginica
133          6.4         2.8          5.6         2.2  virginica
134          6.3         2.8          5.1         1.5  virginica
135          6.1         2.6          5.6         1.4  virginica
136          7.7         3.0          6.1         2.3  virginica
137          6.3         3.4          5.6         2.4  virginica
138          6.4         3.1          5.5         1.8  virginica
139          6.0         3.0          4.8         1.8  virginica
140          6.9         3.1          5.4         2.1  virginica
141          6.7         3.1          5.6         2.4  virginica
142          6.9         3.1          5.1         2.3  virginica
143          5.8         2.7          5.1         1.9  virginica
144          6.8         3.2          5.9         2.3  virginica
145          6.7         3.3          5.7         2.5  virginica
146          6.7         3.0          5.2         2.3  virginica
147          6.3         2.5          5.0         1.9  virginica
148          6.5         3.0          5.2         2.0  virginica
149          6.2         3.4          5.4         2.3  virginica
150          5.9         3.0          5.1         1.8  virginica

ncol(iris)  # number of variables (column)

[1] 5

nrow(iris)  # number of objects (row)

[1] 150

head(iris)  # first 6 objects.

  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa

# data()

# home laptop loading ice =
# read.table('C:/Users/poster/Desktop/data/icecream.txt', header=TRUE,
# sep='') work pc loading
ice = read.table("C:/Users/poster/Desktop/data/icecream.txt")
head(ice)

   id female ice_cream video puzzle
1  70      0         2    47     57
2 121      1         1    63     61
3  86      0         3    58     31
4 141      0         3    53     56
5 172      0         1    53     61
6 113      0         1    63     61

colnames(ice)

[1] "id"        "female"    "ice_cream" "video"     "puzzle"

write.csv(ice, "icecream.csv", row.names = FALSE)
ice1 = read.csv("icecream.csv")
head(ice1)

   id female ice_cream video puzzle
1  70      0         2    47     57
2 121      1         1    63     61
3  86      0         3    58     31
4 141      0         3    53     56
5 172      0         1    53     61
6 113      0         1    63     61

dim(ice)  # Check dimensions of the data frame ice

[1] 200   5

names(ice)  # List the name of the variables in the data frame

[1] "id"        "female"    "ice_cream" "video"     "puzzle"

ice[2, ]  # select the second row only

   id female ice_cream video puzzle
2 121      1         1    63     61

ice[, 4]  # select the fourth column only

  [1] 47 63 58 53 53 63 53 39 58 50 53 63 61 55 31 50 50 58 55 53 66 72 55
 [24] 61 39 39 61 58 39 55 47 64 66 72 61 61 66 66 36 39 42 58 55 50 63 69
 [47] 49 63 53 47 57 47 50 55 69 26 33 56 58 44 58 69 34 36 36 50 55 42 65
 [70] 44 39 58 63 74 58 45 49 63 39 42 55 61 66 63 44 63 53 42 34 61 47 66
 [93] 69 44 47 63 66 69 39 61 69 66 33 50 61 42 50 51 50 58 61 39 46 59 55
[116] 42 55 58 58 39 50 50 39 48 34 58 44 50 47 29 50 54 50 47 44 67 58 44
[139] 42 44 44 50 39 44 53 48 55 44 40 34 42 58 50 53 58 55 54 47 42 61 53
[162] 51 63 61 55 40 61 47 55 53 50 47 31 61 35 54 55 53 58 56 50 39 63 50
[185] 66 58 53 42 55 53 42 50 55 34 50 42 36 55 58 53

ice[63, 4]  # selects the entry in the 63rd row, 4th column

[1] 34

ice[c(1, 7), ]  # selects rows 1 and 7

  id female ice_cream video puzzle
1 70      0         2    47     57
7 50      0         1    53     61

ice[80:85, ]  # selects rows 80 through 85

    id female ice_cream video puzzle
80  14      0         3    42     56
81 127      0         3    55     56
82 165      0         2    61     36
83 174      0         1    66     56
84   3      0         2    63     56
85  58      0         1    44     41

ice[, -c(1, 2)]  # select all data except columns 1 and 2

    ice_cream video puzzle
1           2    47     57
2           1    63     61
3           3    58     31
4           3    53     56
5           1    53     61
6           1    63     61
7           1    53     61
8           1    39     36
9           1    58     51
10          1    50     51
11          1    53     61
12          1    63     61
13          3    61     71
14          3    55     46
15          2    31     56
16          2    50     56
17          3    50     56
18          1    58     56
19          3    55     61
20          1    53     46
21          1    66     41
22          1    72     66
23          1    55     56
24          3    61     61
25          1    39     46
26          1    39     31
27          3    61     66
28          1    58     46
29          3    39     46
30          2    55     41
31          1    47     51
32          3    64     61
33          3    66     71
34          1    72     31
35          3    61     61
36          3    61     66
37          1    66     66
38          3    66     66
39          2    36     36
40          1    39     51
41          1    42     51
42          1    58     51
43          1    55     51
44          2    50     41
45          3    63     66
46          2    69     46
47          3    49     47
48          1    63     51
49          1    53     46
50          1    47     51
51          1    57     56
52          3    47     41
53          1    50     46
54          1    55     71
55          1    69     66
56          3    26     42
57          2    33     32
58          1    56     46
59          1    58     41
60          1    44     51
61          1    58     61
62          3    69     66
63          2    34     46
64          1    36     36
65          3    36     61
66          1    50     26
67          1    55     66
68          2    42     26
69          2    65     44
70          1    44     36
71          1    39     51
72          3    58     61
73          1    63     66
74          1    74     66
75          1    58     51
76          2    45     31
77          3    49     61
78          3    63     66
79          1    39     46
80          3    42     56
81          3    55     56
82          2    61     36
83          1    66     56
84          2    63     56
85          1    44     41
86          3    63     66
87          3    53     56
88          3    42     56
89          1    34     31
90          3    61     56
91          1    47     46
92          2    66     46
93          3    69     61
94          2    44     48
95          2    47     51
96          2    63     51
97          1    66     56
98          3    69     71
99          2    39     41
100         3    61     61
101         3    69     66
102         3    66     61
103         2    33     41
104         3    50     51
105         1    61     51
106         1    42     56
107         3    50     56
108         2    51     33
109         2    50     56
110         3    58     71
111         1    61     56
112         2    39     51
113         3    46     66
114         2    59     56
115         3    55     66
116         1    42     41
117         3    55     46
118         3    58     66
119         2    58     56
120         2    39     51
121         2    50     51
122         1    50     56
123         1    39     56
124         1    48     46
125         2    34     46
126         3    58     61
127         1    44     56
128         1    50     41
129         3    47     46
130         2    29     26
131         1    50     56
132         2    54     56
133         1    50     51
134         1    47     46
135         2    44     66
136         3    67     66
137         1    58     46
138         1    44     56
139         1    42     41
140         1    44     61
141         2    44     51
142         3    50     52
143         1    39     51
144         1    44     41
145         1    53     66
146         3    48     61
147         1    55     31
148         1    44     51
149         2    40     41
150         2    34     41
151         2    42     46
152         3    58     56
153         1    50     51
154         1    53     61
155         1    58     66
156         1    55     71
157         1    54     61
158         1    47     61
159         1    42     41
160         3    61     66
161         2    53     61
162         1    51     58
163         3    63     31
164         2    61     61
165         1    55     61
166         2    40     31
167         1    61     61
168         2    47     36
169         1    55     41
170         2    53     37
171         2    50     43
172         1    47     61
173         3    31     39
174         3    61     51
175         2    35     51
176         2    54     66
177         1    55     71
178         1    53     41
179         3    58     36
180         2    56     51
181         1    50     51
182         1    39     51
183         3    63     61
184         1    50     61
185         3    66     56
186         2    58     71
187         1    53     51
188         1    42     36
189         3    55     61
190         2    53     66
191         2    42     41
192         3    50     41
193         1    55     56
194         3    34     51
195         1    50     56
196         1    42     56
197         1    36     46
198         1    55     52
199         1    58     61
200         3    53     61

puzzlevec = ice$puzzle
ice[puzzlevec > 61, ]

     id female ice_cream video puzzle
13   95      0         3    61     71
22  143      0         1    72     66
27  154      0         3    61     66
33  192      0         3    66     71
36  144      0         3    61     66
37  200      0         1    66     66
38   80      0         3    66     66
45   62      0         3    63     66
54  183      0         1    55     71
55  132      0         1    69     66
62  170      0         3    69     66
67  171      0         1    55     66
73   68      0         1    63     66
74  157      0         1    74     66
78  123      0         3    63     66
86  146      0         3    63     66
98  100      1         3    69     71
101  88      1         3    69     66
110 180      1         3    58     71
113 131      1         3    46     66
115  34      1         3    55     66
118  93      1         3    58     66
135  77      1         2    44     66
136  61      1         3    67     66
145 122      1         1    53     66
155  71      1         1    58     66
156 139      1         1    55     71
160  39      1         3    61     66
176 135      1         2    54     66
177  59      1         1    55     71
186  23      1         2    58     71
190  52      1         2    53     66

Good = ice[puzzlevec > 61, ]
Good

     id female ice_cream video puzzle
13   95      0         3    61     71
22  143      0         1    72     66
27  154      0         3    61     66
33  192      0         3    66     71
36  144      0         3    61     66
37  200      0         1    66     66
38   80      0         3    66     66
45   62      0         3    63     66
54  183      0         1    55     71
55  132      0         1    69     66
62  170      0         3    69     66
67  171      0         1    55     66
73   68      0         1    63     66
74  157      0         1    74     66
78  123      0         3    63     66
86  146      0         3    63     66
98  100      1         3    69     71
101  88      1         3    69     66
110 180      1         3    58     71
113 131      1         3    46     66
115  34      1         3    55     66
118  93      1         3    58     66
135  77      1         2    44     66
136  61      1         3    67     66
145 122      1         1    53     66
155  71      1         1    58     66
156 139      1         1    55     71
160  39      1         3    61     66
176 135      1         2    54     66
177  59      1         1    55     71
186  23      1         2    58     71
190  52      1         2    53     66

Lec 1

# Multivariate data Two-dimensional scatter plot matrix
head(iris)

  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa

hist(iris$Sepal.Length)
rug(iris$Sepal.Length)

colors = brewer.pal(nlevels(iris$Species), "Dark2")
scatterplotMatrix(iris[1:4], groups = iris[, 5], smoother = FALSE, reg.line = FALSE, 
    legend.plot = FALSE, oma = c(2, 2, 7, 2), col = colors)

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter

Warning in plot.window(...): "smoother" is not a graphical parameter

Warning in plot.window(...): "reg.line" is not a graphical parameter

Warning in plot.window(...): "legend.plot" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter

Warning in title(...): "smoother" is not a graphical parameter

Warning in title(...): "reg.line" is not a graphical parameter

Warning in title(...): "legend.plot" is not a graphical parameter

legend("top", legend = levels(iris[, 5]), pch = 1:3, col = colors, horiz = TRUE, 
    bty = "n", cex = 1.5, xpd = TRUE)

# pairs(iris[1:4], main = 'Anderson's Iris Data -- 3 species', pch = 21, bg
# = c('red', 'green3', 'blue')[unclass(iris$Species)])

# 3-D plot

with(iris, scatterplot3d(Sepal.Width, Sepal.Length, Petal.Length, color = colors[Species], 
    type = "h", pch = 20))

# Star plot
stars(iris[-5], col.lines = colors[iris$Species], locations = 0:1, key.loc = 0:1)

irs2 = iris[c(1:10, 51:60, 101:110), ]
# stars(irs2[-5],col.lines=colors[irs2$Species],locations=0:1,key.loc=0:1)

# Face plot

data = irs2
faces(data[1:30, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")], 
    face.type = 1, scale = TRUE, labels = data$Species, plot.faces = TRUE, nrow.plot = 5, 
    ncol.plot = 5)

effect of variables:
 modified item       Var           
 "height of face   " "Sepal.Length"
 "width of face    " "Sepal.Width" 
 "structure of face" "Petal.Length"
 "height of mouth  " "Petal.Width" 
 "width of mouth   " "Sepal.Length"
 "smiling          " "Sepal.Width" 
 "height of eyes   " "Petal.Length"
 "width of eyes    " "Petal.Width" 
 "height of hair   " "Sepal.Length"
 "width of hair   "  "Sepal.Width" 
 "style of hair   "  "Petal.Length"
 "height of nose  "  "Petal.Width" 
 "width of nose   "  "Sepal.Length"
 "width of ear    "  "Sepal.Width" 
 "height of ear   "  "Petal.Length"

# classification tree ctree method

# required package by partykit

tptree = ctree(iris$Species ~ ., data = iris)
table(predict(tptree), iris$Species)

            
             setosa versicolor virginica
  setosa         50          0         0
  versicolor      0         49         5
  virginica       0          1        45

plot(tptree)

# rpart method(pretty plots)library(grid)

gtree_rpart = rpart(iris$Species ~ ., data = iris)
gtree_rpart_cp = rpart(iris$Species ~ ., data = iris, control = rpart.control(cp = 0.005))
print(gtree_rpart)

n= 150 

node), split, n, loss, yval, (yprob)
      * denotes terminal node

1) root 150 100 setosa (0.33333333 0.33333333 0.33333333)  
  2) Petal.Length< 2.45 50   0 setosa (1.00000000 0.00000000 0.00000000) *
  3) Petal.Length>=2.45 100  50 versicolor (0.00000000 0.50000000 0.50000000)  
    6) Petal.Width< 1.75 54   5 versicolor (0.00000000 0.90740741 0.09259259) *
    7) Petal.Width>=1.75 46   1 virginica (0.00000000 0.02173913 0.97826087) *

rpart.plot(gtree_rpart, compress = TRUE)

Lec 3

########### Density function and Its Contours
X <- seq(-1, 5, length.out = 100)
Y <- seq(2, 8, length.out = 100)
dens <- matrix(dmvnorm(expand.grid(X, Y), mean = c(2, 5), sigma = rbind(c(1, 
    0.5), c(0.5, 1))), ncol = 100)

persp(X, Y, dens, main = "Density Surface", box = FALSE)

s3d <- scatterplot3d(X, Y, seq(min(dens), max(dens), length = length(X)), type = "n", 
    grid = FALSE, angle = 70, zlab = "expression(f(x, y))", xlab = expression(X), 
    ylab = expression(Y), main = "Density Surface")
for (i in length(X):1) s3d$points3d(rep(X[i], length(Y)), Y, dens[i, ], type = "l")
for (i in length(Y):1) s3d$points3d(X, rep(Y[i], length(X)), dens[, i], type = "l")

contour(X, Y, dens)

########### Simulated Data
Xbvn <- rmvnorm(100, c(2, 5), matrix(c(1, 0.5, 0.5, 1), nrow = 2))
dataEllipse(Xbvn, xlim = c(-2, 6), ylim = c(2, 10), levels = c(0.5, 0.75, 0.95), 
    ellipse.label = c(0.5, 0.75, 0.95), pch = 20)

########### Example use sataset stiff
stiff <- read.table("C:/Users/poster/Desktop/data/t4-3.dat", header = F)[, 1:4]
head(stiff)

    V1   V2   V3   V4
1 1889 1651 1561 1778
2 2403 2048 2087 2197
3 2119 1700 1815 2222
4 1645 1627 1110 1533
5 1976 1916 1614 1883
6 1712 1712 1439 1546

colnames(stiff) <- c("x1", "x2", "x3", "x4")
attach(stiff)

########### Univariate normality
par(mfrow = c(2, 2))
hist(x1)
hist(x4)
qqnorm(x1)
qqline(x1)  # Q-Q plot of x1
qqnorm(x4)
qqline(x4)  # Q-Q plot of x4

shapiro.test(x1)


    Shapiro-Wilk normality test

data:  x1
W = 0.93068, p-value = 0.05118

shapiro.test(x2)


    Shapiro-Wilk normality test

data:  x2
W = 0.91274, p-value = 0.01746

shapiro.test(x3)


    Shapiro-Wilk normality test

data:  x3
W = 0.93258, p-value = 0.05751

shapiro.test(x4)


    Shapiro-Wilk normality test

data:  x4
W = 0.96127, p-value = 0.3337

########### Bivariate normality
pairs(stiff)  #Scatter Plot Matrix

# Bivariate Percentage
par(mfrow = c(1, 1))
plot(x1, x4, pch = 20)
quan_chisq = ellipse(cor(stiff[, c(1, 4)]), scale = c(sd(x1), sd(x4)), center = c(0, 
    0), level = 0.5, which = c(1, 2), npoints = 100)
lines(quan_chisq[, 1] + mean(x1), quan_chisq[, 2] + mean(x4), col = "blue", 
    lwd = 2)

m_dist14 <- mahalanobis(stiff[, c(1, 4)], colMeans(stiff[, c(1, 4)]), cov(stiff[, 
    c(1, 4)]))  # Mahalanobis distance
sum(m_dist14 <= qchisq(0.5, 2))/30  # 60% of Mahalanobis distance <= 50th quantile of chi-square

[1] 0.6

########### Chi-square plot
m_dist <- mahalanobis(stiff, colMeans(stiff), cov(stiff))  # Mahalanobis distance

sum(m_dist <= qchisq(0.05, 4))

[1] 3

par(mar = c(5, 5.5, 4, 2) + 0.1)  # by hand calculation
plot(sort(m_dist), qchisq(((1:30) - 0.5)/30, 4), xlab = expression(d[j]^2), 
    ylab = expression(paste(chi[4]^2, "( ", frac(j - 0.5, 30), " )")), pch = 20)
abline(a = 0, b = 1, lwd = 2)

# Directly by function mvn()
mvn(stiff, mvnTest = "royston", multivariatePlot = "qq")

$multivariateNormality
     Test        H     p value MVN
1 Royston 9.823858 0.009534665  NO

$univariateNormality
          Test  Variable Statistic   p value Normality
1 Shapiro-Wilk    x1        0.9307    0.0512    YES   
2 Shapiro-Wilk    x2        0.9127    0.0175    NO    
3 Shapiro-Wilk    x3        0.9326    0.0575    YES   
4 Shapiro-Wilk    x4        0.9613    0.3337    YES   

$Descriptives
    n     Mean  Std.Dev Median  Min  Max    25th    75th      Skew
x1 30 1906.100 324.9866 1863.0 1325 2983 1715.25 2057.25 1.0380842
x2 30 1749.533 318.6065 1680.0 1170 2794 1595.50 1888.75 1.1435912
x3 30 1509.133 303.1783 1466.0 1002 2412 1295.75 1623.75 0.9800274
x4 30 1724.967 322.8436 1674.5 1176 2581 1520.25 1880.75 0.5978431
      Kurtosis
x1  2.03586397
x2  1.94986381
x3  0.99683699
x4 -0.04626509

########### Outliers

# Standardized Value
print(data.frame(scale(x1), scale(x2), scale(x3), scale(x4)))

     scale.x1.  scale.x2.   scale.x3.   scale.x4.
1  -0.05261755 -0.3092634  0.17107644  0.16426945
2   1.52898605  0.9367877  1.90602908  1.46211166
3   0.65510390 -0.1554687  1.00886726  1.53954854
4  -0.80341770 -0.3845914 -1.31649701 -0.59461204
5   0.21508578  0.5224835  0.34589106  0.48950437
6  -0.59725537 -0.1178047 -0.23132702 -0.55434486
7   0.11354314 -0.2025487 -0.78545638 -0.16716042
8   0.60894816  0.2211714  0.68562513  0.46162709
9   3.31367493  3.2782337  2.97800552  2.65154223
10 -0.49571272 -0.4693354 -0.41273841 -0.67204892
11 -0.60340947 -0.4975834  0.02924572 -0.17955033
12  0.43047927  0.4942355  0.38877012  0.53596650
13 -0.20339299  0.2870835  0.28322167  0.04966286
14 -0.12031265 -0.2025487 -0.05321401 -0.14547810
15 -0.14492905 -0.3155407 -0.39624647 -0.03396898
16  0.14739069  1.2537931 -1.08560978 -1.37517585
17 -1.78807364 -1.8189625 -1.67272303 -1.70041077
18 -1.49883096 -1.1880903 -0.84812577 -1.29154401
19 -0.24031759 -0.3626207  0.30631040  0.09302751
20 -0.55725372 -0.4881674 -0.64692404 -0.24459731
21  1.13820072  1.3793398  0.12489900  1.19572877
22 -0.02184705 -0.4253941 -0.28739963 -0.76807066
23 -0.84034230 -0.7423995 -0.72278698 -0.64726912
24  0.47663501  0.3686888  0.45143951  0.96651559
25 -0.15416020 -0.8051729 -0.50509331 -0.59461204
26 -0.55109962 -1.0594050 -0.89430321 -0.79285046
27  0.80587934  0.4597102  0.63285091  0.33772807
28 -0.77264721 -0.2339354 -0.31378674 -0.39637361
29  1.29205321  1.7308706  1.83346452  1.57671825
30 -1.28036042 -1.1535650 -0.97346455 -1.36588342

# Generalized Squared Distance--Mahalanobis distance
rank(m_dist)

 [1]  3 25 27 24  8 12 22 10 29  4 11  2 16  1  6 30 19 20  7  9 28 23  5
[24] 14 21 18 13 17 26 15

cbind(1:30, m_dist)[order(m_dist), ]

             m_dist
 [1,] 14  0.1295714
 [2,] 12  0.4635159
 [3,]  1  0.6000129
 [4,] 10  0.7665400
 [5,] 23  0.7962096
 [6,] 15  1.0792484
 [7,] 19  1.3632124
 [8,]  5  1.3980776
 [9,] 20  1.4649908
[10,]  8  1.4876570
[11,] 11  1.9307771
[12,]  6  2.2191409
[13,] 27  2.3816050
[14,] 24  2.5385575
[15,] 30  2.5838186
[16,] 13  2.6959024
[17,] 28  2.9951752
[18,] 26  3.3979804
[19,] 17  3.5018290
[20,] 18  3.9900603
[21,] 25  4.5767867
[22,]  7  4.9883498
[23,] 22  5.0557446
[24,]  4  5.2076098
[25,]  2  5.4770196
[26,] 29  6.2837628
[27,]  3  7.6166439
[28,] 21  9.8980384
[29,]  9 12.2647550
[30,] 16 16.8474070

mvn(stiff[-c(16, 9), ], mvnTest = "royston", multivariatePlot = "qq")  # by function mvn()

$multivariateNormality
     Test        H   p value MVN
1 Royston 1.098338 0.6271166 YES

$univariateNormality
          Test  Variable Statistic   p value Normality
1 Shapiro-Wilk    x1        0.9847    0.9439    YES   
2 Shapiro-Wilk    x2        0.9664    0.4871    YES   
3 Shapiro-Wilk    x3        0.9672    0.5070    YES   
4 Shapiro-Wilk    x4        0.9660    0.4779    YES   

$Descriptives
    n     Mean  Std.Dev Median  Min  Max    25th    75th       Skew
x1 28 1865.929 262.1619 1857.5 1325 2403 1711.50 2049.75 0.08994538
x2 28 1697.964 244.8618 1663.0 1170 2301 1593.25 1847.50 0.39091767
x3 28 1488.643 253.1536 1466.0 1002 2087 1307.25 1617.25 0.49661284
x4 28 1710.250 277.9986 1674.5 1176 2234 1528.75 1876.25 0.25921958
     Kurtosis
x1 -0.5084972
x2  0.1961808
x3  0.0516768
x4 -0.6484407

########### Transformation

trans = powerTransform(stiff, family = "yjPower")  # yjPower' family (Yeo and Johnson(2000)),
# another modifiation of the Box-Cox family that allows a few negative
# values.
summary(trans)

yjPower Transformations to Multinormality 
   Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
x1    0.0939           1      -0.9911       1.1790
x2   -0.2802           0      -1.5129       0.9525
x3    0.1478           1      -0.9585       1.2542
x4    0.7546           1      -0.5292       2.0385

 Likelihood ratio test that all transformation parameters are equal to 0
                                 LRT df    pval
LR test, lambda = (0 0 0 0) 1.980807  4 0.73929

logstiff = log(stiff)

hist(log(x1))

plot(log(x1), log(x4), pch = 20)
quan_chisq1 = ellipse(cor(logstiff[, c(1, 4)]), scale = c(sd(log(x1)), sd(log(x4))), 
    center = c(0, 0), t = sqrt(qchisq(0.5, 2)), which = c(1, 2), npoints = 100)
lines(quan_chisq1[, 1] + mean(log(x1)), quan_chisq1[, 2] + mean(log(x4)), col = "blue", 
    lwd = 2)

mvn(logstiff, mvnTest = "royston", multivariatePlot = "qq")

$multivariateNormality
     Test        H   p value MVN
1 Royston 1.652091 0.5200565 YES

$univariateNormality
          Test  Variable Statistic   p value Normality
1 Shapiro-Wilk    x1        0.9742    0.6577    YES   
2 Shapiro-Wilk    x2        0.9626    0.3612    YES   
3 Shapiro-Wilk    x3        0.9795    0.8118    YES   
4 Shapiro-Wilk    x4        0.9831    0.9005    YES   

$Descriptives
    n     Mean   Std.Dev   Median      Min      Max     25th     75th
x1 30 7.539650 0.1631929 7.529941 7.189168 8.000685 7.447309 7.629120
x2 30 7.452334 0.1721702 7.426545 7.064759 7.935230 7.374941 7.543648
x3 30 7.301165 0.1910199 7.290123 6.909753 7.788212 7.166816 7.392488
x4 30 7.436553 0.1834145 7.423268 7.069874 7.855932 7.326618 7.539424
        Skew    Kurtosis
x1 0.3660786  0.69728520
x2 0.4814651  0.73170914
x3 0.4107699  0.07436358
x4 0.1581748 -0.44895116

MV Stats

Lec 4

############ Part I ## We want to see if perspiration in Latin American healthy women
############ in the age group 50--65 is the same as their counterparts in the North
############ America. Perspiration is quantified in terms of the following variables
############ X1: Sweat Rate X2: Sodium in Sweat X3: Potassium in Sweat

sweat = read.table("C:/Users/poster/Desktop/data/t5-1.dat")
head(sweat)

   V1   V2   V3
1 3.7 48.5  9.3
2 5.7 65.1  8.0
3 3.8 47.2 10.9
4 3.2 53.2 12.0
5 3.1 55.5  9.7
6 4.6 36.1  7.9

n = nrow(sweat)
n

[1] 20

p = ncol(sweat)
p

[1] 3

mu0 = matrix(c(4, 50, 10), ncol = 1)

xbar = as.matrix(colMeans(sweat), ncol = 1)
xbar

     [,1]
V1  4.640
V2 45.400
V3  9.965

S = cov(sweat)
S

          V1       V2        V3
V1  2.879368  10.0100 -1.809053
V2 10.010000 199.7884 -5.640000
V3 -1.809053  -5.6400  3.627658

Tsq <- n * t(xbar - mu0) %*% solve(S) %*% (xbar - mu0)
Tsq

         [,1]
[1,] 9.738773

## Use Mahalanobis distance
Tsq = n * mahalanobis(colMeans(sweat), mu0, cov(sweat))
Tsq  # Hotelling T^2

[1] 9.738773

# Critical point at alpha=0.05
cri = (n - 1) * p/(n - p) * qf(0.95, p, n - p)
cri

[1] 10.7186

# Tsq < cri Fail to reject H0

## Compute P value
Tsq.mod <- (n - p)/p/(n - 1) * Tsq
p.val <- 1 - pf(Tsq.mod, p, (n - p))
p.val

[1] 0.06492834

HotellingsT2(sweat, mu = mu0)


    Hotelling's one sample T2-test

data:  sweat
T.2 = 2.9045, df1 = 3, df2 = 17, p-value = 0.06493
alternative hypothesis: true location is not equal to c(4,50,10)

## One at a time intervals of mu
One = matrix(NA, 3, 2)
for (i in 1:3) {
    One[i, 1] = xbar[i] - qt(0.975, n - 1) * sqrt(S[i, i]/n)  #left
    One[i, 2] = xbar[i] + qt(0.975, n - 1) * sqrt(S[i, i]/n)  #right
}

## T^2 interval mu
Tsq = matrix(NA, 3, 2)
c = sqrt(p * (n - 1)/(n - p) * qf(0.95, p, n - p))
for (i in 1:3) {
    Tsq[i, 1] = xbar[i] - c * sqrt(S[i, i]/n)  #left
    Tsq[i, 2] = xbar[i] + c * sqrt(S[i, i]/n)  #right
}

## Bonferroni interval mu when m=3 (m is the number of CIs).
m = 3
Bonf = matrix(NA, 3, 2)
for (i in 1:m) {
    Bonf[i, 1] = xbar[i] - qt(1 - 0.05/(2 * m), n - 1) * sqrt(S[i, i]/n)  #left
    Bonf[i, 2] = xbar[i] + qt(1 - 0.05/(2 * m), n - 1) * sqrt(S[i, i]/n)  #right
}

colnames(One) = colnames(Tsq) = colnames(Bonf) = c("lower", "upper")
row.names(One) = row.names(Tsq) = row.names(Bonf) = c("SweatRate", "Sodium", 
    "Potassium")
list(One, Tsq, Bonf)

[[1]]
              lower    upper
SweatRate  3.845840  5.43416
Sodium    38.784779 52.01522
Potassium  9.073601 10.85640

[[2]]
              lower     upper
SweatRate  3.397768  5.882232
Sodium    35.052408 55.747592
Potassium  8.570664 11.359336

[[3]]
              lower     upper
SweatRate  3.643952  5.636048
Sodium    37.103078 53.696922
Potassium  8.846992 11.083008

# For large sample inference, only need to change the critical point or
# multiplier using Z (replace to t) and chi-square (replace to F).

############# Part II ##

############## Paired test Municipal wastewater treatment plants are required by law to
############## monitor their discharges into rivers and streams on a regular basis.
############## Concern about the reliability of data from one of these self-monitoring
############## program led to a study in which samples of effluent were divided and sent
############## to two laboratories.  Compare two lab's measurement by two variables: X1
############## and X3: Biochemical oxygen demand (BOD) from two labs X2 and X4: Suspended
############## solids (SS) from two labs

rm(list = ls())
ww = as.matrix(read.table("C:/Users/poster/Desktop/data/t6-1.dat"))
head(ww)

     V1 V2 V3 V4
[1,]  6 27 25 15
[2,]  6 23 28 13
[3,] 18 64 36 22
[4,]  8 44 35 29
[5,] 11 30 15 31
[6,] 34 75 44 64

n1 = nrow(ww)
n1

[1] 11

p1 = 2

D = ww[, 1:2] - ww[, 3:4]
dbar = as.matrix(colMeans(D), ncol = 1)
dbar  #est mu1-mu2

        [,1]
V1 -9.363636
V2 13.272727

Sd = cov(D)
Sd

          V1        V2
V1 199.25455  88.30909
V2  88.30909 418.61818

## Using matrix
C = cbind(diag(p1), -diag(p1))
t(C %*% t(ww))  #D

      [,1] [,2]
 [1,]  -19   12
 [2,]  -22   10
 [3,]  -18   42
 [4,]  -27   15
 [5,]   -4   -1
 [6,]  -10   11
 [7,]  -14   -4
 [8,]   17   60
 [9,]    9   -2
[10,]    4   10
[11,]  -19   -7

n1 * t(C %*% colMeans(ww)) %*% solve(C %*% cov(ww) %*% t(C)) %*% (C %*% colMeans(ww))  #Tsq1

         [,1]
[1,] 13.63931

## Using D
Tsq1 <- n1 * t(dbar) %*% solve(Sd) %*% (dbar)
Tsq1

         [,1]
[1,] 13.63931

cri1 = (n1 - 1) * p1/(n1 - p1) * qf(0.95, p1, n1 - p1)
cri1

[1] 9.458877

pval1 = 1 - pf(Tsq1 * (n1 - p1)/p1/(n1 - 1), p1, n1 - p1)
pval1

           [,1]
[1,] 0.02082779

## Confidence region for delta
plot(D)
CR = ellipse(x = cor(D), scale = sqrt(diag(Sd)/n1), centre = as.vector(dbar), 
    t = sqrt(cri1))
# n1*mahalanobis(as.vector(dbar),as.vector(CR[3,]),cov(D))
lines(CR, type = "l", xlab = expression({
    delta
}[1]), ylab = expression({
    delta
}[2]), main = expression("95% confidence ellipse"))
points(0, 0, pch = 20)

## One at a time intervals of for delta1 and delta2
One1 = matrix(NA, 2, 2)
for (i in 1:2) {
    One1[i, 1] = dbar[i] - qt(0.975, n1 - 1) * sqrt(Sd[i, i]/n1)  #left
    One1[i, 2] = dbar[i] + qt(0.975, n1 - 1) * sqrt(Sd[i, i]/n1)  #right
}

## T^2 interval for delta1 and delta2
Tsq1 = matrix(NA, 2, 2)
c1 = sqrt(p1 * (n1 - 1)/(n1 - p1) * qf(0.95, p1, n1 - p1))
for (i in 1:2) {
    Tsq1[i, 1] = dbar[i] - c1 * sqrt(Sd[i, i]/n1)  #left
    Tsq1[i, 2] = dbar[i] + c1 * sqrt(Sd[i, i]/n1)  #right
}

## Bonferroni interval for delta1 and delta2 when m=2 (m is the number of
## CIs).
m = 2
Bonf1 = matrix(NA, 2, 2)
for (i in 1:m) {
    Bonf1[i, 1] = dbar[i] - qt(1 - 0.05/(2 * m), n1 - 1) * sqrt(Sd[i, i]/n1)  #left
    Bonf1[i, 2] = dbar[i] + qt(1 - 0.05/(2 * m), n1 - 1) * sqrt(Sd[i, i]/n1)  #right
}

colnames(One1) = colnames(Tsq1) = colnames(Bonf1) = c("lower", "upper")
row.names(One1) = row.names(Tsq1) = row.names(Bonf1) = c("BOD", "SS")
list(One1, Tsq1, Bonf1)

[[1]]
          lower     upper
BOD -18.8467298  0.119457
SS   -0.4725958 27.018050

[[2]]
         lower    upper
BOD -22.453272  3.72600
SS   -5.700119 32.24557

[[3]]
         lower     upper
BOD -20.573107  1.845835
SS   -2.974903 29.520358

####################### Two Sample Independent Measurements on the carapaces of 24 female and 24
####################### male painted turtles X1: Length X2: Width X3 Height X4 Sex (female and
####################### male)

rm(list = ls())
X <- read.table("C:/Users/poster/Desktop/data/T6-9.dat", header = F, col.names = c("Length", 
    "Width", "Height", "Sex"))

# log transformation improves Normality.
X[, 1:3] <- log(X[, 1:3])

X_1 = X[X$Sex == "male", 1:3]
X_2 = X[X$Sex == "female", 1:3]

n_1 <- dim(X_1)[1]
n_2 <- dim(X_2)[1]
p <- dim(X_1)[2]

# Testing H0:mu1-mu2=delta0
delta0 = rep(0, p)
xbar_1 = colMeans(X_1)
xbar_1

  Length    Width   Height 
4.725444 4.477574 3.703186

xbar_2 = colMeans(X_2)
xbar_2

  Length    Width   Height 
4.900659 4.622909 3.940286

S_1 = var(X_1)
S_1

            Length       Width      Height
Length 0.011072004 0.008019142 0.008159648
Width  0.008019142 0.006416726 0.006005271
Height 0.008159648 0.006005271 0.006772758

S_2 = var(X_2)
S_2

           Length      Width     Height
Length 0.02640563 0.02011195 0.02491758
Width  0.02011195 0.01619045 0.01942430
Height 0.02491758 0.01942430 0.02493980

S_p = ((n_1 - 1) * S_1 + (n_2 - 1) * S_2)/(n_1 + n_2 - 2)
S_p

           Length      Width     Height
Length 0.01873882 0.01406555 0.01653862
Width  0.01406555 0.01130359 0.01271478
Height 0.01653862 0.01271478 0.01585628

(T2 = (n_1 * n_2/(n_1 + n_2)) * t(xbar_1 - xbar_2 - delta0) %*% solve(S_p) %*% 
    (xbar_1 - xbar_2 - delta0))

       [,1]
[1,] 85.052

(crit = (n_1 + n_2 - 2) * p/(n_1 + n_2 - p - 1) * qf(0.95, p, n_1 + n_2 - p - 
    1))

[1] 8.833461

# Linear combination of the mean components most responsiblefor rejection of
# H_0
(a <- solve(S_p) %*% (xbar_1 - xbar_2))

             [,1]
Length  43.726770
Width    8.710687
Height -67.546415

A = diag(p)

# T^2-intervals
Clower = t(A) %*% (xbar_1 - xbar_2) - sqrt(crit) * sqrt(diag((t(A) %*% S_p %*% 
    A) * (1/n_1 + 1/n_2)))
Cupper = t(A) %*% (xbar_1 - xbar_2) + sqrt(crit) * sqrt(diag((t(A) %*% S_p %*% 
    A) * (1/n_1 + 1/n_2)))
cbind(Clower, Cupper)

           [,1]        [,2]
[1,] -0.2926638 -0.05776762
[2,] -0.2365537 -0.05411666
[3,] -0.3451377 -0.12906223

alpha <- 0.05
# Bonferroni intervlas
crit.b = abs(qt(alpha/(2 * p), (n_1 + n_2 - 2)))
Clower.b = t(A) %*% (xbar_1 - xbar_2) - crit.b * sqrt(diag((t(A) %*% S_p %*% 
    A) * (1/n_1 + 1/n_2)))
Cupper.b = t(A) %*% (xbar_1 - xbar_2) + crit.b * sqrt(diag((t(A) %*% S_p %*% 
    A) * (1/n_1 + 1/n_2)))
cbind(Clower.b, Cupper.b)

           [,1]        [,2]
[1,] -0.2734025 -0.07702893
[2,] -0.2215940 -0.06907636
[3,] -0.3274197 -0.14678026

# Assessing the normality and equal covariance assumptions
par(mfrow = c(1, 3))

# 3-d Scatterplot: Ellipsoidal? are the covariances the same?

sp <- scatterplot3d(X_1, xlim = c(4.4, 5.3), ylim = c(4.2, 5), zlim = c(3.4, 
    4.4), main = "Red=FEMALE")
sp$points3d(X_2, col = "red")

# Visually compare the sample variances and covariances between the two
# groups
cov(X_1)

            Length       Width      Height
Length 0.011072004 0.008019142 0.008159648
Width  0.008019142 0.006416726 0.006005271
Height 0.008159648 0.006005271 0.006772758

cov(X_2)

           Length      Width     Height
Length 0.02640563 0.02011195 0.02491758
Width  0.02011195 0.01619045 0.01942430
Height 0.02491758 0.01942430 0.02493980

# Chi-square plot for males and females separately.
distances <- function(D) {
    dsq = c()
    M <- colMeans(D)
    S <- cov(D)
    n <- dim(D)[1]
    p <- dim(D)[2]
    for (i in 1:n) dsq <- c(dsq, as.matrix(D[i, ] - M) %*% solve(S) %*% as.matrix(t(D[i, 
        ] - M)))
    as.numeric(dsq)
}

# Chi-square plot for males
qc <- qchisq(((1:n_1) - 1/2)/n_1, df = p)
par(mar = c(4, 4, 1, 1))
plot(sort(distances(X_1)), qc, xlab = expression(d[j]^2), main = "Male")
abline(a = 0, b = 1)

# Chi-square plot for females
qc <- qchisq(((1:n_2) - 1/2)/n_2, df = p)
par(mar = c(4, 4, 1, 1))
plot(sort(distances(X_2)), qc, xlab = expression(d[j]^2), main = "Female")
abline(a = 0, b = 1)

########## Test for additional information

X_11 = X[X$Sex == "male", 1:2]
X_12 = X[X$Sex == "female", 1:2]


# Testing H0:mu1-mu2=delta0
xbar_11 = colMeans(X_11)
xbar_11

  Length    Width 
4.725444 4.477574

xbar_12 = colMeans(X_12)
xbar_12

  Length    Width 
4.900659 4.622909

S_11 = var(X_11)
S_11

            Length       Width
Length 0.011072004 0.008019142
Width  0.008019142 0.006416726

S_12 = var(X_12)
S_12

           Length      Width
Length 0.02640563 0.02011195
Width  0.02011195 0.01619045

S_1p = ((n_1 - 1) * S_11 + (n_2 - 1) * S_12)/(n_1 + n_2 - 2)
S_1p

           Length      Width
Length 0.01873882 0.01406555
Width  0.01406555 0.01130359

(T12 = (n_1 * n_2/(n_1 + n_2)) * t(xbar_11 - xbar_12) %*% solve(S_1p) %*% (xbar_11 - 
    xbar_12))

         [,1]
[1,] 22.73142

(U = (T2 - T12)/(n_1 + n_2 - 2 + T12))

          [,1]
[1,] 0.9067263

p = 3
p1 = 2
p2 = 1
(crit2 = p2/(n_1 + n_2 - p - 1) * qf(0.95, p2, n_1 + n_2 - p - 1))

[1] 0.09231151

############## Part III ##

####### One-way MANOVA: Iris Data--- Sepal (V2) Width and Petal (V4) Width of
####### three species of Iris
rm(list = ls())
D <- read.table("C:/Users/poster/Desktop/data/T11-5.dat", header = F)
head(D)

   V1  V2  V3  V4 V5
1 5.1 3.5 1.4 0.2  1
2 4.9 3.0 1.4 0.2  1
3 4.7 3.2 1.3 0.2  1
4 4.6 3.1 1.5 0.2  1
5 5.0 3.6 1.4 0.2  1
6 5.4 3.9 1.7 0.4  1

# data must be sorted by the grouping variable
G <- D$V5
u <- unique(G)
u  # 1,2,3

[1] 1 2 3

g <- length(u)
g  # number of species (groups)

[1] 3

N <- table(G)
N  # n1,n2,n3

G
 1  2  3 
50 50 50

n <- sum(N)  # Total sample size 
X <- as.matrix(D[, c(2, 4)])
p <- dim(X)[2]  # dimension


X_1 <- X[G == 1, ]
X_2 <- X[G == 2, ]
X_3 <- X[G == 3, ]


### Getting a feel for the data
plot(X_1, xlim = c(1, 5), ylim = c(0, 3), col = "red", ylab = "Petal", xlab = "Sepal", 
    main = "Width Measurements from Three Species of Iris Plants")
points(X_2, col = "blue")
points(X_3, col = "green")
text(4, 0.5, "Species 1", col = "red")
text(4, 1.5, "Species 2", col = "blue")
text(4, 2.25, "Species 3", col = "green")

## Middle matrices
library(Matrix)

# Matrix for B
C_1 <- c(1)
for (i in 1:g) C_1 <- bdiag(C_1, (1/N[i]) * matrix(1, N[i], N[i]))
C1 <- as.matrix(C_1[-1, -1] - (1/n) * matrix(1, n, n))

# Matrix for W
C_2 <- c(1)
for (i in 1:g) C_2 <- bdiag(C_2, diag(1, N[i]) - (1/N[i]) * matrix(1, N[i], 
    N[i]))
C2 <- as.matrix(C_2[-1, -1])


# SS
SStrt <- t(X) %*% C1 %*% X
SSres <- t(X) %*% C2 %*% X

# Test Stat
Lambda <- det(SSres)/det(SStrt + SSres)
Lambda  # Wilks Lambda

[1] 0.03831574

# Lambda has the same distributions as U(2,2,147)
m1 <- g - 1
m1

[1] 2

m2 <- n - g
m2

[1] 147

((1 - sqrt(Lambda))/sqrt(Lambda)) * (m2 - 1)/m1  # Approx F

[1] 299.936

alpha = 0.05
qf(1 - alpha, 2 * m1, 2 * (m2 - 1))  #Reject H0

[1] 2.402562

# use manova() function
test1 <- manova(cbind(D[, 2], D[, 4]) ~ factor(G))
summary(test1, "Wilks")  #c('Pillai', 'Wilks', 'Hotelling-Lawley', 'Roy')

           Df    Wilks approx F num Df den Df    Pr(>F)    
factor(G)   2 0.038316   299.94      4    292 < 2.2e-16 ***
Residuals 147                                              
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.aov(test1)

 Response 1 :
             Df Sum Sq Mean Sq F value    Pr(>F)    
factor(G)     2 11.345  5.6725   49.16 < 2.2e-16 ***
Residuals   147 16.962  0.1154                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response 2 :
             Df Sum Sq Mean Sq F value    Pr(>F)    
factor(G)     2 80.413  40.207  960.01 < 2.2e-16 ***
Residuals   147  6.157   0.042                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# summary.aov() includes two univariate test:
anova(lm(D[, 2] ~ factor(G)))

Analysis of Variance Table

Response: D[, 2]
           Df Sum Sq Mean Sq F value    Pr(>F)    
factor(G)   2 11.345  5.6725   49.16 < 2.2e-16 ***
Residuals 147 16.962  0.1154                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(D[, 4] ~ factor(G)))

Analysis of Variance Table

Response: D[, 4]
           Df Sum Sq Mean Sq F value    Pr(>F)    
factor(G)   2 80.413  40.207  960.01 < 2.2e-16 ***
Residuals 147  6.157   0.042                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Pairwise Comparison with Hotelling's T^2 Bonferroni method Testing
## H0:mui-muj=delta0
delta0 = rep(0, p)
xbar_1 = colMeans(X_1)
xbar_2 = colMeans(X_2)
xbar_3 = colMeans(X_3)
S_p = SSres/(n - g)

alpha = 0.05
m = g * (g - 1)/2  # Number of pairs of mu you are comparing
(crit = ((n - g) * p/(n - g - p + 1)) * qf(1 - alpha/m, p, n - g - p + 1))

[1] 8.480372

# Between Species 1 and 2
(T2 = (N[1] * N[2]/(N[1] + N[2])) * t(xbar_1 - xbar_2 - delta0) %*% solve(S_p) %*% 
    (xbar_1 - xbar_2 - delta0))

         [,1]
[1,] 1323.607

# Between Species 1 and 3
(T2 = (N[1] * N[3]/(N[1] + N[3])) * t(xbar_1 - xbar_3 - delta0) %*% solve(S_p) %*% 
    (xbar_1 - xbar_3 - delta0))

         [,1]
[1,] 2837.709

# Between Species 2 and 3
(T2 = (N[2] * N[3]/(N[2] + N[3])) * t(xbar_2 - xbar_3 - delta0) %*% solve(S_p) %*% 
    (xbar_2 - xbar_3 - delta0))

         [,1]
[1,] 325.1741

########## Bonferoni Intervals
m <- p * g * (g - 1)/2
crit.b <- qt(1 - alpha/(2 * m), n - g)

# Comparing Species 1 and 2
cbind(xbar_1 - xbar_2 - crit.b * sqrt((1/N[1] + 1/N[2]) * diag(S_p)), xbar_1 - 
    xbar_2 + crit.b * sqrt((1/N[1] + 1/N[2]) * diag(S_p)))

         [,1]       [,2]
V2  0.4763056  0.8396944
V4 -1.1894645 -0.9705355

# Comparing Species 1 and 3
cbind(xbar_1 - xbar_3 - crit.b * sqrt((1/N[1] + 1/N[3]) * diag(S_p)), xbar_1 - 
    xbar_3 + crit.b * sqrt((1/N[1] + 1/N[3]) * diag(S_p)))

         [,1]       [,2]
V2  0.2723056  0.6356944
V4 -1.8894645 -1.6705355

# Comparing Species 2 and 3
cbind(xbar_2 - xbar_3 - crit.b * sqrt((1/N[2] + 1/N[3]) * diag(S_p)), xbar_2 - 
    xbar_3 + crit.b * sqrt((1/N[2] + 1/N[3]) * diag(S_p)))

         [,1]        [,2]
V2 -0.3856944 -0.02230564
V4 -0.8094645 -0.59053548

###### 2-way MANOVA Plastic File data Factor 1: Change in the rate of extrusion 0
###### for Low 10% 1 for High 10% Factor 2: Amount of additive 0 for Low 1% 1 for
###### High 1.5% X1: Tear resistance X2: Gloss X3: Opacity
rm(list = ls())
plastic = read.table("C:/Users/poster/Desktop/data/T6-4.dat")
head(plastic)

  V1 V2  V3  V4  V5
1  0  0 6.5 9.5 4.4
2  0  0 6.2 9.9 6.4
3  0  0 5.8 9.6 3.0
4  0  0 6.5 9.6 4.1
5  0  0 6.5 9.2 0.8
6  0  1 6.9 9.1 5.7

test2 = manova(cbind(V3, V4, V5) ~ V1 * V2, data = plastic)
summary(test2, "Wilks")  #c('Pillai', 'Wilks', 'Hotelling-Lawley', 'Roy')

          Df   Wilks approx F num Df den Df   Pr(>F)   
V1         1 0.38186   7.5543      3     14 0.003034 **
V2         1 0.52303   4.2556      3     14 0.024745 * 
V1:V2      1 0.77711   1.3385      3     14 0.301782   
Residuals 16                                           
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.aov(test2)

 Response V3 :
            Df Sum Sq Mean Sq F value   Pr(>F)   
V1           1 1.7405 1.74050 15.7868 0.001092 **
V2           1 0.7605 0.76050  6.8980 0.018330 * 
V1:V2        1 0.0005 0.00050  0.0045 0.947143   
Residuals   16 1.7640 0.11025                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response V4 :
            Df Sum Sq Mean Sq F value  Pr(>F)  
V1           1 1.3005 1.30050  7.9178 0.01248 *
V2           1 0.6125 0.61250  3.7291 0.07139 .
V1:V2        1 0.5445 0.54450  3.3151 0.08740 .
Residuals   16 2.6280 0.16425                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response V5 :
            Df Sum Sq Mean Sq F value Pr(>F)
V1           1  0.421  0.4205  0.1036 0.7517
V2           1  4.901  4.9005  1.2077 0.2881
V1:V2        1  3.960  3.9605  0.9760 0.3379
Residuals   16 64.924  4.0578

###### Box's M Test for Equality of Covariance
boxM1 <- function(data, grouping) {
    if (!inherits(data, c("data.frame", "matrix"))) 
        stop("'data' must be a numeric data.frame or matrix!")
    if (length(grouping) != nrow(data)) 
        stop("incompatible dimensions!")
    dname <- deparse(substitute(data))
    data <- as.matrix(data)
    grouping <- as.factor(as.character(grouping))
    p <- ncol(data)
    nlev <- nlevels(grouping)
    lev <- levels(grouping)
    dfs <- tapply(grouping, grouping, length) - 1
    if (any(dfs < p)) 
        warning("there are one or more levels with less observations than variables!")
    mats <- aux <- list()
    for (i in 1:nlev) {
        mats[[i]] <- cov(data[grouping == lev[i], ])
        aux[[i]] <- mats[[i]] * dfs[i]
    }
    names(mats) <- lev
    pooled <- Reduce("+", aux)/sum(dfs)
    logdet <- log(unlist(lapply(mats, det)))
    minus2logM <- sum(dfs) * log(det(pooled)) - sum(logdet * dfs)
    sum1 <- sum(1/dfs)
    Co <- (((2 * p^2) + (3 * p) - 1)/(6 * (p + 1) * (nlev - 1))) * (sum1 - (1/sum(dfs)))
    X2 <- minus2logM * (1 - Co)
    dfchi <- (choose(p, 2) + p) * (nlev - 1)
    pval <- pchisq(X2, dfchi, lower.tail = FALSE)
    out <- structure(list(statistic = c(`Chi-Sq (approx.)` = X2), parameter = c(df = dfchi), 
        p.value = pval, cov = mats, pooled = pooled, logDet = logdet, data.name = dname, 
        method = " Box's M-test for Homogeneity of Covariance Matrices"), class = c("htest", 
        "boxM"))
    return(out)
}

boxM(cbind(plastic$V3, plastic$V4, plastic$V5), plastic$V1 * plastic$V2)


    Box's M-test for Homogeneity of Covariance Matrices

data:  cbind(plastic$V3, plastic$V4, plastic$V5)
Chi-Sq (approx.) = 7.1269, df = 6, p-value = 0.3093

boxM1(cbind(plastic$V3, plastic$V4, plastic$V5), plastic$V1 * plastic$V2)


    Box's M-test for Homogeneity of Covariance Matrices

data:  cbind(plastic$V3, plastic$V4, plastic$V5)
Chi-Sq (approx.) = 7.1269, df = 6, p-value = 0.3093

Lec 5: MVLR

####### The following analysis uses data from HERS project Source:
####### http://www.biostat.ucsf.edu/vgsm/data.html Data includes White-nondiabetic
####### women in the age group 44-79.

Data1 <- read.csv("C:/Users/poster/Desktop/data/hersdata.csv", header = TRUE)
Data <- Data1[(Data1$diabetes == "no" & Data1$raceth == "White"), ]
n <- dim(Data)[1]
Y = data.frame(glucose = Data$glucose, TG = Data$TG)
Z = data.frame(intercept = rep(1, n), age = Data$age, drinkany = as.numeric(Data$drinkany == 
    "yes"), exercise = as.numeric(Data$exercise == "yes"), BMI = Data$BMI)
ComCases <- complete.cases(Z) & complete.cases(Y)
Y <- Y[ComCases, ]
Z <- Z[ComCases, ]
p = dim(Y)[2]  # dimension 
r = dim(Z)[2] - 1  # number of predictors
n = dim(Y)[1]  # sample size 
Z <- as.matrix(Z)
Y <- as.matrix(Y)
B_hat = solve(t(Z) %*% Z) %*% t(Z) %*% Y
B_hat

              glucose         TG
intercept 78.62564381 98.3540631
age        0.07367995  0.1728804
drinkany   0.63656365 -9.3090412
exercise  -0.96975262 -4.1237035
BMI        0.47621049  2.1121466

Sigma_hat = (1/(n - r - 1)) * (t(Y - Z %*% B_hat) %*% (Y - Z %*% B_hat))
Sigma_hat

         glucose         TG
glucose 85.98248   72.46454
TG      72.46454 3745.16189

Cov_B_hat = Sigma_hat %x% solve(t(Z) %*% Z)

rownames(Cov_B_hat) <- c("B_01", "B_11", "B_21", "B_31", "B_41", "B_02", "B_12", 
    "B_22", "B_32", "B_42")
colnames(Cov_B_hat) <- c("B_01", "B_11", "B_21", "B_31", "B_41", "B_02", "B_12", 
    "B_22", "B_32", "B_42")
Cov_B_hat[1:5, 1:5]

            B_01          B_11          B_21          B_31          B_41
B_01  7.24632586 -0.0778435763 -0.1929244742 -0.1865533801 -0.0658209237
B_11 -0.07784358  0.0010714166  0.0013538412  0.0002707412  0.0001909548
B_21 -0.19292447  0.0013538412  0.1889307310 -0.0001902451  0.0005723298
B_31 -0.18655338  0.0002707412 -0.0001902451  0.1956711207  0.0031202742
B_41 -0.06582092  0.0001909548  0.0005723298  0.0031202742  0.0018678963

Cov_B_hat[6:10, 6:10]

           B_02        B_12         B_22         B_32        B_42
B_02 315.630166 -3.39065360 -8.403263243 -8.125755785 -2.86697969
B_12  -3.390654  0.04666798  0.058969627  0.011792747  0.00831747
B_22  -8.403263  0.05896963  8.229306699 -0.008286559  0.02492912
B_32  -8.125756  0.01179275 -0.008286559  8.522899665  0.13591063
B_42  -2.866980  0.00831747  0.024929123  0.135910625  0.08136046

Cov_B_hat[1:5, 6:10]

            B_02          B_12          B_22          B_32          B_42
B_01  6.10707735 -0.0656052116 -0.1625933900 -0.1572239428 -0.0554727293
B_11 -0.06560521  0.0009029712  0.0011409938  0.0002281760  0.0001609334
B_21 -0.16259339  0.0011409938  0.1592275328 -0.0001603353  0.0004823496
B_31 -0.15722394  0.0002281760 -0.0001603353  0.1649082159  0.0026297128
B_41 -0.05547273  0.0001609334  0.0004823496  0.0026297128  0.0015742305

#### Testing for any predictive subset of the predictors in the presence of the
#### other predictors Any of the risk factors
q = 0
C = cbind(matrix(0, r - q, q + 1), diag(r - q))
S_h = t(Y) %*% Z %*% solve(t(Z) %*% Z) %*% t(C) %*% solve(C %*% solve(t(Z) %*% 
    Z) %*% t(C)) %*% C %*% solve(t(Z) %*% Z) %*% t(Z) %*% Y
S_e = t(Y) %*% (diag(n) - Z %*% solve(t(Z) %*% Z) %*% t(Z)) %*% Y
S_h

         glucose        TG
glucose 11987.89  50252.18
TG      50252.18 277409.44

S_e

         glucose        TG
glucose 158723.7  133769.5
TG      133769.5 6913568.9

Lambda = det(S_e)/det(S_h + S_e)
Lambda

[1] 0.904277

c(p, r - q, n - r - 1)

[1]    2    4 1846

# F and df
c((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 * (n - 
    r - 2))

[1]   23.79903    8.00000 3690.00000

# p value
1 - pf((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 * 
    (n - r - 2))

[1] 0

# Use lm() function
Dat = data.frame(cbind(Y, Z))
fit_f = lm(as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI, 
    data = Dat)
fit_n = lm(as.matrix(cbind(glucose, TG)) ~ 1, data = Dat)
anova(fit_f, fit_n, test = "Wilks")

Analysis of Variance Table

Model 1: as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI
Model 2: as.matrix(cbind(glucose, TG)) ~ 1
  Res.Df Df Gen.var.   Wilks approx F num Df den Df    Pr(>F)    
1   1846      562.82                                             
2   1850  4   590.58 0.90428   23.799      8   3690 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Life style risk factors
q = 2
C = cbind(matrix(0, r - q, q), diag(r - q), rep(0, r - q))
S_h = t(Y) %*% Z %*% solve(t(Z) %*% Z) %*% t(C) %*% solve(C %*% solve(t(Z) %*% 
    Z) %*% t(C)) %*% C %*% solve(t(Z) %*% Z) %*% t(Z) %*% Y
S_e = t(Y) %*% (diag(n) - Z %*% solve(t(Z) %*% Z) %*% t(Z)) %*% Y
S_h

          glucose         TG
glucose  597.1097  -936.7593
TG      -936.7593 46944.5976

S_e

         glucose        TG
glucose 158723.7  133769.5
TG      133769.5 6913568.9

Lambda = det(S_e)/det(S_h + S_e)
Lambda

[1] 0.9891349

c(p, r - q, n - r - 1)

[1]    2    2 1846

# F and df
c((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 * (n - 
    r - 2))

[1]    5.052722    4.000000 3690.000000

# p value
1 - pf((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 * 
    (n - r - 2))

[1] 0.0004641033

# Use lm() function
fit_f = lm(as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI, 
    data = Dat)
fit_r = lm(as.matrix(cbind(glucose, TG)) ~ age + BMI, data = Dat)
anova(fit_f, fit_r, test = "Wilks")

Analysis of Variance Table

Model 1: as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI
Model 2: as.matrix(cbind(glucose, TG)) ~ age + BMI
  Res.Df Df Gen.var.   Wilks approx F num Df den Df    Pr(>F)    
1   1846      562.82                                             
2   1848  2   565.29 0.98913   5.0527      4   3690 0.0004641 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#### Plotting the Predictors
z_0 = c(1, 45, 1, 0, 25)
# z_0=c(1,70,1,0,25)
plot(Z[(Z[, 3] == 0) & (Z[, 4] == 0), 2], Z[(Z[, 3] == 0) & (Z[, 4] == 0), 5], 
    pch = 2, bty = "l", col = "blue", ylab = "BMI", xlab = "AGE", xlim = c(30, 
        90), ylim = c(0, 60))
points(Z[(Z[, 3] == 0) & (Z[, 4] == 1), 2], Z[(Z[, 3] == 0) & (Z[, 4] == 1), 
    5], pch = 2, col = "red")
points(Z[(Z[, 3] == 1) & (Z[, 4] == 0), 2], Z[(Z[, 3] == 1) & (Z[, 4] == 0), 
    5], col = "blue")
points(Z[(Z[, 3] == 1) & (Z[, 4] == 1), 2], Z[(Z[, 3] == 1) & (Z[, 4] == 1), 
    5], col = "red")
legend("topright", pch = c(1, 2), title = "Alcohol", legend = c("Yes", "No"))
legend("topleft", fill = c("red", "blue"), title = "Exercies", legend = c("Yes", 
    "No"))

# Plotting z_0
points(z_0[2], z_0[5], col = "blue", cex = 2, pch = 16)
text(z_0[2] - 8, z_0[5] - 2, "Drinks")
text(z_0[2] - 8, z_0[5] - 4, "No Exercise")
arrows(41, 22, 44, 24, code = 2, length = 0.1)

### Predicting the mean response and actual response Prediciting mean response
### For a typical nonalcholic and excercising 35 years old with BMI 26

alpha = 0.05
pred = t(z_0) %*% B_hat
pred

      glucose       TG
[1,] 94.48307 149.6283

pred_crit = sqrt((p * (n - r - 1))/(n - r - p) * qf(1 - alpha, p, n - r - p))
pred_crit

[1] 2.450399

# predict() function
predict(fit_f, newdata = list(age = 45, drinkany = 1, exercise = 0, BMI = 25))

   glucose       TG
1 94.48307 149.6283

cov(Y)  #see the total variation

         glucose         TG
glucose 92.27651   99.47119
TG      99.47119 3887.01530

#### Predition
pred_cov = as.numeric(t(z_0) %*% solve(t(Z) %*% Z) %*% z_0) * (n * Sigma_hat)/(n - 
    r - 1)
pred_cov

          glucose         TG
glucose 0.6714174  0.5658589
TG      0.5658589 29.2451097

### Confidence Region

plot(Y, type = "n", bty = "l", xlab = expression(mu[1] * " (Glucose)"), xlim = c(60, 
    120), ylim = c(-20, 320), ylab = expression(mu[2] * " (Triglyceride)"))
car::ellipse(center = as.vector(pred), shape = pred_cov, radius = pred_crit, 
    segments = 104, center.cex = 0.5)


#### Simultaneous intervals T^2 intervals
CI_lower = pred - sqrt(diag(pred_cov)) * pred_crit
CI_upper = pred + sqrt(diag(pred_cov)) * pred_crit
rbind(CI_lower, CI_upper)

      glucose       TG
[1,] 92.47521 136.3769
[2,] 96.49093 162.8798

rect(xleft = CI_lower[1], xright = CI_upper[1], ybottom = CI_lower[2], ytop = CI_upper[2])


# Bonferroni intervals
m = 2
b_pred_crit <- qt(1 - alpha/(2 * m), n - r - 1)
b_CI_lower = pred - sqrt(diag(pred_cov)) * b_pred_crit
b_CI_upper = pred + sqrt(diag(pred_cov)) * b_pred_crit
rbind(b_CI_lower, b_CI_upper)

      glucose       TG
[1,] 92.64496 137.4972
[2,] 96.32117 161.7594

rect(xleft = b_CI_lower[1], xright = b_CI_upper[1], ybottom = b_CI_lower[2], 
    ytop = b_CI_upper[2], lty = "dashed")


### Forecasting
pred_cov_F = as.numeric(1 + t(z_0) %*% solve(t(Z) %*% Z) %*% z_0) * (n * Sigma_hat)/(n - 
    r - 1)

### Confidence Region

car::ellipse(center = as.vector(pred), col = "blue", shape = pred_cov_F, radius = pred_crit, 
    segments = 104, center.cex = 0.5)


#### Simultaneous intervals T^2 intervals
CI_lower = pred - sqrt(diag(pred_cov_F)) * pred_crit
CI_upper = pred + sqrt(diag(pred_cov_F)) * pred_crit
rbind(CI_lower, CI_upper)

       glucose         TG
[1,]  71.64214  -1.117077
[2,] 117.32399 300.373689

rect(xleft = CI_lower[1], xright = CI_upper[1], ybottom = CI_lower[2], ytop = CI_upper[2])

# Bonferroni intervals
m = 2
b_pred_crit <- qt(1 - alpha/(2 * m), n - r - 1)
b_CI_lower = pred - sqrt(diag(pred_cov_F)) * b_pred_crit
b_CI_upper = pred + sqrt(diag(pred_cov_F)) * b_pred_crit
rbind(b_CI_lower, b_CI_upper)

      glucose        TG
[1,]  73.5732  11.62754
[2,] 115.3929 287.62908

rect(xleft = b_CI_lower[1], xright = b_CI_upper[1], ybottom = b_CI_lower[2], 
    ytop = b_CI_upper[2], lty = "dashed")

Adv MV

Lec 6: PCA

#### Maximia of Quadratic forms
rm(list = ls(all = TRUE))


Gamma = matrix(c(1/sqrt(2), -1/sqrt(2), 1/sqrt(2), 1/sqrt(2)), 2, 2)
Lambda = diag(c(1, 4))

B = Gamma %*% Lambda %*% t(Gamma)
B

     [,1] [,2]
[1,]  2.5  1.5
[2,]  1.5  2.5

par(mar = c(0, 0, 0, 0))
plot(NULL, NULL, col = "red", pch = 16, xlim = c(-3, 3), ylim = c(-2, 2), xaxt = "n", 
    yaxt = "n", bty = "n", xlab = "", ylab = "")
car::ellipse(center = c(0, 0), shape = B, radius = 1, col = "blue", center.pch = 16, 
    center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = B, radius = 0.5, col = "green", center.pch = 16, 
    center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = diag(1, 2), radius = 1, col = "red", 
    center.pch = 16, center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = B, radius = 1.2, lty = "dashed", col = "black", 
    center.pch = 16, center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = B, radius = 0.2, lty = "dashed", col = "black", 
    center.pch = 16, center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)

axis(1, pos = 0, at = seq(-5, 5, 2))
axis(2, pos = 0, at = seq(-5, 5, 2))

segments(0, 0, 1/sqrt(2), 1/sqrt(2), lty = "solid", lwd = 1, col = "green")
segments(0, 0, -1/sqrt(2), 1/sqrt(2), lty = "solid", lwd = 1, col = "blue")
text(2, 1, expression(bold(x)^T * B * bold(x) == c[2]^2), col = "blue")
text(0.5, -0.6, expression(bold(x)^T * B * bold(x) == c[1]^2), col = "green")
text(0.5, 1.1, expression(bold(x)^T * bold(x) == 1), col = "red")
text(0.4, 0.6, expression(frac(c[1], sqrt(lambda[min]))), col = "green")
text(-0.6, 0.4, expression(frac(c[2], sqrt(lambda[max]))), col = "blue")

########## Example of Population Principal Components
Sigma = matrix(c(1, -2, 0, -2, 5, 0, 0, 0, 2), 3, 3, byrow = T)
p = dim(Sigma)[1]
L = eigen(Sigma, symmetric = T)
L$values

[1] 5.8284271 2.0000000 0.1715729

# We probably need the first two principal components ONLY They account for
# 98% of the total variance
E = t(L$vectors)
E

           [,1]      [,2] [,3]
[1,] -0.3826834 0.9238795    0
[2,]  0.0000000 0.0000000    1
[3,]  0.9238795 0.3826834    0

rho = matrix(NA, p, p)
for (i in 1:p) for (k in 1:p) rho[i, k] = E[i, k] * sqrt(L$values[i])/sqrt(Sigma[k, 
    k])
rho

           [,1]       [,2] [,3]
[1,] -0.9238795 0.99748421    0
[2,]  0.0000000 0.00000000    1
[3,]  0.3826834 0.07088902    0

########### Need for standardization???  PCs extracted from Sigma=Covariance Matrix
Sigma = matrix(c(1, 4, 4, 100), 2, 2, byrow = T)
p = dim(Sigma)[1]
L = eigen(Sigma)
L$values

[1] 100.1613532   0.8386468

t(L$vectors)

            [,1]       [,2]
[1,]  0.04030552 0.99918740
[2,] -0.99918740 0.04030552

L$values/sum(L$values)

[1] 0.991696566 0.008303434

# PCs extracted from Rho=Correlation Matrix
V_Half_inv = diag(1/diag(Sigma)^{
    1/2
})
Rho = V_Half_inv %*% Sigma %*% V_Half_inv
L_r = eigen(Rho, symmetric = T)
L_r$values

[1] 1.4 0.6

t(L_r$vectors)

           [,1]      [,2]
[1,]  0.7071068 0.7071068
[2,] -0.7071068 0.7071068

cumsum(L_r$values)/sum(L_r$values)

[1] 0.7 1.0

################## Example 8.3 J&W, 6th Edition: Census Data
census = read.table("C:/Users/poster/Desktop/data/T8-5.DAT")
xbarvec = apply(census, 2, mean)
Smat = cov(census)
eigen(Smat)

eigen() decomposition
$values
[1] 107.0152535  39.6721358   8.3708660   2.8678740   0.1546931

$vectors
             [,1]        [,2]        [,3]        [,4]         [,5]
[1,]  0.038887287 -0.07114494 -0.18789258  0.97713524 -0.057699864
[2,] -0.105321969 -0.12975236  0.96099580  0.17135181 -0.138554092
[3,]  0.492363944 -0.86438807 -0.04579737 -0.09104368  0.004966048
[4,] -0.863069865 -0.48033178 -0.15318538 -0.02968577  0.006691800
[5,] -0.009122262 -0.01474342  0.12498114  0.08170118  0.988637470

lambdavec = eigen(Smat)$values  # eigenvalues
pc.percent = lambdavec/(sum(lambdavec))
pc.percent

[1] 0.6769654402 0.2509610921 0.0529530772 0.0181418208 0.0009785696

cumsum(pc.percent)

[1] 0.6769654 0.9279265 0.9808796 0.9990214 1.0000000

# scree plot
ivec = seq(1, 5, 1)
plot(ivec, lambdavec, type = "o")

# alternative way use prcomp()
prcomp(census)

Standard deviations (1, .., p=5):
[1] 10.3448177  6.2985820  2.8932449  1.6934798  0.3933104

Rotation (n x k) = (5 x 5):
            PC1         PC2         PC3         PC4          PC5
V1  0.038887287 -0.07114494  0.18789258  0.97713524 -0.057699864
V2 -0.105321969 -0.12975236 -0.96099580  0.17135181 -0.138554092
V3  0.492363944 -0.86438807  0.04579737 -0.09104368  0.004966048
V4 -0.863069865 -0.48033178  0.15318538 -0.02968577  0.006691800
V5 -0.009122262 -0.01474342 -0.12498114  0.08170118  0.988637470

summary(prcomp(census))

Importance of components:
                          PC1    PC2     PC3     PC4     PC5
Standard deviation     10.345 6.2986 2.89324 1.69348 0.39331
Proportion of Variance  0.677 0.2510 0.05295 0.01814 0.00098
Cumulative Proportion   0.677 0.9279 0.98088 0.99902 1.00000

prcomp(census)$sdev^2  ## eigenvalues

[1] 107.0152535  39.6721358   8.3708660   2.8678740   0.1546931

screeplot(prcomp(census), type = "lines")  #type=lines

########################## Example 8.4 J&W, 6th Edition: Turtle data
turtle = read.table("C:/Users/poster/Desktop/data/T6-9.DAT")
names(turtle) = c("Length", "Width", "Height", "Sex")
logturtle = log(turtle[25:48, 1:3])
pc.turtle = prcomp(logturtle)
summary(pc.turtle)

Importance of components:
                          PC1     PC2     PC3
Standard deviation     0.1527 0.02446 0.01897
Proportion of Variance 0.9605 0.02466 0.01483
Cumulative Proportion  0.9605 0.98517 1.00000

cumsum(pc.turtle$sdev^2)/sum(pc.turtle$sdev^2)

[1] 0.9605077 0.9851684 1.0000000

screeplot(pc.turtle, type = "lines", main = "Scree Plot")

################ Example 8.7#
pcmat = pc.turtle$rotation
meanmat = t(replicate(24, as.vector(apply(logturtle, 2, mean))))
pcscoremat = (as.matrix(logturtle) - meanmat) %*% pcmat
pc.turtle$x  #easier way

            PC1           PC2           PC3
25 -0.268473390  0.0610685037  0.0004066149
26 -0.263344980 -0.0157244390 -0.0066858875
27 -0.236045707 -0.0341210935 -0.0074276738
28 -0.119923476  0.0141251769 -0.0366922244
29 -0.120728388 -0.0149572760 -0.0286045130
30 -0.152592781 -0.0089082112  0.0169793685
31 -0.106039296  0.0165711616 -0.0083826652
32 -0.093027430  0.0135333730  0.0051929344
33 -0.106371042 -0.0012452738  0.0278503862
34 -0.006575259 -0.0167441765 -0.0071889505
35 -0.006268452 -0.0114497088  0.0061739765
36 -0.011979592  0.0008012246  0.0267516989
37  0.060886922  0.0280465362 -0.0125896593
38  0.041862973 -0.0108766303  0.0089785626
39  0.047500815 -0.0174403631  0.0021060793
40  0.070171264 -0.0330572711  0.0006647271
41  0.040553932 -0.0277470963  0.0419820919
42  0.112788083  0.0212894116 -0.0162796865
43  0.105004626 -0.0293536649 -0.0085075394
44  0.152416632  0.0324987788  0.0055238164
45  0.179458559  0.0111082110 -0.0029094996
46  0.179473590  0.0160774637  0.0091929735
47  0.206783903  0.0297129498  0.0185740872
48  0.294468494 -0.0232075863 -0.0351090182

# detecting outliers
par(mfrow = c(1, 3))
qqnorm(pcscoremat[, 2])
qqline(pcscoremat[, 2])
qqnorm(pcscoremat[, 3])
qqline(pcscoremat[, 3])
# scatter plot of first two pc (pc1 vs. pc2)
plot(pcscoremat[, 2], pcscoremat[, 1])

############ Example Bulls Data

bulls <- read.table("C:/Users/poster/Desktop/data/bull_data.txt", col.names = c("Breed", 
    "SalePr", "YrHgt", "FtFrBody", "PrctFFB", "Frame", "BkFat", "SaleHt", "SaleWt"))
breed = bulls$Breed
X = bulls[-(1:2)]
X = scale(X)
color = 1 * (breed == 1) + 2 * (breed == 5) + 3 * (breed == 8)
par(mfrow = c(1, 1))
pairs(X, col = color)

par(mfrow = c(1, 3))
pcaStBull = prcomp(X, retx = TRUE, center = TRUE, scale = TRUE)
pcaStBull

Standard deviations (1, .., p=7):
[1] 2.0299502 1.1563431 0.8610357 0.6491727 0.4310521 0.3827563 0.2169256

Rotation (n x k) = (7 x 7):
                PC1          PC2         PC3        PC4         PC5
YrHgt    -0.4499313 -0.042790217 -0.41570891  0.1133565 -0.06587066
FtFrBody -0.4123256  0.129836547  0.45029241  0.2474787  0.71934339
PrctFFB  -0.3555618 -0.315507785  0.56827313  0.3147874 -0.57936738
Frame    -0.4339569  0.007728211 -0.45234503  0.2428179 -0.14299538
BkFat     0.1867048  0.714719363 -0.03873196  0.6181171 -0.16023789
SaleHt   -0.4528538  0.101315086 -0.17665043 -0.2157694  0.10953536
SaleWt   -0.2699470  0.600514834  0.25331192 -0.5824327 -0.29054729
                 PC6         PC7
YrHgt     0.07223418 -0.77492612
FtFrBody  0.17706072 -0.01776760
PrctFFB  -0.12780009  0.00239740
Frame     0.43414400  0.58233705
BkFat    -0.20801720 -0.04244214
SaleHt   -0.79928778  0.23672329
SaleWt    0.27656055 -0.04703601

summary(pcaStBull)

Importance of components:
                          PC1    PC2    PC3    PC4     PC5     PC6     PC7
Standard deviation     2.0300 1.1563 0.8610 0.6492 0.43105 0.38276 0.21693
Proportion of Variance 0.5887 0.1910 0.1059 0.0602 0.02654 0.02093 0.00672
Cumulative Proportion  0.5887 0.7797 0.8856 0.9458 0.97235 0.99328 1.00000

par(mfrow = c(1, 2))
screeplot(pcaStBull, main = "Variation of Scree Plot", type = "lines")
# Take first two or three principal components??? Not as conclusive
Y_hat_R = as.matrix(X) %*% pcaStBull$rotation
scatterplot3d(Y_hat_R[, 1:3], color = color, angle = 45, type = "h", pch = 16)
legend("topright", legend = c("Angus", "Hereford", "Simental"), col = 1:3, pch = rep(16, 
    3))

# perform Linear regression
my.lm <- lm(SalePr ~ YrHgt + FtFrBody + PrctFFB + Frame + BkFat + SaleHt + SaleWt, 
    data = bulls)
summary(my.lm)


Call:
lm(formula = SalePr ~ YrHgt + FtFrBody + PrctFFB + Frame + BkFat + 
    SaleHt + SaleWt, data = bulls)

Residuals:
    Min      1Q  Median      3Q     Max 
-927.31 -279.95  -43.97  220.15 1612.87 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -3156.8944  4105.7192  -0.769  0.44461   
YrHgt          25.9314   111.4029   0.233  0.81664   
FtFrBody       -2.2012     1.0707  -2.056  0.04365 * 
PrctFFB       -30.0976    26.7168  -1.127  0.26390   
Frame         360.3168   172.9033   2.084  0.04093 * 
BkFat        2571.5500   790.7881   3.252  0.00179 **
SaleHt         84.3969    64.0910   1.317  0.19232   
SaleWt          0.3634     0.6085   0.597  0.55233   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 460.9 on 68 degrees of freedom
Multiple R-squared:  0.5037,    Adjusted R-squared:  0.4526 
F-statistic: 9.859 on 7 and 68 DF,  p-value: 2.008e-08

vif(my.lm)

    YrHgt  FtFrBody   PrctFFB     Frame     BkFat    SaleHt    SaleWt 
13.134475  3.478312  2.693996  9.064782  1.770954  5.826196  2.202311

X <- bulls[, 3:9]
my.pr <- prcomp(X, scale = TRUE, center = TRUE)
summary(my.pr)

Importance of components:
                          PC1    PC2    PC3    PC4     PC5     PC6     PC7
Standard deviation     2.0300 1.1563 0.8610 0.6492 0.43105 0.38276 0.21693
Proportion of Variance 0.5887 0.1910 0.1059 0.0602 0.02654 0.02093 0.00672
Cumulative Proportion  0.5887 0.7797 0.8856 0.9458 0.97235 0.99328 1.00000

pc.x <- my.pr$x

new.lm <- lm(bulls[, 2] ~ pc.x)
summary(new.lm)


Call:
lm(formula = bulls[, 2] ~ pc.x)

Residuals:
    Min      1Q  Median      3Q     Max 
-927.31 -279.95  -43.97  220.15 1612.87 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1742.43      52.87  32.955  < 2e-16 ***
pc.xPC1       -92.33      26.22  -3.521 0.000772 ***
pc.xPC2       215.30      46.03   4.678 1.43e-05 ***
pc.xPC3      -344.39      61.81  -5.571 4.69e-07 ***
pc.xPC4        83.09      81.99   1.013 0.314449    
pc.xPC5      -172.56     123.47  -1.398 0.166786    
pc.xPC6       -45.42     139.05  -0.327 0.744969    
pc.xPC7       191.11     245.36   0.779 0.438727    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 460.9 on 68 degrees of freedom
Multiple R-squared:  0.5037,    Adjusted R-squared:  0.4526 
F-statistic: 9.859 on 7 and 68 DF,  p-value: 2.008e-08

newer.lm <- lm(bulls[, 2] ~ pc.x[, 1:3])
summary(newer.lm)


Call:
lm(formula = bulls[, 2] ~ pc.x[, 1:3])

Residuals:
    Min      1Q  Median      3Q     Max 
-953.33 -313.71  -21.59  162.47 1676.33 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     1742.43      52.76  33.026  < 2e-16 ***
pc.x[, 1:3]PC1   -92.33      26.16  -3.529 0.000732 ***
pc.x[, 1:3]PC2   215.30      45.93   4.688 1.27e-05 ***
pc.x[, 1:3]PC3  -344.39      61.68  -5.583 3.93e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 460 on 72 degrees of freedom
Multiple R-squared:  0.4767,    Adjusted R-squared:  0.4549 
F-statistic: 21.87 on 3 and 72 DF,  p-value: 3.583e-10

Lec 7: FA

stock = data.matrix(read.table("C:/Users/poster/Desktop/data/T8-4.DAT"))  # stock data
nrow(stock)  #n=103

[1] 103

# X1: J P Morgan X2: Citibank X3: Wells Fargo X4: Royal Dutch Shell X5:
# ExxonMobil

### Example 9.4### The Principal Component (Principal Factor) Method Usually
### use centralized or standardedized
pc.stock = prcomp(stock, scale = TRUE, center = TRUE)  #standardized
pc.stock

Standard deviations (1, .., p=5):
[1] 1.5611768 1.1861756 0.7074693 0.6324805 0.5051434

Rotation (n x k) = (5 x 5):
          PC1        PC2         PC3        PC4         PC5
V1 -0.4690832  0.3680070 -0.60431522  0.3630228  0.38412160
V2 -0.5324055  0.2364624 -0.13610618 -0.6292079 -0.49618794
V3 -0.4651633  0.3151795  0.77182810  0.2889658  0.07116948
V4 -0.3873459 -0.5850373  0.09336192 -0.3812515  0.59466408
V5 -0.3606821 -0.6058463 -0.10882629  0.4934145 -0.49755167

loading1 = pc.stock$sdev[1] * pc.stock$rotation[, 1]  #factor loading1
loading1

        V1         V2         V3         V4         V5 
-0.7323218 -0.8311791 -0.7262022 -0.6047155 -0.5630885

loading2 = pc.stock$sdev[2] * pc.stock$rotation[, 2]  #factor loading1
loading2

        V1         V2         V3         V4         V5 
 0.4365209  0.2804859  0.3738582 -0.6939569 -0.7186401

## Specific variance epsilon- the diagonal matrix and The residual matrix##
## One factor model
1 - (loading1)^2

       V1        V2        V3        V4        V5 
0.4637047 0.3091413 0.4726303 0.6343192 0.6829314

# The residual matrix
cor(stock) - loading1 %*% t(loading1) - diag(1 - (loading1)^2)

            V1          V2          V3         V4         V5
V1  0.00000000  0.02359722 -0.02131640 -0.3282445 -0.2578992
V2  0.02359722  0.00000000 -0.02946169 -0.1803348 -0.2553527
V3 -0.02131640 -0.02946169  0.00000000 -0.2566466 -0.2627094
V4 -0.32824446 -0.18033479 -0.25664656  0.0000000  0.3428693
V5 -0.25789922 -0.25535267 -0.26270940  0.3428693  0.0000000

# Two factor model
1 - apply(cbind(loading1^2 + loading2^2), 1, sum)

       V1        V2        V3        V4        V5 
0.2731542 0.2304689 0.3328604 0.1527429 0.1664878

# The residual matrix
cor(stock) - cbind(loading1, loading2) %*% t(cbind(loading1, loading2)) - diag(1 - 
    apply(cbind(loading1^2 + loading2^2), 1, sum))

            V1          V2           V3           V4           V5
V1  0.00000000 -0.09884073 -0.184513342 -0.025317756  0.055802201
V2 -0.09884073  0.00000000 -0.134323655  0.014310342 -0.053784258
V3 -0.18451334 -0.13432365  0.000000000  0.002794963  0.005960127
V4 -0.02531776  0.01431034  0.002794963  0.000000000 -0.155835955
V5  0.05580220 -0.05378426  0.005960127 -0.155835955  0.000000000

## Rotation##
Lpc = cbind(loading1, loading2)
varimax(Lpc)

$loadings

Loadings:
   loading1 loading2
V1 -0.852           
V2 -0.849   -0.220  
V3 -0.812           
V4 -0.126   -0.912  
V5          -0.910  

               loading1 loading2
SS loadings       2.129    1.716
Proportion Var    0.426    0.343
Cumulative Var    0.426    0.769

$rotmat
           [,1]      [,2]
[1,]  0.8368530 0.5474277
[2,] -0.5474277 0.8368530

# variance epsilon
1 - apply(varimax(Lpc)$loadings^2, 1, sum)

       V1        V2        V3        V4        V5 
0.2731542 0.2304689 0.3328604 0.1527429 0.1664878

### Example 9.5### The Maximum Likelihood Method Using Covariance Matrix S No
### rotation##
mlik.fa = factanal(stock, factors = 2, rotation = "none")
mlik.fa


Call:
factanal(x = stock, factors = 2, rotation = "none")

Uniquenesses:
   V1    V2    V3    V4    V5 
0.417 0.275 0.542 0.005 0.530 

Loadings:
   Factor1 Factor2
V1  0.121   0.754 
V2  0.328   0.786 
V3  0.188   0.650 
V4  0.997         
V5  0.685         

               Factor1 Factor2
SS loadings      1.622   1.610
Proportion Var   0.324   0.322
Cumulative Var   0.324   0.646

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16

# specific variance
1 - apply(mlik.fa$loadings^2, 1, sum)

         V1          V2          V3          V4          V5 
0.416537511 0.274690258 0.542023377 0.004998441 0.529843148

## Rotation (varimax)##
mlik.farot = factanal(stock, factors = 2, rotation = "varimax")  #rotmat
mlik.farot


Call:
factanal(x = stock, factors = 2, rotation = "varimax")

Uniquenesses:
   V1    V2    V3    V4    V5 
0.417 0.275 0.542 0.005 0.530 

Loadings:
   Factor1 Factor2
V1 0.763          
V2 0.819   0.232  
V3 0.668   0.108  
V4 0.113   0.991  
V5 0.108   0.677  

               Factor1 Factor2
SS loadings      1.725   1.507
Proportion Var   0.345   0.301
Cumulative Var   0.345   0.646

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16

# specific variance
1 - apply(mlik.farot$loadings^2, 1, sum)  #same as no rotation.

         V1          V2          V3          V4          V5 
0.416537511 0.274690258 0.542023377 0.004998441 0.529843148

## Or use the correlation mattrix R
cor.stock = cor(stock)
factanal(covmat = cor.stock, factors = 2, rotation = "none", n.obs = 103, method = "mle")


Call:
factanal(factors = 2, covmat = cor.stock, n.obs = 103, rotation = "none",     method = "mle")

Uniquenesses:
   V1    V2    V3    V4    V5 
0.417 0.275 0.542 0.005 0.530 

Loadings:
   Factor1 Factor2
V1  0.121   0.754 
V2  0.328   0.786 
V3  0.188   0.650 
V4  0.997         
V5  0.685         

               Factor1 Factor2
SS loadings      1.622   1.610
Proportion Var   0.324   0.322
Cumulative Var   0.324   0.646

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16

factanal(covmat = cor.stock, factors = 2, rotation = "varimax", n.obs = 103, 
    method = "mle")


Call:
factanal(factors = 2, covmat = cor.stock, n.obs = 103, rotation = "varimax",     method = "mle")

Uniquenesses:
   V1    V2    V3    V4    V5 
0.417 0.275 0.542 0.005 0.530 

Loadings:
   Factor1 Factor2
V1 0.763          
V2 0.819   0.232  
V3 0.668   0.108  
V4 0.113   0.991  
V5 0.108   0.677  

               Factor1 Factor2
SS loadings      1.725   1.507
Proportion Var   0.345   0.301
Cumulative Var   0.345   0.646

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16

######################## Use psych package###

# The Principal Component (Principal Factor) Method
principal(stock, nfactors = 2, rotate = "none")

Principal Components Analysis
Call: principal(r = stock, nfactors = 2, rotate = "none")
Standardized loadings (pattern matrix) based upon correlation matrix
    PC1   PC2   h2   u2 com
V1 0.73 -0.44 0.73 0.27 1.6
V2 0.83 -0.28 0.77 0.23 1.2
V3 0.73 -0.37 0.67 0.33 1.5
V4 0.60  0.69 0.85 0.15 2.0
V5 0.56  0.72 0.83 0.17 1.9

                       PC1  PC2
SS loadings           2.44 1.41
Proportion Var        0.49 0.28
Cumulative Var        0.49 0.77
Proportion Explained  0.63 0.37
Cumulative Proportion 0.63 1.00

Mean item complexity =  1.6
Test of the hypothesis that 2 components are sufficient.

The root mean square of the residuals (RMSR) is  0.1 
 with the empirical chi square  19.17  with prob <  1.2e-05 

Fit based upon off diagonal values = 0.95

principal(stock, nfactors = 2, rotate = "varimax")

Principal Components Analysis
Call: principal(r = stock, nfactors = 2, rotate = "varimax")
Standardized loadings (pattern matrix) based upon correlation matrix
    RC1  RC2   h2   u2 com
V1 0.85 0.04 0.73 0.27 1.0
V2 0.85 0.22 0.77 0.23 1.1
V3 0.81 0.08 0.67 0.33 1.0
V4 0.13 0.91 0.85 0.15 1.0
V5 0.08 0.91 0.83 0.17 1.0

                       RC1  RC2
SS loadings           2.13 1.72
Proportion Var        0.43 0.34
Cumulative Var        0.43 0.77
Proportion Explained  0.55 0.45
Cumulative Proportion 0.55 1.00

Mean item complexity =  1
Test of the hypothesis that 2 components are sufficient.

The root mean square of the residuals (RMSR) is  0.1 
 with the empirical chi square  19.17  with prob <  1.2e-05 

Fit based upon off diagonal values = 0.95

# The Maximum Likelihood Method
fa(stock, nfactors = 2, rotate = "none", fm = "mle")

Factor Analysis using method =  ml
Call: fa(r = stock, nfactors = 2, rotate = "none", fm = "mle")
Standardized loadings (pattern matrix) based upon correlation matrix
    ML1   ML2   h2    u2 com
V1 0.12  0.75 0.58 0.417 1.1
V2 0.33  0.79 0.73 0.275 1.3
V3 0.19  0.65 0.46 0.542 1.2
V4 1.00 -0.01 1.00 0.005 1.0
V5 0.69  0.03 0.47 0.530 1.0

                       ML1  ML2
SS loadings           1.62 1.61
Proportion Var        0.32 0.32
Cumulative Var        0.32 0.65
Proportion Explained  0.50 0.50
Cumulative Proportion 0.50 1.00

Mean item complexity =  1.1
Test of the hypothesis that 2 factors are sufficient.

The degrees of freedom for the null model are  10  and the objective function was  1.74 with Chi Square of  173.31
The degrees of freedom for the model are 1  and the objective function was  0.02 

The root mean square of the residuals (RMSR) is  0.02 
The df corrected root mean square of the residuals is  0.06 

The harmonic number of observations is  103 with the empirical chi square  0.78  with prob <  0.38 
The total number of observations was  103  with Likelihood Chi Square =  1.97  with prob <  0.16 

Tucker Lewis Index of factoring reliability =  0.939
RMSEA index =  0.102  and the 90 % confidence intervals are  0 0.302
BIC =  -2.66
Fit based upon off diagonal values = 1
Measures of factor score adequacy             
                                                   ML1  ML2
Correlation of (regression) scores with factors   1.00 0.90
Multiple R square of scores with factors          1.00 0.81
Minimum correlation of possible factor scores     0.99 0.63

fa(stock, nfactors = 2, rotate = "varimax", fm = "mle")

Factor Analysis using method =  ml
Call: fa(r = stock, nfactors = 2, rotate = "varimax", fm = "mle")
Standardized loadings (pattern matrix) based upon correlation matrix
    ML2  ML1   h2    u2 com
V1 0.76 0.03 0.58 0.417 1.0
V2 0.82 0.23 0.73 0.275 1.2
V3 0.67 0.11 0.46 0.542 1.1
V4 0.11 0.99 1.00 0.005 1.0
V5 0.11 0.68 0.47 0.530 1.1

                       ML2  ML1
SS loadings           1.72 1.51
Proportion Var        0.34 0.30
Cumulative Var        0.34 0.65
Proportion Explained  0.53 0.47
Cumulative Proportion 0.53 1.00

Mean item complexity =  1.1
Test of the hypothesis that 2 factors are sufficient.

The degrees of freedom for the null model are  10  and the objective function was  1.74 with Chi Square of  173.31
The degrees of freedom for the model are 1  and the objective function was  0.02 

The root mean square of the residuals (RMSR) is  0.02 
The df corrected root mean square of the residuals is  0.06 

The harmonic number of observations is  103 with the empirical chi square  0.78  with prob <  0.38 
The total number of observations was  103  with Likelihood Chi Square =  1.97  with prob <  0.16 

Tucker Lewis Index of factoring reliability =  0.939
RMSEA index =  0.102  and the 90 % confidence intervals are  0 0.302
BIC =  -2.66
Fit based upon off diagonal values = 1
Measures of factor score adequacy             
                                                   ML2  ML1
Correlation of (regression) scores with factors   0.90 1.00
Multiple R square of scores with factors          0.82 0.99
Minimum correlation of possible factor scores     0.64 0.98

Lec 8: DA

######### Discriminant Analysis Looking for lower dimensional structure

Data <- read.table("C:/Users/poster/Desktop/data/T1-3.dat", header = F)
Lizards <- Data[, c(2, 3, 1)]
Gender = c("f", "m", "f", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m", 
    "m", "m", "m", "f", "m", "m", "m", "f", "f", "m", "f", "f")
sp <- scatterplot3d(Lizards[Gender == "f", ], box = FALSE, xlab = "Snout-Vent Length", 
    xlim = c(40, 90), ylab = "Hind Limb Span", ylim = c(90, 170), zlab = "Weight", 
    mar = c(5, 3, 0, 3), zlim = c(0, 16), type = "h", sub = "Red=Male &  Black=Female")
sp$points3d(Lizards[Gender == "m", ], col = "red", type = "h")

###### Classification Example for Motivation

###### Hemophiliac Example 11.3 in Johnson and Wichern, Sixth Edition Detection
###### of Hemophilia A carriers. Two measurement from blood samples X1 =
###### log10(AHF activity) X2 = log10(AHF-like antigen) where AHF =
###### Autohemophilic factor. Group 1 (Normal) : women who did not carry
###### hemophilia (n1 = 30) Group 2 (Obligatory Carriers) : women who are known
###### to be hemophilia carriers (daughters of hemophiliacs, mothers with more
###### than one hemophiliac sons, mothers with one hemophiliac son and other
###### hemophiliac relatives.) (n2 = 45)
rm(list = ls(all = TRUE))
Data = read.table("C:/Users/poster/Desktop/data/T11-8.dat", header = F)
names(Data) = c("Group", "AHF.Activity", "AHF.Antigen")
X_1 = Data[Data$Group == 1, 2:3]
X_2 = Data[Data$Group == 2, 2:3]

mar = c(5, 6, 0, 0)
dataEllipse(x = X_1$AHF.Activity, y = X_1$AHF.Antigen, center.cex = 0.75, xlim = c(-1, 
    1), ylim = c(-1, 1), xlab = expression(log(Activity)), ylab = expression(log(Antigen)), 
    log = "", levels = c(0.5, 0.95), center.pch = 3, col = "black")
dataEllipse(x = X_2$AHF.Activity, y = X_2$AHF.Antigen, center.cex = 0.75, log = "", 
    levels = c(0.5, 0.95), center.pch = 3, col = "blue", add = TRUE)
legend(0, -0.4, fill = c("black", "blue"), legend = c("Obligatory Carrier", 
    "Normal"))
x_0 = c(-0.21, -0.044)  # new point
points(x = x_0[1], y = x_0[2], col = "red", pch = 16, cex = 2)


####### Linear Discriminant Function
n_1 = dim(X_1)[1]
n_1

[1] 30

n_2 = dim(X_2)[1]
n_2

[1] 45

xbar_1 = colMeans(X_1)  #Normal (population 1)
xbar_2 = colMeans(X_2)  #Obligatory Carriers (population 2)
S_1 = var(X_1)
S_2 = var(X_2)
S_p = ((n_1 - 1) * S_1 + (n_2 - 1) * S_2)/(n_1 + n_2 - 2)
a_hat = solve(S_p) %*% (xbar_1 - xbar_2)
a_hat

                  [,1]
AHF.Activity  19.31900
AHF.Antigen  -17.12424

ybar_1 = t(a_hat) %*% xbar_1
ybar_2 = t(a_hat) %*% xbar_2
mhat = (1/2) * (ybar_1 + ybar_2)
mhat

          [,1]
[1,] -3.559472

#### The direction of vector a
arrows(x0 = (0 - 1.2)/2, y0 = (0 - 1)/2, x1 = (1 - 1.2)/2, y1 = (a_hat[2]/a_hat[1] - 
    1)/2, code = 2)  # Translated and scaled version of vector a_hat
abline(b = -a_hat[1]/a_hat[2], a = mhat/a_hat[2], lty = "dashed", lwd = 2, col = "orange")  #Separting (LDF) line
text(0.55, 0.5, expression(x[2] == frac(19.32, 17.12) * x[1] + frac(3.56, 17.12)), 
    cex = 0.7)
text(-0.1, -1, expression("Direction of " * hat(a)))

########## Classifying a new observation
x_0 = c(-0.21, -0.044)
y_0 = t(a_hat) %*% x_0
y_0

          [,1]
[1,] -3.303523

mhat

          [,1]
[1,] -3.559472

1/(1/exp(y_0 - mhat) + 1)  #posterior of pi_1|x_0

          [,1]
[1,] 0.5636403

1 - 1/(1/exp(y_0 - mhat) + 1)  #posterior of pi_2|x_0

          [,1]
[1,] 0.4363597

##### y_0 > m_hat ==> assign x_0 to poulation 1

##### Quadratic Discriminant Function
khat = 1/2 * (log(det(S_1)) - log(det(S_2))) + 1/2 * (t(xbar_1) %*% solve(S_1) %*% 
    xbar_1 - t(xbar_2) %*% solve(S_2) %*% xbar_2)
yy_0 = -1/2 * t(x_0) %*% (solve(S_1) - solve(S_2)) %*% x_0 + (t(xbar_1) %*% 
    solve(S_1) - t(xbar_2) %*% solve(S_2)) %*% x_0
yy_0

          [,1]
[1,] -3.039055

khat

          [,1]
[1,] -3.301692

1/(1/exp(yy_0 - khat) + 1)  #posterior of pi_1|x_0

          [,1]
[1,] 0.5652844

1 - 1/(1/exp(yy_0 - khat) + 1)  #posterior of pi_2|x_0

          [,1]
[1,] 0.4347156

##### yy_0 > khat ==> assign x_0 to poulation 1

(solve(S_1) - solve(S_2))  # Quadratic

             AHF.Activity AHF.Antigen
AHF.Activity     62.26556   -70.11627
AHF.Antigen     -70.11627    85.27190

(t(xbar_1) %*% solve(S_1) - t(xbar_2) %*% solve(S_2))  # Linear

     AHF.Activity AHF.Antigen
[1,]     12.76723   -10.22016

-khat  # Constant

         [,1]
[1,] 3.301692

x = seq(-1, 1, 0.01)
y = seq(-1, 1, 0.01)
z = outer(x, y, function(x, y) (-(1/2) * (62.266 * x^2 - 140.232 * x * y + 85.272 * 
    y^2) + 12.767 * x - 10.22 * y + 3.302))
contour(x, y, z, levels = 0, labels = NULL, add = T)
text(0.08, -0.8, expression(z == -31.133 * x[1]^2 + 70.166 * x[1] * x[2] - 42.636 * 
    x[2]^2 + 12.767 * x[1] - 10.22 * x[2] + 3.302), cex = 0.7)

###### Use functions
Data = read.table("C:/Users/poster/Desktop/data/T11-8.dat", header = F)
z = lda(V1 ~ ., as.data.frame(Data), prior = c(1, 1)/2)
predict(z, as.data.frame(t(c(1, -0.21, -0.044))))

$class
[1] 1
Levels: 1 2

$posterior
          1         2
1 0.5636403 0.4363597

$x
         LD1
1 -0.1196716

z2 = qda(V1 ~ ., as.data.frame(Data), prior = c(1, 1)/2)
predict(z2, as.data.frame(t(c(1, -0.21, -0.044))))

$class
[1] 1
Levels: 1 2

$posterior
          1         2
1 0.5652844 0.4347156

###### Apparent Error Rates Example 11.8 Classification Alaskan and Canadian
###### Salmon X1: Class X2: Gender (delete) X3: Diameter of rings for the
###### first-year Freshwater growth X4: Diameter of rings for the first-year
###### Marine growth
rm(list = ls())
Data = read.table("C:/Users/poster/Desktop/data/T11-2.dat", header = F)[, c(1, 
    3, 4)]
names(Data) = c("Class", "Freshwater", "Marine")
plot(Data[1:50, c(2, 3)], col = "blue", xlim = c(40, 200), ylim = c(250, 550))
points(Data[51:100, c(2, 3)], col = "red")

table(Data$Class)  # n1=n2=50


 1  2 
50 50

z = lda(Class ~ ., Data)
newclass = predict(z, Data)$class
table(Data$Class, newclass)

(1 + 6)/(50 + 50)

[1] 0.07

z = qda(Class ~ ., Data)
newclass = predict(z, Data)$class
table(Data$Class, newclass)

(2 + 5)/(50 + 50)

[1] 0.07

######### Training and Validation
N = table(Data$Class)
N


 1  2 
50 50

train <- sample(1:100, 50)  #Choose 50 obs for training
table(Data[train, 1])


 1  2 
23 27

z <- lda(Class ~ ., data = Data, prior = c(1, 1)/2, subset = train)
table(Data[-train, 1], predict(z, Data[-train, ])$class)

########## Holdout (Lachenbruch's) Method
holdout.class = NULL
vec = 1:100
for (i in 1:100) {
    train = vec[-i]
    z <- lda(Class ~ ., data = Data, prior = c(1, 1)/2, subset = train)
    holdout.class[i] = predict(z, Data[i, ])$class
}

table(Data$Class, holdout.class)

   holdout.class
     1  2
  1 44  6
  2  1 49

## Cross Validation and Bootstrap could be used here too!

##### Logistic Regression

fit = glm((Class - 1) ~ ., family = binomial(link = "logit"), data = Data)
fitted.results <- fitted(fit)
fitted.class <- ifelse(fitted.results > 0.5, 1, 0)
table(Data$Class, fitted.class + 1)

7/100

[1] 0.07

########## Classifying when there are more than 2 populations
z <- lda(Species ~ ., iris, prior = c(1, 1, 1)/3)
table(iris$Species, predict(z, iris)$class)

            
             setosa versicolor virginica
  setosa         50          0         0
  versicolor      0         48         2
  virginica       0          1        49

z <- qda(Species ~ ., iris, prior = c(1, 1, 1)/3)
table(iris$Species, predict(z, iris)$class)

            
             setosa versicolor virginica
  setosa         50          0         0
  versicolor      0         48         2
  virginica       0          1        49

Lec 9: Clustering

################ Hierarchical Clustering
mydata = cbind(1:60, iris[c(1:20, 51:70, 101:120), 1:4])  # First 20 from each Species
mydata

    1:60 Sepal.Length Sepal.Width Petal.Length Petal.Width
1      1          5.1         3.5          1.4         0.2
2      2          4.9         3.0          1.4         0.2
3      3          4.7         3.2          1.3         0.2
4      4          4.6         3.1          1.5         0.2
5      5          5.0         3.6          1.4         0.2
6      6          5.4         3.9          1.7         0.4
7      7          4.6         3.4          1.4         0.3
8      8          5.0         3.4          1.5         0.2
9      9          4.4         2.9          1.4         0.2
10    10          4.9         3.1          1.5         0.1
11    11          5.4         3.7          1.5         0.2
12    12          4.8         3.4          1.6         0.2
13    13          4.8         3.0          1.4         0.1
14    14          4.3         3.0          1.1         0.1
15    15          5.8         4.0          1.2         0.2
16    16          5.7         4.4          1.5         0.4
17    17          5.4         3.9          1.3         0.4
18    18          5.1         3.5          1.4         0.3
19    19          5.7         3.8          1.7         0.3
20    20          5.1         3.8          1.5         0.3
51    21          7.0         3.2          4.7         1.4
52    22          6.4         3.2          4.5         1.5
53    23          6.9         3.1          4.9         1.5
54    24          5.5         2.3          4.0         1.3
55    25          6.5         2.8          4.6         1.5
56    26          5.7         2.8          4.5         1.3
57    27          6.3         3.3          4.7         1.6
58    28          4.9         2.4          3.3         1.0
59    29          6.6         2.9          4.6         1.3
60    30          5.2         2.7          3.9         1.4
61    31          5.0         2.0          3.5         1.0
62    32          5.9         3.0          4.2         1.5
63    33          6.0         2.2          4.0         1.0
64    34          6.1         2.9          4.7         1.4
65    35          5.6         2.9          3.6         1.3
66    36          6.7         3.1          4.4         1.4
67    37          5.6         3.0          4.5         1.5
68    38          5.8         2.7          4.1         1.0
69    39          6.2         2.2          4.5         1.5
70    40          5.6         2.5          3.9         1.1
101   41          6.3         3.3          6.0         2.5
102   42          5.8         2.7          5.1         1.9
103   43          7.1         3.0          5.9         2.1
104   44          6.3         2.9          5.6         1.8
105   45          6.5         3.0          5.8         2.2
106   46          7.6         3.0          6.6         2.1
107   47          4.9         2.5          4.5         1.7
108   48          7.3         2.9          6.3         1.8
109   49          6.7         2.5          5.8         1.8
110   50          7.2         3.6          6.1         2.5
111   51          6.5         3.2          5.1         2.0
112   52          6.4         2.7          5.3         1.9
113   53          6.8         3.0          5.5         2.1
114   54          5.7         2.5          5.0         2.0
115   55          5.8         2.8          5.1         2.4
116   56          6.4         3.2          5.3         2.3
117   57          6.5         3.0          5.5         1.8
118   58          7.7         3.8          6.7         2.2
119   59          7.7         2.6          6.9         2.3
120   60          6.0         2.2          5.0         1.5

d = dist(mydata, method = "euclidean")
d  # distance matrix

            1         2         3         4         5         6         7
2    1.135782                                                            
3    2.063977  1.044031                                                  
4    3.069202  2.027313  1.029563                                        
5    4.002499  3.061046  2.063977  1.191638                              
6    5.037857  4.146082  3.190611  2.315167  1.174734                    
7    6.022458  5.025933  4.008740  3.018278  2.051828  1.410674          
8    7.002143  6.014981  5.016971  4.031129  3.008322  2.118962  1.086278
9    8.052950  7.018547  6.015812  5.008992  4.104875  3.336165  2.073644
10   9.012214  8.001875  7.007139  6.008328  5.027922  4.125530  3.038092
11  10.006998  9.041571  8.048602  7.071068  6.014981  5.011986  4.092676
12  11.006362 10.010494  9.007774  8.008745  7.008566  6.054750  5.008992
13  12.014574 11.000909 10.003499  9.003888  8.025584  7.095773  6.019967
14  13.038021 12.019151 11.011358 10.013491  9.052624  8.152914  7.027090
15  14.027829 13.070960 12.077251 11.105854 10.041912  9.025519  8.114801
16  15.040612 14.094325 13.096564 12.121881 11.053506 10.018982  9.123048
17  16.009372 15.036954 14.036381 13.052203 12.012493 11.007270 10.045397
18  17.000294 16.009372 15.008997 14.015349 13.001154 12.014574 11.011812
19  18.015271 17.040540 16.047741 15.058220 14.022482 13.004230 12.060680
20  19.002895 18.019434 17.016756 16.023420 15.002333 14.005356 13.016144
51  20.396813 19.436563 18.501081 17.505713 16.507271 15.428869 14.625321
52  21.309153 20.336912 19.386077 18.383144 17.390227 16.326053 15.470294
53  22.390623 21.423118 20.481699 19.481786 18.487834 17.416659 16.585234
54  23.207111 22.199550 21.235583 20.221523 19.259024 18.239243 17.285254
55  24.297737 23.313730 22.360233 21.354157 20.367130 19.314243 18.429596
56  25.232321 24.238399 23.272516 22.260054 21.282622 20.247222 19.317867
57  26.274132 25.296245 24.332900 23.326594 22.330025 21.274633 20.383327
58  27.101660 26.088503 25.106175 24.092737 23.123581 22.122839 21.123210
59  28.249956 27.264446 26.302091 25.295454 24.300206 23.252097 22.349273
60  29.143438 28.140185 27.160633 26.148040 25.170220 24.152019 23.179948
61  30.121587 29.104295 28.124900 27.111068 26.145937 25.146968 24.145807
62  31.167611 30.174990 29.198973 28.190069 27.197426 26.163333 25.221618
63  32.154315 31.148836 30.176481 29.166076 28.184393 27.164315 26.204007
64  33.206776 32.214593 31.241799 30.231937 29.240896 28.204432 27.269030
65  34.097801 33.099094 32.115417 31.107234 30.114780 29.093986 28.126322
66  35.187498 34.200731 33.227398 32.222042 31.218264 30.176481 29.252521
67  36.163518 35.168025 34.187425 33.176799 32.187265 31.159268 30.202814
68  37.122231 36.122431 35.141713 34.132096 33.141816 32.120399 31.156219
69  38.186385 37.183733 36.210220 35.200284 34.215494 33.190059 32.234299
70  39.106393 38.102493 37.119671 36.109971 35.123069 34.107624 33.131556
101 40.347615 39.363689 38.392056 37.382750 36.389971 35.342184 34.422376
102 41.215410 40.217906 39.240158 38.228916 37.242852 36.213395 35.258758
103 42.333320 41.348519 40.380317 39.372960 38.374862 37.325996 36.412910
104 43.255058 42.263104 41.288013 40.277785 39.285875 38.248922 37.309918
105 44.289615 43.300346 42.326469 41.317188 40.323318 39.281930 38.350228
106 45.410902 44.429045 43.464353 42.457390 41.458051 40.404826 39.502405
107 46.140004 45.134355 44.147707 43.135832 42.155427 41.142922 40.155572
108 47.336772 46.350189 45.380723 44.372627 43.374532 42.328832 41.410144
109 48.264687 47.269546 46.295788 45.285980 44.294695 43.259912 42.318199
110 49.323422 48.342838 47.368133 46.362269 45.356587 44.306884 43.390437
111 50.189441 49.198882 48.217424 47.210380 46.209739 45.175436 44.229289
112 51.199902 50.203984 49.224283 48.215454 47.221499 46.191449 45.238258
113 52.226047 51.235047 50.256343 49.248959 48.249560 47.213346 46.271481
114 53.165402 52.164068 51.180270 50.170310 49.182314 48.161188 47.189723
115 54.180347 53.182516 52.198180 51.189159 50.197709 49.172452 48.206846
116 55.194203 54.202491 53.219357 52.212068 51.212694 50.180275 49.228955
117 56.192348 55.199004 54.216972 53.208834 52.210918 51.180270 50.227781
118 57.340562 56.361068 55.385197 54.379592 53.370591 52.321506 51.404961
119 58.362916 57.372990 56.401507 55.393411 54.396231 53.353819 52.426139
120 59.145160 58.142067 57.158289 56.148998 55.159859 54.141943 53.167659
            8         9        10        11        12        13        14
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9    1.272792                                                            
10   2.027313  1.144552                                                  
11   3.041381  2.376973  1.272792                                        
12   4.006245  3.074085  2.029778  1.208305                              
13   5.021952  4.022437  3.004996  2.206808  1.100000                    
14   6.067949  5.011986  4.065710  3.296968  2.161018  1.157584          
15   7.077429  6.261789  5.169139  4.042277  3.243455  2.459675  2.066398
16   8.095060  7.279423  6.198387  5.061620  4.226109  3.445287  2.858321
17   9.027181  8.127115  7.072482  6.009992  5.073460  4.155719  3.339162
18  10.002000  9.047652  8.015610  7.010706  6.012487  5.037857  4.125530
19  11.031772 10.129166  9.066973  8.009370  7.070361  6.130253  5.291503
20  12.007498 11.059837 10.028460  9.006109  8.016857  7.055494  6.122091
51  13.591174 12.774193 11.719642 10.700000  9.845303  9.025519  8.424963
52  14.446107 13.579028 12.538740 11.529961 10.616026  9.755511  9.053729
53  15.554742 14.704761 13.657233 12.643575 11.750745 10.891740 10.206371
54  16.276363 15.314699 14.306991 13.357769 12.355970 11.409645 10.572606
55  17.404310 16.503030 15.466739 14.467895 13.525531 12.614674 11.835962
56  18.304644 17.364331 16.345336 15.365871 14.380195 13.449907 12.609520
57  19.362335 18.455893 17.420964 16.406401 15.454126 14.542352 13.722245
58  20.121879 19.124591 18.125672 17.170323 16.141252 15.158826 14.225681
59  21.322054 20.403186 19.364400 18.354836 17.398276 16.459951 15.620179
60  22.174986 21.198349 20.191582 19.215619 18.203846 17.239200 16.322684
61  23.143034 22.140912 21.143084 20.191335 19.163768 18.173057 17.235429
62  24.206404 23.254892 22.231959 21.230167 20.244011 19.287561 18.388312
63  25.185909 24.216730 23.196551 22.214860 21.219802 20.239812 19.332615
64  26.251476 25.302569 24.277768 23.276812 22.293273 21.337291 20.442603
65  27.115125 26.143642 25.127276 24.130893 23.132229 22.156940 21.222394
66  28.228177 27.290475 26.255285 25.236878 24.268910 23.309011 22.413166
67  29.192636 28.226760 27.211395 26.214881 25.217058 24.253041 23.328309
68  30.141831 29.170704 28.152087 27.158056 26.160657 25.183129 24.251804
69  31.218104 30.249463 29.231148 28.241459 27.248853 26.271087 25.352909
70  32.120710 31.139364 30.126566 29.138463 28.134498 27.150322 26.206488
101 33.410178 32.468754 31.448529 30.438627 29.460312 28.517714 27.619920
102 34.248796 33.280325 32.267166 31.273151 30.275898 29.309043 28.382741
103 35.391242 34.455188 33.424691 32.409104 31.441215 30.488358 29.595270
104 36.294765 35.338506 34.317634 33.313961 32.327388 31.365427 30.449138
105 37.334970 36.383788 35.361985 34.353894 33.371994 32.413886 31.501429
106 38.477786 37.548635 36.514518 35.495774 34.534186 33.583925 32.697706
107 39.154438 38.161106 37.160732 36.179276 35.163618 34.182305 33.222432
108 40.387374 39.445912 38.415231 37.401203 36.430070 35.470551 34.567904
109 41.300726 40.340674 39.318952 38.316837 37.331220 36.360831 35.439949
110 42.371335 41.433199 40.403713 39.378928 38.410155 37.456241 36.544904
111 43.214465 42.254349 41.232754 40.220144 39.235443 38.265258 37.329479
112 44.224202 43.256676 42.238490 41.235058 40.244627 39.269581 38.332493
113 45.254944 44.296614 43.273780 42.261803 41.279293 40.308932 39.377786
114 46.182139 45.200111 44.191176 43.197454 42.195023 41.214439 40.262017
115 47.199576 46.222289 45.212277 44.212781 43.214002 42.237661 41.286560
116 48.216698 47.251561 46.233538 45.222782 44.234602 43.262570 42.319381
117 49.213514 48.247176 47.227746 46.219693 45.230742 44.255960 43.313855
118 50.383430 49.444818 48.412602 47.385019 46.417454 45.459322 44.541778
119 51.405253 50.454336 49.427624 48.415597 47.439962 46.472142 45.554363
120 52.157262 51.173235 50.162037 49.168588 48.167728 47.180504 46.224452
           15        16        17        18        19        20        51
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9                                                                        
10                                                                       
11                                                                       
12                                                                       
13                                                                       
14                                                                       
15                                                                       
16   1.140175                                                            
17   2.054264  1.174734                                                  
18   3.128898  2.278157  1.126943                                        
19   4.038564  3.067572  2.066398  1.240967                              
20   5.062608  4.090232  3.024897  2.024846  1.183216                    
51   7.195137  6.274552  5.622277  4.979960  4.032369  4.052160          
52   7.910752  6.938300  6.159545  5.369358  4.373786  4.060788  1.187434
53   9.022195  8.056054  7.284230  6.487681  5.441507  5.072475  2.017424
54   9.645206  8.689649  7.724636  6.734983  5.794825  5.065570  3.544009
55  10.732195  9.747820  8.861151  7.945439  6.891299  6.245799  4.053394
56  11.599569 10.600472  9.661780  8.686772  7.670724  6.881860  5.186521
57  12.607537 11.586630 10.685036  9.749872  8.677557  7.913280  6.044833
58  13.319910 12.338557 11.307520 10.264015  9.308598  8.350449  7.494665
59  14.512753 13.508516 12.575373 11.612493 10.537077  9.729851  8.016857
60  15.355129 14.349216 13.350655 12.333288 11.336225 10.401442  9.226592
61  16.326972 15.349593 14.316773 13.272528 12.306909 11.346365 10.346014
62  17.340704 16.324828 15.351873 14.358621 13.318033 12.410077 11.068424
63  18.323755 17.336090 16.337074 15.321553 14.297902 13.383198 12.109913
64  19.390462 18.375255 17.408331 16.415237 15.368149 14.465822 13.034569
65  20.204455 19.195833 18.197253 17.188659 16.168797 15.214138 14.116303
66  21.314314 20.300246 19.337528 18.355653 17.291327 16.391156 15.006332
67  22.307398 21.288025 20.305172 19.301554 18.273752 17.330032 16.063935
68  23.232305 22.226561 21.232051 20.221523 19.195572 18.246918 17.064876
69  24.330639 23.330238 22.337860 21.329557 20.300000 19.370338 18.046883
70  25.206943 24.204752 23.200216 22.184229 21.170262 20.207424 19.083501
101 26.553154 25.519208 24.569697 23.589828 22.537524 21.628222 20.084820
102 27.364027 26.345967 25.363162 24.359392 23.331095 22.387943 21.049941
103 28.502456 27.480357 26.528287 25.548777 24.484281 23.584741 22.044954
104 29.400340 28.379218 27.409123 26.413822 25.369864 24.439926 23.033671
105 30.440926 29.416492 28.451889 27.462338 26.414011 25.489213 24.044542
106 31.591296 30.569102 29.624821 28.649258 27.577164 26.685951 25.089839
107 32.252132 31.240038 30.234748 29.216605 28.215776 27.233986 26.096551
108 33.481786 32.462440 31.505079 30.519830 29.458106 28.547154 27.053650
109 34.391569 33.377537 32.401543 31.403344 30.360336 29.427708 28.034800
110 35.446015 34.415985 33.463114 32.486151 31.424354 30.503279 29.058045
111 36.270925 35.250532 34.273897 33.281076 32.240502 31.291692 30.012831
112 37.292761 36.277679 35.294334 34.294314 33.259886 32.309441 31.019671
113 38.315793 37.296917 36.322170 35.329733 34.286003 33.343215 32.018901
114 39.254809 38.241600 37.246476 36.238239 35.220307 34.251861 33.039824
115 40.267729 39.249586 38.259770 37.256946 36.235066 35.268399 34.040564
116 41.270086 40.248975 39.269581 38.275188 37.239898 36.283467 35.021850
117 42.267482 41.249970 40.269343 39.271746 38.235716 37.280692 36.015136
118 43.438462 42.410376 41.457207 40.479007 39.415860 38.489089 37.074115
119 44.479996 43.462743 42.496470 41.506867 40.453430 39.526700 38.085430
120 45.215152 44.208483 43.210531 42.200711 41.182278 40.212685 39.026914
           52        53        54        55        56        57        58
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9                                                                        
10                                                                       
11                                                                       
12                                                                       
13                                                                       
14                                                                       
15                                                                       
16                                                                       
17                                                                       
18                                                                       
19                                                                       
20                                                                       
51                                                                       
52                                                                       
53   1.191638                                                            
54   2.431049  2.109502                                                  
55   3.029851  2.083267  1.627882                                        
56   4.085340  3.275668  2.130728  1.300000                              
57   5.006995  4.055860  3.349627  2.076054  1.319091                    
58   6.370243  5.683309  4.117038  3.695944  2.515949  2.467793          
59   7.012845  6.021628  5.189412  4.007493  3.135283  2.085665  2.433105
60   8.127730  7.284230  6.022458  5.215362  4.077990  3.354102  2.167948
61   9.254729  8.427930  7.048404  6.352165  5.217279  4.602173  3.034798
62  10.018982  9.082951  8.045496  7.039886  6.017475  5.050743  4.291853
63  11.075198 10.133114  9.019423  8.075890  7.056203  6.176569  5.171073
64  12.009580 11.033132 10.060815  9.010549  8.013738  7.017122  6.309517
65  13.060628 12.143311 11.024065 10.092572  9.045994  8.120961  7.065409
66  14.004285 13.011533 12.093387 11.008179 10.055347  9.018315  8.312641
67  15.022650 14.066272 13.030349 12.035780 11.004090 10.031451  9.140022
68  16.031843 15.075145 14.012494 13.038405 12.011245 11.060289 10.076706
69  17.030561 16.045560 15.026310 14.016419 13.024976 12.052801 11.154371
70  18.045775 17.094151 16.003125 15.051578 14.017846 13.077462 12.036195
101 19.085859 18.072355 17.206975 16.101242 15.142655 14.089003 13.465140
102 20.028230 19.041271 18.050485 17.026744 16.023108 15.028639 14.175683
103 21.067748 20.035219 19.191144 18.067927 17.134760 16.075447 15.432757
104 22.031795 21.023320 20.095024 19.029976 18.050762 17.029680 16.252384
105 23.048427 22.033384 21.131493 20.049190 19.083501 18.047160 17.309246
106 24.129857 23.081378 22.277792 21.133149 20.216330 19.147846 18.540496
107 25.055538 24.094813 23.017602 22.061278 21.021180 20.066141 19.050984
108 26.081219 25.044960 24.189667 23.078778 22.137299 21.089334 20.387496
109 27.043668 26.024988 25.099203 24.034558 23.065776 22.046542 21.240056
110 28.077749 27.051432 26.199809 25.087447 24.143115 23.079645 22.379008
111 29.010688 28.008213 27.064922 26.012689 25.032978 24.007707 23.161174
112 30.017495 29.012583 28.053877 27.012405 26.028830 25.016395 24.148499
113 31.025151 30.012331 29.087282 28.023205 27.053466 26.023643 25.199603
114 32.023117 31.033208 30.026155 29.019649 28.014818 27.023138 26.087162
115 33.025596 32.033576 31.044484 30.025822 29.027229 28.023205 27.114019
116 34.018818 33.016057 32.067273 31.020961 30.038142 29.014996 28.152797
117 35.016282 34.009116 33.060399 32.014684 31.031113 30.013497 29.144468
118 36.102354 35.069360 34.222653 33.111025 32.165976 31.105787 30.377623
119 37.114014 36.076724 35.204261 34.108797 33.163233 32.121332 31.361282
120 38.018548 37.022020 36.018051 35.010998 34.010881 33.021205 32.068520
           59        60        61        62        63        64        65
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9                                                                        
10                                                                       
11                                                                       
12                                                                       
13                                                                       
14                                                                       
15                                                                       
16                                                                       
17                                                                       
18                                                                       
19                                                                       
20                                                                       
51                                                                       
52                                                                       
53                                                                       
54                                                                       
55                                                                       
56                                                                       
57                                                                       
58                                                                       
59                                                                       
60   1.870829                                                            
61   2.944486  1.360147                                                  
62   3.114482  2.163331  1.884144                                        
63   4.159327  3.171750  2.300000  1.392839                              
64   5.026927  4.182105  3.552464  2.076054  1.466288                    
65   6.164414  5.029911  4.155719  3.082207  2.213594  1.571623          
66   7.007139  6.217717  5.483612  4.086563  3.258834  2.118962  1.702939
67   8.065978  7.044147  6.213695  5.017968  4.159327  3.051229  2.204541
68   9.056489  8.034924  7.105632  6.029925  5.029911  4.080441  3.069202
69  10.034939  9.089554  8.168843  7.058328  6.044833  5.054701  4.207137
70  11.076552 10.014490  9.043230  8.036790  7.019259  6.094260  5.028916
101 12.151132 11.322102 10.577334  9.246080  8.458723  7.218033  6.621933
102 13.049521 12.085115 11.202678 10.053358  9.127431  8.033679  7.189576
103 14.092196 13.311273 12.505199 11.211155 10.328117  9.161878  8.496470
104 15.044600 14.152738 13.287212 12.092146 11.170497 10.050373  9.259590
105 16.070781 15.199671 14.352003 13.130499 12.229881 11.091438 10.318430
106 17.165372 16.420414 15.607050 14.318170 13.422742 12.262137 11.603879
107 18.089223 17.017050 16.054594 15.045930 14.072669 13.066369 12.067311
108 19.095287 18.285787 17.423547 16.200926 15.267940 14.147791 13.395148
109 20.046446 19.158810 18.250205 17.103801 16.138773 15.062868 14.228844
110 21.107818 20.269682 19.427043 18.184059 17.293351 16.151471 15.353827
111 22.019083 21.088860 20.180436 19.038382 18.095856 17.022632 16.113349
112 23.020209 22.082799 21.154196 20.042704 19.076425 18.020544 17.115198
113 24.031230 23.123581 22.213734 21.067985 20.118151 19.042846 18.157643
114 25.032379 24.038719 23.086576 22.026802 21.051841 20.019241 19.068823
115 26.040545 25.056137 24.120531 23.036276 22.080987 21.029979 20.087558
116 27.029983 26.085628 25.166049 24.044542 23.098485 22.030660 21.109713
117 28.019279 27.083205 26.151291 25.042763 24.078621 23.021077 22.106108
118 29.124560 28.283211 27.408210 26.203435 25.282009 24.166299 23.337523
119 30.126234 29.275758 28.370760 27.209006 26.251667 25.165453 24.339063
120 31.016931 30.035146 29.060970 28.023026 27.023138 26.011536 25.052944
           66        67        68        69        70       101       102
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9                                                                        
10                                                                       
11                                                                       
12                                                                       
13                                                                       
14                                                                       
15                                                                       
16                                                                       
17                                                                       
18                                                                       
19                                                                       
20                                                                       
51                                                                       
52                                                                       
53                                                                       
54                                                                       
55                                                                       
56                                                                       
57                                                                       
58                                                                       
59                                                                       
60                                                                       
61                                                                       
62                                                                       
63                                                                       
64                                                                       
65                                                                       
66                                                                       
67   1.496663                                                            
68   2.284732  1.240967                                                  
69   3.174902  2.236068  1.349074                                        
70   4.232021  3.125700  2.032240  1.403567                              
101  5.382379  4.453089  3.933192  2.910326  2.915476                    
102  6.140847  5.064583  4.220190  3.151190  2.481935  1.667333          
103  7.204859  6.369458  5.588381  4.446347  4.062019  2.213594  1.860108
104  8.111720  7.127412  6.259393  5.176872  4.475489  3.132092  2.133073
105  9.146037  8.184742  7.342343  6.237788  5.556978  4.032369  3.187475
106 10.302912  9.474703  8.648121  7.507996  6.967065  5.224940  4.649731
107 11.166915 10.038924  9.082951  8.112953  7.085901  6.441273  5.123475
108 12.172510 11.279628 10.381233  9.275236  8.561542  7.123202  6.303967
109 13.095037 12.134249 11.197321 10.105444  9.290318  8.082698  7.095773
110 14.163333 13.246886 12.370125 11.292475 10.515227  9.050414  8.254090
111 15.029970 14.052046 13.104961 12.070626 11.160197 10.055347  9.041571
112 16.040885 15.050914 14.092906 13.041856 12.135897 11.055315 10.019980
113 17.050513 16.087262 15.141334 14.084034 13.200379 12.031209 11.058481
114 18.057685 17.022338 16.058020 15.027974 14.072313 13.086252 12.002916
115 19.062791 18.034689 17.087130 16.052726 15.108276 14.047064 13.009996
116 20.042954 19.051509 18.103591 17.068099 16.140942 15.018322 14.028899
117 21.033782 20.047444 19.083501 18.050485 17.120456 16.027164 15.024979
118 22.167995 21.245705 20.322893 19.264994 18.416026 17.081862 16.231759
119 23.179948 22.247921 21.310795 20.219050 19.387109 18.091434 17.205232
120 24.034766 23.022815 22.030660 21.006904 20.040459 19.086645 18.012773
          103       104       105       106       107       108       109
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9                                                                        
10                                                                       
11                                                                       
12                                                                       
13                                                                       
14                                                                       
15                                                                       
16                                                                       
17                                                                       
18                                                                       
19                                                                       
20                                                                       
51                                                                       
52                                                                       
53                                                                       
54                                                                       
55                                                                       
56                                                                       
57                                                                       
58                                                                       
59                                                                       
60                                                                       
61                                                                       
62                                                                       
63                                                                       
64                                                                       
65                                                                       
66                                                                       
67                                                                       
68                                                                       
69                                                                       
70                                                                       
101                                                                      
102                                                                      
103                                                                      
104  1.352775                                                            
105  2.092845  1.118034                                                  
106  3.120897  2.605763  1.691153                                        
107  4.817676  3.512834  2.958040  3.620773                              
108  5.029911  4.182105  3.171750  2.068816  3.189044                    
109  6.042351  5.035871  4.055860  3.284814  2.989983  1.330413          
110  7.040597  6.167658  5.101960  4.114608  4.324350  2.242766  1.743560
111  8.065358  7.029936  6.047313  5.339476  4.415880  3.348134  2.249444
112  9.054281  8.009370  7.031358  6.265780  5.288667  4.226109  3.064311
113 10.012492  9.019978  8.011866  7.130919  6.404686  5.098039  4.054627
114 11.136876 10.045895  9.086804  8.392258  7.069653  6.360031  5.165269
115 12.102066 11.039475 10.052860  9.306987  8.108637  7.284230  6.144103
116 13.035720 12.018319 11.014082 10.159232  9.205433  8.133265  7.076722
117 14.021769 13.002307 12.010412 11.113505 10.189210  9.071384  8.023715
118 15.054900 14.147085 13.110683 12.027884 11.645600 10.064293  9.201087
119 16.048676 15.132416 14.100355 13.011533 12.568612 11.039022 10.122747
120 17.088593 16.032155 15.067183 14.216891 13.061011 12.163881 11.059385
          110       111       112       113       114       115       116
2                                                                        
3                                                                        
4                                                                        
5                                                                        
6                                                                        
7                                                                        
8                                                                        
9                                                                        
10                                                                       
11                                                                       
12                                                                       
13                                                                       
14                                                                       
15                                                                       
16                                                                       
17                                                                       
18                                                                       
19                                                                       
20                                                                       
51                                                                       
52                                                                       
53                                                                       
54                                                                       
55                                                                       
56                                                                       
57                                                                       
58                                                                       
59                                                                       
60                                                                       
61                                                                       
62                                                                       
63                                                                       
64                                                                       
65                                                                       
66                                                                       
67                                                                       
68                                                                       
69                                                                       
70                                                                       
101                                                                      
102                                                                      
103                                                                      
104                                                                      
105                                                                      
106                                                                      
107                                                                      
108                                                                      
109                                                                      
110                                                                      
111  1.702939                                                            
112  2.539685  1.144552                                                  
113  3.168596  2.073644  1.153256                                        
114  4.573839  3.184337  2.151743  1.649242                              
115  5.348832  4.100000  3.108054  2.300000  1.126943                    
116  6.122091  5.013980  4.050926  3.046309  2.271563  1.252996          
117  7.120393  6.019967  5.014978  4.022437  3.190611  2.247221  1.157584
118  8.046117  7.307530  6.399219  5.282045  4.961854  4.026164  2.831960
119  9.106591  8.314445  7.308899  6.242596  5.719266  4.785394  3.689173
120 10.276673  9.083502  8.041144  7.133723  6.035727  5.120547  4.229657
          117       118       119
2                                
3                                
4                                
5                                
6                                
7                                
8                                
9                                
10                               
11                               
12                               
13                               
14                               
15                               
16                               
17                               
18                               
19                               
20                               
51                               
52                               
53                               
54                               
55                               
56                               
57                               
58                               
59                               
60                               
61                               
62                               
63                               
64                               
65                               
66                               
67                               
68                               
69                               
70                               
101                              
102                              
103                              
104                              
105                              
106                              
107                              
108                              
109                              
110                              
111                              
112                              
113                              
114                              
115                              
116                              
117                              
118  2.163331                    
119  2.794638  1.577973          
120  3.198437  3.581899  2.880972

fit = hclust(d, method = "ave")
fit


Call:
hclust(d = d, method = "ave")

Cluster method   : average 
Distance         : euclidean 
Number of objects: 60

plot(fit, cex = 0.5)  # display dendogram
groups = cutree(fit, k = 3)
groups  # cut tree into 3 clusters

  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
 19  20  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66 
  1   1   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   3 
 67  68  69  70 101 102 103 104 105 106 107 108 109 110 111 112 113 114 
  3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
115 116 117 118 119 120 
  3   3   3   3   3   3

# draw dendogram with red borders around the 3 clusters
rect.hclust(fit, k = 3, border = "red")

############# K-Means
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()

ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + geom_point()

set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster

K-means clustering with 3 clusters of sizes 50, 52, 48

Cluster means:
  Petal.Length Petal.Width
1     1.462000    0.246000
2     4.269231    1.342308
3     5.595833    2.037500

Clustering vector:
  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 [71] 2 2 2 2 2 2 2 3 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
[106] 3 2 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 2 3
[141] 3 3 3 3 3 3 3 3 3 3

Within cluster sum of squares by cluster:
[1]  2.02200 13.05769 16.29167
 (between_SS / total_SS =  94.3 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"

ggplot(iris, aes(Petal.Length, Petal.Width, color = as.factor(irisCluster$cluster))) + 
    geom_point()

ggplot(iris, aes(Sepal.Length, Sepal.Width, color = as.factor(irisCluster$cluster))) + 
    geom_point()

table(irisCluster$cluster, iris$Species)

   
    setosa versicolor virginica
  1     50          0         0
  2      0         48         4
  3      0          2        46

set.seed(20)
irisCluster1 <- kmeans(iris[, 1:4], 3, nstart = 20)
irisCluster1

K-means clustering with 3 clusters of sizes 38, 62, 50

Cluster means:
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1     6.850000    3.073684     5.742105    2.071053
2     5.901613    2.748387     4.393548    1.433871
3     5.006000    3.428000     1.462000    0.246000

Clustering vector:
  [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 [71] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1
[106] 1 2 1 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1
[141] 1 1 2 1 1 1 2 1 1 2

Within cluster sum of squares by cluster:
[1] 23.87947 39.82097 15.15100
 (between_SS / total_SS =  88.4 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"

ggplot(iris, aes(Petal.Length, Petal.Width, color = as.factor(irisCluster1$cluster))) + 
    geom_point()

ggplot(iris, aes(Sepal.Length, Sepal.Width, color = as.factor(irisCluster1$cluster))) + 
    geom_point()

table(irisCluster1$cluster, iris$Species)

   
    setosa versicolor virginica
  1      0          2        36
  2      0         48        14
  3     50          0         0

pltree(agnes(mydata))

pltree(diana(mydata))

# https://www.statmethods.net/advstats/cluster.html
# https://www.r-bloggers.com/k-means-clustering-in-r/

Lec 10: CCA

Rho <- matrix(c(1, 0.4, 0.5, 0.6, 0.4, 1, 0.3, 0.4, 0.5, 0.3, 1, 0.2, 0.6, 0.4, 
    0.2, 1), 4, 4, byrow = T)
Rho

     [,1] [,2] [,3] [,4]
[1,]  1.0  0.4  0.5  0.6
[2,]  0.4  1.0  0.3  0.4
[3,]  0.5  0.3  1.0  0.2
[4,]  0.6  0.4  0.2  1.0

Rho11 <- Rho[1:2, 1:2]
Rho12 <- Rho[1:2, 3:4]
Rho21 <- Rho[3:4, 1:2]
Rho22 <- Rho[3:4, 3:4]
G = solve(Rho11) %*% Rho12 %*% solve(Rho22) %*% Rho21
G

          [,1]       [,2]
[1,] 0.4518849 0.28918651
[2,] 0.1463294 0.09474206

H = solve(Rho22) %*% Rho21 %*% solve(Rho11) %*% Rho12
H

          [,1]      [,2]
[1,] 0.2063492 0.2509921
[2,] 0.2777778 0.3402778

eigen(G)

eigen() decomposition
$values
[1] 0.5457180317 0.0009089525

$vectors
          [,1]       [,2]
[1,] 0.9511814 -0.5397975
[2,] 0.3086323  0.8417949

eigen(H)

eigen() decomposition
$values
[1] 0.5457180317 0.0009089525

$vectors
           [,1]       [,2]
[1,] -0.5946273 -0.7738317
[2,] -0.8040014  0.6333913

l1 = sqrt(eigen(G)$vectors[, 1] %*% Rho11 %*% eigen(G)$vectors[, 1])
l1

         [,1]
[1,] 1.111239

a1 = eigen(G)$vectors[, 1]/c(l1)
a1

[1] 0.8559647 0.2777371

l2 = sqrt(eigen(H)$vectors[, 1] %*% Rho22 %*% eigen(H)$vectors[, 1])
l2

         [,1]
[1,] 1.091436

b1 = -eigen(H)$vectors[, 1]/c(l2)
b1

[1] 0.5448119 0.7366455

## transfor from a1
a1_2 = solve(Rho22) %*% Rho21 %*% eigen(G)$vectors[, 1]
a1_2

          [,1]
[1,] 0.4472376
[2,] 0.6047143

l2_2 = sqrt(t(a1_2) %*% Rho22 %*% a1_2)
l2_2

          [,1]
[1,] 0.8209026

b1_2 = a1_2/c(l2_2)
b1_2

          [,1]
[1,] 0.5448119
[2,] 0.7366455

# Correlation of U1 and V1
rho_UV = a1 %*% Rho12 %*% b1
rho_UV

          [,1]
[1,] 0.7387273

sqrt(eigen(G)$values[1])

[1] 0.7387273

# Correlation of U and Z1
a1 %*% Rho22

          [,1]      [,2]
[1,] 0.9115121 0.4489301

### Andrews and Herzberg 1985 Data36. Chemical and Overt Diabetes

# The three primary variables are: glucose area(a measure of glucose
# intolerance, V7), insulin area(a measurement of insulin response to oral
# glucose V8), steady-state plasma glucose(a measure of insulin resistance
# V9).

# Two secondary variable the relative weight(V5) fasting plasma glucose (V6)

# V10 is the classification 3: 'normal',2: 'chemical diabetic', 1: 'overt
# diabetic'
rm(list = ls())
X = read.table("C:/Users/poster/Desktop/data/T36.1", header = F)
head(X)

  V1 V2 V3 V4   V5  V6  V7  V8  V9 V10
1 36  1  1  1 0.81  80 356 124  55   3
2 36  1  2  2 0.95  97 289 117  76   3
3 36  1  3  3 0.94 105 319 143 105   3
4 36  1  4  4 1.04  90 356 199 108   3
5 36  1  5  5 1.00  90 323 240 143   3
6 36  1  6  6 0.76  86 381 157 165   3

Y = X[X[, 10] == 3, c(7:9, 5, 6)]  #non-diabetic
colnames(Y) = c("GA", "IA", "SSPG", "RW", "FPG")
dim(Y)

[1] 76  5

pairs(Y)

R = cor(Y)
r11 = R[1:3, 1:3]
r22 = R[4:5, 4:5]
r12 = R[1:3, 4:5]
eig.g = eigen(solve(r11) %*% r12 %*% solve(r22) %*% t(r12))
eig.g

eigen() decomposition
$values
[1] 2.544060e-01 6.129327e-02 1.559964e-17

$vectors
           [,1]        [,2]        [,3]
[1,] -0.2729779  0.88912304 -0.08602997
[2,]  0.3954839  0.07123149  0.99217368
[3,] -0.8769695 -0.45209103 -0.09049987

eig.h = eigen(solve(r22) %*% t(r12) %*% solve(r11) %*% r12)
eig.h

eigen() decomposition
$values
[1] 0.25440604 0.06129327

$vectors
           [,1]       [,2]
[1,] 0.99787687 -0.3355584
[2,] 0.06512875  0.9420194

cca_cor = sqrt(eig.h$values)
cca_cor

[1] 0.5043868 0.2475748

# coefficient
l1 = sqrt(diag(t(eig.g$vectors[, 1:2]) %*% r11 %*% eig.g$vectors[, 1:2]))
a = eig.g$vectors[, 1:2] %*% diag(1/l1)
a

           [,1]        [,2]
[1,] -0.3241239  0.97721500
[2,]  0.4695831  0.07828892
[3,] -1.0412816 -0.49688301

l2 = sqrt(diag(t(eig.h$vectors) %*% r22 %*% eig.h$vectors))
b = eig.h$vectors[, 1:2] %*% diag(1/l2)
b

           [,1]       [,2]
[1,] 0.97910272 -0.3724554
[2,] 0.06390341  1.0456010

# use 76 nondiabetic patients to get the cononical variates
U = t(t(a) %*% t(scale(Y[, 1:3])))
V = t(t(b) %*% t(scale(Y[, 4:5])))

# Cor of U and V
rho_UV = t(a) %*% r12 %*% b
rho_UV

              [,1]         [,2]
[1,] -5.043868e-01 1.665335e-16
[2,]  8.153200e-17 2.475748e-01

# Adjust using corr U and V
adj = sign(diag(rho_UV))
adj

[1] -1  1

b.adj = b %*% diag(adj)
b

           [,1]       [,2]
[1,] 0.97910272 -0.3724554
[2,] 0.06390341  1.0456010

V.adj = t(t(b.adj) %*% t(scale(Y[, 4:5])))
rho_UV_adj = t(a) %*% r12 %*% b.adj
rho_UV_adj

              [,1]         [,2]
[1,]  5.043868e-01 1.665335e-16
[2,] -8.153200e-17 2.475748e-01

rho_UU = t(a) %*% r11 %*% a
rho_UU

             [,1]         [,2]
[1,] 1.000000e+00 2.775558e-16
[2,] 2.775558e-16 1.000000e+00

rho_VV = t(b.adj) %*% r22 %*% b.adj
rho_VV

             [,1] [,2]
[1,] 1.000000e+00    0
[2,] 3.469447e-17    1

# Correlation bewteen X/Y and X/Y canonical variates
rho_XU = cor(scale(Y[, 1:3]), U)
rho_XU

           [,1]        [,2]
GA   -0.4367729  0.89045987
IA   -0.1179190  0.05415582
SSPG -0.8775764 -0.25275458

rho_XV = cor(scale(Y[, 1:3]), V.adj)
rho_XV

            [,1]        [,2]
GA   -0.22030250  0.22045540
IA   -0.05947678  0.01340761
SSPG -0.44263796 -0.06257566

rho_YU = cor(scale(Y[, 4:5]), U)
rho_YU

          [,1]        [,2]
RW  -0.5034474 -0.01510271
FPG -0.1793339  0.23139771

rho_YV = cor(scale(Y[, 4:5]), V.adj)
rho_YV

          [,1]        [,2]
RW  -0.9981376 -0.06100262
FPG -0.3555484  0.93465787

# Test if the r12=0
t1 = -(76 - 1 - (2 + 3 + 1)/2) * sum(log(1 - (cca_cor)^2))
t1

[1] 25.69149

p_val1 = pchisq(t1, 3 * 2, lower.tail = F)
p_val1

[1] 0.0002541122

# Reject H0

# Test if the second canonical varviates
t2 = -(76 - 1 - (2 + 3 + 1)/2) * (log(1 - (cca_cor[2])^2))
t2

[1] 4.554156

p_val2 = pchisq(t2, (3 - 1) * (2 - 1), lower.tail = F)
p_val2

[1] 0.1025835

# Fail to reject H0

cc1 = cc(scale(Y[, 1:3]), scale(Y[, 4:5]))
cc1$cor  # cca_cor

[1] 0.5043868 0.2475748

# cancor
cc1$xcoef  # a

           [,1]        [,2]
GA   -0.3241239  0.97721500
IA    0.4695831  0.07828892
SSPG -1.0412816 -0.49688301

cc1$ycoef  # b

           [,1]       [,2]
RW  -0.97910272 -0.3724554
FPG -0.06390341  1.0456010

# comput
cc1$scores$xscores  # U

          [,1]         [,2]
1   0.68310204  0.613964679
2   0.84427226 -1.351124366
3   0.23299821 -0.776902625
4   0.23535998  0.241506862
5   0.17161648 -0.888782362
6  -1.30248955  0.364068499
7   0.23905717  0.012496354
8   0.68449379 -1.205080956
9  -0.17458081  0.872651512
10  0.55243590 -1.305135669
11  1.40721278  0.662017954
12  1.29183703 -0.744833589
13 -0.22946671 -1.873026668
14  0.38180081  0.769750475
15  1.40749418 -0.568972118
16  0.03915177  1.277382305
17  0.42365814  0.676466203
18  1.39853010  0.918292587
19  1.06549885 -1.028549714
20  0.86347602  0.364467020
21  0.71633453  1.279787533
22  1.29892118 -0.709686617
23  0.51985914  0.132736768
24  0.15297221 -0.673250420
25  1.13259757  1.421228574
26 -1.94453942 -0.583820975
27 -1.11698351 -0.013183110
28 -0.49278186 -0.171577085
29 -0.29818327 -0.922406440
30  0.86279025 -0.582250230
31  0.08267673  0.429072761
32  0.08355365 -0.326335877
33  1.44144059  0.757755165
34 -0.33342519 -0.257516127
35  0.45838268  0.922743202
36  0.12301215  0.939500707
37  1.00006173  0.980092193
38 -0.50621897 -0.845966613
39 -0.09372501  1.462757046
40 -0.32501738 -2.499799571
41 -0.35280919  0.708205567
42 -0.17488656 -0.007202487
43  0.76032502 -1.281085670
44  1.89801826 -1.470738683
45  0.90462026 -0.338898454
46  0.78838705 -0.157248334
47 -0.37704623 -0.554590222
48  0.27548158  0.444863080
49  0.26694075 -1.649914837
50  0.47247865  0.280536223
51 -1.64217715 -1.976588995
52  0.38374901 -1.267088098
53 -0.35744587 -1.060770504
54  0.13556845  0.169559377
55  0.10088146 -0.457801382
56  0.46302504 -0.576102839
57  0.67967686 -0.292743946
58  0.66908202  0.544827366
60 -1.88972227  0.679837517
61 -1.12022939  1.917180020
64 -0.53298009 -0.687223916
67 -1.59470441  1.417652080
68 -3.02900787 -0.110463395
69 -1.84952896 -0.082266698
70 -0.14678208  2.013302135
72 -1.37043653  0.380332611
73 -0.24650704  1.325281299
74  0.06846066  0.784310289
75 -2.35649116  0.303578707
76  0.08564256  1.776969514
78 -1.05127153 -0.035816205
79  0.16821464  1.237341533
80  1.12220666  0.664695972
81 -0.97271644  1.214816926
82 -0.83821635  0.248914847
84 -2.31698603 -1.876197669

cc1$scores$yscores  # V

          [,1]        [,2]
1   1.05598937 -1.05262230
2  -0.14238236  0.70208647
3  -0.12834840  1.74769575
4  -0.77351945 -0.44823800
5  -0.46885120 -0.33234072
6   1.39022485 -0.14527448
7   0.13898598  1.19922187
8  -1.19168863 -1.25748077
9  -0.44705061  0.58618919
10  1.15245770  1.19464993
11  0.28505278  0.08448186
12  1.61095940  0.06872785
13 -0.07098329 -1.74139586
14  0.74982180  0.13124842
15  1.54255897 -0.08732584
16 -0.23885069 -1.54518576
17 -1.23052182 -0.62208393
18  0.60375499  1.24598842
19  0.86482205 -0.47517410
20  0.06431824 -0.12952047
21 -0.08801589 -0.18746911
22  1.52702569  0.16683290
23 -0.12684908  0.44792774
24 -0.24035001 -0.24541775
25  1.64829327  0.73309902
26 -1.18242267 -2.68432815
27 -2.05432555  0.10480781
28 -1.53668939  0.56178680
29 -0.43778465 -0.84065819
30  1.14469106  1.32172930
31 -0.31351843 -2.87392810
32 -0.53875095  0.81137361
33  1.85649318 -1.39827594
34  1.35915829  0.36304299
35  0.40781968 -0.64902003
36  0.37525380  1.15906546
37 -1.84135764  0.44588951
38  0.76685439 -1.42267833
39 -0.36461623  1.78785215
40 -0.53098431  0.68429424
41 -0.56205086  1.19261171
42  0.36898648 -0.01362318
43 -0.34758363  0.23392540
44  1.28449055 -0.96569934
45  1.48819250  0.80222974
46  0.24771891 -0.57988930
47 -0.04918270 -0.82286596
48 -0.13461572  0.57500710
49 -0.68181911 -0.67342242
50  0.54462052 -0.33691265
51  0.49025406  0.55264294
52 -1.78699117 -0.44366607
53  0.85705541 -0.34809473
54  0.93322247 -0.31912041
55  0.55088784  0.83577600
56 -0.51395171 -0.86963251
57  0.45292018 -0.11172823
58  1.48969182 -0.49753827
60 -1.46052233  0.59076112
61 -1.30968753  1.94847778
64 -0.84818719 -1.77698033
67 -1.88645815 -0.09140228
68 -0.58385145  0.27408181
69  0.27878547 -1.08820678
70  1.02492281 -0.54430483
72 -0.77501877  0.85153001
73 -0.20928346 -0.75373523
74 -0.18748287  0.16479468
75 -1.36255468  1.53826535
76 -1.17288668  2.26058516
78 -0.11131580  0.19376900
79  1.48819250  0.80222974
80  0.74982180  0.13124842
81  0.29908674  1.13009114
82 -1.32998880 -0.26982014
84 -1.83209168 -0.98095787

cc1$scores$corr.X.xscores  #rho_XU

           [,1]        [,2]
GA   -0.4367729  0.89045987
IA   -0.1179190  0.05415582
SSPG -0.8775764 -0.25275458

cc1$scores$corr.X.yscores  #rho_XV

            [,1]        [,2]
GA   -0.22030250  0.22045540
IA   -0.05947678  0.01340761
SSPG -0.44263796 -0.06257566

cc1$scores$corr.Y.xscores  #rho_YU

          [,1]        [,2]
RW  -0.5034474 -0.01510271
FPG -0.1793339  0.23139771

cc1$scores$corr.Y.yscores  #rho_YV

          [,1]        [,2]
RW  -0.9981376 -0.06100262
FPG -0.3555484  0.93465787

# Canonical dimensions
p.asym(cc1$cor, 76, 2, 3, tstat = "Wilks")  # Eq to chisq test (large sample size)

Wilks' Lambda, using F-approximation (Rao's F):
              stat   approx df1 df2    p.value
1 to 2:  0.6998941 4.622551   6 142 0.00025487
2 to 2:  0.9387067 2.350636   2  72 0.10258351

p_val1

[1] 0.0002541122

p_val2

[1] 0.1025835

p.asym(cc1$cor, 76, 2, 3, tstat = "Hotelling")

 Hotelling-Lawley Trace, using F-approximation:
               stat   approx df1 df2      p.value
1 to 2:  0.40650802 4.742594   6 140 0.0001985532
2 to 2:  0.06529544 2.350636   2 144 0.0989560343

p.asym(cc1$cor, 76, 2, 3, tstat = "Pillai")

 Pillai-Bartlett Trace, using F-approximation:
               stat   approx df1 df2     p.value
1 to 2:  0.31569931 4.498474   6 144 0.000330589
2 to 2:  0.06129327 2.339550   2 148 0.099925668

p.asym(cc1$cor, 76, 2, 3, tstat = "Roy")

 Roy's Largest Root, using F-approximation:
             stat   approx df1 df2      p.value
1 to 1:  0.254406 8.189102   3  72 9.209354e-05

 F statistic for Roy's Greatest Root is an upper bound.

Multivariate Statistical Analysis

Graduate Course Notes

Patrick Oster

Friday April 26, 2019; 09:23:14 AM

Introduction

Intro

Lec 1

Lec 3

MV Stats

Lec 4

Lec 5: MVLR

Adv MV

Lec 6: PCA

Lec 7: FA

Lec 8: DA

Lec 9: Clustering

Lec 10: CCA