################################################### The following is the code for Introdution to R ##
2 + 5[1] 7
3/5[1] 0.6
35[1] 35
1 - 3 * 4[1] -11
(1 - 3) * 4[1] -8
# Function-name(arguments)
sqrt(40) # the square root function[1] 6.324555
# squareroot(40) # R does not recognize a function named ``squareroot'
exp(1) # the exp function: exp(x)= e^x [1] 2.718282
log(14) # the log base-e function (natural log, or ln)[1] 2.639057
# Log(14) # R is case-sensitive
abs(-0.43) # absolute value[1] 0.43
floor(3.4) # round down[1] 3
ceiling(3.4) # round up[1] 4
x <- 10 # the value 10 is assigned to an object named x.
x # call the value of x.[1] 10
y = 2 * x # assign a new object name y with twice value of x.
y[1] 20
x = x + 5 # replace x with the value x+5.
x[1] 15
x = c(5, 3, 6, 4, 6, 7, 1)
x[1] 5 3 6 4 6 7 1
x[2] # Extract the 2nd element[1] 3
2:4 # Create an integer sequence from 2 to 4[1] 2 3 4
x[2:4] # Extract the 2nd through 4th elements[1] 3 6 4
x[c(2, 4, 6)] # Extract the 2nd, 4th, 6th elements[1] 3 4 7
x[-2] # Extract all but the 2nd elements[1] 5 6 4 6 7 1
-2:-4[1] -2 -3 -4
x[-2:-4] # Extract all but the 2nd through 4th elements[1] 5 6 7 1
x[2] = 10 # Assign 10 to the 2nd element
x[1] 5 10 6 4 6 7 1
x[c(1, 3, 6)] = c(10, 30, 60) # How many elements does x have?
x[1] 10 10 30 4 6 60 1
x = 3:6
y = 2 * x # Functions apply to each element of a vector at the same time.
y[1] 6 8 10 12
seq(from = 0, to = 6, by = 2) # a function has multiple arguments.[1] 0 2 4 6
# What are the first three argument in the seq() function?
seq(0, 6, 2) # R expects a certain order for function arguments[1] 0 2 4 6
# seq(6,0,2) # R could not read our mind.
seq(to = 6, from = 0, by = 2)[1] 0 2 4 6
z = seq(from = 0, to = 6, by = 2)
x * z # Entry-wise multiply[1] 0 8 20 36
x^z # Entry-wise exponetial[1] 1 16 625 46656
x %*% y # Dot (inner) product of two vectors. [,1]
[1,] 172
1:100 # gives an answer that is several lines long [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
[18] 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
[35] 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
[52] 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
[69] 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
[86] 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
gpa.a = c(3.13, 3.55, 2.92, 2.73, 3, 3.18, 2.66, 3.76)
gpa.b = c(3.14, 3.13, 3.25, 2.93, 3.73, 3.5, 2.7)
all.gpa = c(gpa.a, gpa.b) # Combine (stack) data vectors
all.gpa [1] 3.13 3.55 2.92 2.73 3.00 3.18 2.66 3.76 3.14 3.13 3.25 2.93 3.73 3.50
[15] 2.70
sum(all.gpa) # Returns the sum of vector elements[1] 47.31
length(all.gpa) # Returns the vector length (i.e., n)[1] 15
mean(all.gpa) # Returns the vector mean[1] 3.154
var(all.gpa) # Returns the sample variance[1] 0.1246686
sd(all.gpa) # Returns the sample standard deviation[1] 0.3530844
min(all.gpa) # Returns the minimum value[1] 2.66
max(all.gpa) # Returns the maximum value[1] 3.76
sort(all.gpa) # Returns the sorted vector elements [1] 2.66 2.70 2.73 2.92 2.93 3.00 3.13 3.13 3.14 3.18 3.25 3.50 3.55 3.73
[15] 3.76
summary(all.gpa) # Returns the five number summary and mean Min. 1st Qu. Median Mean 3rd Qu. Max.
2.660 2.925 3.130 3.154 3.375 3.760
Grade = c("freshman", "sophomore", "junior", "senior")
Grade[1] "freshman" "sophomore" "junior" "senior"
grade = c("1", "2", "3", "4") # R is case-sensitive.
grade[1] "1" "2" "3" "4"
Grade1 = c("freshman", "sophomore", "junior", 4)
Grade1[1] "freshman" "sophomore" "junior" "4"
video = c(47, 63, 58, 53, 53, 63, 53, 39, 58, 50)
video > 55 [1] FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
video == 55 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(video > 55)[1] 4
which(video > 55)[1] 2 3 6 9
video[video > 55][1] 63 58 63 58
mean(video[video > 55])[1] 60.5
iris Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
11 5.4 3.7 1.5 0.2 setosa
12 4.8 3.4 1.6 0.2 setosa
13 4.8 3.0 1.4 0.1 setosa
14 4.3 3.0 1.1 0.1 setosa
15 5.8 4.0 1.2 0.2 setosa
16 5.7 4.4 1.5 0.4 setosa
17 5.4 3.9 1.3 0.4 setosa
18 5.1 3.5 1.4 0.3 setosa
19 5.7 3.8 1.7 0.3 setosa
20 5.1 3.8 1.5 0.3 setosa
21 5.4 3.4 1.7 0.2 setosa
22 5.1 3.7 1.5 0.4 setosa
23 4.6 3.6 1.0 0.2 setosa
24 5.1 3.3 1.7 0.5 setosa
25 4.8 3.4 1.9 0.2 setosa
26 5.0 3.0 1.6 0.2 setosa
27 5.0 3.4 1.6 0.4 setosa
28 5.2 3.5 1.5 0.2 setosa
29 5.2 3.4 1.4 0.2 setosa
30 4.7 3.2 1.6 0.2 setosa
31 4.8 3.1 1.6 0.2 setosa
32 5.4 3.4 1.5 0.4 setosa
33 5.2 4.1 1.5 0.1 setosa
34 5.5 4.2 1.4 0.2 setosa
35 4.9 3.1 1.5 0.2 setosa
36 5.0 3.2 1.2 0.2 setosa
37 5.5 3.5 1.3 0.2 setosa
38 4.9 3.6 1.4 0.1 setosa
39 4.4 3.0 1.3 0.2 setosa
40 5.1 3.4 1.5 0.2 setosa
41 5.0 3.5 1.3 0.3 setosa
42 4.5 2.3 1.3 0.3 setosa
43 4.4 3.2 1.3 0.2 setosa
44 5.0 3.5 1.6 0.6 setosa
45 5.1 3.8 1.9 0.4 setosa
46 4.8 3.0 1.4 0.3 setosa
47 5.1 3.8 1.6 0.2 setosa
48 4.6 3.2 1.4 0.2 setosa
49 5.3 3.7 1.5 0.2 setosa
50 5.0 3.3 1.4 0.2 setosa
51 7.0 3.2 4.7 1.4 versicolor
52 6.4 3.2 4.5 1.5 versicolor
53 6.9 3.1 4.9 1.5 versicolor
54 5.5 2.3 4.0 1.3 versicolor
55 6.5 2.8 4.6 1.5 versicolor
56 5.7 2.8 4.5 1.3 versicolor
57 6.3 3.3 4.7 1.6 versicolor
58 4.9 2.4 3.3 1.0 versicolor
59 6.6 2.9 4.6 1.3 versicolor
60 5.2 2.7 3.9 1.4 versicolor
61 5.0 2.0 3.5 1.0 versicolor
62 5.9 3.0 4.2 1.5 versicolor
63 6.0 2.2 4.0 1.0 versicolor
64 6.1 2.9 4.7 1.4 versicolor
65 5.6 2.9 3.6 1.3 versicolor
66 6.7 3.1 4.4 1.4 versicolor
67 5.6 3.0 4.5 1.5 versicolor
68 5.8 2.7 4.1 1.0 versicolor
69 6.2 2.2 4.5 1.5 versicolor
70 5.6 2.5 3.9 1.1 versicolor
71 5.9 3.2 4.8 1.8 versicolor
72 6.1 2.8 4.0 1.3 versicolor
73 6.3 2.5 4.9 1.5 versicolor
74 6.1 2.8 4.7 1.2 versicolor
75 6.4 2.9 4.3 1.3 versicolor
76 6.6 3.0 4.4 1.4 versicolor
77 6.8 2.8 4.8 1.4 versicolor
78 6.7 3.0 5.0 1.7 versicolor
79 6.0 2.9 4.5 1.5 versicolor
80 5.7 2.6 3.5 1.0 versicolor
81 5.5 2.4 3.8 1.1 versicolor
82 5.5 2.4 3.7 1.0 versicolor
83 5.8 2.7 3.9 1.2 versicolor
84 6.0 2.7 5.1 1.6 versicolor
85 5.4 3.0 4.5 1.5 versicolor
86 6.0 3.4 4.5 1.6 versicolor
87 6.7 3.1 4.7 1.5 versicolor
88 6.3 2.3 4.4 1.3 versicolor
89 5.6 3.0 4.1 1.3 versicolor
90 5.5 2.5 4.0 1.3 versicolor
91 5.5 2.6 4.4 1.2 versicolor
92 6.1 3.0 4.6 1.4 versicolor
93 5.8 2.6 4.0 1.2 versicolor
94 5.0 2.3 3.3 1.0 versicolor
95 5.6 2.7 4.2 1.3 versicolor
96 5.7 3.0 4.2 1.2 versicolor
97 5.7 2.9 4.2 1.3 versicolor
98 6.2 2.9 4.3 1.3 versicolor
99 5.1 2.5 3.0 1.1 versicolor
100 5.7 2.8 4.1 1.3 versicolor
101 6.3 3.3 6.0 2.5 virginica
102 5.8 2.7 5.1 1.9 virginica
103 7.1 3.0 5.9 2.1 virginica
104 6.3 2.9 5.6 1.8 virginica
105 6.5 3.0 5.8 2.2 virginica
106 7.6 3.0 6.6 2.1 virginica
107 4.9 2.5 4.5 1.7 virginica
108 7.3 2.9 6.3 1.8 virginica
109 6.7 2.5 5.8 1.8 virginica
110 7.2 3.6 6.1 2.5 virginica
111 6.5 3.2 5.1 2.0 virginica
112 6.4 2.7 5.3 1.9 virginica
113 6.8 3.0 5.5 2.1 virginica
114 5.7 2.5 5.0 2.0 virginica
115 5.8 2.8 5.1 2.4 virginica
116 6.4 3.2 5.3 2.3 virginica
117 6.5 3.0 5.5 1.8 virginica
118 7.7 3.8 6.7 2.2 virginica
119 7.7 2.6 6.9 2.3 virginica
120 6.0 2.2 5.0 1.5 virginica
121 6.9 3.2 5.7 2.3 virginica
122 5.6 2.8 4.9 2.0 virginica
123 7.7 2.8 6.7 2.0 virginica
124 6.3 2.7 4.9 1.8 virginica
125 6.7 3.3 5.7 2.1 virginica
126 7.2 3.2 6.0 1.8 virginica
127 6.2 2.8 4.8 1.8 virginica
128 6.1 3.0 4.9 1.8 virginica
129 6.4 2.8 5.6 2.1 virginica
130 7.2 3.0 5.8 1.6 virginica
131 7.4 2.8 6.1 1.9 virginica
132 7.9 3.8 6.4 2.0 virginica
133 6.4 2.8 5.6 2.2 virginica
134 6.3 2.8 5.1 1.5 virginica
135 6.1 2.6 5.6 1.4 virginica
136 7.7 3.0 6.1 2.3 virginica
137 6.3 3.4 5.6 2.4 virginica
138 6.4 3.1 5.5 1.8 virginica
139 6.0 3.0 4.8 1.8 virginica
140 6.9 3.1 5.4 2.1 virginica
141 6.7 3.1 5.6 2.4 virginica
142 6.9 3.1 5.1 2.3 virginica
143 5.8 2.7 5.1 1.9 virginica
144 6.8 3.2 5.9 2.3 virginica
145 6.7 3.3 5.7 2.5 virginica
146 6.7 3.0 5.2 2.3 virginica
147 6.3 2.5 5.0 1.9 virginica
148 6.5 3.0 5.2 2.0 virginica
149 6.2 3.4 5.4 2.3 virginica
150 5.9 3.0 5.1 1.8 virginica
ncol(iris) # number of variables (column) [1] 5
nrow(iris) # number of objects (row) [1] 150
head(iris) # first 6 objects. Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
# data()
# home laptop loading ice =
# read.table('C:/Users/poster/Desktop/data/icecream.txt', header=TRUE,
# sep='') work pc loading
ice = read.table("C:/Users/poster/Desktop/data/icecream.txt")
head(ice) id female ice_cream video puzzle
1 70 0 2 47 57
2 121 1 1 63 61
3 86 0 3 58 31
4 141 0 3 53 56
5 172 0 1 53 61
6 113 0 1 63 61
colnames(ice)[1] "id" "female" "ice_cream" "video" "puzzle"
write.csv(ice, "icecream.csv", row.names = FALSE)
ice1 = read.csv("icecream.csv")
head(ice1) id female ice_cream video puzzle
1 70 0 2 47 57
2 121 1 1 63 61
3 86 0 3 58 31
4 141 0 3 53 56
5 172 0 1 53 61
6 113 0 1 63 61
dim(ice) # Check dimensions of the data frame ice[1] 200 5
names(ice) # List the name of the variables in the data frame[1] "id" "female" "ice_cream" "video" "puzzle"
ice[2, ] # select the second row only id female ice_cream video puzzle
2 121 1 1 63 61
ice[, 4] # select the fourth column only [1] 47 63 58 53 53 63 53 39 58 50 53 63 61 55 31 50 50 58 55 53 66 72 55
[24] 61 39 39 61 58 39 55 47 64 66 72 61 61 66 66 36 39 42 58 55 50 63 69
[47] 49 63 53 47 57 47 50 55 69 26 33 56 58 44 58 69 34 36 36 50 55 42 65
[70] 44 39 58 63 74 58 45 49 63 39 42 55 61 66 63 44 63 53 42 34 61 47 66
[93] 69 44 47 63 66 69 39 61 69 66 33 50 61 42 50 51 50 58 61 39 46 59 55
[116] 42 55 58 58 39 50 50 39 48 34 58 44 50 47 29 50 54 50 47 44 67 58 44
[139] 42 44 44 50 39 44 53 48 55 44 40 34 42 58 50 53 58 55 54 47 42 61 53
[162] 51 63 61 55 40 61 47 55 53 50 47 31 61 35 54 55 53 58 56 50 39 63 50
[185] 66 58 53 42 55 53 42 50 55 34 50 42 36 55 58 53
ice[63, 4] # selects the entry in the 63rd row, 4th column[1] 34
ice[c(1, 7), ] # selects rows 1 and 7 id female ice_cream video puzzle
1 70 0 2 47 57
7 50 0 1 53 61
ice[80:85, ] # selects rows 80 through 85 id female ice_cream video puzzle
80 14 0 3 42 56
81 127 0 3 55 56
82 165 0 2 61 36
83 174 0 1 66 56
84 3 0 2 63 56
85 58 0 1 44 41
ice[, -c(1, 2)] # select all data except columns 1 and 2 ice_cream video puzzle
1 2 47 57
2 1 63 61
3 3 58 31
4 3 53 56
5 1 53 61
6 1 63 61
7 1 53 61
8 1 39 36
9 1 58 51
10 1 50 51
11 1 53 61
12 1 63 61
13 3 61 71
14 3 55 46
15 2 31 56
16 2 50 56
17 3 50 56
18 1 58 56
19 3 55 61
20 1 53 46
21 1 66 41
22 1 72 66
23 1 55 56
24 3 61 61
25 1 39 46
26 1 39 31
27 3 61 66
28 1 58 46
29 3 39 46
30 2 55 41
31 1 47 51
32 3 64 61
33 3 66 71
34 1 72 31
35 3 61 61
36 3 61 66
37 1 66 66
38 3 66 66
39 2 36 36
40 1 39 51
41 1 42 51
42 1 58 51
43 1 55 51
44 2 50 41
45 3 63 66
46 2 69 46
47 3 49 47
48 1 63 51
49 1 53 46
50 1 47 51
51 1 57 56
52 3 47 41
53 1 50 46
54 1 55 71
55 1 69 66
56 3 26 42
57 2 33 32
58 1 56 46
59 1 58 41
60 1 44 51
61 1 58 61
62 3 69 66
63 2 34 46
64 1 36 36
65 3 36 61
66 1 50 26
67 1 55 66
68 2 42 26
69 2 65 44
70 1 44 36
71 1 39 51
72 3 58 61
73 1 63 66
74 1 74 66
75 1 58 51
76 2 45 31
77 3 49 61
78 3 63 66
79 1 39 46
80 3 42 56
81 3 55 56
82 2 61 36
83 1 66 56
84 2 63 56
85 1 44 41
86 3 63 66
87 3 53 56
88 3 42 56
89 1 34 31
90 3 61 56
91 1 47 46
92 2 66 46
93 3 69 61
94 2 44 48
95 2 47 51
96 2 63 51
97 1 66 56
98 3 69 71
99 2 39 41
100 3 61 61
101 3 69 66
102 3 66 61
103 2 33 41
104 3 50 51
105 1 61 51
106 1 42 56
107 3 50 56
108 2 51 33
109 2 50 56
110 3 58 71
111 1 61 56
112 2 39 51
113 3 46 66
114 2 59 56
115 3 55 66
116 1 42 41
117 3 55 46
118 3 58 66
119 2 58 56
120 2 39 51
121 2 50 51
122 1 50 56
123 1 39 56
124 1 48 46
125 2 34 46
126 3 58 61
127 1 44 56
128 1 50 41
129 3 47 46
130 2 29 26
131 1 50 56
132 2 54 56
133 1 50 51
134 1 47 46
135 2 44 66
136 3 67 66
137 1 58 46
138 1 44 56
139 1 42 41
140 1 44 61
141 2 44 51
142 3 50 52
143 1 39 51
144 1 44 41
145 1 53 66
146 3 48 61
147 1 55 31
148 1 44 51
149 2 40 41
150 2 34 41
151 2 42 46
152 3 58 56
153 1 50 51
154 1 53 61
155 1 58 66
156 1 55 71
157 1 54 61
158 1 47 61
159 1 42 41
160 3 61 66
161 2 53 61
162 1 51 58
163 3 63 31
164 2 61 61
165 1 55 61
166 2 40 31
167 1 61 61
168 2 47 36
169 1 55 41
170 2 53 37
171 2 50 43
172 1 47 61
173 3 31 39
174 3 61 51
175 2 35 51
176 2 54 66
177 1 55 71
178 1 53 41
179 3 58 36
180 2 56 51
181 1 50 51
182 1 39 51
183 3 63 61
184 1 50 61
185 3 66 56
186 2 58 71
187 1 53 51
188 1 42 36
189 3 55 61
190 2 53 66
191 2 42 41
192 3 50 41
193 1 55 56
194 3 34 51
195 1 50 56
196 1 42 56
197 1 36 46
198 1 55 52
199 1 58 61
200 3 53 61
puzzlevec = ice$puzzle
ice[puzzlevec > 61, ] id female ice_cream video puzzle
13 95 0 3 61 71
22 143 0 1 72 66
27 154 0 3 61 66
33 192 0 3 66 71
36 144 0 3 61 66
37 200 0 1 66 66
38 80 0 3 66 66
45 62 0 3 63 66
54 183 0 1 55 71
55 132 0 1 69 66
62 170 0 3 69 66
67 171 0 1 55 66
73 68 0 1 63 66
74 157 0 1 74 66
78 123 0 3 63 66
86 146 0 3 63 66
98 100 1 3 69 71
101 88 1 3 69 66
110 180 1 3 58 71
113 131 1 3 46 66
115 34 1 3 55 66
118 93 1 3 58 66
135 77 1 2 44 66
136 61 1 3 67 66
145 122 1 1 53 66
155 71 1 1 58 66
156 139 1 1 55 71
160 39 1 3 61 66
176 135 1 2 54 66
177 59 1 1 55 71
186 23 1 2 58 71
190 52 1 2 53 66
Good = ice[puzzlevec > 61, ]
Good id female ice_cream video puzzle
13 95 0 3 61 71
22 143 0 1 72 66
27 154 0 3 61 66
33 192 0 3 66 71
36 144 0 3 61 66
37 200 0 1 66 66
38 80 0 3 66 66
45 62 0 3 63 66
54 183 0 1 55 71
55 132 0 1 69 66
62 170 0 3 69 66
67 171 0 1 55 66
73 68 0 1 63 66
74 157 0 1 74 66
78 123 0 3 63 66
86 146 0 3 63 66
98 100 1 3 69 71
101 88 1 3 69 66
110 180 1 3 58 71
113 131 1 3 46 66
115 34 1 3 55 66
118 93 1 3 58 66
135 77 1 2 44 66
136 61 1 3 67 66
145 122 1 1 53 66
155 71 1 1 58 66
156 139 1 1 55 71
160 39 1 3 61 66
176 135 1 2 54 66
177 59 1 1 55 71
186 23 1 2 58 71
190 52 1 2 53 66
# Multivariate data Two-dimensional scatter plot matrix
head(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
hist(iris$Sepal.Length)
rug(iris$Sepal.Length)colors = brewer.pal(nlevels(iris$Species), "Dark2")
scatterplotMatrix(iris[1:4], groups = iris[, 5], smoother = FALSE, reg.line = FALSE,
legend.plot = FALSE, oma = c(2, 2, 7, 2), col = colors)Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "reg.line" is
not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "legend.plot"
is not a graphical parameter
Warning in plot.window(...): "smoother" is not a graphical parameter
Warning in plot.window(...): "reg.line" is not a graphical parameter
Warning in plot.window(...): "legend.plot" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "reg.line" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "legend.plot" is not a graphical
parameter
Warning in title(...): "smoother" is not a graphical parameter
Warning in title(...): "reg.line" is not a graphical parameter
Warning in title(...): "legend.plot" is not a graphical parameter
legend("top", legend = levels(iris[, 5]), pch = 1:3, col = colors, horiz = TRUE,
bty = "n", cex = 1.5, xpd = TRUE)# pairs(iris[1:4], main = 'Anderson's Iris Data -- 3 species', pch = 21, bg
# = c('red', 'green3', 'blue')[unclass(iris$Species)])
# 3-D plot
with(iris, scatterplot3d(Sepal.Width, Sepal.Length, Petal.Length, color = colors[Species],
type = "h", pch = 20))# Star plot
stars(iris[-5], col.lines = colors[iris$Species], locations = 0:1, key.loc = 0:1)irs2 = iris[c(1:10, 51:60, 101:110), ]
# stars(irs2[-5],col.lines=colors[irs2$Species],locations=0:1,key.loc=0:1)
# Face plot
data = irs2
faces(data[1:30, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")],
face.type = 1, scale = TRUE, labels = data$Species, plot.faces = TRUE, nrow.plot = 5,
ncol.plot = 5)effect of variables:
modified item Var
"height of face " "Sepal.Length"
"width of face " "Sepal.Width"
"structure of face" "Petal.Length"
"height of mouth " "Petal.Width"
"width of mouth " "Sepal.Length"
"smiling " "Sepal.Width"
"height of eyes " "Petal.Length"
"width of eyes " "Petal.Width"
"height of hair " "Sepal.Length"
"width of hair " "Sepal.Width"
"style of hair " "Petal.Length"
"height of nose " "Petal.Width"
"width of nose " "Sepal.Length"
"width of ear " "Sepal.Width"
"height of ear " "Petal.Length"
# classification tree ctree method
# required package by partykit
tptree = ctree(iris$Species ~ ., data = iris)
table(predict(tptree), iris$Species)
setosa versicolor virginica
setosa 50 0 0
versicolor 0 49 5
virginica 0 1 45
plot(tptree)# rpart method(pretty plots)library(grid)
gtree_rpart = rpart(iris$Species ~ ., data = iris)
gtree_rpart_cp = rpart(iris$Species ~ ., data = iris, control = rpart.control(cp = 0.005))
print(gtree_rpart)n= 150
node), split, n, loss, yval, (yprob)
* denotes terminal node
1) root 150 100 setosa (0.33333333 0.33333333 0.33333333)
2) Petal.Length< 2.45 50 0 setosa (1.00000000 0.00000000 0.00000000) *
3) Petal.Length>=2.45 100 50 versicolor (0.00000000 0.50000000 0.50000000)
6) Petal.Width< 1.75 54 5 versicolor (0.00000000 0.90740741 0.09259259) *
7) Petal.Width>=1.75 46 1 virginica (0.00000000 0.02173913 0.97826087) *
rpart.plot(gtree_rpart, compress = TRUE)########### Density function and Its Contours
X <- seq(-1, 5, length.out = 100)
Y <- seq(2, 8, length.out = 100)
dens <- matrix(dmvnorm(expand.grid(X, Y), mean = c(2, 5), sigma = rbind(c(1,
0.5), c(0.5, 1))), ncol = 100)
persp(X, Y, dens, main = "Density Surface", box = FALSE)s3d <- scatterplot3d(X, Y, seq(min(dens), max(dens), length = length(X)), type = "n",
grid = FALSE, angle = 70, zlab = "expression(f(x, y))", xlab = expression(X),
ylab = expression(Y), main = "Density Surface")
for (i in length(X):1) s3d$points3d(rep(X[i], length(Y)), Y, dens[i, ], type = "l")
for (i in length(Y):1) s3d$points3d(X, rep(Y[i], length(X)), dens[, i], type = "l")contour(X, Y, dens)########### Simulated Data
Xbvn <- rmvnorm(100, c(2, 5), matrix(c(1, 0.5, 0.5, 1), nrow = 2))
dataEllipse(Xbvn, xlim = c(-2, 6), ylim = c(2, 10), levels = c(0.5, 0.75, 0.95),
ellipse.label = c(0.5, 0.75, 0.95), pch = 20)########### Example use sataset stiff
stiff <- read.table("C:/Users/poster/Desktop/data/t4-3.dat", header = F)[, 1:4]
head(stiff) V1 V2 V3 V4
1 1889 1651 1561 1778
2 2403 2048 2087 2197
3 2119 1700 1815 2222
4 1645 1627 1110 1533
5 1976 1916 1614 1883
6 1712 1712 1439 1546
colnames(stiff) <- c("x1", "x2", "x3", "x4")
attach(stiff)
########### Univariate normality
par(mfrow = c(2, 2))
hist(x1)
hist(x4)
qqnorm(x1)
qqline(x1) # Q-Q plot of x1
qqnorm(x4)
qqline(x4) # Q-Q plot of x4shapiro.test(x1)
Shapiro-Wilk normality test
data: x1
W = 0.93068, p-value = 0.05118
shapiro.test(x2)
Shapiro-Wilk normality test
data: x2
W = 0.91274, p-value = 0.01746
shapiro.test(x3)
Shapiro-Wilk normality test
data: x3
W = 0.93258, p-value = 0.05751
shapiro.test(x4)
Shapiro-Wilk normality test
data: x4
W = 0.96127, p-value = 0.3337
########### Bivariate normality
pairs(stiff) #Scatter Plot Matrix# Bivariate Percentage
par(mfrow = c(1, 1))
plot(x1, x4, pch = 20)
quan_chisq = ellipse(cor(stiff[, c(1, 4)]), scale = c(sd(x1), sd(x4)), center = c(0,
0), level = 0.5, which = c(1, 2), npoints = 100)
lines(quan_chisq[, 1] + mean(x1), quan_chisq[, 2] + mean(x4), col = "blue",
lwd = 2)m_dist14 <- mahalanobis(stiff[, c(1, 4)], colMeans(stiff[, c(1, 4)]), cov(stiff[,
c(1, 4)])) # Mahalanobis distance
sum(m_dist14 <= qchisq(0.5, 2))/30 # 60% of Mahalanobis distance <= 50th quantile of chi-square [1] 0.6
########### Chi-square plot
m_dist <- mahalanobis(stiff, colMeans(stiff), cov(stiff)) # Mahalanobis distance
sum(m_dist <= qchisq(0.05, 4))[1] 3
par(mar = c(5, 5.5, 4, 2) + 0.1) # by hand calculation
plot(sort(m_dist), qchisq(((1:30) - 0.5)/30, 4), xlab = expression(d[j]^2),
ylab = expression(paste(chi[4]^2, "( ", frac(j - 0.5, 30), " )")), pch = 20)
abline(a = 0, b = 1, lwd = 2)# Directly by function mvn()
mvn(stiff, mvnTest = "royston", multivariatePlot = "qq")$multivariateNormality
Test H p value MVN
1 Royston 9.823858 0.009534665 NO
$univariateNormality
Test Variable Statistic p value Normality
1 Shapiro-Wilk x1 0.9307 0.0512 YES
2 Shapiro-Wilk x2 0.9127 0.0175 NO
3 Shapiro-Wilk x3 0.9326 0.0575 YES
4 Shapiro-Wilk x4 0.9613 0.3337 YES
$Descriptives
n Mean Std.Dev Median Min Max 25th 75th Skew
x1 30 1906.100 324.9866 1863.0 1325 2983 1715.25 2057.25 1.0380842
x2 30 1749.533 318.6065 1680.0 1170 2794 1595.50 1888.75 1.1435912
x3 30 1509.133 303.1783 1466.0 1002 2412 1295.75 1623.75 0.9800274
x4 30 1724.967 322.8436 1674.5 1176 2581 1520.25 1880.75 0.5978431
Kurtosis
x1 2.03586397
x2 1.94986381
x3 0.99683699
x4 -0.04626509
########### Outliers
# Standardized Value
print(data.frame(scale(x1), scale(x2), scale(x3), scale(x4))) scale.x1. scale.x2. scale.x3. scale.x4.
1 -0.05261755 -0.3092634 0.17107644 0.16426945
2 1.52898605 0.9367877 1.90602908 1.46211166
3 0.65510390 -0.1554687 1.00886726 1.53954854
4 -0.80341770 -0.3845914 -1.31649701 -0.59461204
5 0.21508578 0.5224835 0.34589106 0.48950437
6 -0.59725537 -0.1178047 -0.23132702 -0.55434486
7 0.11354314 -0.2025487 -0.78545638 -0.16716042
8 0.60894816 0.2211714 0.68562513 0.46162709
9 3.31367493 3.2782337 2.97800552 2.65154223
10 -0.49571272 -0.4693354 -0.41273841 -0.67204892
11 -0.60340947 -0.4975834 0.02924572 -0.17955033
12 0.43047927 0.4942355 0.38877012 0.53596650
13 -0.20339299 0.2870835 0.28322167 0.04966286
14 -0.12031265 -0.2025487 -0.05321401 -0.14547810
15 -0.14492905 -0.3155407 -0.39624647 -0.03396898
16 0.14739069 1.2537931 -1.08560978 -1.37517585
17 -1.78807364 -1.8189625 -1.67272303 -1.70041077
18 -1.49883096 -1.1880903 -0.84812577 -1.29154401
19 -0.24031759 -0.3626207 0.30631040 0.09302751
20 -0.55725372 -0.4881674 -0.64692404 -0.24459731
21 1.13820072 1.3793398 0.12489900 1.19572877
22 -0.02184705 -0.4253941 -0.28739963 -0.76807066
23 -0.84034230 -0.7423995 -0.72278698 -0.64726912
24 0.47663501 0.3686888 0.45143951 0.96651559
25 -0.15416020 -0.8051729 -0.50509331 -0.59461204
26 -0.55109962 -1.0594050 -0.89430321 -0.79285046
27 0.80587934 0.4597102 0.63285091 0.33772807
28 -0.77264721 -0.2339354 -0.31378674 -0.39637361
29 1.29205321 1.7308706 1.83346452 1.57671825
30 -1.28036042 -1.1535650 -0.97346455 -1.36588342
# Generalized Squared Distance--Mahalanobis distance
rank(m_dist) [1] 3 25 27 24 8 12 22 10 29 4 11 2 16 1 6 30 19 20 7 9 28 23 5
[24] 14 21 18 13 17 26 15
cbind(1:30, m_dist)[order(m_dist), ] m_dist
[1,] 14 0.1295714
[2,] 12 0.4635159
[3,] 1 0.6000129
[4,] 10 0.7665400
[5,] 23 0.7962096
[6,] 15 1.0792484
[7,] 19 1.3632124
[8,] 5 1.3980776
[9,] 20 1.4649908
[10,] 8 1.4876570
[11,] 11 1.9307771
[12,] 6 2.2191409
[13,] 27 2.3816050
[14,] 24 2.5385575
[15,] 30 2.5838186
[16,] 13 2.6959024
[17,] 28 2.9951752
[18,] 26 3.3979804
[19,] 17 3.5018290
[20,] 18 3.9900603
[21,] 25 4.5767867
[22,] 7 4.9883498
[23,] 22 5.0557446
[24,] 4 5.2076098
[25,] 2 5.4770196
[26,] 29 6.2837628
[27,] 3 7.6166439
[28,] 21 9.8980384
[29,] 9 12.2647550
[30,] 16 16.8474070
mvn(stiff[-c(16, 9), ], mvnTest = "royston", multivariatePlot = "qq") # by function mvn()$multivariateNormality
Test H p value MVN
1 Royston 1.098338 0.6271166 YES
$univariateNormality
Test Variable Statistic p value Normality
1 Shapiro-Wilk x1 0.9847 0.9439 YES
2 Shapiro-Wilk x2 0.9664 0.4871 YES
3 Shapiro-Wilk x3 0.9672 0.5070 YES
4 Shapiro-Wilk x4 0.9660 0.4779 YES
$Descriptives
n Mean Std.Dev Median Min Max 25th 75th Skew
x1 28 1865.929 262.1619 1857.5 1325 2403 1711.50 2049.75 0.08994538
x2 28 1697.964 244.8618 1663.0 1170 2301 1593.25 1847.50 0.39091767
x3 28 1488.643 253.1536 1466.0 1002 2087 1307.25 1617.25 0.49661284
x4 28 1710.250 277.9986 1674.5 1176 2234 1528.75 1876.25 0.25921958
Kurtosis
x1 -0.5084972
x2 0.1961808
x3 0.0516768
x4 -0.6484407
########### Transformation
trans = powerTransform(stiff, family = "yjPower") # yjPower' family (Yeo and Johnson(2000)),
# another modifiation of the Box-Cox family that allows a few negative
# values.
summary(trans)yjPower Transformations to Multinormality
Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
x1 0.0939 1 -0.9911 1.1790
x2 -0.2802 0 -1.5129 0.9525
x3 0.1478 1 -0.9585 1.2542
x4 0.7546 1 -0.5292 2.0385
Likelihood ratio test that all transformation parameters are equal to 0
LRT df pval
LR test, lambda = (0 0 0 0) 1.980807 4 0.73929
logstiff = log(stiff)
hist(log(x1))plot(log(x1), log(x4), pch = 20)
quan_chisq1 = ellipse(cor(logstiff[, c(1, 4)]), scale = c(sd(log(x1)), sd(log(x4))),
center = c(0, 0), t = sqrt(qchisq(0.5, 2)), which = c(1, 2), npoints = 100)
lines(quan_chisq1[, 1] + mean(log(x1)), quan_chisq1[, 2] + mean(log(x4)), col = "blue",
lwd = 2)mvn(logstiff, mvnTest = "royston", multivariatePlot = "qq")$multivariateNormality
Test H p value MVN
1 Royston 1.652091 0.5200565 YES
$univariateNormality
Test Variable Statistic p value Normality
1 Shapiro-Wilk x1 0.9742 0.6577 YES
2 Shapiro-Wilk x2 0.9626 0.3612 YES
3 Shapiro-Wilk x3 0.9795 0.8118 YES
4 Shapiro-Wilk x4 0.9831 0.9005 YES
$Descriptives
n Mean Std.Dev Median Min Max 25th 75th
x1 30 7.539650 0.1631929 7.529941 7.189168 8.000685 7.447309 7.629120
x2 30 7.452334 0.1721702 7.426545 7.064759 7.935230 7.374941 7.543648
x3 30 7.301165 0.1910199 7.290123 6.909753 7.788212 7.166816 7.392488
x4 30 7.436553 0.1834145 7.423268 7.069874 7.855932 7.326618 7.539424
Skew Kurtosis
x1 0.3660786 0.69728520
x2 0.4814651 0.73170914
x3 0.4107699 0.07436358
x4 0.1581748 -0.44895116
############ Part I ## We want to see if perspiration in Latin American healthy women
############ in the age group 50--65 is the same as their counterparts in the North
############ America. Perspiration is quantified in terms of the following variables
############ X1: Sweat Rate X2: Sodium in Sweat X3: Potassium in Sweat
sweat = read.table("C:/Users/poster/Desktop/data/t5-1.dat")
head(sweat) V1 V2 V3
1 3.7 48.5 9.3
2 5.7 65.1 8.0
3 3.8 47.2 10.9
4 3.2 53.2 12.0
5 3.1 55.5 9.7
6 4.6 36.1 7.9
n = nrow(sweat)
n[1] 20
p = ncol(sweat)
p[1] 3
mu0 = matrix(c(4, 50, 10), ncol = 1)
xbar = as.matrix(colMeans(sweat), ncol = 1)
xbar [,1]
V1 4.640
V2 45.400
V3 9.965
S = cov(sweat)
S V1 V2 V3
V1 2.879368 10.0100 -1.809053
V2 10.010000 199.7884 -5.640000
V3 -1.809053 -5.6400 3.627658
Tsq <- n * t(xbar - mu0) %*% solve(S) %*% (xbar - mu0)
Tsq [,1]
[1,] 9.738773
## Use Mahalanobis distance
Tsq = n * mahalanobis(colMeans(sweat), mu0, cov(sweat))
Tsq # Hotelling T^2[1] 9.738773
# Critical point at alpha=0.05
cri = (n - 1) * p/(n - p) * qf(0.95, p, n - p)
cri[1] 10.7186
# Tsq < cri Fail to reject H0
## Compute P value
Tsq.mod <- (n - p)/p/(n - 1) * Tsq
p.val <- 1 - pf(Tsq.mod, p, (n - p))
p.val[1] 0.06492834
HotellingsT2(sweat, mu = mu0)
Hotelling's one sample T2-test
data: sweat
T.2 = 2.9045, df1 = 3, df2 = 17, p-value = 0.06493
alternative hypothesis: true location is not equal to c(4,50,10)
## One at a time intervals of mu
One = matrix(NA, 3, 2)
for (i in 1:3) {
One[i, 1] = xbar[i] - qt(0.975, n - 1) * sqrt(S[i, i]/n) #left
One[i, 2] = xbar[i] + qt(0.975, n - 1) * sqrt(S[i, i]/n) #right
}
## T^2 interval mu
Tsq = matrix(NA, 3, 2)
c = sqrt(p * (n - 1)/(n - p) * qf(0.95, p, n - p))
for (i in 1:3) {
Tsq[i, 1] = xbar[i] - c * sqrt(S[i, i]/n) #left
Tsq[i, 2] = xbar[i] + c * sqrt(S[i, i]/n) #right
}
## Bonferroni interval mu when m=3 (m is the number of CIs).
m = 3
Bonf = matrix(NA, 3, 2)
for (i in 1:m) {
Bonf[i, 1] = xbar[i] - qt(1 - 0.05/(2 * m), n - 1) * sqrt(S[i, i]/n) #left
Bonf[i, 2] = xbar[i] + qt(1 - 0.05/(2 * m), n - 1) * sqrt(S[i, i]/n) #right
}
colnames(One) = colnames(Tsq) = colnames(Bonf) = c("lower", "upper")
row.names(One) = row.names(Tsq) = row.names(Bonf) = c("SweatRate", "Sodium",
"Potassium")
list(One, Tsq, Bonf)[[1]]
lower upper
SweatRate 3.845840 5.43416
Sodium 38.784779 52.01522
Potassium 9.073601 10.85640
[[2]]
lower upper
SweatRate 3.397768 5.882232
Sodium 35.052408 55.747592
Potassium 8.570664 11.359336
[[3]]
lower upper
SweatRate 3.643952 5.636048
Sodium 37.103078 53.696922
Potassium 8.846992 11.083008
# For large sample inference, only need to change the critical point or
# multiplier using Z (replace to t) and chi-square (replace to F).
############# Part II ##
############## Paired test Municipal wastewater treatment plants are required by law to
############## monitor their discharges into rivers and streams on a regular basis.
############## Concern about the reliability of data from one of these self-monitoring
############## program led to a study in which samples of effluent were divided and sent
############## to two laboratories. Compare two lab's measurement by two variables: X1
############## and X3: Biochemical oxygen demand (BOD) from two labs X2 and X4: Suspended
############## solids (SS) from two labs
rm(list = ls())
ww = as.matrix(read.table("C:/Users/poster/Desktop/data/t6-1.dat"))
head(ww) V1 V2 V3 V4
[1,] 6 27 25 15
[2,] 6 23 28 13
[3,] 18 64 36 22
[4,] 8 44 35 29
[5,] 11 30 15 31
[6,] 34 75 44 64
n1 = nrow(ww)
n1[1] 11
p1 = 2
D = ww[, 1:2] - ww[, 3:4]
dbar = as.matrix(colMeans(D), ncol = 1)
dbar #est mu1-mu2 [,1]
V1 -9.363636
V2 13.272727
Sd = cov(D)
Sd V1 V2
V1 199.25455 88.30909
V2 88.30909 418.61818
## Using matrix
C = cbind(diag(p1), -diag(p1))
t(C %*% t(ww)) #D [,1] [,2]
[1,] -19 12
[2,] -22 10
[3,] -18 42
[4,] -27 15
[5,] -4 -1
[6,] -10 11
[7,] -14 -4
[8,] 17 60
[9,] 9 -2
[10,] 4 10
[11,] -19 -7
n1 * t(C %*% colMeans(ww)) %*% solve(C %*% cov(ww) %*% t(C)) %*% (C %*% colMeans(ww)) #Tsq1 [,1]
[1,] 13.63931
## Using D
Tsq1 <- n1 * t(dbar) %*% solve(Sd) %*% (dbar)
Tsq1 [,1]
[1,] 13.63931
cri1 = (n1 - 1) * p1/(n1 - p1) * qf(0.95, p1, n1 - p1)
cri1[1] 9.458877
pval1 = 1 - pf(Tsq1 * (n1 - p1)/p1/(n1 - 1), p1, n1 - p1)
pval1 [,1]
[1,] 0.02082779
## Confidence region for delta
plot(D)
CR = ellipse(x = cor(D), scale = sqrt(diag(Sd)/n1), centre = as.vector(dbar),
t = sqrt(cri1))
# n1*mahalanobis(as.vector(dbar),as.vector(CR[3,]),cov(D))
lines(CR, type = "l", xlab = expression({
delta
}[1]), ylab = expression({
delta
}[2]), main = expression("95% confidence ellipse"))
points(0, 0, pch = 20)## One at a time intervals of for delta1 and delta2
One1 = matrix(NA, 2, 2)
for (i in 1:2) {
One1[i, 1] = dbar[i] - qt(0.975, n1 - 1) * sqrt(Sd[i, i]/n1) #left
One1[i, 2] = dbar[i] + qt(0.975, n1 - 1) * sqrt(Sd[i, i]/n1) #right
}
## T^2 interval for delta1 and delta2
Tsq1 = matrix(NA, 2, 2)
c1 = sqrt(p1 * (n1 - 1)/(n1 - p1) * qf(0.95, p1, n1 - p1))
for (i in 1:2) {
Tsq1[i, 1] = dbar[i] - c1 * sqrt(Sd[i, i]/n1) #left
Tsq1[i, 2] = dbar[i] + c1 * sqrt(Sd[i, i]/n1) #right
}
## Bonferroni interval for delta1 and delta2 when m=2 (m is the number of
## CIs).
m = 2
Bonf1 = matrix(NA, 2, 2)
for (i in 1:m) {
Bonf1[i, 1] = dbar[i] - qt(1 - 0.05/(2 * m), n1 - 1) * sqrt(Sd[i, i]/n1) #left
Bonf1[i, 2] = dbar[i] + qt(1 - 0.05/(2 * m), n1 - 1) * sqrt(Sd[i, i]/n1) #right
}
colnames(One1) = colnames(Tsq1) = colnames(Bonf1) = c("lower", "upper")
row.names(One1) = row.names(Tsq1) = row.names(Bonf1) = c("BOD", "SS")
list(One1, Tsq1, Bonf1)[[1]]
lower upper
BOD -18.8467298 0.119457
SS -0.4725958 27.018050
[[2]]
lower upper
BOD -22.453272 3.72600
SS -5.700119 32.24557
[[3]]
lower upper
BOD -20.573107 1.845835
SS -2.974903 29.520358
####################### Two Sample Independent Measurements on the carapaces of 24 female and 24
####################### male painted turtles X1: Length X2: Width X3 Height X4 Sex (female and
####################### male)
rm(list = ls())
X <- read.table("C:/Users/poster/Desktop/data/T6-9.dat", header = F, col.names = c("Length",
"Width", "Height", "Sex"))
# log transformation improves Normality.
X[, 1:3] <- log(X[, 1:3])
X_1 = X[X$Sex == "male", 1:3]
X_2 = X[X$Sex == "female", 1:3]
n_1 <- dim(X_1)[1]
n_2 <- dim(X_2)[1]
p <- dim(X_1)[2]
# Testing H0:mu1-mu2=delta0
delta0 = rep(0, p)
xbar_1 = colMeans(X_1)
xbar_1 Length Width Height
4.725444 4.477574 3.703186
xbar_2 = colMeans(X_2)
xbar_2 Length Width Height
4.900659 4.622909 3.940286
S_1 = var(X_1)
S_1 Length Width Height
Length 0.011072004 0.008019142 0.008159648
Width 0.008019142 0.006416726 0.006005271
Height 0.008159648 0.006005271 0.006772758
S_2 = var(X_2)
S_2 Length Width Height
Length 0.02640563 0.02011195 0.02491758
Width 0.02011195 0.01619045 0.01942430
Height 0.02491758 0.01942430 0.02493980
S_p = ((n_1 - 1) * S_1 + (n_2 - 1) * S_2)/(n_1 + n_2 - 2)
S_p Length Width Height
Length 0.01873882 0.01406555 0.01653862
Width 0.01406555 0.01130359 0.01271478
Height 0.01653862 0.01271478 0.01585628
(T2 = (n_1 * n_2/(n_1 + n_2)) * t(xbar_1 - xbar_2 - delta0) %*% solve(S_p) %*%
(xbar_1 - xbar_2 - delta0)) [,1]
[1,] 85.052
(crit = (n_1 + n_2 - 2) * p/(n_1 + n_2 - p - 1) * qf(0.95, p, n_1 + n_2 - p -
1))[1] 8.833461
# Linear combination of the mean components most responsiblefor rejection of
# H_0
(a <- solve(S_p) %*% (xbar_1 - xbar_2)) [,1]
Length 43.726770
Width 8.710687
Height -67.546415
A = diag(p)
# T^2-intervals
Clower = t(A) %*% (xbar_1 - xbar_2) - sqrt(crit) * sqrt(diag((t(A) %*% S_p %*%
A) * (1/n_1 + 1/n_2)))
Cupper = t(A) %*% (xbar_1 - xbar_2) + sqrt(crit) * sqrt(diag((t(A) %*% S_p %*%
A) * (1/n_1 + 1/n_2)))
cbind(Clower, Cupper) [,1] [,2]
[1,] -0.2926638 -0.05776762
[2,] -0.2365537 -0.05411666
[3,] -0.3451377 -0.12906223
alpha <- 0.05
# Bonferroni intervlas
crit.b = abs(qt(alpha/(2 * p), (n_1 + n_2 - 2)))
Clower.b = t(A) %*% (xbar_1 - xbar_2) - crit.b * sqrt(diag((t(A) %*% S_p %*%
A) * (1/n_1 + 1/n_2)))
Cupper.b = t(A) %*% (xbar_1 - xbar_2) + crit.b * sqrt(diag((t(A) %*% S_p %*%
A) * (1/n_1 + 1/n_2)))
cbind(Clower.b, Cupper.b) [,1] [,2]
[1,] -0.2734025 -0.07702893
[2,] -0.2215940 -0.06907636
[3,] -0.3274197 -0.14678026
# Assessing the normality and equal covariance assumptions
par(mfrow = c(1, 3))
# 3-d Scatterplot: Ellipsoidal? are the covariances the same?
sp <- scatterplot3d(X_1, xlim = c(4.4, 5.3), ylim = c(4.2, 5), zlim = c(3.4,
4.4), main = "Red=FEMALE")
sp$points3d(X_2, col = "red")
# Visually compare the sample variances and covariances between the two
# groups
cov(X_1) Length Width Height
Length 0.011072004 0.008019142 0.008159648
Width 0.008019142 0.006416726 0.006005271
Height 0.008159648 0.006005271 0.006772758
cov(X_2) Length Width Height
Length 0.02640563 0.02011195 0.02491758
Width 0.02011195 0.01619045 0.01942430
Height 0.02491758 0.01942430 0.02493980
# Chi-square plot for males and females separately.
distances <- function(D) {
dsq = c()
M <- colMeans(D)
S <- cov(D)
n <- dim(D)[1]
p <- dim(D)[2]
for (i in 1:n) dsq <- c(dsq, as.matrix(D[i, ] - M) %*% solve(S) %*% as.matrix(t(D[i,
] - M)))
as.numeric(dsq)
}
# Chi-square plot for males
qc <- qchisq(((1:n_1) - 1/2)/n_1, df = p)
par(mar = c(4, 4, 1, 1))
plot(sort(distances(X_1)), qc, xlab = expression(d[j]^2), main = "Male")
abline(a = 0, b = 1)
# Chi-square plot for females
qc <- qchisq(((1:n_2) - 1/2)/n_2, df = p)
par(mar = c(4, 4, 1, 1))
plot(sort(distances(X_2)), qc, xlab = expression(d[j]^2), main = "Female")
abline(a = 0, b = 1)########## Test for additional information
X_11 = X[X$Sex == "male", 1:2]
X_12 = X[X$Sex == "female", 1:2]
# Testing H0:mu1-mu2=delta0
xbar_11 = colMeans(X_11)
xbar_11 Length Width
4.725444 4.477574
xbar_12 = colMeans(X_12)
xbar_12 Length Width
4.900659 4.622909
S_11 = var(X_11)
S_11 Length Width
Length 0.011072004 0.008019142
Width 0.008019142 0.006416726
S_12 = var(X_12)
S_12 Length Width
Length 0.02640563 0.02011195
Width 0.02011195 0.01619045
S_1p = ((n_1 - 1) * S_11 + (n_2 - 1) * S_12)/(n_1 + n_2 - 2)
S_1p Length Width
Length 0.01873882 0.01406555
Width 0.01406555 0.01130359
(T12 = (n_1 * n_2/(n_1 + n_2)) * t(xbar_11 - xbar_12) %*% solve(S_1p) %*% (xbar_11 -
xbar_12)) [,1]
[1,] 22.73142
(U = (T2 - T12)/(n_1 + n_2 - 2 + T12)) [,1]
[1,] 0.9067263
p = 3
p1 = 2
p2 = 1
(crit2 = p2/(n_1 + n_2 - p - 1) * qf(0.95, p2, n_1 + n_2 - p - 1))[1] 0.09231151
############## Part III ##
####### One-way MANOVA: Iris Data--- Sepal (V2) Width and Petal (V4) Width of
####### three species of Iris
rm(list = ls())
D <- read.table("C:/Users/poster/Desktop/data/T11-5.dat", header = F)
head(D) V1 V2 V3 V4 V5
1 5.1 3.5 1.4 0.2 1
2 4.9 3.0 1.4 0.2 1
3 4.7 3.2 1.3 0.2 1
4 4.6 3.1 1.5 0.2 1
5 5.0 3.6 1.4 0.2 1
6 5.4 3.9 1.7 0.4 1
# data must be sorted by the grouping variable
G <- D$V5
u <- unique(G)
u # 1,2,3[1] 1 2 3
g <- length(u)
g # number of species (groups)[1] 3
N <- table(G)
N # n1,n2,n3G
1 2 3
50 50 50
n <- sum(N) # Total sample size
X <- as.matrix(D[, c(2, 4)])
p <- dim(X)[2] # dimension
X_1 <- X[G == 1, ]
X_2 <- X[G == 2, ]
X_3 <- X[G == 3, ]
### Getting a feel for the data
plot(X_1, xlim = c(1, 5), ylim = c(0, 3), col = "red", ylab = "Petal", xlab = "Sepal",
main = "Width Measurements from Three Species of Iris Plants")
points(X_2, col = "blue")
points(X_3, col = "green")
text(4, 0.5, "Species 1", col = "red")
text(4, 1.5, "Species 2", col = "blue")
text(4, 2.25, "Species 3", col = "green")
## Middle matrices
library(Matrix)
# Matrix for B
C_1 <- c(1)
for (i in 1:g) C_1 <- bdiag(C_1, (1/N[i]) * matrix(1, N[i], N[i]))
C1 <- as.matrix(C_1[-1, -1] - (1/n) * matrix(1, n, n))
# Matrix for W
C_2 <- c(1)
for (i in 1:g) C_2 <- bdiag(C_2, diag(1, N[i]) - (1/N[i]) * matrix(1, N[i],
N[i]))
C2 <- as.matrix(C_2[-1, -1])
# SS
SStrt <- t(X) %*% C1 %*% X
SSres <- t(X) %*% C2 %*% X
# Test Stat
Lambda <- det(SSres)/det(SStrt + SSres)
Lambda # Wilks Lambda[1] 0.03831574
# Lambda has the same distributions as U(2,2,147)
m1 <- g - 1
m1[1] 2
m2 <- n - g
m2[1] 147
((1 - sqrt(Lambda))/sqrt(Lambda)) * (m2 - 1)/m1 # Approx F[1] 299.936
alpha = 0.05
qf(1 - alpha, 2 * m1, 2 * (m2 - 1)) #Reject H0[1] 2.402562
# use manova() function
test1 <- manova(cbind(D[, 2], D[, 4]) ~ factor(G))
summary(test1, "Wilks") #c('Pillai', 'Wilks', 'Hotelling-Lawley', 'Roy') Df Wilks approx F num Df den Df Pr(>F)
factor(G) 2 0.038316 299.94 4 292 < 2.2e-16 ***
Residuals 147
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(test1) Response 1 :
Df Sum Sq Mean Sq F value Pr(>F)
factor(G) 2 11.345 5.6725 49.16 < 2.2e-16 ***
Residuals 147 16.962 0.1154
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Response 2 :
Df Sum Sq Mean Sq F value Pr(>F)
factor(G) 2 80.413 40.207 960.01 < 2.2e-16 ***
Residuals 147 6.157 0.042
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# summary.aov() includes two univariate test:
anova(lm(D[, 2] ~ factor(G)))Analysis of Variance Table
Response: D[, 2]
Df Sum Sq Mean Sq F value Pr(>F)
factor(G) 2 11.345 5.6725 49.16 < 2.2e-16 ***
Residuals 147 16.962 0.1154
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(D[, 4] ~ factor(G)))Analysis of Variance Table
Response: D[, 4]
Df Sum Sq Mean Sq F value Pr(>F)
factor(G) 2 80.413 40.207 960.01 < 2.2e-16 ***
Residuals 147 6.157 0.042
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Pairwise Comparison with Hotelling's T^2 Bonferroni method Testing
## H0:mui-muj=delta0
delta0 = rep(0, p)
xbar_1 = colMeans(X_1)
xbar_2 = colMeans(X_2)
xbar_3 = colMeans(X_3)
S_p = SSres/(n - g)
alpha = 0.05
m = g * (g - 1)/2 # Number of pairs of mu you are comparing
(crit = ((n - g) * p/(n - g - p + 1)) * qf(1 - alpha/m, p, n - g - p + 1))[1] 8.480372
# Between Species 1 and 2
(T2 = (N[1] * N[2]/(N[1] + N[2])) * t(xbar_1 - xbar_2 - delta0) %*% solve(S_p) %*%
(xbar_1 - xbar_2 - delta0)) [,1]
[1,] 1323.607
# Between Species 1 and 3
(T2 = (N[1] * N[3]/(N[1] + N[3])) * t(xbar_1 - xbar_3 - delta0) %*% solve(S_p) %*%
(xbar_1 - xbar_3 - delta0)) [,1]
[1,] 2837.709
# Between Species 2 and 3
(T2 = (N[2] * N[3]/(N[2] + N[3])) * t(xbar_2 - xbar_3 - delta0) %*% solve(S_p) %*%
(xbar_2 - xbar_3 - delta0)) [,1]
[1,] 325.1741
########## Bonferoni Intervals
m <- p * g * (g - 1)/2
crit.b <- qt(1 - alpha/(2 * m), n - g)
# Comparing Species 1 and 2
cbind(xbar_1 - xbar_2 - crit.b * sqrt((1/N[1] + 1/N[2]) * diag(S_p)), xbar_1 -
xbar_2 + crit.b * sqrt((1/N[1] + 1/N[2]) * diag(S_p))) [,1] [,2]
V2 0.4763056 0.8396944
V4 -1.1894645 -0.9705355
# Comparing Species 1 and 3
cbind(xbar_1 - xbar_3 - crit.b * sqrt((1/N[1] + 1/N[3]) * diag(S_p)), xbar_1 -
xbar_3 + crit.b * sqrt((1/N[1] + 1/N[3]) * diag(S_p))) [,1] [,2]
V2 0.2723056 0.6356944
V4 -1.8894645 -1.6705355
# Comparing Species 2 and 3
cbind(xbar_2 - xbar_3 - crit.b * sqrt((1/N[2] + 1/N[3]) * diag(S_p)), xbar_2 -
xbar_3 + crit.b * sqrt((1/N[2] + 1/N[3]) * diag(S_p))) [,1] [,2]
V2 -0.3856944 -0.02230564
V4 -0.8094645 -0.59053548
###### 2-way MANOVA Plastic File data Factor 1: Change in the rate of extrusion 0
###### for Low 10% 1 for High 10% Factor 2: Amount of additive 0 for Low 1% 1 for
###### High 1.5% X1: Tear resistance X2: Gloss X3: Opacity
rm(list = ls())
plastic = read.table("C:/Users/poster/Desktop/data/T6-4.dat")
head(plastic) V1 V2 V3 V4 V5
1 0 0 6.5 9.5 4.4
2 0 0 6.2 9.9 6.4
3 0 0 5.8 9.6 3.0
4 0 0 6.5 9.6 4.1
5 0 0 6.5 9.2 0.8
6 0 1 6.9 9.1 5.7
test2 = manova(cbind(V3, V4, V5) ~ V1 * V2, data = plastic)
summary(test2, "Wilks") #c('Pillai', 'Wilks', 'Hotelling-Lawley', 'Roy') Df Wilks approx F num Df den Df Pr(>F)
V1 1 0.38186 7.5543 3 14 0.003034 **
V2 1 0.52303 4.2556 3 14 0.024745 *
V1:V2 1 0.77711 1.3385 3 14 0.301782
Residuals 16
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(test2) Response V3 :
Df Sum Sq Mean Sq F value Pr(>F)
V1 1 1.7405 1.74050 15.7868 0.001092 **
V2 1 0.7605 0.76050 6.8980 0.018330 *
V1:V2 1 0.0005 0.00050 0.0045 0.947143
Residuals 16 1.7640 0.11025
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Response V4 :
Df Sum Sq Mean Sq F value Pr(>F)
V1 1 1.3005 1.30050 7.9178 0.01248 *
V2 1 0.6125 0.61250 3.7291 0.07139 .
V1:V2 1 0.5445 0.54450 3.3151 0.08740 .
Residuals 16 2.6280 0.16425
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Response V5 :
Df Sum Sq Mean Sq F value Pr(>F)
V1 1 0.421 0.4205 0.1036 0.7517
V2 1 4.901 4.9005 1.2077 0.2881
V1:V2 1 3.960 3.9605 0.9760 0.3379
Residuals 16 64.924 4.0578
###### Box's M Test for Equality of Covariance
boxM1 <- function(data, grouping) {
if (!inherits(data, c("data.frame", "matrix")))
stop("'data' must be a numeric data.frame or matrix!")
if (length(grouping) != nrow(data))
stop("incompatible dimensions!")
dname <- deparse(substitute(data))
data <- as.matrix(data)
grouping <- as.factor(as.character(grouping))
p <- ncol(data)
nlev <- nlevels(grouping)
lev <- levels(grouping)
dfs <- tapply(grouping, grouping, length) - 1
if (any(dfs < p))
warning("there are one or more levels with less observations than variables!")
mats <- aux <- list()
for (i in 1:nlev) {
mats[[i]] <- cov(data[grouping == lev[i], ])
aux[[i]] <- mats[[i]] * dfs[i]
}
names(mats) <- lev
pooled <- Reduce("+", aux)/sum(dfs)
logdet <- log(unlist(lapply(mats, det)))
minus2logM <- sum(dfs) * log(det(pooled)) - sum(logdet * dfs)
sum1 <- sum(1/dfs)
Co <- (((2 * p^2) + (3 * p) - 1)/(6 * (p + 1) * (nlev - 1))) * (sum1 - (1/sum(dfs)))
X2 <- minus2logM * (1 - Co)
dfchi <- (choose(p, 2) + p) * (nlev - 1)
pval <- pchisq(X2, dfchi, lower.tail = FALSE)
out <- structure(list(statistic = c(`Chi-Sq (approx.)` = X2), parameter = c(df = dfchi),
p.value = pval, cov = mats, pooled = pooled, logDet = logdet, data.name = dname,
method = " Box's M-test for Homogeneity of Covariance Matrices"), class = c("htest",
"boxM"))
return(out)
}
boxM(cbind(plastic$V3, plastic$V4, plastic$V5), plastic$V1 * plastic$V2)
Box's M-test for Homogeneity of Covariance Matrices
data: cbind(plastic$V3, plastic$V4, plastic$V5)
Chi-Sq (approx.) = 7.1269, df = 6, p-value = 0.3093
boxM1(cbind(plastic$V3, plastic$V4, plastic$V5), plastic$V1 * plastic$V2)
Box's M-test for Homogeneity of Covariance Matrices
data: cbind(plastic$V3, plastic$V4, plastic$V5)
Chi-Sq (approx.) = 7.1269, df = 6, p-value = 0.3093
####### The following analysis uses data from HERS project Source:
####### http://www.biostat.ucsf.edu/vgsm/data.html Data includes White-nondiabetic
####### women in the age group 44-79.
Data1 <- read.csv("C:/Users/poster/Desktop/data/hersdata.csv", header = TRUE)
Data <- Data1[(Data1$diabetes == "no" & Data1$raceth == "White"), ]
n <- dim(Data)[1]
Y = data.frame(glucose = Data$glucose, TG = Data$TG)
Z = data.frame(intercept = rep(1, n), age = Data$age, drinkany = as.numeric(Data$drinkany ==
"yes"), exercise = as.numeric(Data$exercise == "yes"), BMI = Data$BMI)
ComCases <- complete.cases(Z) & complete.cases(Y)
Y <- Y[ComCases, ]
Z <- Z[ComCases, ]
p = dim(Y)[2] # dimension
r = dim(Z)[2] - 1 # number of predictors
n = dim(Y)[1] # sample size
Z <- as.matrix(Z)
Y <- as.matrix(Y)
B_hat = solve(t(Z) %*% Z) %*% t(Z) %*% Y
B_hat glucose TG
intercept 78.62564381 98.3540631
age 0.07367995 0.1728804
drinkany 0.63656365 -9.3090412
exercise -0.96975262 -4.1237035
BMI 0.47621049 2.1121466
Sigma_hat = (1/(n - r - 1)) * (t(Y - Z %*% B_hat) %*% (Y - Z %*% B_hat))
Sigma_hat glucose TG
glucose 85.98248 72.46454
TG 72.46454 3745.16189
Cov_B_hat = Sigma_hat %x% solve(t(Z) %*% Z)
rownames(Cov_B_hat) <- c("B_01", "B_11", "B_21", "B_31", "B_41", "B_02", "B_12",
"B_22", "B_32", "B_42")
colnames(Cov_B_hat) <- c("B_01", "B_11", "B_21", "B_31", "B_41", "B_02", "B_12",
"B_22", "B_32", "B_42")
Cov_B_hat[1:5, 1:5] B_01 B_11 B_21 B_31 B_41
B_01 7.24632586 -0.0778435763 -0.1929244742 -0.1865533801 -0.0658209237
B_11 -0.07784358 0.0010714166 0.0013538412 0.0002707412 0.0001909548
B_21 -0.19292447 0.0013538412 0.1889307310 -0.0001902451 0.0005723298
B_31 -0.18655338 0.0002707412 -0.0001902451 0.1956711207 0.0031202742
B_41 -0.06582092 0.0001909548 0.0005723298 0.0031202742 0.0018678963
Cov_B_hat[6:10, 6:10] B_02 B_12 B_22 B_32 B_42
B_02 315.630166 -3.39065360 -8.403263243 -8.125755785 -2.86697969
B_12 -3.390654 0.04666798 0.058969627 0.011792747 0.00831747
B_22 -8.403263 0.05896963 8.229306699 -0.008286559 0.02492912
B_32 -8.125756 0.01179275 -0.008286559 8.522899665 0.13591063
B_42 -2.866980 0.00831747 0.024929123 0.135910625 0.08136046
Cov_B_hat[1:5, 6:10] B_02 B_12 B_22 B_32 B_42
B_01 6.10707735 -0.0656052116 -0.1625933900 -0.1572239428 -0.0554727293
B_11 -0.06560521 0.0009029712 0.0011409938 0.0002281760 0.0001609334
B_21 -0.16259339 0.0011409938 0.1592275328 -0.0001603353 0.0004823496
B_31 -0.15722394 0.0002281760 -0.0001603353 0.1649082159 0.0026297128
B_41 -0.05547273 0.0001609334 0.0004823496 0.0026297128 0.0015742305
#### Testing for any predictive subset of the predictors in the presence of the
#### other predictors Any of the risk factors
q = 0
C = cbind(matrix(0, r - q, q + 1), diag(r - q))
S_h = t(Y) %*% Z %*% solve(t(Z) %*% Z) %*% t(C) %*% solve(C %*% solve(t(Z) %*%
Z) %*% t(C)) %*% C %*% solve(t(Z) %*% Z) %*% t(Z) %*% Y
S_e = t(Y) %*% (diag(n) - Z %*% solve(t(Z) %*% Z) %*% t(Z)) %*% Y
S_h glucose TG
glucose 11987.89 50252.18
TG 50252.18 277409.44
S_e glucose TG
glucose 158723.7 133769.5
TG 133769.5 6913568.9
Lambda = det(S_e)/det(S_h + S_e)
Lambda[1] 0.904277
c(p, r - q, n - r - 1)[1] 2 4 1846
# F and df
c((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 * (n -
r - 2))[1] 23.79903 8.00000 3690.00000
# p value
1 - pf((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 *
(n - r - 2))[1] 0
# Use lm() function
Dat = data.frame(cbind(Y, Z))
fit_f = lm(as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI,
data = Dat)
fit_n = lm(as.matrix(cbind(glucose, TG)) ~ 1, data = Dat)
anova(fit_f, fit_n, test = "Wilks")Analysis of Variance Table
Model 1: as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI
Model 2: as.matrix(cbind(glucose, TG)) ~ 1
Res.Df Df Gen.var. Wilks approx F num Df den Df Pr(>F)
1 1846 562.82
2 1850 4 590.58 0.90428 23.799 8 3690 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Life style risk factors
q = 2
C = cbind(matrix(0, r - q, q), diag(r - q), rep(0, r - q))
S_h = t(Y) %*% Z %*% solve(t(Z) %*% Z) %*% t(C) %*% solve(C %*% solve(t(Z) %*%
Z) %*% t(C)) %*% C %*% solve(t(Z) %*% Z) %*% t(Z) %*% Y
S_e = t(Y) %*% (diag(n) - Z %*% solve(t(Z) %*% Z) %*% t(Z)) %*% Y
S_h glucose TG
glucose 597.1097 -936.7593
TG -936.7593 46944.5976
S_e glucose TG
glucose 158723.7 133769.5
TG 133769.5 6913568.9
Lambda = det(S_e)/det(S_h + S_e)
Lambda[1] 0.9891349
c(p, r - q, n - r - 1)[1] 2 2 1846
# F and df
c((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 * (n -
r - 2))[1] 5.052722 4.000000 3690.000000
# p value
1 - pf((1 - sqrt(Lambda))/sqrt(Lambda) * (n - r - 2)/(r - q), 2 * (r - q), 2 *
(n - r - 2))[1] 0.0004641033
# Use lm() function
fit_f = lm(as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI,
data = Dat)
fit_r = lm(as.matrix(cbind(glucose, TG)) ~ age + BMI, data = Dat)
anova(fit_f, fit_r, test = "Wilks")Analysis of Variance Table
Model 1: as.matrix(cbind(glucose, TG)) ~ age + drinkany + exercise + BMI
Model 2: as.matrix(cbind(glucose, TG)) ~ age + BMI
Res.Df Df Gen.var. Wilks approx F num Df den Df Pr(>F)
1 1846 562.82
2 1848 2 565.29 0.98913 5.0527 4 3690 0.0004641 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#### Plotting the Predictors
z_0 = c(1, 45, 1, 0, 25)
# z_0=c(1,70,1,0,25)
plot(Z[(Z[, 3] == 0) & (Z[, 4] == 0), 2], Z[(Z[, 3] == 0) & (Z[, 4] == 0), 5],
pch = 2, bty = "l", col = "blue", ylab = "BMI", xlab = "AGE", xlim = c(30,
90), ylim = c(0, 60))
points(Z[(Z[, 3] == 0) & (Z[, 4] == 1), 2], Z[(Z[, 3] == 0) & (Z[, 4] == 1),
5], pch = 2, col = "red")
points(Z[(Z[, 3] == 1) & (Z[, 4] == 0), 2], Z[(Z[, 3] == 1) & (Z[, 4] == 0),
5], col = "blue")
points(Z[(Z[, 3] == 1) & (Z[, 4] == 1), 2], Z[(Z[, 3] == 1) & (Z[, 4] == 1),
5], col = "red")
legend("topright", pch = c(1, 2), title = "Alcohol", legend = c("Yes", "No"))
legend("topleft", fill = c("red", "blue"), title = "Exercies", legend = c("Yes",
"No"))
# Plotting z_0
points(z_0[2], z_0[5], col = "blue", cex = 2, pch = 16)
text(z_0[2] - 8, z_0[5] - 2, "Drinks")
text(z_0[2] - 8, z_0[5] - 4, "No Exercise")
arrows(41, 22, 44, 24, code = 2, length = 0.1)### Predicting the mean response and actual response Prediciting mean response
### For a typical nonalcholic and excercising 35 years old with BMI 26
alpha = 0.05
pred = t(z_0) %*% B_hat
pred glucose TG
[1,] 94.48307 149.6283
pred_crit = sqrt((p * (n - r - 1))/(n - r - p) * qf(1 - alpha, p, n - r - p))
pred_crit[1] 2.450399
# predict() function
predict(fit_f, newdata = list(age = 45, drinkany = 1, exercise = 0, BMI = 25)) glucose TG
1 94.48307 149.6283
cov(Y) #see the total variation glucose TG
glucose 92.27651 99.47119
TG 99.47119 3887.01530
#### Predition
pred_cov = as.numeric(t(z_0) %*% solve(t(Z) %*% Z) %*% z_0) * (n * Sigma_hat)/(n -
r - 1)
pred_cov glucose TG
glucose 0.6714174 0.5658589
TG 0.5658589 29.2451097
### Confidence Region
plot(Y, type = "n", bty = "l", xlab = expression(mu[1] * " (Glucose)"), xlim = c(60,
120), ylim = c(-20, 320), ylab = expression(mu[2] * " (Triglyceride)"))
car::ellipse(center = as.vector(pred), shape = pred_cov, radius = pred_crit,
segments = 104, center.cex = 0.5)
#### Simultaneous intervals T^2 intervals
CI_lower = pred - sqrt(diag(pred_cov)) * pred_crit
CI_upper = pred + sqrt(diag(pred_cov)) * pred_crit
rbind(CI_lower, CI_upper) glucose TG
[1,] 92.47521 136.3769
[2,] 96.49093 162.8798
rect(xleft = CI_lower[1], xright = CI_upper[1], ybottom = CI_lower[2], ytop = CI_upper[2])
# Bonferroni intervals
m = 2
b_pred_crit <- qt(1 - alpha/(2 * m), n - r - 1)
b_CI_lower = pred - sqrt(diag(pred_cov)) * b_pred_crit
b_CI_upper = pred + sqrt(diag(pred_cov)) * b_pred_crit
rbind(b_CI_lower, b_CI_upper) glucose TG
[1,] 92.64496 137.4972
[2,] 96.32117 161.7594
rect(xleft = b_CI_lower[1], xright = b_CI_upper[1], ybottom = b_CI_lower[2],
ytop = b_CI_upper[2], lty = "dashed")
### Forecasting
pred_cov_F = as.numeric(1 + t(z_0) %*% solve(t(Z) %*% Z) %*% z_0) * (n * Sigma_hat)/(n -
r - 1)
### Confidence Region
car::ellipse(center = as.vector(pred), col = "blue", shape = pred_cov_F, radius = pred_crit,
segments = 104, center.cex = 0.5)
#### Simultaneous intervals T^2 intervals
CI_lower = pred - sqrt(diag(pred_cov_F)) * pred_crit
CI_upper = pred + sqrt(diag(pred_cov_F)) * pred_crit
rbind(CI_lower, CI_upper) glucose TG
[1,] 71.64214 -1.117077
[2,] 117.32399 300.373689
rect(xleft = CI_lower[1], xright = CI_upper[1], ybottom = CI_lower[2], ytop = CI_upper[2])
# Bonferroni intervals
m = 2
b_pred_crit <- qt(1 - alpha/(2 * m), n - r - 1)
b_CI_lower = pred - sqrt(diag(pred_cov_F)) * b_pred_crit
b_CI_upper = pred + sqrt(diag(pred_cov_F)) * b_pred_crit
rbind(b_CI_lower, b_CI_upper) glucose TG
[1,] 73.5732 11.62754
[2,] 115.3929 287.62908
rect(xleft = b_CI_lower[1], xright = b_CI_upper[1], ybottom = b_CI_lower[2],
ytop = b_CI_upper[2], lty = "dashed")#### Maximia of Quadratic forms
rm(list = ls(all = TRUE))
Gamma = matrix(c(1/sqrt(2), -1/sqrt(2), 1/sqrt(2), 1/sqrt(2)), 2, 2)
Lambda = diag(c(1, 4))
B = Gamma %*% Lambda %*% t(Gamma)
B [,1] [,2]
[1,] 2.5 1.5
[2,] 1.5 2.5
par(mar = c(0, 0, 0, 0))
plot(NULL, NULL, col = "red", pch = 16, xlim = c(-3, 3), ylim = c(-2, 2), xaxt = "n",
yaxt = "n", bty = "n", xlab = "", ylab = "")
car::ellipse(center = c(0, 0), shape = B, radius = 1, col = "blue", center.pch = 16,
center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = B, radius = 0.5, col = "green", center.pch = 16,
center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = diag(1, 2), radius = 1, col = "red",
center.pch = 16, center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = B, radius = 1.2, lty = "dashed", col = "black",
center.pch = 16, center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
car::ellipse(center = c(0, 0), shape = B, radius = 0.2, lty = "dashed", col = "black",
center.pch = 16, center.cex = 1.5, segments = 51, draw = TRUE, add = TRUE)
axis(1, pos = 0, at = seq(-5, 5, 2))
axis(2, pos = 0, at = seq(-5, 5, 2))
segments(0, 0, 1/sqrt(2), 1/sqrt(2), lty = "solid", lwd = 1, col = "green")
segments(0, 0, -1/sqrt(2), 1/sqrt(2), lty = "solid", lwd = 1, col = "blue")
text(2, 1, expression(bold(x)^T * B * bold(x) == c[2]^2), col = "blue")
text(0.5, -0.6, expression(bold(x)^T * B * bold(x) == c[1]^2), col = "green")
text(0.5, 1.1, expression(bold(x)^T * bold(x) == 1), col = "red")
text(0.4, 0.6, expression(frac(c[1], sqrt(lambda[min]))), col = "green")
text(-0.6, 0.4, expression(frac(c[2], sqrt(lambda[max]))), col = "blue")########## Example of Population Principal Components
Sigma = matrix(c(1, -2, 0, -2, 5, 0, 0, 0, 2), 3, 3, byrow = T)
p = dim(Sigma)[1]
L = eigen(Sigma, symmetric = T)
L$values[1] 5.8284271 2.0000000 0.1715729
# We probably need the first two principal components ONLY They account for
# 98% of the total variance
E = t(L$vectors)
E [,1] [,2] [,3]
[1,] -0.3826834 0.9238795 0
[2,] 0.0000000 0.0000000 1
[3,] 0.9238795 0.3826834 0
rho = matrix(NA, p, p)
for (i in 1:p) for (k in 1:p) rho[i, k] = E[i, k] * sqrt(L$values[i])/sqrt(Sigma[k,
k])
rho [,1] [,2] [,3]
[1,] -0.9238795 0.99748421 0
[2,] 0.0000000 0.00000000 1
[3,] 0.3826834 0.07088902 0
########### Need for standardization??? PCs extracted from Sigma=Covariance Matrix
Sigma = matrix(c(1, 4, 4, 100), 2, 2, byrow = T)
p = dim(Sigma)[1]
L = eigen(Sigma)
L$values[1] 100.1613532 0.8386468
t(L$vectors) [,1] [,2]
[1,] 0.04030552 0.99918740
[2,] -0.99918740 0.04030552
L$values/sum(L$values)[1] 0.991696566 0.008303434
# PCs extracted from Rho=Correlation Matrix
V_Half_inv = diag(1/diag(Sigma)^{
1/2
})
Rho = V_Half_inv %*% Sigma %*% V_Half_inv
L_r = eigen(Rho, symmetric = T)
L_r$values[1] 1.4 0.6
t(L_r$vectors) [,1] [,2]
[1,] 0.7071068 0.7071068
[2,] -0.7071068 0.7071068
cumsum(L_r$values)/sum(L_r$values)[1] 0.7 1.0
################## Example 8.3 J&W, 6th Edition: Census Data
census = read.table("C:/Users/poster/Desktop/data/T8-5.DAT")
xbarvec = apply(census, 2, mean)
Smat = cov(census)
eigen(Smat)eigen() decomposition
$values
[1] 107.0152535 39.6721358 8.3708660 2.8678740 0.1546931
$vectors
[,1] [,2] [,3] [,4] [,5]
[1,] 0.038887287 -0.07114494 -0.18789258 0.97713524 -0.057699864
[2,] -0.105321969 -0.12975236 0.96099580 0.17135181 -0.138554092
[3,] 0.492363944 -0.86438807 -0.04579737 -0.09104368 0.004966048
[4,] -0.863069865 -0.48033178 -0.15318538 -0.02968577 0.006691800
[5,] -0.009122262 -0.01474342 0.12498114 0.08170118 0.988637470
lambdavec = eigen(Smat)$values # eigenvalues
pc.percent = lambdavec/(sum(lambdavec))
pc.percent[1] 0.6769654402 0.2509610921 0.0529530772 0.0181418208 0.0009785696
cumsum(pc.percent)[1] 0.6769654 0.9279265 0.9808796 0.9990214 1.0000000
# scree plot
ivec = seq(1, 5, 1)
plot(ivec, lambdavec, type = "o")# alternative way use prcomp()
prcomp(census)Standard deviations (1, .., p=5):
[1] 10.3448177 6.2985820 2.8932449 1.6934798 0.3933104
Rotation (n x k) = (5 x 5):
PC1 PC2 PC3 PC4 PC5
V1 0.038887287 -0.07114494 0.18789258 0.97713524 -0.057699864
V2 -0.105321969 -0.12975236 -0.96099580 0.17135181 -0.138554092
V3 0.492363944 -0.86438807 0.04579737 -0.09104368 0.004966048
V4 -0.863069865 -0.48033178 0.15318538 -0.02968577 0.006691800
V5 -0.009122262 -0.01474342 -0.12498114 0.08170118 0.988637470
summary(prcomp(census))Importance of components:
PC1 PC2 PC3 PC4 PC5
Standard deviation 10.345 6.2986 2.89324 1.69348 0.39331
Proportion of Variance 0.677 0.2510 0.05295 0.01814 0.00098
Cumulative Proportion 0.677 0.9279 0.98088 0.99902 1.00000
prcomp(census)$sdev^2 ## eigenvalues[1] 107.0152535 39.6721358 8.3708660 2.8678740 0.1546931
screeplot(prcomp(census), type = "lines") #type=lines########################## Example 8.4 J&W, 6th Edition: Turtle data
turtle = read.table("C:/Users/poster/Desktop/data/T6-9.DAT")
names(turtle) = c("Length", "Width", "Height", "Sex")
logturtle = log(turtle[25:48, 1:3])
pc.turtle = prcomp(logturtle)
summary(pc.turtle)Importance of components:
PC1 PC2 PC3
Standard deviation 0.1527 0.02446 0.01897
Proportion of Variance 0.9605 0.02466 0.01483
Cumulative Proportion 0.9605 0.98517 1.00000
cumsum(pc.turtle$sdev^2)/sum(pc.turtle$sdev^2)[1] 0.9605077 0.9851684 1.0000000
screeplot(pc.turtle, type = "lines", main = "Scree Plot")################ Example 8.7#
pcmat = pc.turtle$rotation
meanmat = t(replicate(24, as.vector(apply(logturtle, 2, mean))))
pcscoremat = (as.matrix(logturtle) - meanmat) %*% pcmat
pc.turtle$x #easier way PC1 PC2 PC3
25 -0.268473390 0.0610685037 0.0004066149
26 -0.263344980 -0.0157244390 -0.0066858875
27 -0.236045707 -0.0341210935 -0.0074276738
28 -0.119923476 0.0141251769 -0.0366922244
29 -0.120728388 -0.0149572760 -0.0286045130
30 -0.152592781 -0.0089082112 0.0169793685
31 -0.106039296 0.0165711616 -0.0083826652
32 -0.093027430 0.0135333730 0.0051929344
33 -0.106371042 -0.0012452738 0.0278503862
34 -0.006575259 -0.0167441765 -0.0071889505
35 -0.006268452 -0.0114497088 0.0061739765
36 -0.011979592 0.0008012246 0.0267516989
37 0.060886922 0.0280465362 -0.0125896593
38 0.041862973 -0.0108766303 0.0089785626
39 0.047500815 -0.0174403631 0.0021060793
40 0.070171264 -0.0330572711 0.0006647271
41 0.040553932 -0.0277470963 0.0419820919
42 0.112788083 0.0212894116 -0.0162796865
43 0.105004626 -0.0293536649 -0.0085075394
44 0.152416632 0.0324987788 0.0055238164
45 0.179458559 0.0111082110 -0.0029094996
46 0.179473590 0.0160774637 0.0091929735
47 0.206783903 0.0297129498 0.0185740872
48 0.294468494 -0.0232075863 -0.0351090182
# detecting outliers
par(mfrow = c(1, 3))
qqnorm(pcscoremat[, 2])
qqline(pcscoremat[, 2])
qqnorm(pcscoremat[, 3])
qqline(pcscoremat[, 3])
# scatter plot of first two pc (pc1 vs. pc2)
plot(pcscoremat[, 2], pcscoremat[, 1])############ Example Bulls Data
bulls <- read.table("C:/Users/poster/Desktop/data/bull_data.txt", col.names = c("Breed",
"SalePr", "YrHgt", "FtFrBody", "PrctFFB", "Frame", "BkFat", "SaleHt", "SaleWt"))
breed = bulls$Breed
X = bulls[-(1:2)]
X = scale(X)
color = 1 * (breed == 1) + 2 * (breed == 5) + 3 * (breed == 8)
par(mfrow = c(1, 1))
pairs(X, col = color)par(mfrow = c(1, 3))
pcaStBull = prcomp(X, retx = TRUE, center = TRUE, scale = TRUE)
pcaStBullStandard deviations (1, .., p=7):
[1] 2.0299502 1.1563431 0.8610357 0.6491727 0.4310521 0.3827563 0.2169256
Rotation (n x k) = (7 x 7):
PC1 PC2 PC3 PC4 PC5
YrHgt -0.4499313 -0.042790217 -0.41570891 0.1133565 -0.06587066
FtFrBody -0.4123256 0.129836547 0.45029241 0.2474787 0.71934339
PrctFFB -0.3555618 -0.315507785 0.56827313 0.3147874 -0.57936738
Frame -0.4339569 0.007728211 -0.45234503 0.2428179 -0.14299538
BkFat 0.1867048 0.714719363 -0.03873196 0.6181171 -0.16023789
SaleHt -0.4528538 0.101315086 -0.17665043 -0.2157694 0.10953536
SaleWt -0.2699470 0.600514834 0.25331192 -0.5824327 -0.29054729
PC6 PC7
YrHgt 0.07223418 -0.77492612
FtFrBody 0.17706072 -0.01776760
PrctFFB -0.12780009 0.00239740
Frame 0.43414400 0.58233705
BkFat -0.20801720 -0.04244214
SaleHt -0.79928778 0.23672329
SaleWt 0.27656055 -0.04703601
summary(pcaStBull)Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.0300 1.1563 0.8610 0.6492 0.43105 0.38276 0.21693
Proportion of Variance 0.5887 0.1910 0.1059 0.0602 0.02654 0.02093 0.00672
Cumulative Proportion 0.5887 0.7797 0.8856 0.9458 0.97235 0.99328 1.00000
par(mfrow = c(1, 2))
screeplot(pcaStBull, main = "Variation of Scree Plot", type = "lines")
# Take first two or three principal components??? Not as conclusive
Y_hat_R = as.matrix(X) %*% pcaStBull$rotation
scatterplot3d(Y_hat_R[, 1:3], color = color, angle = 45, type = "h", pch = 16)
legend("topright", legend = c("Angus", "Hereford", "Simental"), col = 1:3, pch = rep(16,
3))# perform Linear regression
my.lm <- lm(SalePr ~ YrHgt + FtFrBody + PrctFFB + Frame + BkFat + SaleHt + SaleWt,
data = bulls)
summary(my.lm)
Call:
lm(formula = SalePr ~ YrHgt + FtFrBody + PrctFFB + Frame + BkFat +
SaleHt + SaleWt, data = bulls)
Residuals:
Min 1Q Median 3Q Max
-927.31 -279.95 -43.97 220.15 1612.87
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3156.8944 4105.7192 -0.769 0.44461
YrHgt 25.9314 111.4029 0.233 0.81664
FtFrBody -2.2012 1.0707 -2.056 0.04365 *
PrctFFB -30.0976 26.7168 -1.127 0.26390
Frame 360.3168 172.9033 2.084 0.04093 *
BkFat 2571.5500 790.7881 3.252 0.00179 **
SaleHt 84.3969 64.0910 1.317 0.19232
SaleWt 0.3634 0.6085 0.597 0.55233
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 460.9 on 68 degrees of freedom
Multiple R-squared: 0.5037, Adjusted R-squared: 0.4526
F-statistic: 9.859 on 7 and 68 DF, p-value: 2.008e-08
vif(my.lm) YrHgt FtFrBody PrctFFB Frame BkFat SaleHt SaleWt
13.134475 3.478312 2.693996 9.064782 1.770954 5.826196 2.202311
X <- bulls[, 3:9]
my.pr <- prcomp(X, scale = TRUE, center = TRUE)
summary(my.pr)Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.0300 1.1563 0.8610 0.6492 0.43105 0.38276 0.21693
Proportion of Variance 0.5887 0.1910 0.1059 0.0602 0.02654 0.02093 0.00672
Cumulative Proportion 0.5887 0.7797 0.8856 0.9458 0.97235 0.99328 1.00000
pc.x <- my.pr$x
new.lm <- lm(bulls[, 2] ~ pc.x)
summary(new.lm)
Call:
lm(formula = bulls[, 2] ~ pc.x)
Residuals:
Min 1Q Median 3Q Max
-927.31 -279.95 -43.97 220.15 1612.87
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1742.43 52.87 32.955 < 2e-16 ***
pc.xPC1 -92.33 26.22 -3.521 0.000772 ***
pc.xPC2 215.30 46.03 4.678 1.43e-05 ***
pc.xPC3 -344.39 61.81 -5.571 4.69e-07 ***
pc.xPC4 83.09 81.99 1.013 0.314449
pc.xPC5 -172.56 123.47 -1.398 0.166786
pc.xPC6 -45.42 139.05 -0.327 0.744969
pc.xPC7 191.11 245.36 0.779 0.438727
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 460.9 on 68 degrees of freedom
Multiple R-squared: 0.5037, Adjusted R-squared: 0.4526
F-statistic: 9.859 on 7 and 68 DF, p-value: 2.008e-08
newer.lm <- lm(bulls[, 2] ~ pc.x[, 1:3])
summary(newer.lm)
Call:
lm(formula = bulls[, 2] ~ pc.x[, 1:3])
Residuals:
Min 1Q Median 3Q Max
-953.33 -313.71 -21.59 162.47 1676.33
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1742.43 52.76 33.026 < 2e-16 ***
pc.x[, 1:3]PC1 -92.33 26.16 -3.529 0.000732 ***
pc.x[, 1:3]PC2 215.30 45.93 4.688 1.27e-05 ***
pc.x[, 1:3]PC3 -344.39 61.68 -5.583 3.93e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 460 on 72 degrees of freedom
Multiple R-squared: 0.4767, Adjusted R-squared: 0.4549
F-statistic: 21.87 on 3 and 72 DF, p-value: 3.583e-10
stock = data.matrix(read.table("C:/Users/poster/Desktop/data/T8-4.DAT")) # stock data
nrow(stock) #n=103[1] 103
# X1: J P Morgan X2: Citibank X3: Wells Fargo X4: Royal Dutch Shell X5:
# ExxonMobil
### Example 9.4### The Principal Component (Principal Factor) Method Usually
### use centralized or standardedized
pc.stock = prcomp(stock, scale = TRUE, center = TRUE) #standardized
pc.stockStandard deviations (1, .., p=5):
[1] 1.5611768 1.1861756 0.7074693 0.6324805 0.5051434
Rotation (n x k) = (5 x 5):
PC1 PC2 PC3 PC4 PC5
V1 -0.4690832 0.3680070 -0.60431522 0.3630228 0.38412160
V2 -0.5324055 0.2364624 -0.13610618 -0.6292079 -0.49618794
V3 -0.4651633 0.3151795 0.77182810 0.2889658 0.07116948
V4 -0.3873459 -0.5850373 0.09336192 -0.3812515 0.59466408
V5 -0.3606821 -0.6058463 -0.10882629 0.4934145 -0.49755167
loading1 = pc.stock$sdev[1] * pc.stock$rotation[, 1] #factor loading1
loading1 V1 V2 V3 V4 V5
-0.7323218 -0.8311791 -0.7262022 -0.6047155 -0.5630885
loading2 = pc.stock$sdev[2] * pc.stock$rotation[, 2] #factor loading1
loading2 V1 V2 V3 V4 V5
0.4365209 0.2804859 0.3738582 -0.6939569 -0.7186401
## Specific variance epsilon- the diagonal matrix and The residual matrix##
## One factor model
1 - (loading1)^2 V1 V2 V3 V4 V5
0.4637047 0.3091413 0.4726303 0.6343192 0.6829314
# The residual matrix
cor(stock) - loading1 %*% t(loading1) - diag(1 - (loading1)^2) V1 V2 V3 V4 V5
V1 0.00000000 0.02359722 -0.02131640 -0.3282445 -0.2578992
V2 0.02359722 0.00000000 -0.02946169 -0.1803348 -0.2553527
V3 -0.02131640 -0.02946169 0.00000000 -0.2566466 -0.2627094
V4 -0.32824446 -0.18033479 -0.25664656 0.0000000 0.3428693
V5 -0.25789922 -0.25535267 -0.26270940 0.3428693 0.0000000
# Two factor model
1 - apply(cbind(loading1^2 + loading2^2), 1, sum) V1 V2 V3 V4 V5
0.2731542 0.2304689 0.3328604 0.1527429 0.1664878
# The residual matrix
cor(stock) - cbind(loading1, loading2) %*% t(cbind(loading1, loading2)) - diag(1 -
apply(cbind(loading1^2 + loading2^2), 1, sum)) V1 V2 V3 V4 V5
V1 0.00000000 -0.09884073 -0.184513342 -0.025317756 0.055802201
V2 -0.09884073 0.00000000 -0.134323655 0.014310342 -0.053784258
V3 -0.18451334 -0.13432365 0.000000000 0.002794963 0.005960127
V4 -0.02531776 0.01431034 0.002794963 0.000000000 -0.155835955
V5 0.05580220 -0.05378426 0.005960127 -0.155835955 0.000000000
## Rotation##
Lpc = cbind(loading1, loading2)
varimax(Lpc)$loadings
Loadings:
loading1 loading2
V1 -0.852
V2 -0.849 -0.220
V3 -0.812
V4 -0.126 -0.912
V5 -0.910
loading1 loading2
SS loadings 2.129 1.716
Proportion Var 0.426 0.343
Cumulative Var 0.426 0.769
$rotmat
[,1] [,2]
[1,] 0.8368530 0.5474277
[2,] -0.5474277 0.8368530
# variance epsilon
1 - apply(varimax(Lpc)$loadings^2, 1, sum) V1 V2 V3 V4 V5
0.2731542 0.2304689 0.3328604 0.1527429 0.1664878
### Example 9.5### The Maximum Likelihood Method Using Covariance Matrix S No
### rotation##
mlik.fa = factanal(stock, factors = 2, rotation = "none")
mlik.fa
Call:
factanal(x = stock, factors = 2, rotation = "none")
Uniquenesses:
V1 V2 V3 V4 V5
0.417 0.275 0.542 0.005 0.530
Loadings:
Factor1 Factor2
V1 0.121 0.754
V2 0.328 0.786
V3 0.188 0.650
V4 0.997
V5 0.685
Factor1 Factor2
SS loadings 1.622 1.610
Proportion Var 0.324 0.322
Cumulative Var 0.324 0.646
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16
# specific variance
1 - apply(mlik.fa$loadings^2, 1, sum) V1 V2 V3 V4 V5
0.416537511 0.274690258 0.542023377 0.004998441 0.529843148
## Rotation (varimax)##
mlik.farot = factanal(stock, factors = 2, rotation = "varimax") #rotmat
mlik.farot
Call:
factanal(x = stock, factors = 2, rotation = "varimax")
Uniquenesses:
V1 V2 V3 V4 V5
0.417 0.275 0.542 0.005 0.530
Loadings:
Factor1 Factor2
V1 0.763
V2 0.819 0.232
V3 0.668 0.108
V4 0.113 0.991
V5 0.108 0.677
Factor1 Factor2
SS loadings 1.725 1.507
Proportion Var 0.345 0.301
Cumulative Var 0.345 0.646
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16
# specific variance
1 - apply(mlik.farot$loadings^2, 1, sum) #same as no rotation. V1 V2 V3 V4 V5
0.416537511 0.274690258 0.542023377 0.004998441 0.529843148
## Or use the correlation mattrix R
cor.stock = cor(stock)
factanal(covmat = cor.stock, factors = 2, rotation = "none", n.obs = 103, method = "mle")
Call:
factanal(factors = 2, covmat = cor.stock, n.obs = 103, rotation = "none", method = "mle")
Uniquenesses:
V1 V2 V3 V4 V5
0.417 0.275 0.542 0.005 0.530
Loadings:
Factor1 Factor2
V1 0.121 0.754
V2 0.328 0.786
V3 0.188 0.650
V4 0.997
V5 0.685
Factor1 Factor2
SS loadings 1.622 1.610
Proportion Var 0.324 0.322
Cumulative Var 0.324 0.646
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16
factanal(covmat = cor.stock, factors = 2, rotation = "varimax", n.obs = 103,
method = "mle")
Call:
factanal(factors = 2, covmat = cor.stock, n.obs = 103, rotation = "varimax", method = "mle")
Uniquenesses:
V1 V2 V3 V4 V5
0.417 0.275 0.542 0.005 0.530
Loadings:
Factor1 Factor2
V1 0.763
V2 0.819 0.232
V3 0.668 0.108
V4 0.113 0.991
V5 0.108 0.677
Factor1 Factor2
SS loadings 1.725 1.507
Proportion Var 0.345 0.301
Cumulative Var 0.345 0.646
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1.97 on 1 degree of freedom.
The p-value is 0.16
######################## Use psych package###
# The Principal Component (Principal Factor) Method
principal(stock, nfactors = 2, rotate = "none")Principal Components Analysis
Call: principal(r = stock, nfactors = 2, rotate = "none")
Standardized loadings (pattern matrix) based upon correlation matrix
PC1 PC2 h2 u2 com
V1 0.73 -0.44 0.73 0.27 1.6
V2 0.83 -0.28 0.77 0.23 1.2
V3 0.73 -0.37 0.67 0.33 1.5
V4 0.60 0.69 0.85 0.15 2.0
V5 0.56 0.72 0.83 0.17 1.9
PC1 PC2
SS loadings 2.44 1.41
Proportion Var 0.49 0.28
Cumulative Var 0.49 0.77
Proportion Explained 0.63 0.37
Cumulative Proportion 0.63 1.00
Mean item complexity = 1.6
Test of the hypothesis that 2 components are sufficient.
The root mean square of the residuals (RMSR) is 0.1
with the empirical chi square 19.17 with prob < 1.2e-05
Fit based upon off diagonal values = 0.95
principal(stock, nfactors = 2, rotate = "varimax")Principal Components Analysis
Call: principal(r = stock, nfactors = 2, rotate = "varimax")
Standardized loadings (pattern matrix) based upon correlation matrix
RC1 RC2 h2 u2 com
V1 0.85 0.04 0.73 0.27 1.0
V2 0.85 0.22 0.77 0.23 1.1
V3 0.81 0.08 0.67 0.33 1.0
V4 0.13 0.91 0.85 0.15 1.0
V5 0.08 0.91 0.83 0.17 1.0
RC1 RC2
SS loadings 2.13 1.72
Proportion Var 0.43 0.34
Cumulative Var 0.43 0.77
Proportion Explained 0.55 0.45
Cumulative Proportion 0.55 1.00
Mean item complexity = 1
Test of the hypothesis that 2 components are sufficient.
The root mean square of the residuals (RMSR) is 0.1
with the empirical chi square 19.17 with prob < 1.2e-05
Fit based upon off diagonal values = 0.95
# The Maximum Likelihood Method
fa(stock, nfactors = 2, rotate = "none", fm = "mle")Factor Analysis using method = ml
Call: fa(r = stock, nfactors = 2, rotate = "none", fm = "mle")
Standardized loadings (pattern matrix) based upon correlation matrix
ML1 ML2 h2 u2 com
V1 0.12 0.75 0.58 0.417 1.1
V2 0.33 0.79 0.73 0.275 1.3
V3 0.19 0.65 0.46 0.542 1.2
V4 1.00 -0.01 1.00 0.005 1.0
V5 0.69 0.03 0.47 0.530 1.0
ML1 ML2
SS loadings 1.62 1.61
Proportion Var 0.32 0.32
Cumulative Var 0.32 0.65
Proportion Explained 0.50 0.50
Cumulative Proportion 0.50 1.00
Mean item complexity = 1.1
Test of the hypothesis that 2 factors are sufficient.
The degrees of freedom for the null model are 10 and the objective function was 1.74 with Chi Square of 173.31
The degrees of freedom for the model are 1 and the objective function was 0.02
The root mean square of the residuals (RMSR) is 0.02
The df corrected root mean square of the residuals is 0.06
The harmonic number of observations is 103 with the empirical chi square 0.78 with prob < 0.38
The total number of observations was 103 with Likelihood Chi Square = 1.97 with prob < 0.16
Tucker Lewis Index of factoring reliability = 0.939
RMSEA index = 0.102 and the 90 % confidence intervals are 0 0.302
BIC = -2.66
Fit based upon off diagonal values = 1
Measures of factor score adequacy
ML1 ML2
Correlation of (regression) scores with factors 1.00 0.90
Multiple R square of scores with factors 1.00 0.81
Minimum correlation of possible factor scores 0.99 0.63
fa(stock, nfactors = 2, rotate = "varimax", fm = "mle")Factor Analysis using method = ml
Call: fa(r = stock, nfactors = 2, rotate = "varimax", fm = "mle")
Standardized loadings (pattern matrix) based upon correlation matrix
ML2 ML1 h2 u2 com
V1 0.76 0.03 0.58 0.417 1.0
V2 0.82 0.23 0.73 0.275 1.2
V3 0.67 0.11 0.46 0.542 1.1
V4 0.11 0.99 1.00 0.005 1.0
V5 0.11 0.68 0.47 0.530 1.1
ML2 ML1
SS loadings 1.72 1.51
Proportion Var 0.34 0.30
Cumulative Var 0.34 0.65
Proportion Explained 0.53 0.47
Cumulative Proportion 0.53 1.00
Mean item complexity = 1.1
Test of the hypothesis that 2 factors are sufficient.
The degrees of freedom for the null model are 10 and the objective function was 1.74 with Chi Square of 173.31
The degrees of freedom for the model are 1 and the objective function was 0.02
The root mean square of the residuals (RMSR) is 0.02
The df corrected root mean square of the residuals is 0.06
The harmonic number of observations is 103 with the empirical chi square 0.78 with prob < 0.38
The total number of observations was 103 with Likelihood Chi Square = 1.97 with prob < 0.16
Tucker Lewis Index of factoring reliability = 0.939
RMSEA index = 0.102 and the 90 % confidence intervals are 0 0.302
BIC = -2.66
Fit based upon off diagonal values = 1
Measures of factor score adequacy
ML2 ML1
Correlation of (regression) scores with factors 0.90 1.00
Multiple R square of scores with factors 0.82 0.99
Minimum correlation of possible factor scores 0.64 0.98
######### Discriminant Analysis Looking for lower dimensional structure
Data <- read.table("C:/Users/poster/Desktop/data/T1-3.dat", header = F)
Lizards <- Data[, c(2, 3, 1)]
Gender = c("f", "m", "f", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m",
"m", "m", "m", "f", "m", "m", "m", "f", "f", "m", "f", "f")
sp <- scatterplot3d(Lizards[Gender == "f", ], box = FALSE, xlab = "Snout-Vent Length",
xlim = c(40, 90), ylab = "Hind Limb Span", ylim = c(90, 170), zlab = "Weight",
mar = c(5, 3, 0, 3), zlim = c(0, 16), type = "h", sub = "Red=Male & Black=Female")
sp$points3d(Lizards[Gender == "m", ], col = "red", type = "h")###### Classification Example for Motivation
###### Hemophiliac Example 11.3 in Johnson and Wichern, Sixth Edition Detection
###### of Hemophilia A carriers. Two measurement from blood samples X1 =
###### log10(AHF activity) X2 = log10(AHF-like antigen) where AHF =
###### Autohemophilic factor. Group 1 (Normal) : women who did not carry
###### hemophilia (n1 = 30) Group 2 (Obligatory Carriers) : women who are known
###### to be hemophilia carriers (daughters of hemophiliacs, mothers with more
###### than one hemophiliac sons, mothers with one hemophiliac son and other
###### hemophiliac relatives.) (n2 = 45)
rm(list = ls(all = TRUE))
Data = read.table("C:/Users/poster/Desktop/data/T11-8.dat", header = F)
names(Data) = c("Group", "AHF.Activity", "AHF.Antigen")
X_1 = Data[Data$Group == 1, 2:3]
X_2 = Data[Data$Group == 2, 2:3]
mar = c(5, 6, 0, 0)
dataEllipse(x = X_1$AHF.Activity, y = X_1$AHF.Antigen, center.cex = 0.75, xlim = c(-1,
1), ylim = c(-1, 1), xlab = expression(log(Activity)), ylab = expression(log(Antigen)),
log = "", levels = c(0.5, 0.95), center.pch = 3, col = "black")
dataEllipse(x = X_2$AHF.Activity, y = X_2$AHF.Antigen, center.cex = 0.75, log = "",
levels = c(0.5, 0.95), center.pch = 3, col = "blue", add = TRUE)
legend(0, -0.4, fill = c("black", "blue"), legend = c("Obligatory Carrier",
"Normal"))
x_0 = c(-0.21, -0.044) # new point
points(x = x_0[1], y = x_0[2], col = "red", pch = 16, cex = 2)
####### Linear Discriminant Function
n_1 = dim(X_1)[1]
n_1[1] 30
n_2 = dim(X_2)[1]
n_2[1] 45
xbar_1 = colMeans(X_1) #Normal (population 1)
xbar_2 = colMeans(X_2) #Obligatory Carriers (population 2)
S_1 = var(X_1)
S_2 = var(X_2)
S_p = ((n_1 - 1) * S_1 + (n_2 - 1) * S_2)/(n_1 + n_2 - 2)
a_hat = solve(S_p) %*% (xbar_1 - xbar_2)
a_hat [,1]
AHF.Activity 19.31900
AHF.Antigen -17.12424
ybar_1 = t(a_hat) %*% xbar_1
ybar_2 = t(a_hat) %*% xbar_2
mhat = (1/2) * (ybar_1 + ybar_2)
mhat [,1]
[1,] -3.559472
#### The direction of vector a
arrows(x0 = (0 - 1.2)/2, y0 = (0 - 1)/2, x1 = (1 - 1.2)/2, y1 = (a_hat[2]/a_hat[1] -
1)/2, code = 2) # Translated and scaled version of vector a_hat
abline(b = -a_hat[1]/a_hat[2], a = mhat/a_hat[2], lty = "dashed", lwd = 2, col = "orange") #Separting (LDF) line
text(0.55, 0.5, expression(x[2] == frac(19.32, 17.12) * x[1] + frac(3.56, 17.12)),
cex = 0.7)
text(-0.1, -1, expression("Direction of " * hat(a)))
########## Classifying a new observation
x_0 = c(-0.21, -0.044)
y_0 = t(a_hat) %*% x_0
y_0 [,1]
[1,] -3.303523
mhat [,1]
[1,] -3.559472
1/(1/exp(y_0 - mhat) + 1) #posterior of pi_1|x_0 [,1]
[1,] 0.5636403
1 - 1/(1/exp(y_0 - mhat) + 1) #posterior of pi_2|x_0 [,1]
[1,] 0.4363597
##### y_0 > m_hat ==> assign x_0 to poulation 1
##### Quadratic Discriminant Function
khat = 1/2 * (log(det(S_1)) - log(det(S_2))) + 1/2 * (t(xbar_1) %*% solve(S_1) %*%
xbar_1 - t(xbar_2) %*% solve(S_2) %*% xbar_2)
yy_0 = -1/2 * t(x_0) %*% (solve(S_1) - solve(S_2)) %*% x_0 + (t(xbar_1) %*%
solve(S_1) - t(xbar_2) %*% solve(S_2)) %*% x_0
yy_0 [,1]
[1,] -3.039055
khat [,1]
[1,] -3.301692
1/(1/exp(yy_0 - khat) + 1) #posterior of pi_1|x_0 [,1]
[1,] 0.5652844
1 - 1/(1/exp(yy_0 - khat) + 1) #posterior of pi_2|x_0 [,1]
[1,] 0.4347156
##### yy_0 > khat ==> assign x_0 to poulation 1
(solve(S_1) - solve(S_2)) # Quadratic AHF.Activity AHF.Antigen
AHF.Activity 62.26556 -70.11627
AHF.Antigen -70.11627 85.27190
(t(xbar_1) %*% solve(S_1) - t(xbar_2) %*% solve(S_2)) # Linear AHF.Activity AHF.Antigen
[1,] 12.76723 -10.22016
-khat # Constant [,1]
[1,] 3.301692
x = seq(-1, 1, 0.01)
y = seq(-1, 1, 0.01)
z = outer(x, y, function(x, y) (-(1/2) * (62.266 * x^2 - 140.232 * x * y + 85.272 *
y^2) + 12.767 * x - 10.22 * y + 3.302))
contour(x, y, z, levels = 0, labels = NULL, add = T)
text(0.08, -0.8, expression(z == -31.133 * x[1]^2 + 70.166 * x[1] * x[2] - 42.636 *
x[2]^2 + 12.767 * x[1] - 10.22 * x[2] + 3.302), cex = 0.7)###### Use functions
Data = read.table("C:/Users/poster/Desktop/data/T11-8.dat", header = F)
z = lda(V1 ~ ., as.data.frame(Data), prior = c(1, 1)/2)
predict(z, as.data.frame(t(c(1, -0.21, -0.044))))$class
[1] 1
Levels: 1 2
$posterior
1 2
1 0.5636403 0.4363597
$x
LD1
1 -0.1196716
z2 = qda(V1 ~ ., as.data.frame(Data), prior = c(1, 1)/2)
predict(z2, as.data.frame(t(c(1, -0.21, -0.044))))$class
[1] 1
Levels: 1 2
$posterior
1 2
1 0.5652844 0.4347156
###### Apparent Error Rates Example 11.8 Classification Alaskan and Canadian
###### Salmon X1: Class X2: Gender (delete) X3: Diameter of rings for the
###### first-year Freshwater growth X4: Diameter of rings for the first-year
###### Marine growth
rm(list = ls())
Data = read.table("C:/Users/poster/Desktop/data/T11-2.dat", header = F)[, c(1,
3, 4)]
names(Data) = c("Class", "Freshwater", "Marine")
plot(Data[1:50, c(2, 3)], col = "blue", xlim = c(40, 200), ylim = c(250, 550))
points(Data[51:100, c(2, 3)], col = "red")table(Data$Class) # n1=n2=50
1 2
50 50
z = lda(Class ~ ., Data)
newclass = predict(z, Data)$class
table(Data$Class, newclass) newclass
1 2
1 44 6
2 1 49
(1 + 6)/(50 + 50)[1] 0.07
z = qda(Class ~ ., Data)
newclass = predict(z, Data)$class
table(Data$Class, newclass) newclass
1 2
1 45 5
2 2 48
(2 + 5)/(50 + 50)[1] 0.07
######### Training and Validation
N = table(Data$Class)
N
1 2
50 50
train <- sample(1:100, 50) #Choose 50 obs for training
table(Data[train, 1])
1 2
23 27
z <- lda(Class ~ ., data = Data, prior = c(1, 1)/2, subset = train)
table(Data[-train, 1], predict(z, Data[-train, ])$class)
1 2
1 21 6
2 0 23
########## Holdout (Lachenbruch's) Method
holdout.class = NULL
vec = 1:100
for (i in 1:100) {
train = vec[-i]
z <- lda(Class ~ ., data = Data, prior = c(1, 1)/2, subset = train)
holdout.class[i] = predict(z, Data[i, ])$class
}
table(Data$Class, holdout.class) holdout.class
1 2
1 44 6
2 1 49
## Cross Validation and Bootstrap could be used here too!
##### Logistic Regression
fit = glm((Class - 1) ~ ., family = binomial(link = "logit"), data = Data)
fitted.results <- fitted(fit)
fitted.class <- ifelse(fitted.results > 0.5, 1, 0)
table(Data$Class, fitted.class + 1)
1 2
1 46 4
2 3 47
7/100[1] 0.07
########## Classifying when there are more than 2 populations
z <- lda(Species ~ ., iris, prior = c(1, 1, 1)/3)
table(iris$Species, predict(z, iris)$class)
setosa versicolor virginica
setosa 50 0 0
versicolor 0 48 2
virginica 0 1 49
z <- qda(Species ~ ., iris, prior = c(1, 1, 1)/3)
table(iris$Species, predict(z, iris)$class)
setosa versicolor virginica
setosa 50 0 0
versicolor 0 48 2
virginica 0 1 49
################ Hierarchical Clustering
mydata = cbind(1:60, iris[c(1:20, 51:70, 101:120), 1:4]) # First 20 from each Species
mydata 1:60 Sepal.Length Sepal.Width Petal.Length Petal.Width
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
5 5 5.0 3.6 1.4 0.2
6 6 5.4 3.9 1.7 0.4
7 7 4.6 3.4 1.4 0.3
8 8 5.0 3.4 1.5 0.2
9 9 4.4 2.9 1.4 0.2
10 10 4.9 3.1 1.5 0.1
11 11 5.4 3.7 1.5 0.2
12 12 4.8 3.4 1.6 0.2
13 13 4.8 3.0 1.4 0.1
14 14 4.3 3.0 1.1 0.1
15 15 5.8 4.0 1.2 0.2
16 16 5.7 4.4 1.5 0.4
17 17 5.4 3.9 1.3 0.4
18 18 5.1 3.5 1.4 0.3
19 19 5.7 3.8 1.7 0.3
20 20 5.1 3.8 1.5 0.3
51 21 7.0 3.2 4.7 1.4
52 22 6.4 3.2 4.5 1.5
53 23 6.9 3.1 4.9 1.5
54 24 5.5 2.3 4.0 1.3
55 25 6.5 2.8 4.6 1.5
56 26 5.7 2.8 4.5 1.3
57 27 6.3 3.3 4.7 1.6
58 28 4.9 2.4 3.3 1.0
59 29 6.6 2.9 4.6 1.3
60 30 5.2 2.7 3.9 1.4
61 31 5.0 2.0 3.5 1.0
62 32 5.9 3.0 4.2 1.5
63 33 6.0 2.2 4.0 1.0
64 34 6.1 2.9 4.7 1.4
65 35 5.6 2.9 3.6 1.3
66 36 6.7 3.1 4.4 1.4
67 37 5.6 3.0 4.5 1.5
68 38 5.8 2.7 4.1 1.0
69 39 6.2 2.2 4.5 1.5
70 40 5.6 2.5 3.9 1.1
101 41 6.3 3.3 6.0 2.5
102 42 5.8 2.7 5.1 1.9
103 43 7.1 3.0 5.9 2.1
104 44 6.3 2.9 5.6 1.8
105 45 6.5 3.0 5.8 2.2
106 46 7.6 3.0 6.6 2.1
107 47 4.9 2.5 4.5 1.7
108 48 7.3 2.9 6.3 1.8
109 49 6.7 2.5 5.8 1.8
110 50 7.2 3.6 6.1 2.5
111 51 6.5 3.2 5.1 2.0
112 52 6.4 2.7 5.3 1.9
113 53 6.8 3.0 5.5 2.1
114 54 5.7 2.5 5.0 2.0
115 55 5.8 2.8 5.1 2.4
116 56 6.4 3.2 5.3 2.3
117 57 6.5 3.0 5.5 1.8
118 58 7.7 3.8 6.7 2.2
119 59 7.7 2.6 6.9 2.3
120 60 6.0 2.2 5.0 1.5
d = dist(mydata, method = "euclidean")
d # distance matrix 1 2 3 4 5 6 7
2 1.135782
3 2.063977 1.044031
4 3.069202 2.027313 1.029563
5 4.002499 3.061046 2.063977 1.191638
6 5.037857 4.146082 3.190611 2.315167 1.174734
7 6.022458 5.025933 4.008740 3.018278 2.051828 1.410674
8 7.002143 6.014981 5.016971 4.031129 3.008322 2.118962 1.086278
9 8.052950 7.018547 6.015812 5.008992 4.104875 3.336165 2.073644
10 9.012214 8.001875 7.007139 6.008328 5.027922 4.125530 3.038092
11 10.006998 9.041571 8.048602 7.071068 6.014981 5.011986 4.092676
12 11.006362 10.010494 9.007774 8.008745 7.008566 6.054750 5.008992
13 12.014574 11.000909 10.003499 9.003888 8.025584 7.095773 6.019967
14 13.038021 12.019151 11.011358 10.013491 9.052624 8.152914 7.027090
15 14.027829 13.070960 12.077251 11.105854 10.041912 9.025519 8.114801
16 15.040612 14.094325 13.096564 12.121881 11.053506 10.018982 9.123048
17 16.009372 15.036954 14.036381 13.052203 12.012493 11.007270 10.045397
18 17.000294 16.009372 15.008997 14.015349 13.001154 12.014574 11.011812
19 18.015271 17.040540 16.047741 15.058220 14.022482 13.004230 12.060680
20 19.002895 18.019434 17.016756 16.023420 15.002333 14.005356 13.016144
51 20.396813 19.436563 18.501081 17.505713 16.507271 15.428869 14.625321
52 21.309153 20.336912 19.386077 18.383144 17.390227 16.326053 15.470294
53 22.390623 21.423118 20.481699 19.481786 18.487834 17.416659 16.585234
54 23.207111 22.199550 21.235583 20.221523 19.259024 18.239243 17.285254
55 24.297737 23.313730 22.360233 21.354157 20.367130 19.314243 18.429596
56 25.232321 24.238399 23.272516 22.260054 21.282622 20.247222 19.317867
57 26.274132 25.296245 24.332900 23.326594 22.330025 21.274633 20.383327
58 27.101660 26.088503 25.106175 24.092737 23.123581 22.122839 21.123210
59 28.249956 27.264446 26.302091 25.295454 24.300206 23.252097 22.349273
60 29.143438 28.140185 27.160633 26.148040 25.170220 24.152019 23.179948
61 30.121587 29.104295 28.124900 27.111068 26.145937 25.146968 24.145807
62 31.167611 30.174990 29.198973 28.190069 27.197426 26.163333 25.221618
63 32.154315 31.148836 30.176481 29.166076 28.184393 27.164315 26.204007
64 33.206776 32.214593 31.241799 30.231937 29.240896 28.204432 27.269030
65 34.097801 33.099094 32.115417 31.107234 30.114780 29.093986 28.126322
66 35.187498 34.200731 33.227398 32.222042 31.218264 30.176481 29.252521
67 36.163518 35.168025 34.187425 33.176799 32.187265 31.159268 30.202814
68 37.122231 36.122431 35.141713 34.132096 33.141816 32.120399 31.156219
69 38.186385 37.183733 36.210220 35.200284 34.215494 33.190059 32.234299
70 39.106393 38.102493 37.119671 36.109971 35.123069 34.107624 33.131556
101 40.347615 39.363689 38.392056 37.382750 36.389971 35.342184 34.422376
102 41.215410 40.217906 39.240158 38.228916 37.242852 36.213395 35.258758
103 42.333320 41.348519 40.380317 39.372960 38.374862 37.325996 36.412910
104 43.255058 42.263104 41.288013 40.277785 39.285875 38.248922 37.309918
105 44.289615 43.300346 42.326469 41.317188 40.323318 39.281930 38.350228
106 45.410902 44.429045 43.464353 42.457390 41.458051 40.404826 39.502405
107 46.140004 45.134355 44.147707 43.135832 42.155427 41.142922 40.155572
108 47.336772 46.350189 45.380723 44.372627 43.374532 42.328832 41.410144
109 48.264687 47.269546 46.295788 45.285980 44.294695 43.259912 42.318199
110 49.323422 48.342838 47.368133 46.362269 45.356587 44.306884 43.390437
111 50.189441 49.198882 48.217424 47.210380 46.209739 45.175436 44.229289
112 51.199902 50.203984 49.224283 48.215454 47.221499 46.191449 45.238258
113 52.226047 51.235047 50.256343 49.248959 48.249560 47.213346 46.271481
114 53.165402 52.164068 51.180270 50.170310 49.182314 48.161188 47.189723
115 54.180347 53.182516 52.198180 51.189159 50.197709 49.172452 48.206846
116 55.194203 54.202491 53.219357 52.212068 51.212694 50.180275 49.228955
117 56.192348 55.199004 54.216972 53.208834 52.210918 51.180270 50.227781
118 57.340562 56.361068 55.385197 54.379592 53.370591 52.321506 51.404961
119 58.362916 57.372990 56.401507 55.393411 54.396231 53.353819 52.426139
120 59.145160 58.142067 57.158289 56.148998 55.159859 54.141943 53.167659
8 9 10 11 12 13 14
2
3
4
5
6
7
8
9 1.272792
10 2.027313 1.144552
11 3.041381 2.376973 1.272792
12 4.006245 3.074085 2.029778 1.208305
13 5.021952 4.022437 3.004996 2.206808 1.100000
14 6.067949 5.011986 4.065710 3.296968 2.161018 1.157584
15 7.077429 6.261789 5.169139 4.042277 3.243455 2.459675 2.066398
16 8.095060 7.279423 6.198387 5.061620 4.226109 3.445287 2.858321
17 9.027181 8.127115 7.072482 6.009992 5.073460 4.155719 3.339162
18 10.002000 9.047652 8.015610 7.010706 6.012487 5.037857 4.125530
19 11.031772 10.129166 9.066973 8.009370 7.070361 6.130253 5.291503
20 12.007498 11.059837 10.028460 9.006109 8.016857 7.055494 6.122091
51 13.591174 12.774193 11.719642 10.700000 9.845303 9.025519 8.424963
52 14.446107 13.579028 12.538740 11.529961 10.616026 9.755511 9.053729
53 15.554742 14.704761 13.657233 12.643575 11.750745 10.891740 10.206371
54 16.276363 15.314699 14.306991 13.357769 12.355970 11.409645 10.572606
55 17.404310 16.503030 15.466739 14.467895 13.525531 12.614674 11.835962
56 18.304644 17.364331 16.345336 15.365871 14.380195 13.449907 12.609520
57 19.362335 18.455893 17.420964 16.406401 15.454126 14.542352 13.722245
58 20.121879 19.124591 18.125672 17.170323 16.141252 15.158826 14.225681
59 21.322054 20.403186 19.364400 18.354836 17.398276 16.459951 15.620179
60 22.174986 21.198349 20.191582 19.215619 18.203846 17.239200 16.322684
61 23.143034 22.140912 21.143084 20.191335 19.163768 18.173057 17.235429
62 24.206404 23.254892 22.231959 21.230167 20.244011 19.287561 18.388312
63 25.185909 24.216730 23.196551 22.214860 21.219802 20.239812 19.332615
64 26.251476 25.302569 24.277768 23.276812 22.293273 21.337291 20.442603
65 27.115125 26.143642 25.127276 24.130893 23.132229 22.156940 21.222394
66 28.228177 27.290475 26.255285 25.236878 24.268910 23.309011 22.413166
67 29.192636 28.226760 27.211395 26.214881 25.217058 24.253041 23.328309
68 30.141831 29.170704 28.152087 27.158056 26.160657 25.183129 24.251804
69 31.218104 30.249463 29.231148 28.241459 27.248853 26.271087 25.352909
70 32.120710 31.139364 30.126566 29.138463 28.134498 27.150322 26.206488
101 33.410178 32.468754 31.448529 30.438627 29.460312 28.517714 27.619920
102 34.248796 33.280325 32.267166 31.273151 30.275898 29.309043 28.382741
103 35.391242 34.455188 33.424691 32.409104 31.441215 30.488358 29.595270
104 36.294765 35.338506 34.317634 33.313961 32.327388 31.365427 30.449138
105 37.334970 36.383788 35.361985 34.353894 33.371994 32.413886 31.501429
106 38.477786 37.548635 36.514518 35.495774 34.534186 33.583925 32.697706
107 39.154438 38.161106 37.160732 36.179276 35.163618 34.182305 33.222432
108 40.387374 39.445912 38.415231 37.401203 36.430070 35.470551 34.567904
109 41.300726 40.340674 39.318952 38.316837 37.331220 36.360831 35.439949
110 42.371335 41.433199 40.403713 39.378928 38.410155 37.456241 36.544904
111 43.214465 42.254349 41.232754 40.220144 39.235443 38.265258 37.329479
112 44.224202 43.256676 42.238490 41.235058 40.244627 39.269581 38.332493
113 45.254944 44.296614 43.273780 42.261803 41.279293 40.308932 39.377786
114 46.182139 45.200111 44.191176 43.197454 42.195023 41.214439 40.262017
115 47.199576 46.222289 45.212277 44.212781 43.214002 42.237661 41.286560
116 48.216698 47.251561 46.233538 45.222782 44.234602 43.262570 42.319381
117 49.213514 48.247176 47.227746 46.219693 45.230742 44.255960 43.313855
118 50.383430 49.444818 48.412602 47.385019 46.417454 45.459322 44.541778
119 51.405253 50.454336 49.427624 48.415597 47.439962 46.472142 45.554363
120 52.157262 51.173235 50.162037 49.168588 48.167728 47.180504 46.224452
15 16 17 18 19 20 51
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 1.140175
17 2.054264 1.174734
18 3.128898 2.278157 1.126943
19 4.038564 3.067572 2.066398 1.240967
20 5.062608 4.090232 3.024897 2.024846 1.183216
51 7.195137 6.274552 5.622277 4.979960 4.032369 4.052160
52 7.910752 6.938300 6.159545 5.369358 4.373786 4.060788 1.187434
53 9.022195 8.056054 7.284230 6.487681 5.441507 5.072475 2.017424
54 9.645206 8.689649 7.724636 6.734983 5.794825 5.065570 3.544009
55 10.732195 9.747820 8.861151 7.945439 6.891299 6.245799 4.053394
56 11.599569 10.600472 9.661780 8.686772 7.670724 6.881860 5.186521
57 12.607537 11.586630 10.685036 9.749872 8.677557 7.913280 6.044833
58 13.319910 12.338557 11.307520 10.264015 9.308598 8.350449 7.494665
59 14.512753 13.508516 12.575373 11.612493 10.537077 9.729851 8.016857
60 15.355129 14.349216 13.350655 12.333288 11.336225 10.401442 9.226592
61 16.326972 15.349593 14.316773 13.272528 12.306909 11.346365 10.346014
62 17.340704 16.324828 15.351873 14.358621 13.318033 12.410077 11.068424
63 18.323755 17.336090 16.337074 15.321553 14.297902 13.383198 12.109913
64 19.390462 18.375255 17.408331 16.415237 15.368149 14.465822 13.034569
65 20.204455 19.195833 18.197253 17.188659 16.168797 15.214138 14.116303
66 21.314314 20.300246 19.337528 18.355653 17.291327 16.391156 15.006332
67 22.307398 21.288025 20.305172 19.301554 18.273752 17.330032 16.063935
68 23.232305 22.226561 21.232051 20.221523 19.195572 18.246918 17.064876
69 24.330639 23.330238 22.337860 21.329557 20.300000 19.370338 18.046883
70 25.206943 24.204752 23.200216 22.184229 21.170262 20.207424 19.083501
101 26.553154 25.519208 24.569697 23.589828 22.537524 21.628222 20.084820
102 27.364027 26.345967 25.363162 24.359392 23.331095 22.387943 21.049941
103 28.502456 27.480357 26.528287 25.548777 24.484281 23.584741 22.044954
104 29.400340 28.379218 27.409123 26.413822 25.369864 24.439926 23.033671
105 30.440926 29.416492 28.451889 27.462338 26.414011 25.489213 24.044542
106 31.591296 30.569102 29.624821 28.649258 27.577164 26.685951 25.089839
107 32.252132 31.240038 30.234748 29.216605 28.215776 27.233986 26.096551
108 33.481786 32.462440 31.505079 30.519830 29.458106 28.547154 27.053650
109 34.391569 33.377537 32.401543 31.403344 30.360336 29.427708 28.034800
110 35.446015 34.415985 33.463114 32.486151 31.424354 30.503279 29.058045
111 36.270925 35.250532 34.273897 33.281076 32.240502 31.291692 30.012831
112 37.292761 36.277679 35.294334 34.294314 33.259886 32.309441 31.019671
113 38.315793 37.296917 36.322170 35.329733 34.286003 33.343215 32.018901
114 39.254809 38.241600 37.246476 36.238239 35.220307 34.251861 33.039824
115 40.267729 39.249586 38.259770 37.256946 36.235066 35.268399 34.040564
116 41.270086 40.248975 39.269581 38.275188 37.239898 36.283467 35.021850
117 42.267482 41.249970 40.269343 39.271746 38.235716 37.280692 36.015136
118 43.438462 42.410376 41.457207 40.479007 39.415860 38.489089 37.074115
119 44.479996 43.462743 42.496470 41.506867 40.453430 39.526700 38.085430
120 45.215152 44.208483 43.210531 42.200711 41.182278 40.212685 39.026914
52 53 54 55 56 57 58
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
51
52
53 1.191638
54 2.431049 2.109502
55 3.029851 2.083267 1.627882
56 4.085340 3.275668 2.130728 1.300000
57 5.006995 4.055860 3.349627 2.076054 1.319091
58 6.370243 5.683309 4.117038 3.695944 2.515949 2.467793
59 7.012845 6.021628 5.189412 4.007493 3.135283 2.085665 2.433105
60 8.127730 7.284230 6.022458 5.215362 4.077990 3.354102 2.167948
61 9.254729 8.427930 7.048404 6.352165 5.217279 4.602173 3.034798
62 10.018982 9.082951 8.045496 7.039886 6.017475 5.050743 4.291853
63 11.075198 10.133114 9.019423 8.075890 7.056203 6.176569 5.171073
64 12.009580 11.033132 10.060815 9.010549 8.013738 7.017122 6.309517
65 13.060628 12.143311 11.024065 10.092572 9.045994 8.120961 7.065409
66 14.004285 13.011533 12.093387 11.008179 10.055347 9.018315 8.312641
67 15.022650 14.066272 13.030349 12.035780 11.004090 10.031451 9.140022
68 16.031843 15.075145 14.012494 13.038405 12.011245 11.060289 10.076706
69 17.030561 16.045560 15.026310 14.016419 13.024976 12.052801 11.154371
70 18.045775 17.094151 16.003125 15.051578 14.017846 13.077462 12.036195
101 19.085859 18.072355 17.206975 16.101242 15.142655 14.089003 13.465140
102 20.028230 19.041271 18.050485 17.026744 16.023108 15.028639 14.175683
103 21.067748 20.035219 19.191144 18.067927 17.134760 16.075447 15.432757
104 22.031795 21.023320 20.095024 19.029976 18.050762 17.029680 16.252384
105 23.048427 22.033384 21.131493 20.049190 19.083501 18.047160 17.309246
106 24.129857 23.081378 22.277792 21.133149 20.216330 19.147846 18.540496
107 25.055538 24.094813 23.017602 22.061278 21.021180 20.066141 19.050984
108 26.081219 25.044960 24.189667 23.078778 22.137299 21.089334 20.387496
109 27.043668 26.024988 25.099203 24.034558 23.065776 22.046542 21.240056
110 28.077749 27.051432 26.199809 25.087447 24.143115 23.079645 22.379008
111 29.010688 28.008213 27.064922 26.012689 25.032978 24.007707 23.161174
112 30.017495 29.012583 28.053877 27.012405 26.028830 25.016395 24.148499
113 31.025151 30.012331 29.087282 28.023205 27.053466 26.023643 25.199603
114 32.023117 31.033208 30.026155 29.019649 28.014818 27.023138 26.087162
115 33.025596 32.033576 31.044484 30.025822 29.027229 28.023205 27.114019
116 34.018818 33.016057 32.067273 31.020961 30.038142 29.014996 28.152797
117 35.016282 34.009116 33.060399 32.014684 31.031113 30.013497 29.144468
118 36.102354 35.069360 34.222653 33.111025 32.165976 31.105787 30.377623
119 37.114014 36.076724 35.204261 34.108797 33.163233 32.121332 31.361282
120 38.018548 37.022020 36.018051 35.010998 34.010881 33.021205 32.068520
59 60 61 62 63 64 65
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
51
52
53
54
55
56
57
58
59
60 1.870829
61 2.944486 1.360147
62 3.114482 2.163331 1.884144
63 4.159327 3.171750 2.300000 1.392839
64 5.026927 4.182105 3.552464 2.076054 1.466288
65 6.164414 5.029911 4.155719 3.082207 2.213594 1.571623
66 7.007139 6.217717 5.483612 4.086563 3.258834 2.118962 1.702939
67 8.065978 7.044147 6.213695 5.017968 4.159327 3.051229 2.204541
68 9.056489 8.034924 7.105632 6.029925 5.029911 4.080441 3.069202
69 10.034939 9.089554 8.168843 7.058328 6.044833 5.054701 4.207137
70 11.076552 10.014490 9.043230 8.036790 7.019259 6.094260 5.028916
101 12.151132 11.322102 10.577334 9.246080 8.458723 7.218033 6.621933
102 13.049521 12.085115 11.202678 10.053358 9.127431 8.033679 7.189576
103 14.092196 13.311273 12.505199 11.211155 10.328117 9.161878 8.496470
104 15.044600 14.152738 13.287212 12.092146 11.170497 10.050373 9.259590
105 16.070781 15.199671 14.352003 13.130499 12.229881 11.091438 10.318430
106 17.165372 16.420414 15.607050 14.318170 13.422742 12.262137 11.603879
107 18.089223 17.017050 16.054594 15.045930 14.072669 13.066369 12.067311
108 19.095287 18.285787 17.423547 16.200926 15.267940 14.147791 13.395148
109 20.046446 19.158810 18.250205 17.103801 16.138773 15.062868 14.228844
110 21.107818 20.269682 19.427043 18.184059 17.293351 16.151471 15.353827
111 22.019083 21.088860 20.180436 19.038382 18.095856 17.022632 16.113349
112 23.020209 22.082799 21.154196 20.042704 19.076425 18.020544 17.115198
113 24.031230 23.123581 22.213734 21.067985 20.118151 19.042846 18.157643
114 25.032379 24.038719 23.086576 22.026802 21.051841 20.019241 19.068823
115 26.040545 25.056137 24.120531 23.036276 22.080987 21.029979 20.087558
116 27.029983 26.085628 25.166049 24.044542 23.098485 22.030660 21.109713
117 28.019279 27.083205 26.151291 25.042763 24.078621 23.021077 22.106108
118 29.124560 28.283211 27.408210 26.203435 25.282009 24.166299 23.337523
119 30.126234 29.275758 28.370760 27.209006 26.251667 25.165453 24.339063
120 31.016931 30.035146 29.060970 28.023026 27.023138 26.011536 25.052944
66 67 68 69 70 101 102
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 1.496663
68 2.284732 1.240967
69 3.174902 2.236068 1.349074
70 4.232021 3.125700 2.032240 1.403567
101 5.382379 4.453089 3.933192 2.910326 2.915476
102 6.140847 5.064583 4.220190 3.151190 2.481935 1.667333
103 7.204859 6.369458 5.588381 4.446347 4.062019 2.213594 1.860108
104 8.111720 7.127412 6.259393 5.176872 4.475489 3.132092 2.133073
105 9.146037 8.184742 7.342343 6.237788 5.556978 4.032369 3.187475
106 10.302912 9.474703 8.648121 7.507996 6.967065 5.224940 4.649731
107 11.166915 10.038924 9.082951 8.112953 7.085901 6.441273 5.123475
108 12.172510 11.279628 10.381233 9.275236 8.561542 7.123202 6.303967
109 13.095037 12.134249 11.197321 10.105444 9.290318 8.082698 7.095773
110 14.163333 13.246886 12.370125 11.292475 10.515227 9.050414 8.254090
111 15.029970 14.052046 13.104961 12.070626 11.160197 10.055347 9.041571
112 16.040885 15.050914 14.092906 13.041856 12.135897 11.055315 10.019980
113 17.050513 16.087262 15.141334 14.084034 13.200379 12.031209 11.058481
114 18.057685 17.022338 16.058020 15.027974 14.072313 13.086252 12.002916
115 19.062791 18.034689 17.087130 16.052726 15.108276 14.047064 13.009996
116 20.042954 19.051509 18.103591 17.068099 16.140942 15.018322 14.028899
117 21.033782 20.047444 19.083501 18.050485 17.120456 16.027164 15.024979
118 22.167995 21.245705 20.322893 19.264994 18.416026 17.081862 16.231759
119 23.179948 22.247921 21.310795 20.219050 19.387109 18.091434 17.205232
120 24.034766 23.022815 22.030660 21.006904 20.040459 19.086645 18.012773
103 104 105 106 107 108 109
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
101
102
103
104 1.352775
105 2.092845 1.118034
106 3.120897 2.605763 1.691153
107 4.817676 3.512834 2.958040 3.620773
108 5.029911 4.182105 3.171750 2.068816 3.189044
109 6.042351 5.035871 4.055860 3.284814 2.989983 1.330413
110 7.040597 6.167658 5.101960 4.114608 4.324350 2.242766 1.743560
111 8.065358 7.029936 6.047313 5.339476 4.415880 3.348134 2.249444
112 9.054281 8.009370 7.031358 6.265780 5.288667 4.226109 3.064311
113 10.012492 9.019978 8.011866 7.130919 6.404686 5.098039 4.054627
114 11.136876 10.045895 9.086804 8.392258 7.069653 6.360031 5.165269
115 12.102066 11.039475 10.052860 9.306987 8.108637 7.284230 6.144103
116 13.035720 12.018319 11.014082 10.159232 9.205433 8.133265 7.076722
117 14.021769 13.002307 12.010412 11.113505 10.189210 9.071384 8.023715
118 15.054900 14.147085 13.110683 12.027884 11.645600 10.064293 9.201087
119 16.048676 15.132416 14.100355 13.011533 12.568612 11.039022 10.122747
120 17.088593 16.032155 15.067183 14.216891 13.061011 12.163881 11.059385
110 111 112 113 114 115 116
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
101
102
103
104
105
106
107
108
109
110
111 1.702939
112 2.539685 1.144552
113 3.168596 2.073644 1.153256
114 4.573839 3.184337 2.151743 1.649242
115 5.348832 4.100000 3.108054 2.300000 1.126943
116 6.122091 5.013980 4.050926 3.046309 2.271563 1.252996
117 7.120393 6.019967 5.014978 4.022437 3.190611 2.247221 1.157584
118 8.046117 7.307530 6.399219 5.282045 4.961854 4.026164 2.831960
119 9.106591 8.314445 7.308899 6.242596 5.719266 4.785394 3.689173
120 10.276673 9.083502 8.041144 7.133723 6.035727 5.120547 4.229657
117 118 119
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 2.163331
119 2.794638 1.577973
120 3.198437 3.581899 2.880972
fit = hclust(d, method = "ave")
fit
Call:
hclust(d = d, method = "ave")
Cluster method : average
Distance : euclidean
Number of objects: 60
plot(fit, cex = 0.5) # display dendogram
groups = cutree(fit, k = 3)
groups # cut tree into 3 clusters 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
19 20 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3
67 68 69 70 101 102 103 104 105 106 107 108 109 110 111 112 113 114
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
115 116 117 118 119 120
3 3 3 3 3 3
# draw dendogram with red borders around the 3 clusters
rect.hclust(fit, k = 3, border = "red")############# K-Means
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + geom_point()set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisClusterK-means clustering with 3 clusters of sizes 50, 52, 48
Cluster means:
Petal.Length Petal.Width
1 1.462000 0.246000
2 4.269231 1.342308
3 5.595833 2.037500
Clustering vector:
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[71] 2 2 2 2 2 2 2 3 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
[106] 3 2 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 2 3
[141] 3 3 3 3 3 3 3 3 3 3
Within cluster sum of squares by cluster:
[1] 2.02200 13.05769 16.29167
(between_SS / total_SS = 94.3 %)
Available components:
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
ggplot(iris, aes(Petal.Length, Petal.Width, color = as.factor(irisCluster$cluster))) +
geom_point()ggplot(iris, aes(Sepal.Length, Sepal.Width, color = as.factor(irisCluster$cluster))) +
geom_point()table(irisCluster$cluster, iris$Species)
setosa versicolor virginica
1 50 0 0
2 0 48 4
3 0 2 46
set.seed(20)
irisCluster1 <- kmeans(iris[, 1:4], 3, nstart = 20)
irisCluster1K-means clustering with 3 clusters of sizes 38, 62, 50
Cluster means:
Sepal.Length Sepal.Width Petal.Length Petal.Width
1 6.850000 3.073684 5.742105 2.071053
2 5.901613 2.748387 4.393548 1.433871
3 5.006000 3.428000 1.462000 0.246000
Clustering vector:
[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[71] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1
[106] 1 2 1 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1
[141] 1 1 2 1 1 1 2 1 1 2
Within cluster sum of squares by cluster:
[1] 23.87947 39.82097 15.15100
(between_SS / total_SS = 88.4 %)
Available components:
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
ggplot(iris, aes(Petal.Length, Petal.Width, color = as.factor(irisCluster1$cluster))) +
geom_point()ggplot(iris, aes(Sepal.Length, Sepal.Width, color = as.factor(irisCluster1$cluster))) +
geom_point()table(irisCluster1$cluster, iris$Species)
setosa versicolor virginica
1 0 2 36
2 0 48 14
3 50 0 0
pltree(agnes(mydata))pltree(diana(mydata))# https://www.statmethods.net/advstats/cluster.html
# https://www.r-bloggers.com/k-means-clustering-in-r/Rho <- matrix(c(1, 0.4, 0.5, 0.6, 0.4, 1, 0.3, 0.4, 0.5, 0.3, 1, 0.2, 0.6, 0.4,
0.2, 1), 4, 4, byrow = T)
Rho [,1] [,2] [,3] [,4]
[1,] 1.0 0.4 0.5 0.6
[2,] 0.4 1.0 0.3 0.4
[3,] 0.5 0.3 1.0 0.2
[4,] 0.6 0.4 0.2 1.0
Rho11 <- Rho[1:2, 1:2]
Rho12 <- Rho[1:2, 3:4]
Rho21 <- Rho[3:4, 1:2]
Rho22 <- Rho[3:4, 3:4]
G = solve(Rho11) %*% Rho12 %*% solve(Rho22) %*% Rho21
G [,1] [,2]
[1,] 0.4518849 0.28918651
[2,] 0.1463294 0.09474206
H = solve(Rho22) %*% Rho21 %*% solve(Rho11) %*% Rho12
H [,1] [,2]
[1,] 0.2063492 0.2509921
[2,] 0.2777778 0.3402778
eigen(G)eigen() decomposition
$values
[1] 0.5457180317 0.0009089525
$vectors
[,1] [,2]
[1,] 0.9511814 -0.5397975
[2,] 0.3086323 0.8417949
eigen(H)eigen() decomposition
$values
[1] 0.5457180317 0.0009089525
$vectors
[,1] [,2]
[1,] -0.5946273 -0.7738317
[2,] -0.8040014 0.6333913
l1 = sqrt(eigen(G)$vectors[, 1] %*% Rho11 %*% eigen(G)$vectors[, 1])
l1 [,1]
[1,] 1.111239
a1 = eigen(G)$vectors[, 1]/c(l1)
a1[1] 0.8559647 0.2777371
l2 = sqrt(eigen(H)$vectors[, 1] %*% Rho22 %*% eigen(H)$vectors[, 1])
l2 [,1]
[1,] 1.091436
b1 = -eigen(H)$vectors[, 1]/c(l2)
b1[1] 0.5448119 0.7366455
## transfor from a1
a1_2 = solve(Rho22) %*% Rho21 %*% eigen(G)$vectors[, 1]
a1_2 [,1]
[1,] 0.4472376
[2,] 0.6047143
l2_2 = sqrt(t(a1_2) %*% Rho22 %*% a1_2)
l2_2 [,1]
[1,] 0.8209026
b1_2 = a1_2/c(l2_2)
b1_2 [,1]
[1,] 0.5448119
[2,] 0.7366455
# Correlation of U1 and V1
rho_UV = a1 %*% Rho12 %*% b1
rho_UV [,1]
[1,] 0.7387273
sqrt(eigen(G)$values[1])[1] 0.7387273
# Correlation of U and Z1
a1 %*% Rho22 [,1] [,2]
[1,] 0.9115121 0.4489301
### Andrews and Herzberg 1985 Data36. Chemical and Overt Diabetes
# The three primary variables are: glucose area(a measure of glucose
# intolerance, V7), insulin area(a measurement of insulin response to oral
# glucose V8), steady-state plasma glucose(a measure of insulin resistance
# V9).
# Two secondary variable the relative weight(V5) fasting plasma glucose (V6)
# V10 is the classification 3: 'normal',2: 'chemical diabetic', 1: 'overt
# diabetic'
rm(list = ls())
X = read.table("C:/Users/poster/Desktop/data/T36.1", header = F)
head(X) V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 36 1 1 1 0.81 80 356 124 55 3
2 36 1 2 2 0.95 97 289 117 76 3
3 36 1 3 3 0.94 105 319 143 105 3
4 36 1 4 4 1.04 90 356 199 108 3
5 36 1 5 5 1.00 90 323 240 143 3
6 36 1 6 6 0.76 86 381 157 165 3
Y = X[X[, 10] == 3, c(7:9, 5, 6)] #non-diabetic
colnames(Y) = c("GA", "IA", "SSPG", "RW", "FPG")
dim(Y)[1] 76 5
pairs(Y)R = cor(Y)
r11 = R[1:3, 1:3]
r22 = R[4:5, 4:5]
r12 = R[1:3, 4:5]
eig.g = eigen(solve(r11) %*% r12 %*% solve(r22) %*% t(r12))
eig.geigen() decomposition
$values
[1] 2.544060e-01 6.129327e-02 1.559964e-17
$vectors
[,1] [,2] [,3]
[1,] -0.2729779 0.88912304 -0.08602997
[2,] 0.3954839 0.07123149 0.99217368
[3,] -0.8769695 -0.45209103 -0.09049987
eig.h = eigen(solve(r22) %*% t(r12) %*% solve(r11) %*% r12)
eig.heigen() decomposition
$values
[1] 0.25440604 0.06129327
$vectors
[,1] [,2]
[1,] 0.99787687 -0.3355584
[2,] 0.06512875 0.9420194
cca_cor = sqrt(eig.h$values)
cca_cor[1] 0.5043868 0.2475748
# coefficient
l1 = sqrt(diag(t(eig.g$vectors[, 1:2]) %*% r11 %*% eig.g$vectors[, 1:2]))
a = eig.g$vectors[, 1:2] %*% diag(1/l1)
a [,1] [,2]
[1,] -0.3241239 0.97721500
[2,] 0.4695831 0.07828892
[3,] -1.0412816 -0.49688301
l2 = sqrt(diag(t(eig.h$vectors) %*% r22 %*% eig.h$vectors))
b = eig.h$vectors[, 1:2] %*% diag(1/l2)
b [,1] [,2]
[1,] 0.97910272 -0.3724554
[2,] 0.06390341 1.0456010
# use 76 nondiabetic patients to get the cononical variates
U = t(t(a) %*% t(scale(Y[, 1:3])))
V = t(t(b) %*% t(scale(Y[, 4:5])))
# Cor of U and V
rho_UV = t(a) %*% r12 %*% b
rho_UV [,1] [,2]
[1,] -5.043868e-01 1.665335e-16
[2,] 8.153200e-17 2.475748e-01
# Adjust using corr U and V
adj = sign(diag(rho_UV))
adj[1] -1 1
b.adj = b %*% diag(adj)
b [,1] [,2]
[1,] 0.97910272 -0.3724554
[2,] 0.06390341 1.0456010
V.adj = t(t(b.adj) %*% t(scale(Y[, 4:5])))
rho_UV_adj = t(a) %*% r12 %*% b.adj
rho_UV_adj [,1] [,2]
[1,] 5.043868e-01 1.665335e-16
[2,] -8.153200e-17 2.475748e-01
rho_UU = t(a) %*% r11 %*% a
rho_UU [,1] [,2]
[1,] 1.000000e+00 2.775558e-16
[2,] 2.775558e-16 1.000000e+00
rho_VV = t(b.adj) %*% r22 %*% b.adj
rho_VV [,1] [,2]
[1,] 1.000000e+00 0
[2,] 3.469447e-17 1
# Correlation bewteen X/Y and X/Y canonical variates
rho_XU = cor(scale(Y[, 1:3]), U)
rho_XU [,1] [,2]
GA -0.4367729 0.89045987
IA -0.1179190 0.05415582
SSPG -0.8775764 -0.25275458
rho_XV = cor(scale(Y[, 1:3]), V.adj)
rho_XV [,1] [,2]
GA -0.22030250 0.22045540
IA -0.05947678 0.01340761
SSPG -0.44263796 -0.06257566
rho_YU = cor(scale(Y[, 4:5]), U)
rho_YU [,1] [,2]
RW -0.5034474 -0.01510271
FPG -0.1793339 0.23139771
rho_YV = cor(scale(Y[, 4:5]), V.adj)
rho_YV [,1] [,2]
RW -0.9981376 -0.06100262
FPG -0.3555484 0.93465787
# Test if the r12=0
t1 = -(76 - 1 - (2 + 3 + 1)/2) * sum(log(1 - (cca_cor)^2))
t1[1] 25.69149
p_val1 = pchisq(t1, 3 * 2, lower.tail = F)
p_val1[1] 0.0002541122
# Reject H0
# Test if the second canonical varviates
t2 = -(76 - 1 - (2 + 3 + 1)/2) * (log(1 - (cca_cor[2])^2))
t2[1] 4.554156
p_val2 = pchisq(t2, (3 - 1) * (2 - 1), lower.tail = F)
p_val2[1] 0.1025835
# Fail to reject H0
cc1 = cc(scale(Y[, 1:3]), scale(Y[, 4:5]))
cc1$cor # cca_cor[1] 0.5043868 0.2475748
# cancor
cc1$xcoef # a [,1] [,2]
GA -0.3241239 0.97721500
IA 0.4695831 0.07828892
SSPG -1.0412816 -0.49688301
cc1$ycoef # b [,1] [,2]
RW -0.97910272 -0.3724554
FPG -0.06390341 1.0456010
# comput
cc1$scores$xscores # U [,1] [,2]
1 0.68310204 0.613964679
2 0.84427226 -1.351124366
3 0.23299821 -0.776902625
4 0.23535998 0.241506862
5 0.17161648 -0.888782362
6 -1.30248955 0.364068499
7 0.23905717 0.012496354
8 0.68449379 -1.205080956
9 -0.17458081 0.872651512
10 0.55243590 -1.305135669
11 1.40721278 0.662017954
12 1.29183703 -0.744833589
13 -0.22946671 -1.873026668
14 0.38180081 0.769750475
15 1.40749418 -0.568972118
16 0.03915177 1.277382305
17 0.42365814 0.676466203
18 1.39853010 0.918292587
19 1.06549885 -1.028549714
20 0.86347602 0.364467020
21 0.71633453 1.279787533
22 1.29892118 -0.709686617
23 0.51985914 0.132736768
24 0.15297221 -0.673250420
25 1.13259757 1.421228574
26 -1.94453942 -0.583820975
27 -1.11698351 -0.013183110
28 -0.49278186 -0.171577085
29 -0.29818327 -0.922406440
30 0.86279025 -0.582250230
31 0.08267673 0.429072761
32 0.08355365 -0.326335877
33 1.44144059 0.757755165
34 -0.33342519 -0.257516127
35 0.45838268 0.922743202
36 0.12301215 0.939500707
37 1.00006173 0.980092193
38 -0.50621897 -0.845966613
39 -0.09372501 1.462757046
40 -0.32501738 -2.499799571
41 -0.35280919 0.708205567
42 -0.17488656 -0.007202487
43 0.76032502 -1.281085670
44 1.89801826 -1.470738683
45 0.90462026 -0.338898454
46 0.78838705 -0.157248334
47 -0.37704623 -0.554590222
48 0.27548158 0.444863080
49 0.26694075 -1.649914837
50 0.47247865 0.280536223
51 -1.64217715 -1.976588995
52 0.38374901 -1.267088098
53 -0.35744587 -1.060770504
54 0.13556845 0.169559377
55 0.10088146 -0.457801382
56 0.46302504 -0.576102839
57 0.67967686 -0.292743946
58 0.66908202 0.544827366
60 -1.88972227 0.679837517
61 -1.12022939 1.917180020
64 -0.53298009 -0.687223916
67 -1.59470441 1.417652080
68 -3.02900787 -0.110463395
69 -1.84952896 -0.082266698
70 -0.14678208 2.013302135
72 -1.37043653 0.380332611
73 -0.24650704 1.325281299
74 0.06846066 0.784310289
75 -2.35649116 0.303578707
76 0.08564256 1.776969514
78 -1.05127153 -0.035816205
79 0.16821464 1.237341533
80 1.12220666 0.664695972
81 -0.97271644 1.214816926
82 -0.83821635 0.248914847
84 -2.31698603 -1.876197669
cc1$scores$yscores # V [,1] [,2]
1 1.05598937 -1.05262230
2 -0.14238236 0.70208647
3 -0.12834840 1.74769575
4 -0.77351945 -0.44823800
5 -0.46885120 -0.33234072
6 1.39022485 -0.14527448
7 0.13898598 1.19922187
8 -1.19168863 -1.25748077
9 -0.44705061 0.58618919
10 1.15245770 1.19464993
11 0.28505278 0.08448186
12 1.61095940 0.06872785
13 -0.07098329 -1.74139586
14 0.74982180 0.13124842
15 1.54255897 -0.08732584
16 -0.23885069 -1.54518576
17 -1.23052182 -0.62208393
18 0.60375499 1.24598842
19 0.86482205 -0.47517410
20 0.06431824 -0.12952047
21 -0.08801589 -0.18746911
22 1.52702569 0.16683290
23 -0.12684908 0.44792774
24 -0.24035001 -0.24541775
25 1.64829327 0.73309902
26 -1.18242267 -2.68432815
27 -2.05432555 0.10480781
28 -1.53668939 0.56178680
29 -0.43778465 -0.84065819
30 1.14469106 1.32172930
31 -0.31351843 -2.87392810
32 -0.53875095 0.81137361
33 1.85649318 -1.39827594
34 1.35915829 0.36304299
35 0.40781968 -0.64902003
36 0.37525380 1.15906546
37 -1.84135764 0.44588951
38 0.76685439 -1.42267833
39 -0.36461623 1.78785215
40 -0.53098431 0.68429424
41 -0.56205086 1.19261171
42 0.36898648 -0.01362318
43 -0.34758363 0.23392540
44 1.28449055 -0.96569934
45 1.48819250 0.80222974
46 0.24771891 -0.57988930
47 -0.04918270 -0.82286596
48 -0.13461572 0.57500710
49 -0.68181911 -0.67342242
50 0.54462052 -0.33691265
51 0.49025406 0.55264294
52 -1.78699117 -0.44366607
53 0.85705541 -0.34809473
54 0.93322247 -0.31912041
55 0.55088784 0.83577600
56 -0.51395171 -0.86963251
57 0.45292018 -0.11172823
58 1.48969182 -0.49753827
60 -1.46052233 0.59076112
61 -1.30968753 1.94847778
64 -0.84818719 -1.77698033
67 -1.88645815 -0.09140228
68 -0.58385145 0.27408181
69 0.27878547 -1.08820678
70 1.02492281 -0.54430483
72 -0.77501877 0.85153001
73 -0.20928346 -0.75373523
74 -0.18748287 0.16479468
75 -1.36255468 1.53826535
76 -1.17288668 2.26058516
78 -0.11131580 0.19376900
79 1.48819250 0.80222974
80 0.74982180 0.13124842
81 0.29908674 1.13009114
82 -1.32998880 -0.26982014
84 -1.83209168 -0.98095787
cc1$scores$corr.X.xscores #rho_XU [,1] [,2]
GA -0.4367729 0.89045987
IA -0.1179190 0.05415582
SSPG -0.8775764 -0.25275458
cc1$scores$corr.X.yscores #rho_XV [,1] [,2]
GA -0.22030250 0.22045540
IA -0.05947678 0.01340761
SSPG -0.44263796 -0.06257566
cc1$scores$corr.Y.xscores #rho_YU [,1] [,2]
RW -0.5034474 -0.01510271
FPG -0.1793339 0.23139771
cc1$scores$corr.Y.yscores #rho_YV [,1] [,2]
RW -0.9981376 -0.06100262
FPG -0.3555484 0.93465787
# Canonical dimensions
p.asym(cc1$cor, 76, 2, 3, tstat = "Wilks") # Eq to chisq test (large sample size)Wilks' Lambda, using F-approximation (Rao's F):
stat approx df1 df2 p.value
1 to 2: 0.6998941 4.622551 6 142 0.00025487
2 to 2: 0.9387067 2.350636 2 72 0.10258351
p_val1[1] 0.0002541122
p_val2[1] 0.1025835
p.asym(cc1$cor, 76, 2, 3, tstat = "Hotelling") Hotelling-Lawley Trace, using F-approximation:
stat approx df1 df2 p.value
1 to 2: 0.40650802 4.742594 6 140 0.0001985532
2 to 2: 0.06529544 2.350636 2 144 0.0989560343
p.asym(cc1$cor, 76, 2, 3, tstat = "Pillai") Pillai-Bartlett Trace, using F-approximation:
stat approx df1 df2 p.value
1 to 2: 0.31569931 4.498474 6 144 0.000330589
2 to 2: 0.06129327 2.339550 2 148 0.099925668
p.asym(cc1$cor, 76, 2, 3, tstat = "Roy") Roy's Largest Root, using F-approximation:
stat approx df1 df2 p.value
1 to 1: 0.254406 8.189102 3 72 9.209354e-05
F statistic for Roy's Greatest Root is an upper bound.