Проведемразведочный анализ наших даннных, а именно Boston методом главных компонент.

names(Boston)
##  [1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"    
##  [8] "dis"     "rad"     "tax"     "ptratio" "black"   "lstat"   "medv"
apply(Boston, 2, mean)
##         crim           zn        indus         chas          nox 
##   3.61352356  11.36363636  11.13677866   0.06916996   0.55469506 
##           rm          age          dis          rad          tax 
##   6.28463439  68.57490119   3.79504269   9.54940711 408.23715415 
##      ptratio        black        lstat         medv 
##  18.45553360 356.67403162  12.65306324  22.53280632
apply(Boston, 2, var)
##         crim           zn        indus         chas          nox 
## 7.398658e+01 5.439368e+02 4.706444e+01 6.451297e-02 1.342764e-02 
##           rm          age          dis          rad          tax 
## 4.936709e-01 7.923584e+02 4.434015e+00 7.581637e+01 2.840476e+04 
##      ptratio        black        lstat         medv 
## 4.686989e+00 8.334752e+03 5.099476e+01 8.458672e+01
pr.out=prcomp(Boston, scale=TRUE)
names(pr.out)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
pr.out$center
##         crim           zn        indus         chas          nox 
##   3.61352356  11.36363636  11.13677866   0.06916996   0.55469506 
##           rm          age          dis          rad          tax 
##   6.28463439  68.57490119   3.79504269   9.54940711 408.23715415 
##      ptratio        black        lstat         medv 
##  18.45553360 356.67403162  12.65306324  22.53280632
pr.out$scale
##        crim          zn       indus        chas         nox          rm 
##   8.6015451  23.3224530   6.8603529   0.2539940   0.1158777   0.7026171 
##         age         dis         rad         tax     ptratio       black 
##  28.1488614   2.1057101   8.7072594 168.5371161   2.1649455  91.2948644 
##       lstat        medv 
##   7.1410615   9.1971041
pr.out$rotation
##                  PC1          PC2          PC3          PC4          PC5
## crim     0.242284451 -0.065873108  0.395077419 -0.100366211  0.004957659
## zn      -0.245435005 -0.148002653  0.394545713 -0.342958421  0.114495002
## indus    0.331859746  0.127075668 -0.066081913  0.009626936 -0.022583692
## chas    -0.005027133  0.410668763 -0.125305293 -0.700406497 -0.535197817
## nox      0.325193880  0.254276363 -0.046475549 -0.053707583  0.194605570
## rm      -0.202816554  0.434005810  0.353406095  0.293357309 -0.008320481
## age      0.296976574  0.260303205 -0.200823078  0.078426326  0.149750092
## dis     -0.298169809 -0.359149977  0.157068710 -0.184747787 -0.106219480
## rad      0.303412754  0.031149596  0.418510334  0.051374381 -0.230352185
## tax      0.324033052  0.008851406  0.343232194  0.026810695 -0.163425820
## ptratio  0.207679535 -0.314623061  0.000399092  0.342036328 -0.615707380
## black   -0.196638358  0.026481032 -0.361375914  0.201741185 -0.367460674
## lstat    0.311397955 -0.201245177 -0.161060336 -0.242621217  0.178358870
## medv    -0.266636396  0.444924411  0.163188735  0.180297553 -0.050659893
##                 PC6          PC7         PC8          PC9         PC10
## crim    -0.22462703  0.777083366 -0.15740140  0.254211798 -0.071384615
## zn      -0.33574694 -0.274178365  0.38031404  0.382899480  0.245579673
## indus   -0.08082495 -0.340273839 -0.17174578  0.627048264 -0.254827026
## chas     0.16264906  0.074075775  0.03292700 -0.018642967 -0.041706916
## nox     -0.14899191 -0.198092965 -0.04745838 -0.043024391 -0.211620349
## rm       0.13108056  0.074084938  0.43761566 -0.003666947 -0.526133916
## age     -0.06086960  0.118580363  0.58810569 -0.043265822  0.245647942
## dis      0.01162335 -0.104397844  0.12823060 -0.175802196 -0.299412026
## rad     -0.13493732 -0.137080107 -0.07464872 -0.463439397  0.115793486
## tax     -0.18847146 -0.313984433 -0.07099212 -0.179446555 -0.008366413
## ptratio  0.27901731  0.001485608  0.28346960  0.274525949  0.160474164
## black   -0.78590728  0.074842780  0.04444175 -0.060975651 -0.146292237
## lstat   -0.09197211  0.083213083  0.35748247 -0.171810921  0.066647267
## medv    -0.05402804 -0.009964973 -0.15230879  0.070751083  0.575547284
##                 PC11        PC12         PC13         PC14
## crim    -0.071068781  0.06327612  0.097032312  0.059114176
## zn      -0.127709065 -0.22112210 -0.132375830 -0.096296807
## indus    0.273797614  0.34840828  0.083716854 -0.235472877
## chas    -0.009968402 -0.01903975 -0.049917454  0.023488966
## nox     -0.437475550 -0.44909357  0.524974687  0.087649148
## rm       0.223951923 -0.12560554 -0.049893596  0.007190515
## age     -0.329630928  0.48633905 -0.051462562 -0.038227027
## dis     -0.114600078  0.49356822  0.552292172  0.047124029
## rad      0.042213348  0.01863641 -0.006278474 -0.634975332
## tax      0.042794054  0.17042179 -0.242987756  0.698822190
## ptratio -0.099991841 -0.23214842  0.188347079  0.055738160
## black    0.039194858 -0.04152885 -0.021078199 -0.016165280
## lstat    0.683032690 -0.18189209  0.249489863  0.083143795
## medv     0.242001064  0.09828580  0.469629324  0.134127182
dim(pr.out$x)
## [1] 506  14
biplot(pr.out, scale=0)

pr.out$rotation=-pr.out$rotation
pr.out$x=-pr.out$x
biplot(pr.out, scale=0)

pr.out$sdev
##  [1] 2.5585132 1.2843410 1.1614241 0.9415625 0.9224421 0.8124105 0.7317177
##  [8] 0.6348831 0.5265582 0.5022524 0.4612919 0.4277704 0.3660733 0.2456149
pr.var=pr.out$sdev^2
pr.var
##  [1] 6.54598958 1.64953191 1.34890592 0.88653987 0.85089944 0.66001077
##  [7] 0.53541080 0.40307658 0.27726358 0.25225744 0.21279025 0.18298750
## [13] 0.13400970 0.06032666
pve=pr.var/sum(pr.var)
pve
##  [1] 0.467570684 0.117823708 0.096350423 0.063324276 0.060778531
##  [6] 0.047143627 0.038243629 0.028791184 0.019804542 0.018018389
## [11] 0.015199304 0.013070536 0.009572121 0.004309047
plot(pve, xlab="Principal Component", ylab="Proportion of Variance Explained", ylim=c(0,1),type='b')

plot(cumsum(pve), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", ylim=c(0,1),type='b')

Из графиков можно сделать вывод, что у нас останется 2, максимум 4 главные компоненты. Суммарно они объясняют от 60 до 70% суммарной дисперсии факторов.

Выполним кластеризацию методом k-средних.

set.seed(2)
km.out=kmeans(Boston,2,nstart=20)
km.out$cluster
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   2   2   2   2   2   2   2   1   1   1   1   1   1   1   1   1   1   1 
## 505 506 
##   1   1
plot(Boston, col=(km.out$cluster+1), main="K-Means Clustering Results with K=2",  pch=20, cex=2)

set.seed(4)
km.out=kmeans(Boston,3,nstart=20)
km.out
## K-means clustering with 3 clusters of sizes 102, 38, 366
## 
## Cluster means:
##         crim       zn     indus       chas       nox       rm      age
## 1 10.9105113  0.00000 18.572549 0.07843137 0.6712255 5.982265 89.91373
## 2 15.2190382  0.00000 17.926842 0.02631579 0.6737105 6.065500 89.90526
## 3  0.3749927 15.71038  8.359536 0.07103825 0.5098626 6.391653 60.41339
##        dis      rad      tax  ptratio     black    lstat     medv
## 1 2.077164 23.01961 668.2059 20.19510 371.80304 17.87402 17.42941
## 2 1.994429 22.50000 644.7368 19.92895  57.78632 20.44868 13.12632
## 3 4.460745  4.45082 311.2322 17.81776 383.48981 10.38866 24.93169
## 
## Clustering vector:
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   3   3   3   3   3   3   3   3   3   3   3   3   2   3   3   3   3   3 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   3   3   3   3   3   3   3   3   3   3   3   2   2   3   3   3   3   3 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   3   3   3   3   3   3   3   3   3   3   3   3   3   3   1   1   1   1 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1   1   1   1 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2   2 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   2   2   2   2   2   2   1   1   1   2   2   2   2   2   2   2   2   2 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   2   2   2   2   2   2   2   1   1   1   1   1   1   2   1   1   1   1 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   2   1   1   1   2   2   2   2   1   1   1   1   1   1   1   1   2   1 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   1   1   1   1   1   1   1   3   3   3   3   3   3   3   3   3   3   3 
## 505 506 
##   3   3 
## 
## Within cluster sum of squares by cluster:
## [1]  181891.7  313208.7 2573399.1
##  (between_SS / total_SS =  84.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
plot(Boston, col=(km.out$cluster+1), main="K-Means Clustering Results with K=3",  pch=20, cex=2)

set.seed(3)
km.out=kmeans(Boston,2,nstart=1)
km.out$tot.withinss
## [1] 5764994
km.out=kmeans(Boston,3,nstart=1)
km.out$tot.withinss
## [1] 4460944
km.out=kmeans(Boston,3,nstart=20)
km.out$tot.withinss
## [1] 3068499

По результатам кластерного анализа методом K-средних можно сделать вывод, что наиболее оптимальным является разбиение на 3 кластера при выборе 20 случайных наблюдений в качестве ядер. Такой вывод мы делаем при подсчете внутриклассовой дисперсии. Чем она меньше, тем лучше качество разбиения.

Теперь выполним иерархическую кластеризацию.

hc.complete=hclust(dist(Boston), method="complete")
hc.average=hclust(dist(Boston), method="average")
hc.single=hclust(dist(Boston), method="single")
par(mfrow=c(1,3))
plot(hc.complete,main="Complete Linkage", xlab="", sub="", cex=.9)
plot(hc.average, main="Average Linkage", xlab="", sub="", cex=.9)
plot(hc.single, main="Single Linkage", xlab="", sub="", cex=.9)

par(mfrow=c(1,1))
cutree(hc.complete, 2)
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   1   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   1   2   2   1   1   1   1   1   1   1   1   2   2   1   1   1   1   1 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   2   2   2   2   2   2   2   1   1   1   1   1   1   1   1   1   1   1 
## 505 506 
##   1   1
cutree(hc.average, 2)
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   1   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   1   2   2   1   1   1   1   1   1   1   1   2   2   1   1   1   1   1 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   2   2   2   2   2   2   2   1   1   1   1   1   1   1   1   1   1   1 
## 505 506 
##   1   1
cutree(hc.single, 2)
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   2   2   2   2   2   2   2   1   1   1   1   1   1   1   1   1   1   1 
## 505 506 
##   1   1
cutree(hc.single, 4)
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   2   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   1   1   1   1   1   1   1   1   1   1   1   1   3   1   1   1   1   1 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   1   1   1   1   1   1   1   1   1   1   1   3   3   1   1   1   1   1 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   4   4   4   4 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   4   4   4   4   4   4   4   1   1   1   1   1   1   1   1   1   1   1 
## 505 506 
##   1   1
xsc=scale(Boston)
plot(hclust(dist(xsc), method="complete"), main="Hierarchical Clustering with Scaled Features")

Построим модели. Сначала, с помощью метода главных компонент.

set.seed(2)  
pcr.fit <- pcr(crim ~ ., data = Boston, scale = T, validation = 'CV')
summary(pcr.fit)
## Data:    X dimension: 506 13 
##  Y dimension: 506 1
## Fit method: svdpc
## Number of components considered: 13
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV            8.61    7.207    7.211    6.779    6.772    6.786    6.815
## adjCV         8.61    7.204    7.208    6.771    6.765    6.780    6.807
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## CV       6.816    6.676    6.707     6.705     6.716     6.677     6.598
## adjCV    6.807    6.667    6.696     6.694     6.704     6.664     6.585
## 
## TRAINING: % variance explained
##       1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X       47.70    60.36    69.67    76.45    82.99    88.00    91.14
## crim    30.69    30.87    39.27    39.61    39.61    39.86    40.14
##       8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## X       93.45    95.40     97.04     98.46     99.52     100.0
## crim    42.47    42.55     42.78     43.04     44.13      45.4
y11 <- predict(pcr.fit)
mse <- round(mean((Boston$crim-y11)^2))
mse
## [1] 44
validationplot(pcr.fit, val.type = 'MSEP')

Выведем отдельно каждый из кластеров.

km.out=kmeans(Boston,3,nstart=100)
data1 <- data.table(Boston, km.out$cluster)
str(data1)
## Classes 'data.table' and 'data.frame':   506 obs. of  15 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
##  $ V2     : int  3 3 3 3 3 3 3 3 3 3 ...
##  - attr(*, ".internal.selfref")=<externalptr>
clust1 <- data1[V2==1, -c("rad","tax", "ptratio","zn")]
clust2 <- data1[V2==2, -c("rad","tax", "ptratio","zn")]
clust3 <- data1[V2==3, -c("rad","tax", "ptratio","zn")]

Построим регрессию на каждом кластере.

model1 <- glm(crim ~ age+dis+medv , data = clust1)
summary(model1)
## 
## Call:
## glm(formula = crim ~ age + dis + medv, data = clust1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -16.392   -4.118   -0.721    2.224   68.671  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  65.2607    16.0522   4.066 9.68e-05 ***
## age          -0.2800     0.1258  -2.226 0.028328 *  
## dis         -10.2536     2.4416  -4.200 5.90e-05 ***
## medv         -0.4520     0.1173  -3.854 0.000207 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 111.0538)
## 
##     Null deviance: 14838  on 101  degrees of freedom
## Residual deviance: 10883  on  98  degrees of freedom
## AIC: 775.8
## 
## Number of Fisher Scoring iterations: 2
model2 <- lm(crim ~ nox+age+dis+medv, data = clust2)
summary(model2)
## 
## Call:
## lm(formula = crim ~ nox + age + dis + medv, data = clust2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.170  -6.827  -2.674   2.362  45.698 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  54.6101    31.4054   1.739   0.0914 .
## nox         -53.8456    28.6092  -1.882   0.0687 .
## age           0.4631     0.2291   2.021   0.0514 .
## dis         -14.0070     5.8605  -2.390   0.0227 *
## medv         -1.2807     0.4950  -2.587   0.0143 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.04 on 33 degrees of freedom
## Multiple R-squared:  0.4028, Adjusted R-squared:  0.3305 
## F-statistic: 5.565 on 4 and 33 DF,  p-value: 0.001545
model3 <- lm(crim ~ nox+rm+age+dis+black+medv, data = clust3)
summary(model3)
## 
## Call:
## lm(formula = crim ~ nox + rm + age + dis + black + medv, data = clust3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.02506 -0.20534 -0.02612  0.11637  2.32837 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.9389484  0.5027688   1.868 0.062639 .  
## nox          4.2205135  0.3288843  12.833  < 2e-16 ***
## rm          -0.2064991  0.0663917  -3.110 0.002018 ** 
## age          0.0020544  0.0010620   1.935 0.053830 .  
## dis          0.0471744  0.0148378   3.179 0.001604 ** 
## black       -0.0057732  0.0007237  -7.977 2.03e-14 ***
## medv         0.0193915  0.0054771   3.540 0.000452 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3667 on 359 degrees of freedom
## Multiple R-squared:  0.6322, Adjusted R-squared:  0.626 
## F-statistic: 102.8 on 6 and 359 DF,  p-value: < 2.2e-16
y1 <- predict(model1)
y1_1 <- clust1$crim
mse1 <- round(mean((y1-y1_1)^2))
mse1
## [1] 107
y2 <- predict(model2)
y2_1 <- clust2$crim
mse2 <- round(mean((y2-y2_1)^2))
mse2
## [1] 126
y3 <- predict(model3)
y3_1 <- clust3$crim
mse3 <- round(mean((y3-y3_1)^2))
mse3
## [1] 0

Наименьшая регрессия наблюдается в регрессии по главным компонентам. Она равна 44.