# Libraries
library(class)
library(cluster)

Ex. 1

Use a data set such as the PlantGrowth in R to calculate three different distance metrics and discuss the results.

Solution

We will use here PlantGrowth dataset in R and dist() function that gives the distance matrix using specified distance to compute the distances between the rows of distance matrix. Here the methods considered are manhattan, euclidean and canberra.

plants_man <- dist(PlantGrowth, method = "manhattan")
## Warning in dist(PlantGrowth, method = "manhattan"): NAs introduced by coercion
as.matrix(plants_man)
##       1    2    3    4    5    6    7    8    9   10   11   12   13   14   15
## 1  0.00 2.82 2.02 3.88 0.66 0.88 2.00 0.72 2.32 1.94 1.28 0.00 0.48 1.16 3.40
## 2  2.82 0.00 0.80 1.06 2.16 1.94 0.82 2.10 0.50 0.88 1.54 2.82 2.34 3.98 0.58
## 3  2.02 0.80 0.00 1.86 1.36 1.14 0.02 1.30 0.30 0.08 0.74 2.02 1.54 3.18 1.38
## 4  3.88 1.06 1.86 0.00 3.22 3.00 1.88 3.16 1.56 1.94 2.60 3.88 3.40 5.04 0.48
## 5  0.66 2.16 1.36 3.22 0.00 0.22 1.34 0.06 1.66 1.28 0.62 0.66 0.18 1.82 2.74
## 6  0.88 1.94 1.14 3.00 0.22 0.00 1.12 0.16 1.44 1.06 0.40 0.88 0.40 2.04 2.52
## 7  2.00 0.82 0.02 1.88 1.34 1.12 0.00 1.28 0.32 0.06 0.72 2.00 1.52 3.16 1.40
## 8  0.72 2.10 1.30 3.16 0.06 0.16 1.28 0.00 1.60 1.22 0.56 0.72 0.24 1.88 2.68
## 9  2.32 0.50 0.30 1.56 1.66 1.44 0.32 1.60 0.00 0.38 1.04 2.32 1.84 3.48 1.08
## 10 1.94 0.88 0.08 1.94 1.28 1.06 0.06 1.22 0.38 0.00 0.66 1.94 1.46 3.10 1.46
## 11 1.28 1.54 0.74 2.60 0.62 0.40 0.72 0.56 1.04 0.66 0.00 1.28 0.80 2.44 2.12
## 12 0.00 2.82 2.02 3.88 0.66 0.88 2.00 0.72 2.32 1.94 1.28 0.00 0.48 1.16 3.40
## 13 0.48 2.34 1.54 3.40 0.18 0.40 1.52 0.24 1.84 1.46 0.80 0.48 0.00 1.64 2.92
## 14 1.16 3.98 3.18 5.04 1.82 2.04 3.16 1.88 3.48 3.10 2.44 1.16 1.64 0.00 4.56
## 15 3.40 0.58 1.38 0.48 2.74 2.52 1.40 2.68 1.08 1.46 2.12 3.40 2.92 4.56 0.00
## 16 0.68 3.50 2.70 4.56 1.34 1.56 2.68 1.40 3.00 2.62 1.96 0.68 1.16 0.48 4.08
## 17 3.72 0.90 1.70 0.16 3.06 2.84 1.72 3.00 1.40 1.78 2.44 3.72 3.24 4.88 0.32
## 18 1.44 1.38 0.58 2.44 0.78 0.56 0.56 0.72 0.88 0.50 0.16 1.44 0.96 2.60 1.96
## 19 0.30 2.52 1.72 3.58 0.36 0.58 1.70 0.42 2.02 1.64 0.98 0.30 0.18 1.46 3.10
## 20 1.04 1.78 0.98 2.84 0.38 0.16 0.96 0.32 1.28 0.90 0.24 1.04 0.56 2.20 2.36
## 21 4.28 1.46 2.26 0.40 3.62 3.40 2.28 3.56 1.96 2.34 3.00 4.28 3.80 5.44 0.88
## 22 1.90 0.92 0.12 1.98 1.24 1.02 0.10 1.18 0.42 0.04 0.62 1.90 1.42 3.06 1.50
## 23 2.74 0.08 0.72 1.14 2.08 1.86 0.74 2.02 0.42 0.80 1.46 2.74 2.26 3.90 0.66
## 24 2.66 0.16 0.64 1.22 2.00 1.78 0.66 1.94 0.34 0.72 1.38 2.66 2.18 3.82 0.74
## 25 2.40 0.42 0.38 1.48 1.74 1.52 0.40 1.68 0.08 0.46 1.12 2.40 1.92 3.56 1.00
## 26 2.24 0.58 0.22 1.64 1.58 1.36 0.24 1.52 0.08 0.30 0.96 2.24 1.76 3.40 1.16
## 27 1.50 1.32 0.52 2.38 0.84 0.62 0.50 0.78 0.82 0.44 0.22 1.50 1.02 2.66 1.90
## 28 3.96 1.14 1.94 0.08 3.30 3.08 1.96 3.24 1.64 2.02 2.68 3.96 3.48 5.12 0.56
## 29 3.26 0.44 1.24 0.62 2.60 2.38 1.26 2.54 0.94 1.32 1.98 3.26 2.78 4.42 0.14
## 30 2.18 0.64 0.16 1.70 1.52 1.30 0.18 1.46 0.14 0.24 0.90 2.18 1.70 3.34 1.22
##      16   17   18   19   20   21   22   23   24   25   26   27   28   29   30
## 1  0.68 3.72 1.44 0.30 1.04 4.28 1.90 2.74 2.66 2.40 2.24 1.50 3.96 3.26 2.18
## 2  3.50 0.90 1.38 2.52 1.78 1.46 0.92 0.08 0.16 0.42 0.58 1.32 1.14 0.44 0.64
## 3  2.70 1.70 0.58 1.72 0.98 2.26 0.12 0.72 0.64 0.38 0.22 0.52 1.94 1.24 0.16
## 4  4.56 0.16 2.44 3.58 2.84 0.40 1.98 1.14 1.22 1.48 1.64 2.38 0.08 0.62 1.70
## 5  1.34 3.06 0.78 0.36 0.38 3.62 1.24 2.08 2.00 1.74 1.58 0.84 3.30 2.60 1.52
## 6  1.56 2.84 0.56 0.58 0.16 3.40 1.02 1.86 1.78 1.52 1.36 0.62 3.08 2.38 1.30
## 7  2.68 1.72 0.56 1.70 0.96 2.28 0.10 0.74 0.66 0.40 0.24 0.50 1.96 1.26 0.18
## 8  1.40 3.00 0.72 0.42 0.32 3.56 1.18 2.02 1.94 1.68 1.52 0.78 3.24 2.54 1.46
## 9  3.00 1.40 0.88 2.02 1.28 1.96 0.42 0.42 0.34 0.08 0.08 0.82 1.64 0.94 0.14
## 10 2.62 1.78 0.50 1.64 0.90 2.34 0.04 0.80 0.72 0.46 0.30 0.44 2.02 1.32 0.24
## 11 1.96 2.44 0.16 0.98 0.24 3.00 0.62 1.46 1.38 1.12 0.96 0.22 2.68 1.98 0.90
## 12 0.68 3.72 1.44 0.30 1.04 4.28 1.90 2.74 2.66 2.40 2.24 1.50 3.96 3.26 2.18
## 13 1.16 3.24 0.96 0.18 0.56 3.80 1.42 2.26 2.18 1.92 1.76 1.02 3.48 2.78 1.70
## 14 0.48 4.88 2.60 1.46 2.20 5.44 3.06 3.90 3.82 3.56 3.40 2.66 5.12 4.42 3.34
## 15 4.08 0.32 1.96 3.10 2.36 0.88 1.50 0.66 0.74 1.00 1.16 1.90 0.56 0.14 1.22
## 16 0.00 4.40 2.12 0.98 1.72 4.96 2.58 3.42 3.34 3.08 2.92 2.18 4.64 3.94 2.86
## 17 4.40 0.00 2.28 3.42 2.68 0.56 1.82 0.98 1.06 1.32 1.48 2.22 0.24 0.46 1.54
## 18 2.12 2.28 0.00 1.14 0.40 2.84 0.46 1.30 1.22 0.96 0.80 0.06 2.52 1.82 0.74
## 19 0.98 3.42 1.14 0.00 0.74 3.98 1.60 2.44 2.36 2.10 1.94 1.20 3.66 2.96 1.88
## 20 1.72 2.68 0.40 0.74 0.00 3.24 0.86 1.70 1.62 1.36 1.20 0.46 2.92 2.22 1.14
## 21 4.96 0.56 2.84 3.98 3.24 0.00 2.38 1.54 1.62 1.88 2.04 2.78 0.32 1.02 2.10
## 22 2.58 1.82 0.46 1.60 0.86 2.38 0.00 0.84 0.76 0.50 0.34 0.40 2.06 1.36 0.28
## 23 3.42 0.98 1.30 2.44 1.70 1.54 0.84 0.00 0.08 0.34 0.50 1.24 1.22 0.52 0.56
## 24 3.34 1.06 1.22 2.36 1.62 1.62 0.76 0.08 0.00 0.26 0.42 1.16 1.30 0.60 0.48
## 25 3.08 1.32 0.96 2.10 1.36 1.88 0.50 0.34 0.26 0.00 0.16 0.90 1.56 0.86 0.22
## 26 2.92 1.48 0.80 1.94 1.20 2.04 0.34 0.50 0.42 0.16 0.00 0.74 1.72 1.02 0.06
## 27 2.18 2.22 0.06 1.20 0.46 2.78 0.40 1.24 1.16 0.90 0.74 0.00 2.46 1.76 0.68
## 28 4.64 0.24 2.52 3.66 2.92 0.32 2.06 1.22 1.30 1.56 1.72 2.46 0.00 0.70 1.78
## 29 3.94 0.46 1.82 2.96 2.22 1.02 1.36 0.52 0.60 0.86 1.02 1.76 0.70 0.00 1.08
## 30 2.86 1.54 0.74 1.88 1.14 2.10 0.28 0.56 0.48 0.22 0.06 0.68 1.78 1.08 0.00
plants_euc <- dist(PlantGrowth, method = "euclidean")
## Warning in dist(PlantGrowth, method = "euclidean"): NAs introduced by coercion
as.matrix(plants_euc)
##            1          2          3          4          5         6          7
## 1  0.0000000 1.99404112 1.42835570 2.74357431 0.46669048 0.6222540 1.41421356
## 2  1.9940411 0.00000000 0.56568542 0.74953319 1.52735065 1.3717872 0.57982756
## 3  1.4283557 0.56568542 0.00000000 1.31521861 0.96166522 0.8061017 0.01414214
## 4  2.7435743 0.74953319 1.31521861 0.00000000 2.27688384 2.1213203 1.32936075
## 5  0.4666905 1.52735065 0.96166522 2.27688384 0.00000000 0.1555635 0.94752309
## 6  0.6222540 1.37178716 0.80610173 2.12132034 0.15556349 0.0000000 0.79195959
## 7  1.4142136 0.57982756 0.01414214 1.32936075 0.94752309 0.7919596 0.00000000
## 8  0.5091169 1.48492424 0.91923882 2.23445743 0.04242641 0.1131371 0.90509668
## 9  1.6404877 0.35355339 0.21213203 1.10308658 1.17379726 1.0182338 0.22627417
## 10 1.3717872 0.62225397 0.05656854 1.37178716 0.90509668 0.7495332 0.04242641
## 11 0.9050967 1.08894444 0.52325902 1.83847763 0.43840620 0.2828427 0.50911688
## 12 0.0000000 1.99404112 1.42835570 2.74357431 0.46669048 0.6222540 1.41421356
## 13 0.3394113 1.65462987 1.08894444 2.40416306 0.12727922 0.2828427 1.07480231
## 14 0.8202439 2.81428499 2.24859956 3.56381818 1.28693434 1.4424978 2.23445743
## 15 2.4041631 0.41012193 0.97580736 0.33941125 1.93747258 1.7819091 0.98994949
## 16 0.4808326 2.47487373 1.90918831 3.22440692 0.94752309 1.1030866 1.89504617
## 17 2.6304372 0.63639610 1.20208153 0.11313708 2.16374675 2.0081833 1.21622366
## 18 1.0182338 0.97580736 0.41012193 1.72534055 0.55154329 0.3959798 0.39597980
## 19 0.2121320 1.78190909 1.21622366 2.53144228 0.25455844 0.4101219 1.20208153
## 20 0.7353911 1.25865007 0.69296465 2.00818326 0.26870058 0.1131371 0.67882251
## 21 3.0264170 1.03237590 1.59806133 0.28284271 2.55972655 2.4041631 1.61220346
## 22 1.3435029 0.65053824 0.08485281 1.40007143 0.87681241 0.7212489 0.07071068
## 23 1.9374726 0.05656854 0.50911688 0.80610173 1.47078210 1.3152186 0.52325902
## 24 1.8809040 0.11313708 0.45254834 0.86267027 1.41421356 1.2586501 0.46669048
## 25 1.6970563 0.29698485 0.26870058 1.04651804 1.23036580 1.0748023 0.28284271
## 26 1.5839192 0.41012193 0.15556349 1.15965512 1.11722871 0.9616652 0.16970563
## 27 1.0606602 0.93338095 0.36769553 1.68291414 0.59396970 0.4384062 0.35355339
## 28 2.8001429 0.80610173 1.37178716 0.05656854 2.33345238 2.1778889 1.38592929
## 29 2.3051681 0.31112698 0.87681241 0.43840620 1.83847763 1.6829141 0.89095454
## 30 1.5414928 0.45254834 0.11313708 1.20208153 1.07480231 0.9192388 0.12727922
##             8          9         10        11        12        13        14
## 1  0.50911688 1.64048773 1.37178716 0.9050967 0.0000000 0.3394113 0.8202439
## 2  1.48492424 0.35355339 0.62225397 1.0889444 1.9940411 1.6546299 2.8142850
## 3  0.91923882 0.21213203 0.05656854 0.5232590 1.4283557 1.0889444 2.2485996
## 4  2.23445743 1.10308658 1.37178716 1.8384776 2.7435743 2.4041631 3.5638182
## 5  0.04242641 1.17379726 0.90509668 0.4384062 0.4666905 0.1272792 1.2869343
## 6  0.11313708 1.01823376 0.74953319 0.2828427 0.6222540 0.2828427 1.4424978
## 7  0.90509668 0.22627417 0.04242641 0.5091169 1.4142136 1.0748023 2.2344574
## 8  0.00000000 1.13137085 0.86267027 0.3959798 0.5091169 0.1697056 1.3293607
## 9  1.13137085 0.00000000 0.26870058 0.7353911 1.6404877 1.3010765 2.4607316
## 10 0.86267027 0.26870058 0.00000000 0.4666905 1.3717872 1.0323759 2.1920310
## 11 0.39597980 0.73539105 0.46669048 0.0000000 0.9050967 0.5656854 1.7253405
## 12 0.50911688 1.64048773 1.37178716 0.9050967 0.0000000 0.3394113 0.8202439
## 13 0.16970563 1.30107648 1.03237590 0.5656854 0.3394113 0.0000000 1.1596551
## 14 1.32936075 2.46073160 2.19203102 1.7253405 0.8202439 1.1596551 0.0000000
## 15 1.89504617 0.76367532 1.03237590 1.4990664 2.4041631 2.0647518 3.2244069
## 16 0.98994949 2.12132034 1.85261977 1.3859293 0.4808326 0.8202439 0.3394113
## 17 2.12132034 0.98994949 1.25865007 1.7253405 2.6304372 2.2910260 3.4506811
## 18 0.50911688 0.62225397 0.35355339 0.1131371 1.0182338 0.6788225 1.8384776
## 19 0.29698485 1.42835570 1.15965512 0.6929646 0.2121320 0.1272792 1.0323759
## 20 0.22627417 0.90509668 0.63639610 0.1697056 0.7353911 0.3959798 1.5556349
## 21 2.51730014 1.38592929 1.65462987 2.1213203 3.0264170 2.6870058 3.8466609
## 22 0.83438600 0.29698485 0.02828427 0.4384062 1.3435029 1.0040916 2.1637468
## 23 1.42835570 0.29698485 0.56568542 1.0323759 1.9374726 1.5980613 2.7577164
## 24 1.37178716 0.24041631 0.50911688 0.9758074 1.8809040 1.5414928 2.7011479
## 25 1.18793939 0.05656854 0.32526912 0.7919596 1.6970563 1.3576450 2.5173001
## 26 1.07480231 0.05656854 0.21213203 0.6788225 1.5839192 1.2445079 2.4041631
## 27 0.55154329 0.57982756 0.31112698 0.1555635 1.0606602 0.7212489 1.8809040
## 28 2.29102597 1.15965512 1.42835570 1.8950462 2.8001429 2.4607316 3.6203867
## 29 1.79605122 0.66468037 0.93338095 1.4000714 2.3051681 1.9657569 3.1254120
## 30 1.03237590 0.09899495 0.16970563 0.6363961 1.5414928 1.2020815 2.3617366
##            15        16        17         18        19        20        21
## 1  2.40416306 0.4808326 2.6304372 1.01823376 0.2121320 0.7353911 3.0264170
## 2  0.41012193 2.4748737 0.6363961 0.97580736 1.7819091 1.2586501 1.0323759
## 3  0.97580736 1.9091883 1.2020815 0.41012193 1.2162237 0.6929646 1.5980613
## 4  0.33941125 3.2244069 0.1131371 1.72534055 2.5314423 2.0081833 0.2828427
## 5  1.93747258 0.9475231 2.1637468 0.55154329 0.2545584 0.2687006 2.5597265
## 6  1.78190909 1.1030866 2.0081833 0.39597980 0.4101219 0.1131371 2.4041631
## 7  0.98994949 1.8950462 1.2162237 0.39597980 1.2020815 0.6788225 1.6122035
## 8  1.89504617 0.9899495 2.1213203 0.50911688 0.2969848 0.2262742 2.5173001
## 9  0.76367532 2.1213203 0.9899495 0.62225397 1.4283557 0.9050967 1.3859293
## 10 1.03237590 1.8526198 1.2586501 0.35355339 1.1596551 0.6363961 1.6546299
## 11 1.49906638 1.3859293 1.7253405 0.11313708 0.6929646 0.1697056 2.1213203
## 12 2.40416306 0.4808326 2.6304372 1.01823376 0.2121320 0.7353911 3.0264170
## 13 2.06475180 0.8202439 2.2910260 0.67882251 0.1272792 0.3959798 2.6870058
## 14 3.22440692 0.3394113 3.4506811 1.83847763 1.0323759 1.5556349 3.8466609
## 15 0.00000000 2.8849957 0.2262742 1.38592929 2.1920310 1.6687720 0.6222540
## 16 2.88499567 0.0000000 3.1112698 1.49906638 0.6929646 1.2162237 3.5072496
## 17 0.22627417 3.1112698 0.0000000 1.61220346 2.4183052 1.8950462 0.3959798
## 18 1.38592929 1.4990664 1.6122035 0.00000000 0.8061017 0.2828427 2.0081833
## 19 2.19203102 0.6929646 2.4183052 0.80610173 0.0000000 0.5232590 2.8142850
## 20 1.66877200 1.2162237 1.8950462 0.28284271 0.5232590 0.0000000 2.2910260
## 21 0.62225397 3.5072496 0.3959798 2.00818326 2.8142850 2.2910260 0.0000000
## 22 1.06066017 1.8243355 1.2869343 0.32526912 1.1313708 0.6081118 1.6829141
## 23 0.46669048 2.4183052 0.6929646 0.91923882 1.7253405 1.2020815 1.0889444
## 24 0.52325902 2.3617366 0.7495332 0.86267027 1.6687720 1.1455130 1.1455130
## 25 0.70710678 2.1778889 0.9333810 0.67882251 1.4849242 0.9616652 1.3293607
## 26 0.82024387 2.0647518 1.0465180 0.56568542 1.3717872 0.8485281 1.4424978
## 27 1.34350288 1.5414928 1.5697771 0.04242641 0.8485281 0.3252691 1.9657569
## 28 0.39597980 3.2809755 0.1697056 1.78190909 2.5880108 2.0647518 0.2262742
## 29 0.09899495 2.7860007 0.3252691 1.28693434 2.0930361 1.5697771 0.7212489
## 30 0.86267027 2.0223254 1.0889444 0.52325902 1.3293607 0.8061017 1.4849242
##            22         23         24         25         26         27         28
## 1  1.34350288 1.93747258 1.88090404 1.69705627 1.58391919 1.06066017 2.80014285
## 2  0.65053824 0.05656854 0.11313708 0.29698485 0.41012193 0.93338095 0.80610173
## 3  0.08485281 0.50911688 0.45254834 0.26870058 0.15556349 0.36769553 1.37178716
## 4  1.40007143 0.80610173 0.86267027 1.04651804 1.15965512 1.68291414 0.05656854
## 5  0.87681241 1.47078210 1.41421356 1.23036580 1.11722871 0.59396970 2.33345238
## 6  0.72124892 1.31521861 1.25865007 1.07480231 0.96166522 0.43840620 2.17788889
## 7  0.07071068 0.52325902 0.46669048 0.28284271 0.16970563 0.35355339 1.38592929
## 8  0.83438600 1.42835570 1.37178716 1.18793939 1.07480231 0.55154329 2.29102597
## 9  0.29698485 0.29698485 0.24041631 0.05656854 0.05656854 0.57982756 1.15965512
## 10 0.02828427 0.56568542 0.50911688 0.32526912 0.21213203 0.31112698 1.42835570
## 11 0.43840620 1.03237590 0.97580736 0.79195959 0.67882251 0.15556349 1.89504617
## 12 1.34350288 1.93747258 1.88090404 1.69705627 1.58391919 1.06066017 2.80014285
## 13 1.00409163 1.59806133 1.54149278 1.35764502 1.24450793 0.72124892 2.46073160
## 14 2.16374675 2.75771645 2.70114790 2.51730014 2.40416306 1.88090404 3.62038672
## 15 1.06066017 0.46669048 0.52325902 0.70710678 0.82024387 1.34350288 0.39597980
## 16 1.82433550 2.41830519 2.36173665 2.17788889 2.06475180 1.54149278 3.28097546
## 17 1.28693434 0.69296465 0.74953319 0.93338095 1.04651804 1.56977705 0.16970563
## 18 0.32526912 0.91923882 0.86267027 0.67882251 0.56568542 0.04242641 1.78190909
## 19 1.13137085 1.72534055 1.66877200 1.48492424 1.37178716 0.84852814 2.58801082
## 20 0.60811183 1.20208153 1.14551299 0.96166522 0.84852814 0.32526912 2.06475180
## 21 1.68291414 1.08894444 1.14551299 1.32936075 1.44249783 1.96575685 0.22627417
## 22 0.00000000 0.59396970 0.53740115 0.35355339 0.24041631 0.28284271 1.45663997
## 23 0.59396970 0.00000000 0.05656854 0.24041631 0.35355339 0.87681241 0.86267027
## 24 0.53740115 0.05656854 0.00000000 0.18384776 0.29698485 0.82024387 0.91923882
## 25 0.35355339 0.24041631 0.18384776 0.00000000 0.11313708 0.63639610 1.10308658
## 26 0.24041631 0.35355339 0.29698485 0.11313708 0.00000000 0.52325902 1.21622366
## 27 0.28284271 0.87681241 0.82024387 0.63639610 0.52325902 0.00000000 1.73948268
## 28 1.45663997 0.86267027 0.91923882 1.10308658 1.21622366 1.73948268 0.00000000
## 29 0.96166522 0.36769553 0.42426407 0.60811183 0.72124892 1.24450793 0.49497475
## 30 0.19798990 0.39597980 0.33941125 0.15556349 0.04242641 0.48083261 1.25865007
##            29         30
## 1  2.30516811 1.54149278
## 2  0.31112698 0.45254834
## 3  0.87681241 0.11313708
## 4  0.43840620 1.20208153
## 5  1.83847763 1.07480231
## 6  1.68291414 0.91923882
## 7  0.89095454 0.12727922
## 8  1.79605122 1.03237590
## 9  0.66468037 0.09899495
## 10 0.93338095 0.16970563
## 11 1.40007143 0.63639610
## 12 2.30516811 1.54149278
## 13 1.96575685 1.20208153
## 14 3.12541197 2.36173665
## 15 0.09899495 0.86267027
## 16 2.78600072 2.02232539
## 17 0.32526912 1.08894444
## 18 1.28693434 0.52325902
## 19 2.09303607 1.32936075
## 20 1.56977705 0.80610173
## 21 0.72124892 1.48492424
## 22 0.96166522 0.19798990
## 23 0.36769553 0.39597980
## 24 0.42426407 0.33941125
## 25 0.60811183 0.15556349
## 26 0.72124892 0.04242641
## 27 1.24450793 0.48083261
## 28 0.49497475 1.25865007
## 29 0.00000000 0.76367532
## 30 0.76367532 0.00000000
plants_can <- dist(PlantGrowth, method = "canberra")
## Warning in dist(PlantGrowth, method = "canberra"): NAs introduced by coercion
as.matrix(plants_can)
##             1           2           3           4           5          6
## 1  0.00000000 0.289230769 0.216042781 0.377431907 0.076124567 0.10022779
## 2  0.28923077 0.000000000 0.074349442 0.090675791 0.214285714 0.19038273
## 3  0.21604278 0.074349442 0.000000000 0.164747564 0.140495868 0.11644535
## 4  0.37743191 0.090675791 0.164747564 0.000000000 0.303487276 0.27985075
## 5  0.07612457 0.214285714 0.140495868 0.303487276 0.000000000 0.02414929
## 6  0.10022779 0.190382728 0.116445352 0.279850746 0.024149286 0.00000000
## 7  0.21413276 0.076279070 0.001932367 0.166666667 0.138572906 0.11451943
## 8  0.08275862 0.207715134 0.133882595 0.296992481 0.006644518 0.01750547
## 9  0.24421053 0.045829514 0.028544244 0.136363636 0.168870804 0.14486922
## 10 0.20837809 0.082089552 0.007751938 0.172444444 0.132780083 0.10871795
## 11 0.14253898 0.148219442 0.074074074 0.238095238 0.066595059 0.04246285
## 12 0.00000000 0.289230769 0.216042781 0.377431907 0.076124567 0.10022779
## 13 0.05594406 0.234234234 0.160583942 0.323193916 0.020202020 0.04434590
## 14 0.14948454 0.434023991 0.362599772 0.519587629 0.224969098 0.24878049
## 15 0.33864542 0.050655022 0.124886878 0.040066778 0.264223722 0.24045802
## 16 0.08500000 0.371944740 0.299667037 0.458752515 0.160864346 0.18483412
## 17 0.36470588 0.077519380 0.151650312 0.013179572 0.290598291 0.26691729
## 18 0.15894040 0.131805158 0.057596822 0.221818182 0.083067093 0.05894737
## 19 0.03533569 0.254545455 0.181052632 0.343240652 0.040816327 0.06494961
## 20 0.11738149 0.173320351 0.099290780 0.262962963 0.041349293 0.01720430
## 21 0.40839695 0.122792262 0.196692776 0.032206119 0.334875116 0.31135531
## 22 0.20452099 0.085981308 0.011650485 0.176313446 0.128898129 0.10483042
## 23 0.28218332 0.007194245 0.067164179 0.097854077 0.207171315 0.18325123
## 24 0.27507756 0.014440433 0.059925094 0.105081826 0.200000000 0.17606330
## 25 0.25157233 0.038356164 0.036018957 0.128919861 0.176291793 0.15230461
## 26 0.23678647 0.053357866 0.021012416 0.143859649 0.161389173 0.13737374
## 27 0.16501650 0.125714286 0.051485149 0.215775159 0.089171975 0.06505771
## 28 0.38372093 0.097186701 0.171226831 0.006525285 0.309859155 0.28624535
## 29 0.32698094 0.038664323 0.112932605 0.052057095 0.252427184 0.22862632
## 30 0.23117709 0.059040590 0.015325670 0.149516271 0.155737705 0.13171226
##              7           8           9          10         11         12
## 1  0.214132762 0.082758621 0.244210526 0.208378088 0.14253898 0.00000000
## 2  0.076279070 0.207715134 0.045829514 0.082089552 0.14821944 0.28923077
## 3  0.001932367 0.133882595 0.028544244 0.007751938 0.07407407 0.21604278
## 4  0.166666667 0.296992481 0.136363636 0.172444444 0.23809524 0.37743191
## 5  0.138572906 0.006644518 0.168870804 0.132780083 0.06659506 0.07612457
## 6  0.114519427 0.017505470 0.144869215 0.108717949 0.04246285 0.10022779
## 7  0.000000000 0.131958763 0.030476190 0.005819593 0.07214429 0.21413276
## 8  0.131958763 0.000000000 0.162271805 0.126163392 0.05995717 0.08275862
## 9  0.030476190 0.162271805 0.000000000 0.036294174 0.10256410 0.24421053
## 10 0.005819593 0.126163392 0.036294174 0.000000000 0.06633166 0.20837809
## 11 0.072144289 0.059957173 0.102564103 0.066331658 0.00000000 0.14253898
## 12 0.214132762 0.082758621 0.244210526 0.208378088 0.14253898 0.00000000
## 13 0.158663883 0.026845638 0.188911704 0.152879581 0.08676790 0.05594406
## 14 0.360730594 0.231527094 0.390134529 0.355097365 0.29047619 0.14948454
## 15 0.126811594 0.257692308 0.096428571 0.132606721 0.19850187 0.33864542
## 16 0.297777778 0.167464115 0.327510917 0.292084727 0.22685185 0.08500000
## 17 0.153571429 0.284090909 0.123239437 0.159355416 0.22509225 0.36470588
## 18 0.055666004 0.076433121 0.086105675 0.049850449 0.01649485 0.15894040
## 19 0.179135933 0.047457627 0.209326425 0.173361522 0.10733844 0.03533569
## 20 0.097363083 0.034707158 0.127744511 0.091556460 0.02526316 0.11738149
## 21 0.198606272 0.328413284 0.168384880 0.204366812 0.26978417 0.40839695
## 22 0.009718173 0.122279793 0.040191388 0.003898635 0.06243706 0.20452099
## 23 0.069094304 0.200595829 0.038638454 0.074906367 0.14106280 0.28218332
## 24 0.061855670 0.193419741 0.031394275 0.067669173 0.13385063 0.27507756
## 25 0.037950664 0.169696970 0.007476636 0.043767840 0.11001965 0.25157233
## 26 0.022944551 0.154786151 0.007532957 0.028763183 0.09504950 0.23678647
## 27 0.049554014 0.082539683 0.080000000 0.043737575 0.02261048 0.16501650
## 28 0.173144876 0.303370787 0.142857143 0.178919398 0.24452555 0.38372093
## 29 0.114858706 0.245885770 0.084456424 0.120658135 0.18661640 0.32698094
## 30 0.017257910 0.149131767 0.013220019 0.023076923 0.08937438 0.23117709
##            13         14         15         16         17          18
## 1  0.05594406 0.14948454 0.33864542 0.08500000 0.36470588 0.158940397
## 2  0.23423423 0.43402399 0.05065502 0.37194474 0.07751938 0.131805158
## 3  0.16058394 0.36259977 0.12488688 0.29966704 0.15165031 0.057596822
## 4  0.32319392 0.51958763 0.04006678 0.45875252 0.01317957 0.221818182
## 5  0.02020202 0.22496910 0.26422372 0.16086435 0.29059829 0.083067093
## 6  0.04434590 0.24878049 0.24045802 0.18483412 0.26691729 0.058947368
## 7  0.15866388 0.36073059 0.12681159 0.29777778 0.15357143 0.055666004
## 8  0.02684564 0.23152709 0.25769231 0.16746411 0.28409091 0.076433121
## 9  0.18891170 0.39013453 0.09642857 0.32751092 0.12323944 0.086105675
## 10 0.15287958 0.35509737 0.13260672 0.29208473 0.15935542 0.049850449
## 11 0.08676790 0.29047619 0.19850187 0.22685185 0.22509225 0.016494845
## 12 0.05594406 0.14948454 0.33864542 0.08500000 0.36470588 0.158940397
## 13 0.00000000 0.20500000 0.28404669 0.14077670 0.31034483 0.103225806
## 14 0.20500000 0.00000000 0.48202960 0.06469003 0.50727651 0.306603774
## 15 0.28404669 0.48202960 0.00000000 0.42061856 0.02689076 0.182156134
## 16 0.14077670 0.06469003 0.42061856 0.00000000 0.44624746 0.243119266
## 17 0.31034483 0.50727651 0.02689076 0.44624746 0.00000000 0.208791209
## 18 0.10322581 0.30660377 0.18215613 0.24311927 0.20879121 0.000000000
## 19 0.02061856 0.18457649 0.30421982 0.12024540 0.33043478 0.123778502
## 20 0.06153846 0.26570048 0.22348485 0.20187793 0.25000000 0.041753653
## 21 0.35447761 0.54949495 0.07224959 0.48915187 0.04538088 0.253571429
## 22 0.14900315 0.35132032 0.13648772 0.28826816 0.16322870 0.045954046
## 23 0.22713568 0.42716320 0.05784400 0.36499466 0.08470182 0.124640460
## 24 0.21997982 0.42024202 0.06508355 0.35798499 0.09193408 0.117420597
## 25 0.19631902 0.39732143 0.08896797 0.33478261 0.11578947 0.093567251
## 26 0.18144330 0.38288288 0.10394265 0.32017544 0.13074205 0.078585462
## 27 0.10932476 0.31257344 0.17608897 0.24914286 0.20273973 0.006116208
## 28 0.32954545 0.52566735 0.04658902 0.46492986 0.01970443 0.228260870
## 29 0.27228208 0.47071353 0.01199657 0.40913811 0.03888419 0.170252572
## 30 0.17580145 0.37740113 0.10961366 0.31463146 0.13640390 0.072906404
##            19         20         21          22          23          24
## 1  0.03533569 0.11738149 0.40839695 0.204520990 0.282183316 0.275077559
## 2  0.25454545 0.17332035 0.12279226 0.085981308 0.007194245 0.014440433
## 3  0.18105263 0.09929078 0.19669278 0.011650485 0.067164179 0.059925094
## 4  0.34324065 0.26296296 0.03220612 0.176313446 0.097854077 0.105081826
## 5  0.04081633 0.04134929 0.33487512 0.128898129 0.207171315 0.200000000
## 6  0.06494961 0.01720430 0.31135531 0.104830421 0.183251232 0.176063304
## 7  0.17913593 0.09736308 0.19860627 0.009718173 0.069094304 0.061855670
## 8  0.04745763 0.03470716 0.32841328 0.122279793 0.200595829 0.193419741
## 9  0.20932642 0.12774451 0.16838488 0.040191388 0.038638454 0.031394275
## 10 0.17336152 0.09155646 0.20436681 0.003898635 0.074906367 0.067669173
## 11 0.10733844 0.02526316 0.26978417 0.062437059 0.141062802 0.133850630
## 12 0.03533569 0.11738149 0.40839695 0.204520990 0.282183316 0.275077559
## 13 0.02061856 0.06153846 0.35447761 0.149003148 0.227135678 0.219979818
## 14 0.18457649 0.26570048 0.54949495 0.351320321 0.427163198 0.420242024
## 15 0.30421982 0.22348485 0.07224959 0.136487716 0.057843996 0.065083553
## 16 0.12024540 0.20187793 0.48915187 0.288268156 0.364994664 0.357984995
## 17 0.33043478 0.25000000 0.04538088 0.163228700 0.084701815 0.091934085
## 18 0.12377850 0.04175365 0.25357143 0.045954046 0.124640460 0.117420597
## 19 0.00000000 0.08213097 0.37441204 0.169491525 0.247464503 0.240325866
## 20 0.08213097 0.00000000 0.29454545 0.087665647 0.166177908 0.158979392
## 21 0.37441204 0.29454545 0.00000000 0.208223972 0.129957806 0.137171888
## 22 0.16949153 0.08766565 0.20822397 0.000000000 0.078799250 0.071563089
## 23 0.24746450 0.16617791 0.12995781 0.078799250 0.000000000 0.007246377
## 24 0.24032587 0.15897939 0.13717189 0.071563089 0.007246377 0.000000000
## 25 0.21671827 0.13518887 0.16095890 0.047664442 0.031164070 0.023919043
## 26 0.20187305 0.12024048 0.17586207 0.032660903 0.046168052 0.038924930
## 27 0.12987013 0.04786681 0.24755120 0.039840637 0.118546845 0.111324376
## 28 0.34957020 0.26937269 0.02568218 0.182786158 0.104362703 0.111587983
## 29 0.29249012 0.21163012 0.08422791 0.124542125 0.045855379 0.053097345
## 30 0.19624217 0.11457286 0.18150389 0.026974952 0.051851852 0.044609665
##             25          26          27          28         29          30
## 1  0.251572327 0.236786469 0.165016502 0.383720930 0.32698094 0.231177094
## 2  0.038356164 0.053357866 0.125714286 0.097186701 0.03866432 0.059040590
## 3  0.036018957 0.021012416 0.051485149 0.171226831 0.11293260 0.015325670
## 4  0.128919861 0.143859649 0.215775159 0.006525285 0.05205709 0.149516271
## 5  0.176291793 0.161389173 0.089171975 0.309859155 0.25242718 0.155737705
## 6  0.152304609 0.137373737 0.065057712 0.286245353 0.22862632 0.131712259
## 7  0.037950664 0.022944551 0.049554014 0.173144876 0.11485871 0.017257910
## 8  0.169696970 0.154786151 0.082539683 0.303370787 0.24588577 0.149131767
## 9  0.007476636 0.007532957 0.080000000 0.142857143 0.08445642 0.013220019
## 10 0.043767840 0.028763183 0.043737575 0.178919398 0.12065814 0.023076923
## 11 0.110019646 0.095049505 0.022610483 0.244525547 0.18661640 0.089374379
## 12 0.251572327 0.236786469 0.165016502 0.383720930 0.32698094 0.231177094
## 13 0.196319018 0.181443299 0.109324759 0.329545455 0.27228208 0.175801448
## 14 0.397321429 0.382882883 0.312573443 0.525667351 0.47071353 0.377401130
## 15 0.088967972 0.103942652 0.176088971 0.046589018 0.01199657 0.109613657
## 16 0.334782609 0.320175439 0.249142857 0.464929860 0.40913811 0.314631463
## 17 0.115789474 0.130742049 0.202739726 0.019704433 0.03888419 0.136403897
## 18 0.093567251 0.078585462 0.006116208 0.228260870 0.17025257 0.072906404
## 19 0.216718266 0.201873049 0.129870130 0.349570201 0.29249012 0.196242171
## 20 0.135188867 0.120240481 0.047866805 0.269372694 0.21163012 0.114572864
## 21 0.160958904 0.175862069 0.247551202 0.025682183 0.08422791 0.181503889
## 22 0.047664442 0.032660903 0.039840637 0.182786158 0.12454212 0.026974952
## 23 0.031164070 0.046168052 0.118546845 0.104362703 0.04585538 0.051851852
## 24 0.023919043 0.038924930 0.111324376 0.111587983 0.05309735 0.044609665
## 25 0.000000000 0.015009381 0.087463557 0.135416667 0.07699194 0.020696143
## 26 0.015009381 0.000000000 0.072477963 0.150349650 0.09197475 0.005687204
## 27 0.087463557 0.072477963 0.000000000 0.222222222 0.16417910 0.066797642
## 28 0.135416667 0.150349650 0.222222222 0.000000000 0.05857741 0.156003506
## 29 0.076991943 0.091974752 0.164179104 0.058577406 0.00000000 0.097649186
## 30 0.020696143 0.005687204 0.066797642 0.156003506 0.09764919 0.000000000

Ex. 2

Now use a higher-dimensional data set mtcars, try the same three distance metrics in the previous question and discuss the results.

Solution

Now we will have mtcars dataset.

head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

We will take transpose of mtcars dataset and apply the same 3 distance methods.

head(t(mtcars))
##      Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout
## mpg      21.00        21.000      22.80         21.400             18.70
## cyl       6.00         6.000       4.00          6.000              8.00
## disp    160.00       160.000     108.00        258.000            360.00
## hp      110.00       110.000      93.00        110.000            175.00
## drat      3.90         3.900       3.85          3.080              3.15
## wt        2.62         2.875       2.32          3.215              3.44
##      Valiant Duster 360 Merc 240D Merc 230 Merc 280 Merc 280C Merc 450SE
## mpg    18.10      14.30     24.40    22.80    19.20     17.80      16.40
## cyl     6.00       8.00      4.00     4.00     6.00      6.00       8.00
## disp  225.00     360.00    146.70   140.80   167.60    167.60     275.80
## hp    105.00     245.00     62.00    95.00   123.00    123.00     180.00
## drat    2.76       3.21      3.69     3.92     3.92      3.92       3.07
## wt      3.46       3.57      3.19     3.15     3.44      3.44       4.07
##      Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## mpg       17.30       15.20              10.40              10.400
## cyl        8.00        8.00               8.00               8.000
## disp     275.80      275.80             472.00             460.000
## hp       180.00      180.00             205.00             215.000
## drat       3.07        3.07               2.93               3.000
## wt         3.73        3.78               5.25               5.424
##      Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla Toyota Corona
## mpg             14.700    32.40      30.400         33.900        21.500
## cyl              8.000     4.00       4.000          4.000         4.000
## disp           440.000    78.70      75.700         71.100       120.100
## hp             230.000    66.00      52.000         65.000        97.000
## drat             3.230     4.08       4.930          4.220         3.700
## wt               5.345     2.20       1.615          1.835         2.465
##      Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird Fiat X1-9
## mpg             15.50      15.200      13.30           19.200    27.300
## cyl              8.00       8.000       8.00            8.000     4.000
## disp           318.00     304.000     350.00          400.000    79.000
## hp             150.00     150.000     245.00          175.000    66.000
## drat             2.76       3.150       3.73            3.080     4.080
## wt               3.52       3.435       3.84            3.845     1.935
##      Porsche 914-2 Lotus Europa Ford Pantera L Ferrari Dino Maserati Bora
## mpg          26.00       30.400          15.80        19.70         15.00
## cyl           4.00        4.000           8.00         6.00          8.00
## disp        120.30       95.100         351.00       145.00        301.00
## hp           91.00      113.000         264.00       175.00        335.00
## drat          4.43        3.770           4.22         3.62          3.54
## wt            2.14        1.513           3.17         2.77          3.57
##      Volvo 142E
## mpg       21.40
## cyl        4.00
## disp     121.00
## hp       109.00
## drat       4.11
## wt         2.78
mtcars_man <- dist(t(mtcars), method = "manhattan")
as.matrix(mtcars_man)
##           mpg      cyl     disp       hp     drat       wt     qsec       vs
## mpg     0.000  444.900 6740.200 4051.100  527.810  539.948  136.260  628.900
## cyl   444.900    0.000 7185.100 4496.000   86.610   95.048  373.160  184.000
## disp 6740.200 7185.100    0.000 2852.900 7268.010 7280.148 6811.940 7369.100
## hp   4051.100 4496.000 2852.900    0.000 4578.910 4591.048 4122.840 4680.000
## drat  527.810   86.610 7268.010 4578.910    0.000   37.466  456.070  101.090
## wt    539.948   95.048 7280.148 4591.048   37.466    0.000  468.208   88.952
## qsec  136.260  373.160 6811.940 4122.840  456.070  468.208    0.000  557.160
## vs    628.900  184.000 7369.100 4680.000  101.090   88.952  557.160    0.000
## am    629.900  185.000 7370.100 4681.000  102.090   89.952  558.160   13.000
## gear  524.900   84.000 7265.100 4576.000   10.830   42.896  453.160  104.000
## carb  552.900  108.000 7293.100 4604.000   47.290   40.106  481.160   76.000
##            am     gear     carb
## mpg   629.900  524.900  552.900
## cyl   185.000   84.000  108.000
## disp 7370.100 7265.100 7293.100
## hp   4681.000 4576.000 4604.000
## drat  102.090   10.830   47.290
## wt     89.952   42.896   40.106
## qsec  558.160  453.160  481.160
## vs     13.000  104.000   76.000
## am      0.000  105.000   77.000
## gear  105.000    0.000   46.000
## carb   77.000   46.000    0.000
mtcars_euc <- dist(t(mtcars), method = "euclidian")
as.matrix(mtcars_euc)
##             mpg        cyl      disp       hp        drat          wt
## mpg     0.00000   89.32586 1391.4955 824.3755   98.511658  102.877138
## cyl    89.32586    0.00000 1441.2518 878.1765   19.078540   18.058047
## disp 1391.49546 1441.25177    0.0000 656.6404 1459.404217 1458.014195
## hp    824.37547  878.17652  656.6404   0.0000  895.520090  895.374454
## drat   98.51166   19.07854 1459.4042 895.5201    0.000000    8.139647
## wt    102.87714   18.05805 1458.0142 895.3745    8.139647    0.000000
## qsec   33.26109   68.31076 1390.0784 826.0673   81.255418   83.655198
## vs    115.62314   34.78505 1475.1043 911.9945   18.130932   17.371962
## am    115.84951   34.71311 1475.0962 911.5882   18.179403   17.641289
## gear   98.08420   18.86796 1459.0335 894.7100    2.981728    8.929562
## carb  105.32099   21.21320 1460.6606 896.1362   10.689747    8.596341
##            qsec          vs          am        gear        carb
## mpg    33.26109  115.623138  115.849514   98.084199  105.320986
## cyl    68.31076   34.785054   34.713110   18.867962   21.213203
## disp 1390.07839 1475.104291 1475.096156 1459.033540 1460.660559
## hp    826.06729  911.994518  911.588175  894.710009  896.136150
## drat   81.25542   18.130932   18.179403    2.981728   10.689747
## wt     83.65520   17.371962   17.641289    8.929562    8.596341
## qsec    0.00000   98.823784   99.272958   80.935531   86.787904
## vs     98.82378    0.000000    3.605551   18.920888   17.262677
## am     99.27296    3.605551    0.000000   18.734994   16.462078
## gear   80.93553   18.920888   18.734994    0.000000   10.099505
## carb   86.78790   17.262677   16.462078   10.099505    0.000000
mtcars_can <- dist(t(mtcars), method = "canberra")
as.matrix(mtcars_can)
##            mpg       cyl      disp        hp      drat        wt      qsec
## mpg   0.000000 15.985928 24.728626 22.460192 21.870636 22.286293  3.379737
## cyl  15.985928  0.000000 30.047021 29.175986  8.103136 10.061722 15.597100
## disp 24.728626 30.047021  0.000000  6.700758 30.578813 30.968494 25.892404
## hp   22.460192 29.175986  6.700758  0.000000 30.051098 30.475757 23.669036
## drat 21.870636  8.103136 30.578813 30.051098  0.000000  5.591582 21.233229
## wt   22.286293 10.061722 30.968494 30.475757  5.591582  0.000000 22.241270
## qsec  3.379737 15.597100 25.892404 23.669036 21.233229 22.241270  0.000000
## vs   30.860381 26.857143 31.756080 31.673126 26.175546 23.935629 30.617539
## am   30.914876 27.498413 31.773202 31.732619 26.827919 24.142597 30.570900
## gear 21.684967  7.838739 30.553348 30.036581  1.384173  6.216516 21.006830
## carb 23.437221 12.992208 31.105555 30.761211  8.151473  7.171846 23.351412
##            vs       am      gear      carb
## mpg  30.86038 30.91488 21.684967 23.437221
## cyl  26.85714 27.49841  7.838739 12.992208
## disp 31.75608 31.77320 30.553348 31.105555
## hp   31.67313 31.73262 30.036581 30.761211
## drat 26.17555 26.82792  1.384173  8.151473
## wt   23.93563 24.14260  6.216516  7.171846
## qsec 30.61754 30.57090 21.006830 23.351412
## vs    0.00000 20.80000 26.166667 20.866667
## am   20.80000  0.00000 27.133333 23.625397
## gear 26.16667 27.13333  0.000000  8.037551
## carb 20.86667 23.62540  8.037551  0.000000

Ex. 3

Use the built-in data set mtcars to carry out hierarchy clustering using two different distance metrics and compare if they get the same results. Discuss the results.

Solution

summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
euc_clus <- hclust(dist(mtcars, method = "euclidian"))
plot(euc_clus)

ct1 <- cutree(euc_clus, k = 3)
table(ct1)
## ct1
##  1  2  3 
## 16  7  9
man_clus <- hclust(dist(mtcars, method = "manhattan"))
plot(man_clus)

ct2 <- cutree(man_clus, k = 3)
table(ct2)
## ct2
##  1  2  3 
## 18 10  4

We see above both the methods give higher elements to first cluster but second and third clusters are more evenly distributed in euclidian.

Ex. 4

Load the well-known Fisher’s iris flower data set that consists of 150 samples for 3 species (50 samples each species). The four measures or features are the lengths and widths of sepals and petals. Use the kNN clustering to analyze this iris data set by selecting 120 samples for training and 30 samples for testing.

Solution

# iris dataset
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
set.seed(609)

# to split
split <- sample(nrow(iris), nrow(iris)*0.80)

# feature variables
train <- iris[split, -5] #120 rows
test <- iris[-split, -5] #30 rows

# target variable
train_trgt <- iris[split, 5] #120 rows
test_trgt <- iris[-split, 5] #30 rows

# knn
knn <- knn(train, test, cl=train_trgt, k=5)

# contingency table
knn_tbl <- table(knn, test_trgt)

# knn accuracy
acc_knn <- sum(diag(knn_tbl)) / sum(knn_tbl)
acc_knn
## [1] 0.9666667

Ex. 5

Use the iris data set to carry out k-means clustering. Compare the results to the actual classes and estimate the clustering accuracy.

Solution

set.seed(609)

iris_kmeans <- kmeans(iris[,-5], centers = 3)
iris_kmeans
## K-means clustering with 3 clusters of sizes 33, 21, 96
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.175758    3.624242     1.472727   0.2727273
## 2     4.738095    2.904762     1.790476   0.3523810
## 3     6.314583    2.895833     4.973958   1.7031250
## 
## Clustering vector:
##   [1] 1 2 2 2 1 1 1 1 2 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 2 2 1 1 1 2 1 1
##  [38] 1 2 1 1 2 2 1 1 2 1 2 1 1 3 3 3 3 3 3 3 2 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3
## [112] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [149] 3 3
## 
## Within cluster sum of squares by cluster:
## [1]   6.432121  17.669524 118.651875
##  (between_SS / total_SS =  79.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# contingency table
iris_clus <- iris_kmeans$cluster
table(iris_clus)
## iris_clus
##  1  2  3 
## 33 21 96
clusplot(iris[,-5], iris_clus)

# kmeans accuracy
tbl_kmeans <- table(iris$Species, iris_clus)
acc_kmeans <- sum(diag(tbl_kmeans)) / sum(tbl_kmeans)
acc_kmeans
## [1] 0.58