Reading Multivariate Analysis Data into R

wine <- read.csv("C:/4th year 2ND sem/Multivariate Data Analysis/wine.data", header=FALSE, stringsAsFactors=TRUE)
View(wine)

Plotting Multivariate Data

wine <- read.table("C:/4th year 2ND sem/Multivariate Data Analysis/wine.data", sep = ",")
wine
##     V1    V2   V3   V4   V5  V6   V7   V8   V9  V10       V11   V12  V13  V14
## 1    1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29  5.640000 1.040 3.92 1065
## 2    1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28  4.380000 1.050 3.40 1050
## 3    1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81  5.680000 1.030 3.17 1185
## 4    1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18  7.800000 0.860 3.45 1480
## 5    1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82  4.320000 1.040 2.93  735
## 6    1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97  6.750000 1.050 2.85 1450
## 7    1 14.39 1.87 2.45 14.6  96 2.50 2.52 0.30 1.98  5.250000 1.020 3.58 1290
## 8    1 14.06 2.15 2.61 17.6 121 2.60 2.51 0.31 1.25  5.050000 1.060 3.58 1295
## 9    1 14.83 1.64 2.17 14.0  97 2.80 2.98 0.29 1.98  5.200000 1.080 2.85 1045
## 10   1 13.86 1.35 2.27 16.0  98 2.98 3.15 0.22 1.85  7.220000 1.010 3.55 1045
## 11   1 14.10 2.16 2.30 18.0 105 2.95 3.32 0.22 2.38  5.750000 1.250 3.17 1510
## 12   1 14.12 1.48 2.32 16.8  95 2.20 2.43 0.26 1.57  5.000000 1.170 2.82 1280
## 13   1 13.75 1.73 2.41 16.0  89 2.60 2.76 0.29 1.81  5.600000 1.150 2.90 1320
## 14   1 14.75 1.73 2.39 11.4  91 3.10 3.69 0.43 2.81  5.400000 1.250 2.73 1150
## 15   1 14.38 1.87 2.38 12.0 102 3.30 3.64 0.29 2.96  7.500000 1.200 3.00 1547
## 16   1 13.63 1.81 2.70 17.2 112 2.85 2.91 0.30 1.46  7.300000 1.280 2.88 1310
## 17   1 14.30 1.92 2.72 20.0 120 2.80 3.14 0.33 1.97  6.200000 1.070 2.65 1280
## 18   1 13.83 1.57 2.62 20.0 115 2.95 3.40 0.40 1.72  6.600000 1.130 2.57 1130
## 19   1 14.19 1.59 2.48 16.5 108 3.30 3.93 0.32 1.86  8.700000 1.230 2.82 1680
## 20   1 13.64 3.10 2.56 15.2 116 2.70 3.03 0.17 1.66  5.100000 0.960 3.36  845
## 21   1 14.06 1.63 2.28 16.0 126 3.00 3.17 0.24 2.10  5.650000 1.090 3.71  780
## 22   1 12.93 3.80 2.65 18.6 102 2.41 2.41 0.25 1.98  4.500000 1.030 3.52  770
## 23   1 13.71 1.86 2.36 16.6 101 2.61 2.88 0.27 1.69  3.800000 1.110 4.00 1035
## 24   1 12.85 1.60 2.52 17.8  95 2.48 2.37 0.26 1.46  3.930000 1.090 3.63 1015
## 25   1 13.50 1.81 2.61 20.0  96 2.53 2.61 0.28 1.66  3.520000 1.120 3.82  845
## 26   1 13.05 2.05 3.22 25.0 124 2.63 2.68 0.47 1.92  3.580000 1.130 3.20  830
## 27   1 13.39 1.77 2.62 16.1  93 2.85 2.94 0.34 1.45  4.800000 0.920 3.22 1195
## 28   1 13.30 1.72 2.14 17.0  94 2.40 2.19 0.27 1.35  3.950000 1.020 2.77 1285
## 29   1 13.87 1.90 2.80 19.4 107 2.95 2.97 0.37 1.76  4.500000 1.250 3.40  915
## 30   1 14.02 1.68 2.21 16.0  96 2.65 2.33 0.26 1.98  4.700000 1.040 3.59 1035
## 31   1 13.73 1.50 2.70 22.5 101 3.00 3.25 0.29 2.38  5.700000 1.190 2.71 1285
## 32   1 13.58 1.66 2.36 19.1 106 2.86 3.19 0.22 1.95  6.900000 1.090 2.88 1515
## 33   1 13.68 1.83 2.36 17.2 104 2.42 2.69 0.42 1.97  3.840000 1.230 2.87  990
## 34   1 13.76 1.53 2.70 19.5 132 2.95 2.74 0.50 1.35  5.400000 1.250 3.00 1235
## 35   1 13.51 1.80 2.65 19.0 110 2.35 2.53 0.29 1.54  4.200000 1.100 2.87 1095
## 36   1 13.48 1.81 2.41 20.5 100 2.70 2.98 0.26 1.86  5.100000 1.040 3.47  920
## 37   1 13.28 1.64 2.84 15.5 110 2.60 2.68 0.34 1.36  4.600000 1.090 2.78  880
## 38   1 13.05 1.65 2.55 18.0  98 2.45 2.43 0.29 1.44  4.250000 1.120 2.51 1105
## 39   1 13.07 1.50 2.10 15.5  98 2.40 2.64 0.28 1.37  3.700000 1.180 2.69 1020
## 40   1 14.22 3.99 2.51 13.2 128 3.00 3.04 0.20 2.08  5.100000 0.890 3.53  760
## 41   1 13.56 1.71 2.31 16.2 117 3.15 3.29 0.34 2.34  6.130000 0.950 3.38  795
## 42   1 13.41 3.84 2.12 18.8  90 2.45 2.68 0.27 1.48  4.280000 0.910 3.00 1035
## 43   1 13.88 1.89 2.59 15.0 101 3.25 3.56 0.17 1.70  5.430000 0.880 3.56 1095
## 44   1 13.24 3.98 2.29 17.5 103 2.64 2.63 0.32 1.66  4.360000 0.820 3.00  680
## 45   1 13.05 1.77 2.10 17.0 107 3.00 3.00 0.28 2.03  5.040000 0.880 3.35  885
## 46   1 14.21 4.04 2.44 18.9 111 2.85 2.65 0.30 1.25  5.240000 0.870 3.33 1080
## 47   1 14.38 3.59 2.28 16.0 102 3.25 3.17 0.27 2.19  4.900000 1.040 3.44 1065
## 48   1 13.90 1.68 2.12 16.0 101 3.10 3.39 0.21 2.14  6.100000 0.910 3.33  985
## 49   1 14.10 2.02 2.40 18.8 103 2.75 2.92 0.32 2.38  6.200000 1.070 2.75 1060
## 50   1 13.94 1.73 2.27 17.4 108 2.88 3.54 0.32 2.08  8.900000 1.120 3.10 1260
## 51   1 13.05 1.73 2.04 12.4  92 2.72 3.27 0.17 2.91  7.200000 1.120 2.91 1150
## 52   1 13.83 1.65 2.60 17.2  94 2.45 2.99 0.22 2.29  5.600000 1.240 3.37 1265
## 53   1 13.82 1.75 2.42 14.0 111 3.88 3.74 0.32 1.87  7.050000 1.010 3.26 1190
## 54   1 13.77 1.90 2.68 17.1 115 3.00 2.79 0.39 1.68  6.300000 1.130 2.93 1375
## 55   1 13.74 1.67 2.25 16.4 118 2.60 2.90 0.21 1.62  5.850000 0.920 3.20 1060
## 56   1 13.56 1.73 2.46 20.5 116 2.96 2.78 0.20 2.45  6.250000 0.980 3.03 1120
## 57   1 14.22 1.70 2.30 16.3 118 3.20 3.00 0.26 2.03  6.380000 0.940 3.31  970
## 58   1 13.29 1.97 2.68 16.8 102 3.00 3.23 0.31 1.66  6.000000 1.070 2.84 1270
## 59   1 13.72 1.43 2.50 16.7 108 3.40 3.67 0.19 2.04  6.800000 0.890 2.87 1285
## 60   2 12.37 0.94 1.36 10.6  88 1.98 0.57 0.28 0.42  1.950000 1.050 1.82  520
## 61   2 12.33 1.10 2.28 16.0 101 2.05 1.09 0.63 0.41  3.270000 1.250 1.67  680
## 62   2 12.64 1.36 2.02 16.8 100 2.02 1.41 0.53 0.62  5.750000 0.980 1.59  450
## 63   2 13.67 1.25 1.92 18.0  94 2.10 1.79 0.32 0.73  3.800000 1.230 2.46  630
## 64   2 12.37 1.13 2.16 19.0  87 3.50 3.10 0.19 1.87  4.450000 1.220 2.87  420
## 65   2 12.17 1.45 2.53 19.0 104 1.89 1.75 0.45 1.03  2.950000 1.450 2.23  355
## 66   2 12.37 1.21 2.56 18.1  98 2.42 2.65 0.37 2.08  4.600000 1.190 2.30  678
## 67   2 13.11 1.01 1.70 15.0  78 2.98 3.18 0.26 2.28  5.300000 1.120 3.18  502
## 68   2 12.37 1.17 1.92 19.6  78 2.11 2.00 0.27 1.04  4.680000 1.120 3.48  510
## 69   2 13.34 0.94 2.36 17.0 110 2.53 1.30 0.55 0.42  3.170000 1.020 1.93  750
## 70   2 12.21 1.19 1.75 16.8 151 1.85 1.28 0.14 2.50  2.850000 1.280 3.07  718
## 71   2 12.29 1.61 2.21 20.4 103 1.10 1.02 0.37 1.46  3.050000 0.906 1.82  870
## 72   2 13.86 1.51 2.67 25.0  86 2.95 2.86 0.21 1.87  3.380000 1.360 3.16  410
## 73   2 13.49 1.66 2.24 24.0  87 1.88 1.84 0.27 1.03  3.740000 0.980 2.78  472
## 74   2 12.99 1.67 2.60 30.0 139 3.30 2.89 0.21 1.96  3.350000 1.310 3.50  985
## 75   2 11.96 1.09 2.30 21.0 101 3.38 2.14 0.13 1.65  3.210000 0.990 3.13  886
## 76   2 11.66 1.88 1.92 16.0  97 1.61 1.57 0.34 1.15  3.800000 1.230 2.14  428
## 77   2 13.03 0.90 1.71 16.0  86 1.95 2.03 0.24 1.46  4.600000 1.190 2.48  392
## 78   2 11.84 2.89 2.23 18.0 112 1.72 1.32 0.43 0.95  2.650000 0.960 2.52  500
## 79   2 12.33 0.99 1.95 14.8 136 1.90 1.85 0.35 2.76  3.400000 1.060 2.31  750
## 80   2 12.70 3.87 2.40 23.0 101 2.83 2.55 0.43 1.95  2.570000 1.190 3.13  463
## 81   2 12.00 0.92 2.00 19.0  86 2.42 2.26 0.30 1.43  2.500000 1.380 3.12  278
## 82   2 12.72 1.81 2.20 18.8  86 2.20 2.53 0.26 1.77  3.900000 1.160 3.14  714
## 83   2 12.08 1.13 2.51 24.0  78 2.00 1.58 0.40 1.40  2.200000 1.310 2.72  630
## 84   2 13.05 3.86 2.32 22.5  85 1.65 1.59 0.61 1.62  4.800000 0.840 2.01  515
## 85   2 11.84 0.89 2.58 18.0  94 2.20 2.21 0.22 2.35  3.050000 0.790 3.08  520
## 86   2 12.67 0.98 2.24 18.0  99 2.20 1.94 0.30 1.46  2.620000 1.230 3.16  450
## 87   2 12.16 1.61 2.31 22.8  90 1.78 1.69 0.43 1.56  2.450000 1.330 2.26  495
## 88   2 11.65 1.67 2.62 26.0  88 1.92 1.61 0.40 1.34  2.600000 1.360 3.21  562
## 89   2 11.64 2.06 2.46 21.6  84 1.95 1.69 0.48 1.35  2.800000 1.000 2.75  680
## 90   2 12.08 1.33 2.30 23.6  70 2.20 1.59 0.42 1.38  1.740000 1.070 3.21  625
## 91   2 12.08 1.83 2.32 18.5  81 1.60 1.50 0.52 1.64  2.400000 1.080 2.27  480
## 92   2 12.00 1.51 2.42 22.0  86 1.45 1.25 0.50 1.63  3.600000 1.050 2.65  450
## 93   2 12.69 1.53 2.26 20.7  80 1.38 1.46 0.58 1.62  3.050000 0.960 2.06  495
## 94   2 12.29 2.83 2.22 18.0  88 2.45 2.25 0.25 1.99  2.150000 1.150 3.30  290
## 95   2 11.62 1.99 2.28 18.0  98 3.02 2.26 0.17 1.35  3.250000 1.160 2.96  345
## 96   2 12.47 1.52 2.20 19.0 162 2.50 2.27 0.32 3.28  2.600000 1.160 2.63  937
## 97   2 11.81 2.12 2.74 21.5 134 1.60 0.99 0.14 1.56  2.500000 0.950 2.26  625
## 98   2 12.29 1.41 1.98 16.0  85 2.55 2.50 0.29 1.77  2.900000 1.230 2.74  428
## 99   2 12.37 1.07 2.10 18.5  88 3.52 3.75 0.24 1.95  4.500000 1.040 2.77  660
## 100  2 12.29 3.17 2.21 18.0  88 2.85 2.99 0.45 2.81  2.300000 1.420 2.83  406
## 101  2 12.08 2.08 1.70 17.5  97 2.23 2.17 0.26 1.40  3.300000 1.270 2.96  710
## 102  2 12.60 1.34 1.90 18.5  88 1.45 1.36 0.29 1.35  2.450000 1.040 2.77  562
## 103  2 12.34 2.45 2.46 21.0  98 2.56 2.11 0.34 1.31  2.800000 0.800 3.38  438
## 104  2 11.82 1.72 1.88 19.5  86 2.50 1.64 0.37 1.42  2.060000 0.940 2.44  415
## 105  2 12.51 1.73 1.98 20.5  85 2.20 1.92 0.32 1.48  2.940000 1.040 3.57  672
## 106  2 12.42 2.55 2.27 22.0  90 1.68 1.84 0.66 1.42  2.700000 0.860 3.30  315
## 107  2 12.25 1.73 2.12 19.0  80 1.65 2.03 0.37 1.63  3.400000 1.000 3.17  510
## 108  2 12.72 1.75 2.28 22.5  84 1.38 1.76 0.48 1.63  3.300000 0.880 2.42  488
## 109  2 12.22 1.29 1.94 19.0  92 2.36 2.04 0.39 2.08  2.700000 0.860 3.02  312
## 110  2 11.61 1.35 2.70 20.0  94 2.74 2.92 0.29 2.49  2.650000 0.960 3.26  680
## 111  2 11.46 3.74 1.82 19.5 107 3.18 2.58 0.24 3.58  2.900000 0.750 2.81  562
## 112  2 12.52 2.43 2.17 21.0  88 2.55 2.27 0.26 1.22  2.000000 0.900 2.78  325
## 113  2 11.76 2.68 2.92 20.0 103 1.75 2.03 0.60 1.05  3.800000 1.230 2.50  607
## 114  2 11.41 0.74 2.50 21.0  88 2.48 2.01 0.42 1.44  3.080000 1.100 2.31  434
## 115  2 12.08 1.39 2.50 22.5  84 2.56 2.29 0.43 1.04  2.900000 0.930 3.19  385
## 116  2 11.03 1.51 2.20 21.5  85 2.46 2.17 0.52 2.01  1.900000 1.710 2.87  407
## 117  2 11.82 1.47 1.99 20.8  86 1.98 1.60 0.30 1.53  1.950000 0.950 3.33  495
## 118  2 12.42 1.61 2.19 22.5 108 2.00 2.09 0.34 1.61  2.060000 1.060 2.96  345
## 119  2 12.77 3.43 1.98 16.0  80 1.63 1.25 0.43 0.83  3.400000 0.700 2.12  372
## 120  2 12.00 3.43 2.00 19.0  87 2.00 1.64 0.37 1.87  1.280000 0.930 3.05  564
## 121  2 11.45 2.40 2.42 20.0  96 2.90 2.79 0.32 1.83  3.250000 0.800 3.39  625
## 122  2 11.56 2.05 3.23 28.5 119 3.18 5.08 0.47 1.87  6.000000 0.930 3.69  465
## 123  2 12.42 4.43 2.73 26.5 102 2.20 2.13 0.43 1.71  2.080000 0.920 3.12  365
## 124  2 13.05 5.80 2.13 21.5  86 2.62 2.65 0.30 2.01  2.600000 0.730 3.10  380
## 125  2 11.87 4.31 2.39 21.0  82 2.86 3.03 0.21 2.91  2.800000 0.750 3.64  380
## 126  2 12.07 2.16 2.17 21.0  85 2.60 2.65 0.37 1.35  2.760000 0.860 3.28  378
## 127  2 12.43 1.53 2.29 21.5  86 2.74 3.15 0.39 1.77  3.940000 0.690 2.84  352
## 128  2 11.79 2.13 2.78 28.5  92 2.13 2.24 0.58 1.76  3.000000 0.970 2.44  466
## 129  2 12.37 1.63 2.30 24.5  88 2.22 2.45 0.40 1.90  2.120000 0.890 2.78  342
## 130  2 12.04 4.30 2.38 22.0  80 2.10 1.75 0.42 1.35  2.600000 0.790 2.57  580
## 131  3 12.86 1.35 2.32 18.0 122 1.51 1.25 0.21 0.94  4.100000 0.760 1.29  630
## 132  3 12.88 2.99 2.40 20.0 104 1.30 1.22 0.24 0.83  5.400000 0.740 1.42  530
## 133  3 12.81 2.31 2.40 24.0  98 1.15 1.09 0.27 0.83  5.700000 0.660 1.36  560
## 134  3 12.70 3.55 2.36 21.5 106 1.70 1.20 0.17 0.84  5.000000 0.780 1.29  600
## 135  3 12.51 1.24 2.25 17.5  85 2.00 0.58 0.60 1.25  5.450000 0.750 1.51  650
## 136  3 12.60 2.46 2.20 18.5  94 1.62 0.66 0.63 0.94  7.100000 0.730 1.58  695
## 137  3 12.25 4.72 2.54 21.0  89 1.38 0.47 0.53 0.80  3.850000 0.750 1.27  720
## 138  3 12.53 5.51 2.64 25.0  96 1.79 0.60 0.63 1.10  5.000000 0.820 1.69  515
## 139  3 13.49 3.59 2.19 19.5  88 1.62 0.48 0.58 0.88  5.700000 0.810 1.82  580
## 140  3 12.84 2.96 2.61 24.0 101 2.32 0.60 0.53 0.81  4.920000 0.890 2.15  590
## 141  3 12.93 2.81 2.70 21.0  96 1.54 0.50 0.53 0.75  4.600000 0.770 2.31  600
## 142  3 13.36 2.56 2.35 20.0  89 1.40 0.50 0.37 0.64  5.600000 0.700 2.47  780
## 143  3 13.52 3.17 2.72 23.5  97 1.55 0.52 0.50 0.55  4.350000 0.890 2.06  520
## 144  3 13.62 4.95 2.35 20.0  92 2.00 0.80 0.47 1.02  4.400000 0.910 2.05  550
## 145  3 12.25 3.88 2.20 18.5 112 1.38 0.78 0.29 1.14  8.210000 0.650 2.00  855
## 146  3 13.16 3.57 2.15 21.0 102 1.50 0.55 0.43 1.30  4.000000 0.600 1.68  830
## 147  3 13.88 5.04 2.23 20.0  80 0.98 0.34 0.40 0.68  4.900000 0.580 1.33  415
## 148  3 12.87 4.61 2.48 21.5  86 1.70 0.65 0.47 0.86  7.650000 0.540 1.86  625
## 149  3 13.32 3.24 2.38 21.5  92 1.93 0.76 0.45 1.25  8.420000 0.550 1.62  650
## 150  3 13.08 3.90 2.36 21.5 113 1.41 1.39 0.34 1.14  9.400000 0.570 1.33  550
## 151  3 13.50 3.12 2.62 24.0 123 1.40 1.57 0.22 1.25  8.600000 0.590 1.30  500
## 152  3 12.79 2.67 2.48 22.0 112 1.48 1.36 0.24 1.26 10.800000 0.480 1.47  480
## 153  3 13.11 1.90 2.75 25.5 116 2.20 1.28 0.26 1.56  7.100000 0.610 1.33  425
## 154  3 13.23 3.30 2.28 18.5  98 1.80 0.83 0.61 1.87 10.520000 0.560 1.51  675
## 155  3 12.58 1.29 2.10 20.0 103 1.48 0.58 0.53 1.40  7.600000 0.580 1.55  640
## 156  3 13.17 5.19 2.32 22.0  93 1.74 0.63 0.61 1.55  7.900000 0.600 1.48  725
## 157  3 13.84 4.12 2.38 19.5  89 1.80 0.83 0.48 1.56  9.010000 0.570 1.64  480
## 158  3 12.45 3.03 2.64 27.0  97 1.90 0.58 0.63 1.14  7.500000 0.670 1.73  880
## 159  3 14.34 1.68 2.70 25.0  98 2.80 1.31 0.53 2.70 13.000000 0.570 1.96  660
## 160  3 13.48 1.67 2.64 22.5  89 2.60 1.10 0.52 2.29 11.750000 0.570 1.78  620
## 161  3 12.36 3.83 2.38 21.0  88 2.30 0.92 0.50 1.04  7.650000 0.560 1.58  520
## 162  3 13.69 3.26 2.54 20.0 107 1.83 0.56 0.50 0.80  5.880000 0.960 1.82  680
## 163  3 12.85 3.27 2.58 22.0 106 1.65 0.60 0.60 0.96  5.580000 0.870 2.11  570
## 164  3 12.96 3.45 2.35 18.5 106 1.39 0.70 0.40 0.94  5.280000 0.680 1.75  675
## 165  3 13.78 2.76 2.30 22.0  90 1.35 0.68 0.41 1.03  9.580000 0.700 1.68  615
## 166  3 13.73 4.36 2.26 22.5  88 1.28 0.47 0.52 1.15  6.620000 0.780 1.75  520
## 167  3 13.45 3.70 2.60 23.0 111 1.70 0.92 0.43 1.46 10.680000 0.850 1.56  695
## 168  3 12.82 3.37 2.30 19.5  88 1.48 0.66 0.40 0.97 10.260000 0.720 1.75  685
## 169  3 13.58 2.58 2.69 24.5 105 1.55 0.84 0.39 1.54  8.660000 0.740 1.80  750
## 170  3 13.40 4.60 2.86 25.0 112 1.98 0.96 0.27 1.11  8.500000 0.670 1.92  630
## 171  3 12.20 3.03 2.32 19.0  96 1.25 0.49 0.40 0.73  5.500000 0.660 1.83  510
## 172  3 12.77 2.39 2.28 19.5  86 1.39 0.51 0.48 0.64  9.899999 0.570 1.63  470
## 173  3 14.16 2.51 2.48 20.0  91 1.68 0.70 0.44 1.24  9.700000 0.620 1.71  660
## 174  3 13.71 5.65 2.45 20.5  95 1.68 0.61 0.52 1.06  7.700000 0.640 1.74  740
## 175  3 13.40 3.91 2.48 23.0 102 1.80 0.75 0.43 1.41  7.300000 0.700 1.56  750
## 176  3 13.27 4.28 2.26 20.0 120 1.59 0.69 0.43 1.35 10.200000 0.590 1.56  835
## 177  3 13.17 2.59 2.37 20.0 120 1.65 0.68 0.53 1.46  9.300000 0.600 1.62  840
## 178  3 14.13 4.10 2.74 24.5  96 2.05 0.76 0.56 1.35  9.200000 0.610 1.60  560

A Matrix Scatterplot

library("carData")
library("car")
wine[2:6]
##        V2   V3   V4   V5  V6
## 1   14.23 1.71 2.43 15.6 127
## 2   13.20 1.78 2.14 11.2 100
## 3   13.16 2.36 2.67 18.6 101
## 4   14.37 1.95 2.50 16.8 113
## 5   13.24 2.59 2.87 21.0 118
## 6   14.20 1.76 2.45 15.2 112
## 7   14.39 1.87 2.45 14.6  96
## 8   14.06 2.15 2.61 17.6 121
## 9   14.83 1.64 2.17 14.0  97
## 10  13.86 1.35 2.27 16.0  98
## 11  14.10 2.16 2.30 18.0 105
## 12  14.12 1.48 2.32 16.8  95
## 13  13.75 1.73 2.41 16.0  89
## 14  14.75 1.73 2.39 11.4  91
## 15  14.38 1.87 2.38 12.0 102
## 16  13.63 1.81 2.70 17.2 112
## 17  14.30 1.92 2.72 20.0 120
## 18  13.83 1.57 2.62 20.0 115
## 19  14.19 1.59 2.48 16.5 108
## 20  13.64 3.10 2.56 15.2 116
## 21  14.06 1.63 2.28 16.0 126
## 22  12.93 3.80 2.65 18.6 102
## 23  13.71 1.86 2.36 16.6 101
## 24  12.85 1.60 2.52 17.8  95
## 25  13.50 1.81 2.61 20.0  96
## 26  13.05 2.05 3.22 25.0 124
## 27  13.39 1.77 2.62 16.1  93
## 28  13.30 1.72 2.14 17.0  94
## 29  13.87 1.90 2.80 19.4 107
## 30  14.02 1.68 2.21 16.0  96
## 31  13.73 1.50 2.70 22.5 101
## 32  13.58 1.66 2.36 19.1 106
## 33  13.68 1.83 2.36 17.2 104
## 34  13.76 1.53 2.70 19.5 132
## 35  13.51 1.80 2.65 19.0 110
## 36  13.48 1.81 2.41 20.5 100
## 37  13.28 1.64 2.84 15.5 110
## 38  13.05 1.65 2.55 18.0  98
## 39  13.07 1.50 2.10 15.5  98
## 40  14.22 3.99 2.51 13.2 128
## 41  13.56 1.71 2.31 16.2 117
## 42  13.41 3.84 2.12 18.8  90
## 43  13.88 1.89 2.59 15.0 101
## 44  13.24 3.98 2.29 17.5 103
## 45  13.05 1.77 2.10 17.0 107
## 46  14.21 4.04 2.44 18.9 111
## 47  14.38 3.59 2.28 16.0 102
## 48  13.90 1.68 2.12 16.0 101
## 49  14.10 2.02 2.40 18.8 103
## 50  13.94 1.73 2.27 17.4 108
## 51  13.05 1.73 2.04 12.4  92
## 52  13.83 1.65 2.60 17.2  94
## 53  13.82 1.75 2.42 14.0 111
## 54  13.77 1.90 2.68 17.1 115
## 55  13.74 1.67 2.25 16.4 118
## 56  13.56 1.73 2.46 20.5 116
## 57  14.22 1.70 2.30 16.3 118
## 58  13.29 1.97 2.68 16.8 102
## 59  13.72 1.43 2.50 16.7 108
## 60  12.37 0.94 1.36 10.6  88
## 61  12.33 1.10 2.28 16.0 101
## 62  12.64 1.36 2.02 16.8 100
## 63  13.67 1.25 1.92 18.0  94
## 64  12.37 1.13 2.16 19.0  87
## 65  12.17 1.45 2.53 19.0 104
## 66  12.37 1.21 2.56 18.1  98
## 67  13.11 1.01 1.70 15.0  78
## 68  12.37 1.17 1.92 19.6  78
## 69  13.34 0.94 2.36 17.0 110
## 70  12.21 1.19 1.75 16.8 151
## 71  12.29 1.61 2.21 20.4 103
## 72  13.86 1.51 2.67 25.0  86
## 73  13.49 1.66 2.24 24.0  87
## 74  12.99 1.67 2.60 30.0 139
## 75  11.96 1.09 2.30 21.0 101
## 76  11.66 1.88 1.92 16.0  97
## 77  13.03 0.90 1.71 16.0  86
## 78  11.84 2.89 2.23 18.0 112
## 79  12.33 0.99 1.95 14.8 136
## 80  12.70 3.87 2.40 23.0 101
## 81  12.00 0.92 2.00 19.0  86
## 82  12.72 1.81 2.20 18.8  86
## 83  12.08 1.13 2.51 24.0  78
## 84  13.05 3.86 2.32 22.5  85
## 85  11.84 0.89 2.58 18.0  94
## 86  12.67 0.98 2.24 18.0  99
## 87  12.16 1.61 2.31 22.8  90
## 88  11.65 1.67 2.62 26.0  88
## 89  11.64 2.06 2.46 21.6  84
## 90  12.08 1.33 2.30 23.6  70
## 91  12.08 1.83 2.32 18.5  81
## 92  12.00 1.51 2.42 22.0  86
## 93  12.69 1.53 2.26 20.7  80
## 94  12.29 2.83 2.22 18.0  88
## 95  11.62 1.99 2.28 18.0  98
## 96  12.47 1.52 2.20 19.0 162
## 97  11.81 2.12 2.74 21.5 134
## 98  12.29 1.41 1.98 16.0  85
## 99  12.37 1.07 2.10 18.5  88
## 100 12.29 3.17 2.21 18.0  88
## 101 12.08 2.08 1.70 17.5  97
## 102 12.60 1.34 1.90 18.5  88
## 103 12.34 2.45 2.46 21.0  98
## 104 11.82 1.72 1.88 19.5  86
## 105 12.51 1.73 1.98 20.5  85
## 106 12.42 2.55 2.27 22.0  90
## 107 12.25 1.73 2.12 19.0  80
## 108 12.72 1.75 2.28 22.5  84
## 109 12.22 1.29 1.94 19.0  92
## 110 11.61 1.35 2.70 20.0  94
## 111 11.46 3.74 1.82 19.5 107
## 112 12.52 2.43 2.17 21.0  88
## 113 11.76 2.68 2.92 20.0 103
## 114 11.41 0.74 2.50 21.0  88
## 115 12.08 1.39 2.50 22.5  84
## 116 11.03 1.51 2.20 21.5  85
## 117 11.82 1.47 1.99 20.8  86
## 118 12.42 1.61 2.19 22.5 108
## 119 12.77 3.43 1.98 16.0  80
## 120 12.00 3.43 2.00 19.0  87
## 121 11.45 2.40 2.42 20.0  96
## 122 11.56 2.05 3.23 28.5 119
## 123 12.42 4.43 2.73 26.5 102
## 124 13.05 5.80 2.13 21.5  86
## 125 11.87 4.31 2.39 21.0  82
## 126 12.07 2.16 2.17 21.0  85
## 127 12.43 1.53 2.29 21.5  86
## 128 11.79 2.13 2.78 28.5  92
## 129 12.37 1.63 2.30 24.5  88
## 130 12.04 4.30 2.38 22.0  80
## 131 12.86 1.35 2.32 18.0 122
## 132 12.88 2.99 2.40 20.0 104
## 133 12.81 2.31 2.40 24.0  98
## 134 12.70 3.55 2.36 21.5 106
## 135 12.51 1.24 2.25 17.5  85
## 136 12.60 2.46 2.20 18.5  94
## 137 12.25 4.72 2.54 21.0  89
## 138 12.53 5.51 2.64 25.0  96
## 139 13.49 3.59 2.19 19.5  88
## 140 12.84 2.96 2.61 24.0 101
## 141 12.93 2.81 2.70 21.0  96
## 142 13.36 2.56 2.35 20.0  89
## 143 13.52 3.17 2.72 23.5  97
## 144 13.62 4.95 2.35 20.0  92
## 145 12.25 3.88 2.20 18.5 112
## 146 13.16 3.57 2.15 21.0 102
## 147 13.88 5.04 2.23 20.0  80
## 148 12.87 4.61 2.48 21.5  86
## 149 13.32 3.24 2.38 21.5  92
## 150 13.08 3.90 2.36 21.5 113
## 151 13.50 3.12 2.62 24.0 123
## 152 12.79 2.67 2.48 22.0 112
## 153 13.11 1.90 2.75 25.5 116
## 154 13.23 3.30 2.28 18.5  98
## 155 12.58 1.29 2.10 20.0 103
## 156 13.17 5.19 2.32 22.0  93
## 157 13.84 4.12 2.38 19.5  89
## 158 12.45 3.03 2.64 27.0  97
## 159 14.34 1.68 2.70 25.0  98
## 160 13.48 1.67 2.64 22.5  89
## 161 12.36 3.83 2.38 21.0  88
## 162 13.69 3.26 2.54 20.0 107
## 163 12.85 3.27 2.58 22.0 106
## 164 12.96 3.45 2.35 18.5 106
## 165 13.78 2.76 2.30 22.0  90
## 166 13.73 4.36 2.26 22.5  88
## 167 13.45 3.70 2.60 23.0 111
## 168 12.82 3.37 2.30 19.5  88
## 169 13.58 2.58 2.69 24.5 105
## 170 13.40 4.60 2.86 25.0 112
## 171 12.20 3.03 2.32 19.0  96
## 172 12.77 2.39 2.28 19.5  86
## 173 14.16 2.51 2.48 20.0  91
## 174 13.71 5.65 2.45 20.5  95
## 175 13.40 3.91 2.48 23.0 102
## 176 13.27 4.28 2.26 20.0 120
## 177 13.17 2.59 2.37 20.0 120
## 178 14.13 4.10 2.74 24.5  96
scatterplotMatrix(wine[2:6])

A Scatterplot with the Data Points Labelled by their Group

plot(wine$V4, wine$V5)

plot(wine$V4, wine$V5)
text(wine$V4, wine$V5, wine$V1, cex=0.7, pos=4, col="red")

A Profile Plot

library("RColorBrewer")
makeProfilePlot <- function(mylist,names)
  {
     require(RColorBrewer)
     # find out how many variables we want to include
     numvariables <- length(mylist)
     # choose 'numvariables' random colours
     colours <- brewer.pal(numvariables,"Set1")
     # find out the minimum and maximum values of the variables:
     mymin <- 1e+20
     mymax <- 1e-20
     for (i in 1:numvariables)
     {
        vectori <- mylist[[i]]
        mini <- min(vectori)
        maxi <- max(vectori)
        if (mini < mymin) { mymin <- mini }
        if (maxi > mymax) { mymax <- maxi }
     }
     # plot the variables
     for (i in 1:numvariables)
     {
        vectori <- mylist[[i]]
        namei <- names[i]
        colouri <- colours[i]
        if (i == 1) { plot(vectori,col=colouri,type="l",ylim=c(mymin,mymax)) }
        else         { points(vectori, col=colouri,type="l")                                     }
        lastxval <- length(vectori)
        lastyval <- vectori[length(vectori)]
        text((lastxval-10),(lastyval),namei,col="black",cex=0.6)
     }
  }
names <- c("V2","V3","V4","V5","V6")
mylist <- list(wine$V2,wine$V3,wine$V4,wine$V5,wine$V6)
makeProfilePlot(mylist,names)

Calculating Summary Statistics for Multivariate Data

sapply(wine[2:14],mean)
##          V2          V3          V4          V5          V6          V7 
##  13.0006180   2.3363483   2.3665169  19.4949438  99.7415730   2.2951124 
##          V8          V9         V10         V11         V12         V13 
##   2.0292697   0.3618539   1.5908989   5.0580899   0.9574494   2.6116854 
##         V14 
## 746.8932584
sapply(wine[2:14],sd)
##          V2          V3          V4          V5          V6          V7 
##   0.8118265   1.1171461   0.2743440   3.3395638  14.2824835   0.6258510 
##          V8          V9         V10         V11         V12         V13 
##   0.9988587   0.1244533   0.5723589   2.3182859   0.2285716   0.7099904 
##         V14 
## 314.9074743

Means and Variances Per Group

cultivar2wine <- wine[wine$V1=="2",]
sapply(cultivar2wine[2:14],mean)
##         V2         V3         V4         V5         V6         V7         V8 
##  12.278732   1.932676   2.244789  20.238028  94.549296   2.258873   2.080845 
##         V9        V10        V11        V12        V13        V14 
##   0.363662   1.630282   3.086620   1.056282   2.785352 519.507042
cultivar2wine <- wine[wine$V1=="2",]
sapply(cultivar2wine[2:14], sd)
##          V2          V3          V4          V5          V6          V7 
##   0.5379642   1.0155687   0.3154673   3.3497704  16.7534975   0.5453611 
##          V8          V9         V10         V11         V12         V13 
##   0.7057008   0.1239613   0.6020678   0.9249293   0.2029368   0.4965735 
##         V14 
## 157.2112204
printMeanAndSdByGroup <- function(variables,groupvariable)
  {
     # find the names of the variables
     variablenames <- c(names(groupvariable),names(as.data.frame(variables)))
     # within each group, find the mean of each variable
     groupvariable <- groupvariable[,1] # ensures groupvariable is not a list
     means <- aggregate(as.matrix(variables) ~ groupvariable, FUN = mean)
     names(means) <- variablenames
     print(paste("Means:"))
     print(means)
     # within each group, find the standard deviation of each variable:
     sds <- aggregate(as.matrix(variables) ~ groupvariable, FUN = sd)
     names(sds) <- variablenames
     print(paste("Standard deviations:"))
     print(sds)
     # within each group, find the number of samples:
     samplesizes <- aggregate(as.matrix(variables) ~ groupvariable, FUN = length)
     names(samplesizes) <- variablenames
     print(paste("Sample sizes:"))
     print(samplesizes)
}

printMeanAndSdByGroup(wine[2:14],wine[1])
## [1] "Means:"
##   V1       V2       V3       V4       V5       V6       V7        V8       V9
## 1  1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2  2 12.27873 1.932676 2.244789 20.23803  94.5493 2.258873 2.0808451 0.363662
## 3  3 13.15375 3.333750 2.437083 21.41667  99.3125 1.678750 0.7814583 0.447500
##        V10      V11       V12      V13       V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352  519.5070
## 3 1.153542 7.396250 0.6827083 1.683542  629.8958
## [1] "Standard deviations:"
##   V1        V2        V3        V4       V5       V6        V7        V8
## 1  1 0.4621254 0.6885489 0.2271660 2.546322 10.49895 0.3389614 0.3974936
## 2  2 0.5379642 1.0155687 0.3154673 3.349770 16.75350 0.5453611 0.7057008
## 3  3 0.5302413 1.0879057 0.1846902 2.258161 10.89047 0.3569709 0.2935041
##           V9       V10       V11       V12       V13      V14
## 1 0.07004924 0.4121092 1.2385728 0.1164826 0.3570766 221.5208
## 2 0.12396128 0.6020678 0.9249293 0.2029368 0.4965735 157.2112
## 3 0.12413959 0.4088359 2.3109421 0.1144411 0.2721114 115.0970
## [1] "Sample sizes:"
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1  1 59 59 59 59 59 59 59 59  59  59  59  59  59
## 2  2 71 71 71 71 71 71 71 71  71  71  71  71  71
## 3  3 48 48 48 48 48 48 48 48  48  48  48  48  48
## [1] "Means:"
##   V1       V2       V3       V4       V5       V6       V7        V8       V9
## 1  1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2  2 12.27873 1.932676 2.244789 20.23803  94.5493 2.258873 2.0808451 0.363662
## 3  3 13.15375 3.333750 2.437083 21.41667  99.3125 1.678750 0.7814583 0.447500
##        V10      V11       V12      V13       V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352  519.5070
## 3 1.153542 7.396250 0.6827083 1.683542  629.8958
## [1] "Standard deviations:"
##   V1        V2        V3        V4       V5       V6        V7        V8
## 1  1 0.4621254 0.6885489 0.2271660 2.546322 10.49895 0.3389614 0.3974936
## 2  2 0.5379642 1.0155687 0.3154673 3.349770 16.75350 0.5453611 0.7057008
## 3  3 0.5302413 1.0879057 0.1846902 2.258161 10.89047 0.3569709 0.2935041
##           V9       V10       V11       V12       V13      V14
## 1 0.07004924 0.4121092 1.2385728 0.1164826 0.3570766 221.5208
## 2 0.12396128 0.6020678 0.9249293 0.2029368 0.4965735 157.2112
## 3 0.12413959 0.4088359 2.3109421 0.1144411 0.2721114 115.0970
## [1] "Sample sizes:"
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1  1 59 59 59 59 59 59 59 59  59  59  59  59  59
## 2  2 71 71 71 71 71 71 71 71  71  71  71  71  71
## 3  3 48 48 48 48 48 48 48 48  48  48  48  48  48

Between-groups Variance and Within-groups Variance for a Variable

calcWithinGroupsVariance <- function(variable,groupvariable)
  {
     # find out how many values the group variable can take
     groupvariable2 <- as.factor(groupvariable[[1]])
     levels <- levels(groupvariable2)
     numlevels <- length(levels)
     # get the mean and standard deviation for each group:
     numtotal <- 0
     denomtotal <- 0
     for (i in 1:numlevels)
     {
        leveli <- levels[i]
        levelidata <- variable[groupvariable==leveli,]
        levelilength <- length(levelidata)
        # get the standard deviation for group i:
        sdi <- sd(levelidata)
        numi <- (levelilength - 1)*(sdi * sdi)
        denomi <- levelilength
        numtotal <- numtotal + numi
        denomtotal <- denomtotal + denomi
     }
     # calculate the within-groups variance
     Vw <- numtotal / (denomtotal - numlevels)
     return(Vw)
  }
calcWithinGroupsVariance(wine[2],wine[1])
## [1] 0.2620525
calcBetweenGroupsVariance <- function(variable,groupvariable)
  {
     # find out how many values the group variable can take
     groupvariable2 <- as.factor(groupvariable[[1]])
     levels <- levels(groupvariable2)
     numlevels <- length(levels)
     # calculate the overall grand mean:
     grandmean <- mean(wine$V2)
      
     # get the mean and standard deviation for each group:
     numtotal <- 0
     denomtotal <- 0
     for (i in 1:numlevels)
     {
        leveli <- levels[i]
        levelidata <- variable[groupvariable==leveli,]
        levelilength <- length(levelidata)
        # get the mean and standard deviation for group i:
        meani <- mean(levelidata)
        sdi <- sd(levelidata)
        numi <- levelilength * ((meani - grandmean)^2)
        denomi <- levelilength
        numtotal <- numtotal + numi
        denomtotal <- denomtotal + denomi
     }
     # calculate the between-groups variance
     Vb <- numtotal / (numlevels - 1)
     Vb <- Vb[[1]]
     return(Vb)
  }
calcBetweenGroupsVariance(wine[2],wine[1])
## [1] 35.39742
35.39742/0.2620525
## [1] 135.0776
calcSeparations <- function(variables,groupvariable)
  {
     # find out how many variables we have
     variables <- as.data.frame(variables)
     numvariables <- length(variables)
     # find the variable names
     variablenames <- colnames(variables)
     # calculate the separation for each variable
     for (i in 1:numvariables)
     {
        variablei <- variables[i]
        variablename <- variablenames[i]
        Vw <- calcWithinGroupsVariance(variablei, groupvariable)
        Vb <- calcBetweenGroupsVariance(variablei, groupvariable)
        sep <- Vb/Vw
        print(paste("variable",variablename,"Vw=",Vw,"Vb=",Vb,"separation=",sep))
     }
  }
calcSeparations(wine[2:14],wine[1])
## [1] "variable V2 Vw= 0.262052469153907 Vb= 35.3974249602692 separation= 135.0776242428"
## [1] "variable V3 Vw= 0.887546796746581 Vb= 10154.4606409588 separation= 11441.0425210043"
## [1] "variable V4 Vw= 0.0660721013425184 Vb= 10065.3651082674 separation= 152339.109907954"
## [1] "variable V5 Vw= 8.00681118121156 Vb= 4040.10461181253 separation= 504.583475290745"
## [1] "variable V6 Vw= 180.65777316441 Vb= 671880.903309069 separation= 3719.08106438141"
## [1] "variable V7 Vw= 0.191270475224227 Vb= 10218.0270550471 separation= 53421.8730991727"
## [1] "variable V8 Vw= 0.274707514337437 Vb= 10777.2342568213 separation= 39231.6689363768"
## [1] "variable V9 Vw= 0.0119117022132797 Vb= 14217.0422061125 separation= 1193535.73079276"
## [1] "variable V10 Vw= 0.246172943795542 Vb= 11593.6224025303 separation= 47095.4371499059"
## [1] "variable V11 Vw= 2.28492308133354 Vb= 5890.16197758506 separation= 2577.83818882314"
## [1] "variable V12 Vw= 0.0244876469432414 Vb= 12910.854863443 separation= 527239.505427713"
## [1] "variable V13 Vw= 0.160778729560982 Vb= 9636.30640975892 separation= 59935.2068278657"
## [1] "variable V14 Vw= 29707.6818705169 Vb= 54112090.6081053 separation= 1821.48478780528"

Between-groups Covariance and Within-groups Covariance for Two Variables

calcWithinGroupsCovariance <- function(variable1,variable2,groupvariable)
  {
     # find out how many values the group variable can take
     groupvariable2 <- as.factor(groupvariable[[1]])
     levels <- levels(groupvariable2)
     numlevels <- length(levels)
     # get the covariance of variable 1 and variable 2 for each group:
     Covw <- 0
     for (i in 1:numlevels)
     {
        leveli <- levels[i]
        levelidata1 <- variable1[groupvariable==leveli,]
        levelidata2 <- variable2[groupvariable==leveli,]
        mean1 <- mean(levelidata1)
        mean2 <- mean(levelidata2)
        levelilength <- length(levelidata1)
        # get the covariance for this group:
        term1 <- 0
        for (j in 1:levelilength)
        {
           term1 <- term1 + ((levelidata1[j] - mean1)*(levelidata2[j] - mean2))
        }
        Cov_groupi <- term1 # covariance for this group
        Covw <- Covw + Cov_groupi
     }
     totallength <- nrow(variable1)
     Covw <- Covw / (totallength - numlevels)
     return(Covw)
  }
calcWithinGroupsCovariance(wine[8],wine[11],wine[1])
## [1] 0.2866783
calcBetweenGroupsCovariance <- function(variable1,variable2,groupvariable)
  {
     # find out how many values the group variable can take
     groupvariable2 <- as.factor(groupvariable[[1]])
     levels <- levels(groupvariable2)
     numlevels <- length(levels)
     # calculate the grand means
     variable1mean <- mean(wine$V8)
     variable2mean <- mean(wine$V11)
     # calculate the between-groups covariance
     Covb <- 0
     for (i in 1:numlevels)
     {
        leveli <- levels[i]
        levelidata1 <- variable1[groupvariable==leveli,]
        levelidata2 <- variable2[groupvariable==leveli,]
        mean1 <- mean(levelidata1)
        mean2 <- mean(levelidata2)
        levelilength <- length(levelidata1)
        term1 <- (mean1 - variable1mean)*(mean2 - variable2mean)*(levelilength)
        Covb <- Covb + term1
     }
     Covb <- Covb / (numlevels - 1)
     Covb <- Covb[[1]]
     return(Covb)
  }
calcBetweenGroupsCovariance(wine[8],wine[11],wine[1])
## [1] -60.41077

Calculating Correlations for Multivariate Data

cor.test(wine$V2, wine$V3)
## 
##  Pearson's product-moment correlation
## 
## data:  wine$V2 and wine$V3
## t = 1.2579, df = 176, p-value = 0.2101
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05342959  0.23817474
## sample estimates:
##        cor 
## 0.09439694
mosthighlycorrelated <- function(mydataframe,numtoreport)
  {
     # find the correlations
     cormatrix <- cor(mydataframe)
     # set the correlations on the diagonal or lower triangle to zero,
     # so they will not be reported as the highest ones:
     diag(cormatrix) <- 0
     cormatrix[lower.tri(cormatrix)] <- 0
     # flatten the matrix into a dataframe for easy sorting
     fm <- as.data.frame(as.table(cormatrix))
     # assign human-friendly names
     names(fm) <- c("First.Variable", "Second.Variable","Correlation")
     # sort and print the top n correlations
     head(fm[order(abs(fm$Correlation),decreasing=T),],n=numtoreport)
  }
mosthighlycorrelated(wine[2:14], 10)
##     First.Variable Second.Variable Correlation
## 84              V7              V8   0.8645635
## 150             V8             V13   0.7871939
## 149             V7             V13   0.6999494
## 111             V8             V10   0.6526918
## 157             V2             V14   0.6437200
## 110             V7             V10   0.6124131
## 154            V12             V13   0.5654683
## 132             V3             V12  -0.5612957
## 118             V2             V11   0.5463642
## 137             V8             V12   0.5434786

Standardising Variables

standardisedconcentrations <- as.data.frame(scale(wine[2:14]))
sapply(standardisedconcentrations,mean)
##            V2            V3            V4            V5            V6 
## -8.591766e-16 -6.776446e-17  8.045176e-16 -7.720494e-17 -4.073935e-17 
##            V7            V8            V9           V10           V11 
## -1.395560e-17  6.958263e-17 -1.042186e-16 -1.221369e-16  3.649376e-17 
##           V12           V13           V14 
##  2.093741e-16  3.003459e-16 -1.034429e-16
sapply(standardisedconcentrations,sd)
##  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 
##   1   1   1   1   1   1   1   1   1   1   1   1   1

Principal Component Analysis

standardisedconcentrations <- as.data.frame(scale(wine[2:14])) # standardise the variables
wine.pca <- prcomp(standardisedconcentrations)                 # do a PCA
summary(wine.pca)
## Importance of components:
##                          PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.169 1.5802 1.2025 0.95863 0.92370 0.80103 0.74231
## Proportion of Variance 0.362 0.1921 0.1112 0.07069 0.06563 0.04936 0.04239
## Cumulative Proportion  0.362 0.5541 0.6653 0.73599 0.80162 0.85098 0.89337
##                            PC8     PC9   PC10    PC11    PC12    PC13
## Standard deviation     0.59034 0.53748 0.5009 0.47517 0.41082 0.32152
## Proportion of Variance 0.02681 0.02222 0.0193 0.01737 0.01298 0.00795
## Cumulative Proportion  0.92018 0.94240 0.9617 0.97907 0.99205 1.00000
wine.pca$sdev
##  [1] 2.1692972 1.5801816 1.2025273 0.9586313 0.9237035 0.8010350 0.7423128
##  [8] 0.5903367 0.5374755 0.5009017 0.4751722 0.4108165 0.3215244
sum((wine.pca$sdev)^2)
## [1] 13

Deciding How Many Principal Components to Retain

screeplot(wine.pca, type="lines")

(wine.pca$sdev)^2
##  [1] 4.7058503 2.4969737 1.4460720 0.9189739 0.8532282 0.6416570 0.5510283
##  [8] 0.3484974 0.2888799 0.2509025 0.2257886 0.1687702 0.1033779

Loadings for the Principal Components

wine.pca$rotation[,1]
##           V2           V3           V4           V5           V6           V7 
## -0.144329395  0.245187580  0.002051061  0.239320405 -0.141992042 -0.394660845 
##           V8           V9          V10          V11          V12          V13 
## -0.422934297  0.298533103 -0.313429488  0.088616705 -0.296714564 -0.376167411 
##          V14 
## -0.286752227
sum((wine.pca$rotation[,1])^2)
## [1] 1
calcpc <- function(variables,loadings)
  {
     # find the number of samples in the data set
     as.data.frame(variables)
     numsamples <- nrow(variables)
     # make a vector to store the component
     pc <- numeric(numsamples)
     # find the number of variables
     numvariables <- length(variables)
     # calculate the value of the component for each sample
     for (i in 1:numsamples)
     {
        valuei <- 0
        for (j in 1:numvariables)
        {
           valueij <- variables[i,j]
           loadingj <- loadings[j]
           valuei <- valuei + (valueij * loadingj)
        }
        pc[i] <- valuei
     }
     return(pc)
}
calcpc(standardisedconcentrations, wine.pca$rotation[,1])
##   [1] -3.30742097 -2.20324981 -2.50966069 -3.74649719 -1.00607049 -3.04167373
##   [7] -2.44220051 -2.05364379 -2.50381135 -2.74588238 -3.46994837 -1.74981688
##  [13] -2.10751729 -3.44842921 -4.30065228 -2.29870383 -2.16584568 -1.89362947
##  [19] -3.53202167 -2.07865856 -3.11561376 -1.08351361 -2.52809263 -1.64036108
##  [25] -1.75662066 -0.98729406 -1.77028387 -1.23194878 -2.18225047 -2.24976267
##  [31] -2.49318704 -2.66987964 -1.62399801 -1.89733870 -1.40642118 -1.89847087
##  [37] -1.38096669 -1.11905070 -1.49796891 -2.52268490 -2.58081526 -0.66660159
##  [43] -3.06216898 -0.46090897 -2.09544094 -1.13297020 -2.71893118 -2.81340300
##  [49] -2.00419725 -2.69987528 -3.20587409 -2.85091773 -3.49574328 -2.21853316
##  [55] -2.14094846 -2.46238340 -2.73380617 -2.16762631 -3.13054925  0.92596992
##  [61]  1.53814123  1.83108449 -0.03052074 -2.04449433  0.60796583 -0.89769555
##  [67] -2.24218226 -0.18286818  0.81051865 -1.97006319  1.56779366 -1.65301884
##  [73]  0.72333196 -2.55501977 -1.82741266  0.86555129 -0.36897357  1.45327752
##  [79] -1.25937829 -0.37509228 -0.75992026 -1.03166776  0.49348469  2.53183508
##  [85] -0.83297044 -0.78568828  0.80456258  0.55647288  1.11197430  0.55415961
##  [91]  1.34548982  1.56008180  1.92711944 -0.74456561 -0.95476209 -2.53670943
##  [97]  0.54242248 -1.02814946 -2.24557492 -1.40624916 -0.79547585  0.54798592
## [103]  0.16072037  0.65793897 -0.39125074  1.76751314  0.36523707  1.61611371
## [109] -0.08230361 -1.57383547 -1.41657326  0.27791878  1.29947929  0.45578615
## [115]  0.49279573 -0.48071836  0.25217752  0.10692601  2.42616867  0.54953935
## [121] -0.73754141 -1.33256273  1.17377592  0.46103449 -0.97572169  0.09653741
## [127] -0.03837888  1.59266578  0.47821593  1.78779033  1.32336859  2.37779336
## [133]  2.92867865  2.14077227  2.36320318  3.05522315  3.90473898  3.92539034
## [139]  3.08557209  2.36779237  2.77099630  2.28012931  2.97723506  2.36851341
## [145]  2.20364930  2.61823528  4.26859758  3.57256360  2.79916760  2.89150275
## [151]  2.31420887  2.54265841  1.80744271  2.75238051  2.72945105  3.59472857
## [157]  2.88169708  3.38261413  1.04523342  1.60538369  3.13428951  2.23385546
## [163]  2.83966343  2.59019044  2.94100316  3.52010248  2.39934228  2.92084537
## [169]  2.17527658  2.37423037  3.20258311  3.66757294  2.45862032  3.36104305
## [175]  2.59463669  2.67030685  2.38030254  3.19973210
wine.pca$x[,1]
##   [1] -3.30742097 -2.20324981 -2.50966069 -3.74649719 -1.00607049 -3.04167373
##   [7] -2.44220051 -2.05364379 -2.50381135 -2.74588238 -3.46994837 -1.74981688
##  [13] -2.10751729 -3.44842921 -4.30065228 -2.29870383 -2.16584568 -1.89362947
##  [19] -3.53202167 -2.07865856 -3.11561376 -1.08351361 -2.52809263 -1.64036108
##  [25] -1.75662066 -0.98729406 -1.77028387 -1.23194878 -2.18225047 -2.24976267
##  [31] -2.49318704 -2.66987964 -1.62399801 -1.89733870 -1.40642118 -1.89847087
##  [37] -1.38096669 -1.11905070 -1.49796891 -2.52268490 -2.58081526 -0.66660159
##  [43] -3.06216898 -0.46090897 -2.09544094 -1.13297020 -2.71893118 -2.81340300
##  [49] -2.00419725 -2.69987528 -3.20587409 -2.85091773 -3.49574328 -2.21853316
##  [55] -2.14094846 -2.46238340 -2.73380617 -2.16762631 -3.13054925  0.92596992
##  [61]  1.53814123  1.83108449 -0.03052074 -2.04449433  0.60796583 -0.89769555
##  [67] -2.24218226 -0.18286818  0.81051865 -1.97006319  1.56779366 -1.65301884
##  [73]  0.72333196 -2.55501977 -1.82741266  0.86555129 -0.36897357  1.45327752
##  [79] -1.25937829 -0.37509228 -0.75992026 -1.03166776  0.49348469  2.53183508
##  [85] -0.83297044 -0.78568828  0.80456258  0.55647288  1.11197430  0.55415961
##  [91]  1.34548982  1.56008180  1.92711944 -0.74456561 -0.95476209 -2.53670943
##  [97]  0.54242248 -1.02814946 -2.24557492 -1.40624916 -0.79547585  0.54798592
## [103]  0.16072037  0.65793897 -0.39125074  1.76751314  0.36523707  1.61611371
## [109] -0.08230361 -1.57383547 -1.41657326  0.27791878  1.29947929  0.45578615
## [115]  0.49279573 -0.48071836  0.25217752  0.10692601  2.42616867  0.54953935
## [121] -0.73754141 -1.33256273  1.17377592  0.46103449 -0.97572169  0.09653741
## [127] -0.03837888  1.59266578  0.47821593  1.78779033  1.32336859  2.37779336
## [133]  2.92867865  2.14077227  2.36320318  3.05522315  3.90473898  3.92539034
## [139]  3.08557209  2.36779237  2.77099630  2.28012931  2.97723506  2.36851341
## [145]  2.20364930  2.61823528  4.26859758  3.57256360  2.79916760  2.89150275
## [151]  2.31420887  2.54265841  1.80744271  2.75238051  2.72945105  3.59472857
## [157]  2.88169708  3.38261413  1.04523342  1.60538369  3.13428951  2.23385546
## [163]  2.83966343  2.59019044  2.94100316  3.52010248  2.39934228  2.92084537
## [169]  2.17527658  2.37423037  3.20258311  3.66757294  2.45862032  3.36104305
## [175]  2.59463669  2.67030685  2.38030254  3.19973210
wine.pca$rotation[,2]
##           V2           V3           V4           V5           V6           V7 
##  0.483651548  0.224930935  0.316068814 -0.010590502  0.299634003  0.065039512 
##           V8           V9          V10          V11          V12          V13 
## -0.003359812  0.028779488  0.039301722  0.529995672 -0.279235148 -0.164496193 
##          V14 
##  0.364902832
sum((wine.pca$rotation[,2])^2)
## [1] 1

Scatterplots of the Principal Components

plot(wine.pca$x[,1],wine.pca$x[,2]) # make a scatterplot
text(wine.pca$x[,1],wine.pca$x[,2], wine$V1, cex=0.7, pos=4, col="red") # add labels

printMeanAndSdByGroup(standardisedconcentrations,wine[1])
## [1] "Means:"
##   V1         V2         V3         V4         V5          V6          V7
## 1  1  0.9166093 -0.2915199  0.3246886 -0.7359212  0.46192317  0.87090552
## 2  2 -0.8892116 -0.3613424 -0.4437061  0.2225094 -0.36354162 -0.05790375
## 3  3  0.1886265  0.8928122  0.2572190  0.5754413 -0.03004191 -0.98483874
##            V8          V9        V10        V11        V12        V13
## 1  0.95419225 -0.57735640  0.5388633  0.2028288  0.4575567  0.7691811
## 2  0.05163434  0.01452785  0.0688079 -0.8503999  0.4323908  0.2446043
## 3 -1.24923710  0.68817813 -0.7641311  1.0085728 -1.2019916 -1.3072623
##          V14
## 1  1.1711967
## 2 -0.7220731
## 3 -0.3715295
## [1] "Standard deviations:"
##   V1        V2        V3        V4        V5        V6        V7        V8
## 1  1 0.5692415 0.6163463 0.8280333 0.7624716 0.7350927 0.5416007 0.3979478
## 2  2 0.6626591 0.9090742 1.1498967 1.0030563 1.1730101 0.8713912 0.7065071
## 3  3 0.6531461 0.9738258 0.6732065 0.6761844 0.7625055 0.5703767 0.2938394
##          V9       V10       V11       V12       V13       V14
## 1 0.5628555 0.7200189 0.5342623 0.5096112 0.5029315 0.7034472
## 2 0.9960462 1.0519061 0.3989712 0.8878480 0.6994087 0.4992299
## 3 0.9974790 0.7142999 0.9968323 0.5006795 0.3832607 0.3654948
## [1] "Sample sizes:"
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1  1 59 59 59 59 59 59 59 59  59  59  59  59  59
## 2  2 71 71 71 71 71 71 71 71  71  71  71  71  71
## 3  3 48 48 48 48 48 48 48 48  48  48  48  48  48

Linear Discriminant Analysis

library("MASS")                                                # load the MASS package
wine.lda <- lda(wine$V1 ~ wine$V2 + wine$V3 + wine$V4 + wine$V5 + wine$V6 + wine$V7 + wine$V8 + wine$V9 + wine$V10 + wine$V11 + wine$V12 + wine$V13 + wine$V14)
wine.lda
## Call:
## lda(wine$V1 ~ wine$V2 + wine$V3 + wine$V4 + wine$V5 + wine$V6 + 
##     wine$V7 + wine$V8 + wine$V9 + wine$V10 + wine$V11 + wine$V12 + 
##     wine$V13 + wine$V14)
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3314607 0.3988764 0.2696629 
## 
## Group means:
##    wine$V2  wine$V3  wine$V4  wine$V5  wine$V6  wine$V7   wine$V8  wine$V9
## 1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2 12.27873 1.932676 2.244789 20.23803  94.5493 2.258873 2.0808451 0.363662
## 3 13.15375 3.333750 2.437083 21.41667  99.3125 1.678750 0.7814583 0.447500
##   wine$V10 wine$V11  wine$V12 wine$V13  wine$V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352  519.5070
## 3 1.153542 7.396250 0.6827083 1.683542  629.8958
## 
## Coefficients of linear discriminants:
##                   LD1           LD2
## wine$V2  -0.403399781  0.8717930699
## wine$V3   0.165254596  0.3053797325
## wine$V4  -0.369075256  2.3458497486
## wine$V5   0.154797889 -0.1463807654
## wine$V6  -0.002163496 -0.0004627565
## wine$V7   0.618052068 -0.0322128171
## wine$V8  -1.661191235 -0.4919980543
## wine$V9  -1.495818440 -1.6309537953
## wine$V10  0.134092628 -0.3070875776
## wine$V11  0.355055710  0.2532306865
## wine$V12 -0.818036073 -1.5156344987
## wine$V13 -1.157559376  0.0511839665
## wine$V14 -0.002691206  0.0028529846
## 
## Proportion of trace:
##    LD1    LD2 
## 0.6875 0.3125
wine.lda$scaling[,1]
##      wine$V2      wine$V3      wine$V4      wine$V5      wine$V6      wine$V7 
## -0.403399781  0.165254596 -0.369075256  0.154797889 -0.002163496  0.618052068 
##      wine$V8      wine$V9     wine$V10     wine$V11     wine$V12     wine$V13 
## -1.661191235 -1.495818440  0.134092628  0.355055710 -0.818036073 -1.157559376 
##     wine$V14 
## -0.002691206
calclda <- function(variables,loadings)
  {
     # find the number of samples in the data set
     as.data.frame(variables)
     numsamples <- nrow(variables)
     # make a vector to store the discriminant function
     ld <- numeric(numsamples)
     # find the number of variables
     numvariables <- length(variables)
     # calculate the value of the discriminant function for each sample
     for (i in 1:numsamples)
     {
        valuei <- 0
        for (j in 1:numvariables)
        {
           valueij <- variables[i,j]
           loadingj <- loadings[j]
           valuei <- valuei + (valueij * loadingj)
        }
        ld[i] <- valuei
     }
     # standardise the discriminant function so that its mean value is 0:
     ld <- as.data.frame(scale(ld, center=TRUE, scale=FALSE))
     ld <- ld[[1]]
     return(ld)
  }
calclda(wine[2:14], wine.lda$scaling[,1])
##   [1] -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934
##   [7] -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646
##  [13] -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262
##  [19] -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961
##  [25] -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573
##  [31] -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996
##  [37] -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062
##  [43] -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356
##  [49] -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293
##  [55] -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551  1.58618749
##  [61]  0.79967216  2.38015446 -0.45917726 -0.50726885  0.39398359 -0.92256616
##  [67] -1.95549377 -0.34732815  0.20371212 -0.24831914  1.17987999 -1.07718925
##  [73]  0.64100179 -1.74684421 -0.34721117  1.14274222  0.18665882  0.90052500
##  [79] -0.70709551 -0.59562833 -0.55761818 -1.80430417  0.23077079  2.03482711
##  [85] -0.62113021 -1.03372742  0.76598781  0.35042568  0.15324508 -0.14962842
##  [91]  0.48079504  1.39689016  0.91972331 -0.59102937  0.49411386 -1.62614426
##  [97]  2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340  0.02594549
## [103] -0.21887407  1.36437640 -1.12901245 -0.21263094 -0.77946884  0.61546732
## [109]  0.22550192 -2.03869851  0.79274716  0.30229545 -0.50664882  0.99837397
## [115] -0.21954922 -0.37131517  0.05545894 -0.09137874  1.79755252 -0.17405009
## [121] -1.17870281 -3.21054390  0.62605202  0.03366613 -0.69930080 -0.72061079
## [127] -0.51933512  1.17030045  0.10824791  1.12319783  2.24632419  3.28527755
## [133]  4.07236441  3.86691235  3.45088333  3.71583899  3.92220510  4.85161020
## [139]  3.54993389  3.76889174  2.66942250  2.32491492  3.17712883  2.88964418
## [145]  3.78325562  3.04411324  4.70697017  4.85021393  4.98359184  4.86968293
## [151]  4.59869190  5.67447884  5.32986123  5.03401031  4.52080087  5.09783710
## [157]  5.04368277  4.86980829  5.61316558  5.67046737  5.37413513  3.09975377
## [163]  3.35888137  3.04007194  4.94861303  4.54504458  5.27255844  5.13016117
## [169]  4.30468082  5.08336782  4.06743571  5.74212961  4.48205140  4.29150758
## [175]  4.50329623  5.04747033  4.27615505  5.53808610
wine.lda.values <- predict(wine.lda, wine[2:14])
wine.lda.values$x[,1] # contains the values for the first discriminant function
##           1           2           3           4           5           6 
## -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934 
##           7           8           9          10          11          12 
## -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646 
##          13          14          15          16          17          18 
## -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262 
##          19          20          21          22          23          24 
## -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961 
##          25          26          27          28          29          30 
## -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573 
##          31          32          33          34          35          36 
## -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996 
##          37          38          39          40          41          42 
## -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062 
##          43          44          45          46          47          48 
## -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356 
##          49          50          51          52          53          54 
## -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293 
##          55          56          57          58          59          60 
## -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551  1.58618749 
##          61          62          63          64          65          66 
##  0.79967216  2.38015446 -0.45917726 -0.50726885  0.39398359 -0.92256616 
##          67          68          69          70          71          72 
## -1.95549377 -0.34732815  0.20371212 -0.24831914  1.17987999 -1.07718925 
##          73          74          75          76          77          78 
##  0.64100179 -1.74684421 -0.34721117  1.14274222  0.18665882  0.90052500 
##          79          80          81          82          83          84 
## -0.70709551 -0.59562833 -0.55761818 -1.80430417  0.23077079  2.03482711 
##          85          86          87          88          89          90 
## -0.62113021 -1.03372742  0.76598781  0.35042568  0.15324508 -0.14962842 
##          91          92          93          94          95          96 
##  0.48079504  1.39689016  0.91972331 -0.59102937  0.49411386 -1.62614426 
##          97          98          99         100         101         102 
##  2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340  0.02594549 
##         103         104         105         106         107         108 
## -0.21887407  1.36437640 -1.12901245 -0.21263094 -0.77946884  0.61546732 
##         109         110         111         112         113         114 
##  0.22550192 -2.03869851  0.79274716  0.30229545 -0.50664882  0.99837397 
##         115         116         117         118         119         120 
## -0.21954922 -0.37131517  0.05545894 -0.09137874  1.79755252 -0.17405009 
##         121         122         123         124         125         126 
## -1.17870281 -3.21054390  0.62605202  0.03366613 -0.69930080 -0.72061079 
##         127         128         129         130         131         132 
## -0.51933512  1.17030045  0.10824791  1.12319783  2.24632419  3.28527755 
##         133         134         135         136         137         138 
##  4.07236441  3.86691235  3.45088333  3.71583899  3.92220510  4.85161020 
##         139         140         141         142         143         144 
##  3.54993389  3.76889174  2.66942250  2.32491492  3.17712883  2.88964418 
##         145         146         147         148         149         150 
##  3.78325562  3.04411324  4.70697017  4.85021393  4.98359184  4.86968293 
##         151         152         153         154         155         156 
##  4.59869190  5.67447884  5.32986123  5.03401031  4.52080087  5.09783710 
##         157         158         159         160         161         162 
##  5.04368277  4.86980829  5.61316558  5.67046737  5.37413513  3.09975377 
##         163         164         165         166         167         168 
##  3.35888137  3.04007194  4.94861303  4.54504458  5.27255844  5.13016117 
##         169         170         171         172         173         174 
##  4.30468082  5.08336782  4.06743571  5.74212961  4.48205140  4.29150758 
##         175         176         177         178 
##  4.50329623  5.04747033  4.27615505  5.53808610
groupStandardise <- function(variables, groupvariable)
  {
     # find out how many variables we have
     variables <- as.data.frame(variables)
     numvariables <- length(variables)
     # find the variable names
     variablenames <- colnames(variables)
     # calculate the group-standardised version of each variable
     for (i in 1:numvariables)
     {
        variablei <- variables[i]
        variablei_name <- variablenames[i]
        variablei_Vw <- calcWithinGroupsVariance(variablei, groupvariable)
        variablei_mean <- mean(wine$V2) + mean(wine$V3) + mean(wine$V4) + mean(wine$V5) + mean(wine$V6) + mean(wine$V7) + mean(wine$V8) + mean(wine$V9) + mean(wine$V10) + mean(wine$V11) + mean(wine$V12) + mean(wine$V13) + mean(wine$V14)
        variablei_new <- (variablei - variablei_mean)/(sqrt(variablei_Vw))
        data_length <- nrow(variablei)
        if (i == 1) { variables_new <- data.frame(row.names=seq(1,data_length)) }
        variables_new[`variablei_name`] <- variablei_new
     }
     return(variables_new)
  }
groupstandardisedconcentrations <- groupStandardise(wine[2:14], wine[1])
wine.lda2 <- lda(wine$V1 ~ groupstandardisedconcentrations$V2 + groupstandardisedconcentrations$V3 + groupstandardisedconcentrations$V4 + groupstandardisedconcentrations$V5 + groupstandardisedconcentrations$V6 + groupstandardisedconcentrations$V7 + groupstandardisedconcentrations$V8 + groupstandardisedconcentrations$V9 + groupstandardisedconcentrations$V10 + groupstandardisedconcentrations$V11 + groupstandardisedconcentrations$V12 + groupstandardisedconcentrations$V13 + groupstandardisedconcentrations$V14)
wine.lda2
## Call:
## lda(wine$V1 ~ groupstandardisedconcentrations$V2 + groupstandardisedconcentrations$V3 + 
##     groupstandardisedconcentrations$V4 + groupstandardisedconcentrations$V5 + 
##     groupstandardisedconcentrations$V6 + groupstandardisedconcentrations$V7 + 
##     groupstandardisedconcentrations$V8 + groupstandardisedconcentrations$V9 + 
##     groupstandardisedconcentrations$V10 + groupstandardisedconcentrations$V11 + 
##     groupstandardisedconcentrations$V12 + groupstandardisedconcentrations$V13 + 
##     groupstandardisedconcentrations$V14)
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3314607 0.3988764 0.2696629 
## 
## Group means:
##   groupstandardisedconcentrations$V2 groupstandardisedconcentrations$V3
## 1                          -1728.804                          -951.8414
## 2                          -1731.667                          -951.9242
## 3                          -1729.958                          -950.4370
##   groupstandardisedconcentrations$V4 groupstandardisedconcentrations$V5
## 1                          -3486.869                          -311.5955
## 2                          -3487.689                          -310.4644
## 3                          -3486.941                          -310.0478
##   groupstandardisedconcentrations$V6 groupstandardisedconcentrations$V7
## 1                          -58.95429                          -2048.492
## 2                          -59.83144                          -2049.821
## 3                          -59.47706                          -2051.148
##   groupstandardisedconcentrations$V8 groupstandardisedconcentrations$V9
## 1                          -1709.047                          -8232.009
## 2                          -1710.767                          -8231.334
## 3                          -1713.247                          -8230.566
##   groupstandardisedconcentrations$V10 groupstandardisedconcentrations$V11
## 1                           -1807.565                           -590.9047
## 2                           -1808.108                           -592.5200
## 3                           -1809.068                           -589.6690
##   groupstandardisedconcentrations$V12 groupstandardisedconcentrations$V13
## 1                           -5736.485                           -2233.521
## 2                           -5736.522                           -2234.450
## 3                           -5738.909                           -2237.198
##   groupstandardisedconcentrations$V14
## 1                            1.258849
## 2                           -2.200234
## 3                           -1.559777
## 
## Coefficients of linear discriminants:
##                                             LD1          LD2
## groupstandardisedconcentrations$V2  -0.20650463  0.446280119
## groupstandardisedconcentrations$V3   0.15568586  0.287697336
## groupstandardisedconcentrations$V4  -0.09486893  0.602988809
## groupstandardisedconcentrations$V5   0.43802089 -0.414203541
## groupstandardisedconcentrations$V6  -0.02907934 -0.006219863
## groupstandardisedconcentrations$V7   0.27030186 -0.014088108
## groupstandardisedconcentrations$V8  -0.87067265 -0.257868714
## groupstandardisedconcentrations$V9  -0.16325474 -0.178003512
## groupstandardisedconcentrations$V10  0.06653116 -0.152364015
## groupstandardisedconcentrations$V11  0.53670086  0.382782544
## groupstandardisedconcentrations$V12 -0.12801061 -0.237174509
## groupstandardisedconcentrations$V13 -0.46414916  0.020523349
## groupstandardisedconcentrations$V14 -0.46385409  0.491738050
## 
## Proportion of trace:
##    LD1    LD2 
## 0.6875 0.3125
wine.lda.values <- predict(wine.lda, wine[2:14])
wine.lda.values$x[,1] # values for the first discriminant function, using the unstandardised data
##           1           2           3           4           5           6 
## -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934 
##           7           8           9          10          11          12 
## -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646 
##          13          14          15          16          17          18 
## -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262 
##          19          20          21          22          23          24 
## -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961 
##          25          26          27          28          29          30 
## -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573 
##          31          32          33          34          35          36 
## -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996 
##          37          38          39          40          41          42 
## -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062 
##          43          44          45          46          47          48 
## -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356 
##          49          50          51          52          53          54 
## -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293 
##          55          56          57          58          59          60 
## -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551  1.58618749 
##          61          62          63          64          65          66 
##  0.79967216  2.38015446 -0.45917726 -0.50726885  0.39398359 -0.92256616 
##          67          68          69          70          71          72 
## -1.95549377 -0.34732815  0.20371212 -0.24831914  1.17987999 -1.07718925 
##          73          74          75          76          77          78 
##  0.64100179 -1.74684421 -0.34721117  1.14274222  0.18665882  0.90052500 
##          79          80          81          82          83          84 
## -0.70709551 -0.59562833 -0.55761818 -1.80430417  0.23077079  2.03482711 
##          85          86          87          88          89          90 
## -0.62113021 -1.03372742  0.76598781  0.35042568  0.15324508 -0.14962842 
##          91          92          93          94          95          96 
##  0.48079504  1.39689016  0.91972331 -0.59102937  0.49411386 -1.62614426 
##          97          98          99         100         101         102 
##  2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340  0.02594549 
##         103         104         105         106         107         108 
## -0.21887407  1.36437640 -1.12901245 -0.21263094 -0.77946884  0.61546732 
##         109         110         111         112         113         114 
##  0.22550192 -2.03869851  0.79274716  0.30229545 -0.50664882  0.99837397 
##         115         116         117         118         119         120 
## -0.21954922 -0.37131517  0.05545894 -0.09137874  1.79755252 -0.17405009 
##         121         122         123         124         125         126 
## -1.17870281 -3.21054390  0.62605202  0.03366613 -0.69930080 -0.72061079 
##         127         128         129         130         131         132 
## -0.51933512  1.17030045  0.10824791  1.12319783  2.24632419  3.28527755 
##         133         134         135         136         137         138 
##  4.07236441  3.86691235  3.45088333  3.71583899  3.92220510  4.85161020 
##         139         140         141         142         143         144 
##  3.54993389  3.76889174  2.66942250  2.32491492  3.17712883  2.88964418 
##         145         146         147         148         149         150 
##  3.78325562  3.04411324  4.70697017  4.85021393  4.98359184  4.86968293 
##         151         152         153         154         155         156 
##  4.59869190  5.67447884  5.32986123  5.03401031  4.52080087  5.09783710 
##         157         158         159         160         161         162 
##  5.04368277  4.86980829  5.61316558  5.67046737  5.37413513  3.09975377 
##         163         164         165         166         167         168 
##  3.35888137  3.04007194  4.94861303  4.54504458  5.27255844  5.13016117 
##         169         170         171         172         173         174 
##  4.30468082  5.08336782  4.06743571  5.74212961  4.48205140  4.29150758 
##         175         176         177         178 
##  4.50329623  5.04747033  4.27615505  5.53808610
wine.lda.values2 <- predict(wine.lda2, groupstandardisedconcentrations)
wine.lda.values2$x[,1] # values for the first discriminant function, using the standardised data
##           1           2           3           4           5           6 
## -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934 
##           7           8           9          10          11          12 
## -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646 
##          13          14          15          16          17          18 
## -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262 
##          19          20          21          22          23          24 
## -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961 
##          25          26          27          28          29          30 
## -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573 
##          31          32          33          34          35          36 
## -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996 
##          37          38          39          40          41          42 
## -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062 
##          43          44          45          46          47          48 
## -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356 
##          49          50          51          52          53          54 
## -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293 
##          55          56          57          58          59          60 
## -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551  1.58618749 
##          61          62          63          64          65          66 
##  0.79967216  2.38015446 -0.45917726 -0.50726885  0.39398359 -0.92256616 
##          67          68          69          70          71          72 
## -1.95549377 -0.34732815  0.20371212 -0.24831914  1.17987999 -1.07718925 
##          73          74          75          76          77          78 
##  0.64100179 -1.74684421 -0.34721117  1.14274222  0.18665882  0.90052500 
##          79          80          81          82          83          84 
## -0.70709551 -0.59562833 -0.55761818 -1.80430417  0.23077079  2.03482711 
##          85          86          87          88          89          90 
## -0.62113021 -1.03372742  0.76598781  0.35042568  0.15324508 -0.14962842 
##          91          92          93          94          95          96 
##  0.48079504  1.39689016  0.91972331 -0.59102937  0.49411386 -1.62614426 
##          97          98          99         100         101         102 
##  2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340  0.02594549 
##         103         104         105         106         107         108 
## -0.21887407  1.36437640 -1.12901245 -0.21263094 -0.77946884  0.61546732 
##         109         110         111         112         113         114 
##  0.22550192 -2.03869851  0.79274716  0.30229545 -0.50664882  0.99837397 
##         115         116         117         118         119         120 
## -0.21954922 -0.37131517  0.05545894 -0.09137874  1.79755252 -0.17405009 
##         121         122         123         124         125         126 
## -1.17870281 -3.21054390  0.62605202  0.03366613 -0.69930080 -0.72061079 
##         127         128         129         130         131         132 
## -0.51933512  1.17030045  0.10824791  1.12319783  2.24632419  3.28527755 
##         133         134         135         136         137         138 
##  4.07236441  3.86691235  3.45088333  3.71583899  3.92220510  4.85161020 
##         139         140         141         142         143         144 
##  3.54993389  3.76889174  2.66942250  2.32491492  3.17712883  2.88964418 
##         145         146         147         148         149         150 
##  3.78325562  3.04411324  4.70697017  4.85021393  4.98359184  4.86968293 
##         151         152         153         154         155         156 
##  4.59869190  5.67447884  5.32986123  5.03401031  4.52080087  5.09783710 
##         157         158         159         160         161         162 
##  5.04368277  4.86980829  5.61316558  5.67046737  5.37413513  3.09975377 
##         163         164         165         166         167         168 
##  3.35888137  3.04007194  4.94861303  4.54504458  5.27255844  5.13016117 
##         169         170         171         172         173         174 
##  4.30468082  5.08336782  4.06743571  5.74212961  4.48205140  4.29150758 
##         175         176         177         178 
##  4.50329623  5.04747033  4.27615505  5.53808610

Separation Achieved by the Discriminant Functions

wine.lda.values <- predict(wine.lda, standardisedconcentrations)
calcSeparations(wine.lda.values$x,wine[1])
## [1] "variable LD1 Vw= 0.999999999999999 Vb= 15837.082234555 separation= 15837.082234555"
## [1] "variable LD2 Vw= 1 Vb= 15403.6710754822 separation= 15403.6710754822"
wine.lda
## Call:
## lda(wine$V1 ~ wine$V2 + wine$V3 + wine$V4 + wine$V5 + wine$V6 + 
##     wine$V7 + wine$V8 + wine$V9 + wine$V10 + wine$V11 + wine$V12 + 
##     wine$V13 + wine$V14)
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3314607 0.3988764 0.2696629 
## 
## Group means:
##    wine$V2  wine$V3  wine$V4  wine$V5  wine$V6  wine$V7   wine$V8  wine$V9
## 1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2 12.27873 1.932676 2.244789 20.23803  94.5493 2.258873 2.0808451 0.363662
## 3 13.15375 3.333750 2.437083 21.41667  99.3125 1.678750 0.7814583 0.447500
##   wine$V10 wine$V11  wine$V12 wine$V13  wine$V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352  519.5070
## 3 1.153542 7.396250 0.6827083 1.683542  629.8958
## 
## Coefficients of linear discriminants:
##                   LD1           LD2
## wine$V2  -0.403399781  0.8717930699
## wine$V3   0.165254596  0.3053797325
## wine$V4  -0.369075256  2.3458497486
## wine$V5   0.154797889 -0.1463807654
## wine$V6  -0.002163496 -0.0004627565
## wine$V7   0.618052068 -0.0322128171
## wine$V8  -1.661191235 -0.4919980543
## wine$V9  -1.495818440 -1.6309537953
## wine$V10  0.134092628 -0.3070875776
## wine$V11  0.355055710  0.2532306865
## wine$V12 -0.818036073 -1.5156344987
## wine$V13 -1.157559376  0.0511839665
## wine$V14 -0.002691206  0.0028529846
## 
## Proportion of trace:
##    LD1    LD2 
## 0.6875 0.3125
(wine.lda$svd)^2
## [1] 794.6522 361.2410

A Stacked Histogram of the LDA Values

ldahist(data = wine.lda.values$x[,1], g=wine$V1)

ldahist(data = wine.lda.values$x[,2], g=wine$V1)

Scatterplots of the Discriminant Functions

plot(wine.lda.values$x[,1],wine.lda.values$x[,2]) # make a scatterplot
text(wine.lda.values$x[,1],wine.lda.values$x[,2],wine$V1,cex=0.7,pos=4,col="red") # add labels

Allocation Rules and Misclassification Rate

printMeanAndSdByGroup(wine.lda.values$x,wine[1])
## [1] "Means:"
##   V1         LD1       LD2
## 1  1 -3.42248851  1.691674
## 2  2 -0.07972623 -2.472656
## 3  3  4.32473717  1.578120
## [1] "Standard deviations:"
##   V1       LD1       LD2
## 1  1 0.9394626 1.0176394
## 2  2 1.0839318 0.9973165
## 3  3 0.9404188 0.9818673
## [1] "Sample sizes:"
##   V1 LD1 LD2
## 1  1  59  59
## 2  2  71  71
## 3  3  48  48
calcAllocationRuleAccuracy <- function(ldavalue, groupvariable, cutoffpoints)
  {
     # find out how many values the group variable can take
     groupvariable2 <- as.factor(groupvariable[[1]])
     levels <- levels(groupvariable2)
     numlevels <- length(levels)
     # calculate the number of true positives and false negatives for each group
     numlevels <- length(levels)
     for (i in 1:numlevels)
     {
        leveli <- levels[i]
        levelidata <- ldavalue[groupvariable==leveli]
        # see how many of the samples from this group are classified in each group
        for (j in 1:numlevels)
        {
           levelj <- levels[j]
           if (j == 1)
           {
              cutoff1 <- cutoffpoints[1]
              cutoff2 <- "NA"
              results <- summary(levelidata <= cutoff1)
           }
           else if (j == numlevels)
           {
              cutoff1 <- cutoffpoints[(numlevels-1)]
              cutoff2 <- "NA"
              results <- summary(levelidata > cutoff1)
           }
           else
           {
              cutoff1 <- cutoffpoints[(j-1)]
              cutoff2 <- cutoffpoints[(j)]
              results <- summary(levelidata > cutoff1 & levelidata <= cutoff2)
           }
           trues <- results["TRUE"]
           trues <- trues[[1]]
           print(paste("Number of samples of group",leveli,"classified as group",levelj," : ",
              trues,"(cutoffs:",cutoff1,",",cutoff2,")"))
        }
     }
  }
calcAllocationRuleAccuracy(wine.lda.values$x[,1], wine[1], c(-1.751107, 2.122505))
## [1] "Number of samples of group 1 classified as group 1  :  56 (cutoffs: -1.751107 , NA )"
## [1] "Number of samples of group 1 classified as group 2  :  3 (cutoffs: -1.751107 , 2.122505 )"
## [1] "Number of samples of group 1 classified as group 3  :  NA (cutoffs: 2.122505 , NA )"
## [1] "Number of samples of group 2 classified as group 1  :  5 (cutoffs: -1.751107 , NA )"
## [1] "Number of samples of group 2 classified as group 2  :  65 (cutoffs: -1.751107 , 2.122505 )"
## [1] "Number of samples of group 2 classified as group 3  :  1 (cutoffs: 2.122505 , NA )"
## [1] "Number of samples of group 3 classified as group 1  :  NA (cutoffs: -1.751107 , NA )"
## [1] "Number of samples of group 3 classified as group 2  :  NA (cutoffs: -1.751107 , 2.122505 )"
## [1] "Number of samples of group 3 classified as group 3  :  48 (cutoffs: 2.122505 , NA )"