wine <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", sep=",")
#wine contains data on concentrations of 13 different chemicals (V2-V14) in wines
#grown in the same region in Italy that are derived from three different cultivars (indicated in V1).
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 1065
## 2 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 1050
## 3 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 1185
## 4 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 1480
## 5 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 735
## 6 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 1450
Summary Statistics
sapply(wine[2:14], mean) #show the average concentration of the different chemicals
## V2 V3 V4 V5 V6 V7
## 13.0006180 2.3363483 2.3665169 19.4949438 99.7415730 2.2951124
## V8 V9 V10 V11 V12 V13
## 2.0292697 0.3618539 1.5908989 5.0580899 0.9574494 2.6116854
## V14
## 746.8932584
sapply(wine[2:14], sd) #show the concentration variations for the different chemicals
## V2 V3 V4 V5 V6 V7
## 0.8118265 1.1171461 0.2743440 3.3395638 14.2824835 0.6258510
## V8 V9 V10 V11 V12 V13
## 0.9988587 0.1244533 0.5723589 2.3182859 0.2285716 0.7099904
## V14
## 314.9074743
#We can see here that it would make sense to standardize the dataset in order to
#compare the variables, which have very different standard deviations
standardisedconcentrations <- as.data.frame(scale(wine[2:14]))
wine.pca <- prcomp(standardisedconcentrations)
