wine <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", sep=",") #View(wine) str(wine)
'data.frame': 178 obs. of 14 variables: $ V1 : int 1 1 1 1 1 1 1 1 1 1 ... $ V2 : num 14.2 13.2 13.2 14.4 13.2 ... $ V3 : num 1.71 1.78 2.36 1.95 2.59 1.76 1.87 2.15 1.64 1.35 ... $ V4 : num 2.43 2.14 2.67 2.5 2.87 2.45 2.45 2.61 2.17 2.27 ... $ V5 : num 15.6 11.2 18.6 16.8 21 15.2 14.6 17.6 14 16 ... $ V6 : int 127 100 101 113 118 112 96 121 97 98 ... $ V7 : num 2.8 2.65 2.8 3.85 2.8 3.27 2.5 2.6 2.8 2.98 ... $ V8 : num 3.06 2.76 3.24 3.49 2.69 3.39 2.52 2.51 2.98 3.15 ... $ V9 : num 0.28 0.26 0.3 0.24 0.39 0.34 0.3 0.31 0.29 0.22 ... $ V10: num 2.29 1.28 2.81 2.18 1.82 1.97 1.98 1.25 1.98 1.85 ... $ V11: num 5.64 4.38 5.68 7.8 4.32 6.75 5.25 5.05 5.2 7.22 ... $ V12: num 1.04 1.05 1.03 0.86 1.04 1.05 1.02 1.06 1.08 1.01 ... $ V13: num 3.92 3.4 3.17 3.45 2.93 2.85 3.58 3.58 2.85 3.55 ... $ V14: int 1065 1050 1185 1480 735 1450 1290 1295 1045 1045 ...
wine$V1 <- as.factor(wine$V1) str(wine)
'data.frame': 178 obs. of 14 variables: $ V1 : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ... $ V2 : num 14.2 13.2 13.2 14.4 13.2 ... $ V3 : num 1.71 1.78 2.36 1.95 2.59 1.76 1.87 2.15 1.64 1.35 ... $ V4 : num 2.43 2.14 2.67 2.5 2.87 2.45 2.45 2.61 2.17 2.27 ... $ V5 : num 15.6 11.2 18.6 16.8 21 15.2 14.6 17.6 14 16 ... $ V6 : int 127 100 101 113 118 112 96 121 97 98 ... $ V7 : num 2.8 2.65 2.8 3.85 2.8 3.27 2.5 2.6 2.8 2.98 ... $ V8 : num 3.06 2.76 3.24 3.49 2.69 3.39 2.52 2.51 2.98 3.15 ... $ V9 : num 0.28 0.26 0.3 0.24 0.39 0.34 0.3 0.31 0.29 0.22 ... $ V10: num 2.29 1.28 2.81 2.18 1.82 1.97 1.98 1.25 1.98 1.85 ... $ V11: num 5.64 4.38 5.68 7.8 4.32 6.75 5.25 5.05 5.2 7.22 ... $ V12: num 1.04 1.05 1.03 0.86 1.04 1.05 1.02 1.06 1.08 1.01 ... $ V13: num 3.92 3.4 3.17 3.45 2.93 2.85 3.58 3.58 2.85 3.55 ... $ V14: int 1065 1050 1185 1480 735 1450 1290 1295 1045 1045 ...
library(car)
scatterplotMatrix(wine[2:4], pch=19)
pairs(wine[2:4], pch=19)
plot(wine$V2, wine$V3, pch=19)
# Mean vector sapply(wine[2:14], mean)
V2 V3 V4 V5 V6 V7 13.0006180 2.3363483 2.3665169 19.4949438 99.7415730 2.2951124 V8 V9 V10 V11 V12 V13 2.0292697 0.3618539 1.5908989 5.0580899 0.9574494 2.6116854 V14 746.8932584
# Mean of a variable mean(wine$V2)
[1] 13.00062
# Variance vector sapply(wine[2:14], var)
V2 V3 V4 V5 V6 V7 6.590623e-01 1.248015e+00 7.526464e-02 1.115269e+01 2.039893e+02 3.916895e-01 V8 V9 V10 V11 V12 V13 9.977187e-01 1.548863e-02 3.275947e-01 5.374449e+00 5.224496e-02 5.040864e-01 V14 9.916672e+04
# Standard deviation vector sapply(wine[2:14], sd)
V2 V3 V4 V5 V6 V7 0.8118265 1.1171461 0.2743440 3.3395638 14.2824835 0.6258510 V8 V9 V10 V11 V12 V13 0.9988587 0.1244533 0.5723589 2.3182859 0.2285716 0.7099904 V14 314.9074743
# Variance-covariance matrix cov(wine[2:4])
V2 V3 V4 V2 0.65906233 0.08561131 0.04711516 V3 0.08561131 1.24801540 0.05027704 V4 0.04711516 0.05027704 0.07526464
# Correlation matrix cor(wine[2:4])
V2 V3 V4 V2 1.00000000 0.09439694 0.2115446 V3 0.09439694 1.00000000 0.1640455 V4 0.21154460 0.16404547 1.0000000
cor.test(wine$V2,wine$V3)
Pearson's product-moment correlation data: wine$V2 and wine$V3 t = 1.2579, df = 176, p-value = 0.2101 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.05342959 0.23817474 sample estimates: cor 0.09439694
# Standardize a variable stdV2 <- scale(wine$V2) mean(stdV2)
[1] -8.591766e-16
sd(stdV2)
[1] 1
stddf <- as.data.frame(scale(wine[2:14])) head(stddf)
V2 V3 V4 V5 V6 V7 V8 1 1.5143408 -0.56066822 0.2313998 -1.1663032 1.90852151 0.8067217 1.0319081 2 0.2455968 -0.49800856 -0.8256672 -2.4838405 0.01809398 0.5670481 0.7315653 3 0.1963252 0.02117152 1.1062139 -0.2679823 0.08810981 0.8067217 1.2121137 4 1.6867914 -0.34583508 0.4865539 -0.8069748 0.92829983 2.4844372 1.4623994 5 0.2948684 0.22705328 1.8352256 0.4506745 1.27837900 0.8067217 0.6614853 6 1.4773871 -0.51591132 0.3043010 -1.2860793 0.85828399 1.5576991 1.3622851 V9 V10 V11 V12 V13 V14 1 -0.6577078 1.2214385 0.2510088 0.3611585 1.8427215 1.01015939 2 -0.8184106 -0.5431887 -0.2924962 0.4049085 1.1103172 0.96252635 3 -0.4970050 2.1299594 0.2682629 0.3174085 0.7863692 1.39122370 4 -0.9791134 1.0292513 1.1827317 -0.4263410 1.1807407 2.32800680 5 0.2261576 0.4002753 -0.3183774 0.3611585 0.4483365 -0.03776747 6 -0.1755994 0.6623487 0.7298108 0.4049085 0.3356589 2.23274072