Reading Multivariate Analysis Data into R
wine <- read.csv("C:/multi/wine.data", header=FALSE)
View(wine)
A Matrix Scatterplot
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
wine[2:6]
scatterplotMatrix(wine[2:6])
A Scatterplot with the Data Points Labelled by their Group
plot(wine$V4, wine$V5)
plot(wine$V4, wine$V5)
text<-text(wine$V4, wine$V5, wine$V1, cex=0.7, pos=4, col="red")
A Profile Plot
library(RColorBrewer)
makeProfilePlot <- function(mylist,names)
{
require(RColorBrewer)
# find out how many variables we want to include
numvariables <- length(mylist)
# choose 'numvariables' random colours
colours <- brewer.pal(numvariables,"Set1")
# find out the minimum and maximum values of the variables:
mymin <- 1e+20
mymax <- 1e-20
for (i in 1:numvariables)
{
vectori <- mylist[[i]]
mini <- min(vectori)
maxi <- max(vectori)
if (mini < mymin) { mymin <- mini }
if (maxi > mymax) { mymax <- maxi }
}
# plot the variables
for (i in 1:numvariables)
{
vectori <- mylist[[i]]
namei <- names[i]
colouri <- colours[i]
if (i == 1) { plot(vectori,col=colouri,type="l",ylim=c(mymin,mymax)) }
else { points(vectori, col=colouri,type="l") }
lastxval <- length(vectori)
lastyval <- vectori[length(vectori)]
text((lastxval-10),(lastyval),namei,col="black",cex=0.6)
}
}
library(RColorBrewer)
names <- c("V2","V3","V4","V5","V6")
mylist <- list(wine$V2,wine$V3,wine$V4,wine$V5,wine$V6)
makeProfilePlot(mylist,names)
Calculating Summary Statistics for Multivariate Data
sapply(wine[2:14],mean)
## V2 V3 V4 V5 V6 V7
## 13.0006180 2.3363483 2.3665169 19.4949438 99.7415730 2.2951124
## V8 V9 V10 V11 V12 V13
## 2.0292697 0.3618539 1.5908989 5.0580899 0.9574494 2.6116854
## V14
## 746.8932584
sapply(wine[2:14],sd)
## V2 V3 V4 V5 V6 V7
## 0.8118265 1.1171461 0.2743440 3.3395638 14.2824835 0.6258510
## V8 V9 V10 V11 V12 V13
## 0.9988587 0.1244533 0.5723589 2.3182859 0.2285716 0.7099904
## V14
## 314.9074743
Means and Variances Per Group
cultivar2wine <- wine[wine$V1=="2",]
sapply(cultivar2wine[2:14],mean)
## V2 V3 V4 V5 V6 V7 V8
## 12.278732 1.932676 2.244789 20.238028 94.549296 2.258873 2.080845
## V9 V10 V11 V12 V13 V14
## 0.363662 1.630282 3.086620 1.056282 2.785352 519.507042
sapply(cultivar2wine[2:14],sd)
## V2 V3 V4 V5 V6 V7
## 0.5379642 1.0155687 0.3154673 3.3497704 16.7534975 0.5453611
## V8 V9 V10 V11 V12 V13
## 0.7057008 0.1239613 0.6020678 0.9249293 0.2029368 0.4965735
## V14
## 157.2112204
printMeanAndSdByGroup <- function(variables,groupvariable)
{
# find the names of the variables
variablenames <- c(names(groupvariable),names(as.data.frame(variables)))
# within each group, find the mean of each variable
groupvariable <- groupvariable[,1] # ensures groupvariable is not a list
means <- aggregate(as.matrix(variables) ~ groupvariable, FUN = mean)
names(means) <- variablenames
print(paste("Means:"))
print(means)
# within each group, find the standard deviation of each variable:
sds <- aggregate(as.matrix(variables) ~ groupvariable, FUN = sd)
names(sds) <- variablenames
print(paste("Standard deviations:"))
print(sds)
# within each group, find the number of samples:
samplesizes <- aggregate(as.matrix(variables) ~ groupvariable, FUN = length)
names(samplesizes) <- variablenames
print(paste("Sample sizes:"))
print(samplesizes)
}
printMeanAndSdByGroup(wine[2:14],wine[1])
## [1] "Means:"
## V1 V2 V3 V4 V5 V6 V7 V8 V9
## 1 1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2 2 12.27873 1.932676 2.244789 20.23803 94.5493 2.258873 2.0808451 0.363662
## 3 3 13.15375 3.333750 2.437083 21.41667 99.3125 1.678750 0.7814583 0.447500
## V10 V11 V12 V13 V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352 519.5070
## 3 1.153542 7.396250 0.6827083 1.683542 629.8958
## [1] "Standard deviations:"
## V1 V2 V3 V4 V5 V6 V7 V8
## 1 1 0.4621254 0.6885489 0.2271660 2.546322 10.49895 0.3389614 0.3974936
## 2 2 0.5379642 1.0155687 0.3154673 3.349770 16.75350 0.5453611 0.7057008
## 3 3 0.5302413 1.0879057 0.1846902 2.258161 10.89047 0.3569709 0.2935041
## V9 V10 V11 V12 V13 V14
## 1 0.07004924 0.4121092 1.2385728 0.1164826 0.3570766 221.5208
## 2 0.12396128 0.6020678 0.9249293 0.2029368 0.4965735 157.2112
## 3 0.12413959 0.4088359 2.3109421 0.1144411 0.2721114 115.0970
## [1] "Sample sizes:"
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1 1 59 59 59 59 59 59 59 59 59 59 59 59 59
## 2 2 71 71 71 71 71 71 71 71 71 71 71 71 71
## 3 3 48 48 48 48 48 48 48 48 48 48 48 48 48
Between-groups Variance and Within-groups Variance for a Variable
calcWithinGroupsVariance <- function(variable,groupvariable)
{
# find out how many values the group variable can take
groupvariable2 <- as.factor(groupvariable[[1]])
levels <- levels(groupvariable2)
numlevels <- length(levels)
# get the mean and standard deviation for each group:
numtotal <- 0
denomtotal <- 0
for (i in 1:numlevels)
{
leveli <- levels[i]
levelidata <- variable[groupvariable==leveli,]
levelilength <- length(levelidata)
# get the standard deviation for group i:
sdi <- sd(levelidata)
numi <- (levelilength - 1)*(sdi * sdi)
denomi <- levelilength
numtotal <- numtotal + numi
denomtotal <- denomtotal + denomi
}
# calculate the within-groups variance
Vw <- numtotal / (denomtotal - numlevels)
return(Vw)
}
calcWithinGroupsVariance(wine[2],wine[1])
## [1] 0.2620525
calcBetweenGroupsVariance <- function(variable,groupvariable)
{
# find out how many values the group variable can take
groupvariable2 <- as.factor(groupvariable[[1]])
levels <- levels(groupvariable2)
numlevels <- length(levels)
# calculate the overall grand mean:
grandmean <- mean(wine$V2)
# get the mean and standard deviation for each group:
numtotal <- 0
denomtotal <- 0
for (i in 1:numlevels)
{
leveli <- levels[i]
levelidata <- variable[groupvariable==leveli,]
levelilength <- length(levelidata)
# get the mean and standard deviation for group i:
meani <- mean(levelidata)
sdi <- sd(levelidata)
numi <- levelilength * ((meani - grandmean)^2)
denomi <- levelilength
numtotal <- numtotal + numi
denomtotal <- denomtotal + denomi
}
# calculate the between-groups variance
Vb <- numtotal / (numlevels - 1)
Vb <- Vb[[1]]
return(Vb)
}
calcBetweenGroupsVariance (wine[2],wine[1])
## [1] 35.39742
35.39742/0.2620525
## [1] 135.0776
calcSeparations <- function(variables,groupvariable)
{
# find out how many variables we have
variables <- as.data.frame(variables)
numvariables <- length(variables)
# find the variable names
variablenames <- colnames(variables)
# calculate the separation for each variable
for (i in 1:numvariables)
{
variablei <- variables[i]
variablename <- variablenames[i]
Vw <- calcWithinGroupsVariance(variablei, groupvariable)
Vb <- calcBetweenGroupsVariance(variablei, groupvariable)
sep <- Vb/Vw
print(paste("variable",variablename,"Vw=",Vw,"Vb=",Vb,"separation=",sep))
}
}
calcSeparations(wine[2:14],wine[1])
## [1] "variable V2 Vw= 0.262052469153907 Vb= 35.3974249602692 separation= 135.0776242428"
## [1] "variable V3 Vw= 0.887546796746581 Vb= 10154.4606409588 separation= 11441.0425210043"
## [1] "variable V4 Vw= 0.0660721013425184 Vb= 10065.3651082674 separation= 152339.109907954"
## [1] "variable V5 Vw= 8.00681118121156 Vb= 4040.10461181253 separation= 504.583475290745"
## [1] "variable V6 Vw= 180.65777316441 Vb= 671880.903309069 separation= 3719.08106438141"
## [1] "variable V7 Vw= 0.191270475224227 Vb= 10218.0270550471 separation= 53421.8730991727"
## [1] "variable V8 Vw= 0.274707514337437 Vb= 10777.2342568213 separation= 39231.6689363768"
## [1] "variable V9 Vw= 0.0119117022132797 Vb= 14217.0422061125 separation= 1193535.73079276"
## [1] "variable V10 Vw= 0.246172943795542 Vb= 11593.6224025303 separation= 47095.4371499059"
## [1] "variable V11 Vw= 2.28492308133354 Vb= 5890.16197758506 separation= 2577.83818882314"
## [1] "variable V12 Vw= 0.0244876469432414 Vb= 12910.854863443 separation= 527239.505427713"
## [1] "variable V13 Vw= 0.160778729560982 Vb= 9636.30640975892 separation= 59935.2068278657"
## [1] "variable V14 Vw= 29707.6818705169 Vb= 54112090.6081053 separation= 1821.48478780528"
Between-groups Covariance and Within-groups Covariance for Two Variables
calcWithinGroupsCovariance <- function(variable1,variable2,groupvariable)
{
# find out how many values the group variable can take
groupvariable2 <- as.factor(groupvariable[[1]])
levels <- levels(groupvariable2)
numlevels <- length(levels)
# get the covariance of variable 1 and variable 2 for each group:
Covw <- 0
for (i in 1:numlevels)
{
leveli <- levels[i]
levelidata1 <- variable1[groupvariable==leveli,]
levelidata2 <- variable2[groupvariable==leveli,]
mean1 <- mean(levelidata1)
mean2 <- mean(levelidata2)
levelilength <- length(levelidata1)
# get the covariance for this group:
term1 <- 0
for (j in 1:levelilength)
{
term1 <- term1 + ((levelidata1[j] - mean1)*(levelidata2[j] - mean2))
}
Cov_groupi <- term1 # covariance for this group
Covw <- Covw + Cov_groupi
}
totallength <- nrow(variable1)
Covw <- Covw / (totallength - numlevels)
return(Covw)
}
calcWithinGroupsCovariance(wine[8],wine[11],wine[1])
## [1] 0.2866783
calcBetweenGroupsCovariance <- function(variable1,variable2,groupvariable)
{
# find out how many values the group variable can take
groupvariable2 <- as.factor(groupvariable[[1]])
levels <- levels(groupvariable2)
numlevels <- length(levels)
# calculate the grand means
variable1mean <- mean(wine$V8)
variable2mean <- mean(wine$V11)
# calculate the between-groups covariance
Covb <- 0
for (i in 1:numlevels)
{
leveli <- levels[i]
levelidata1 <- variable1[groupvariable==leveli,]
levelidata2 <- variable2[groupvariable==leveli,]
mean1 <- mean(levelidata1)
mean2 <- mean(levelidata2)
levelilength <- length(levelidata1)
term1 <- (mean1 - variable1mean)*(mean2 - variable2mean)*(levelilength)
Covb <- Covb + term1
}
Covb <- Covb / (numlevels - 1)
Covb <- Covb[[1]]
return(Covb)
}
calcBetweenGroupsCovariance(wine[8],wine[11],wine[1])
## [1] -60.41077
Calculating Correlations for Multivariate Data
cor.test(wine$V2, wine$V3)
##
## Pearson's product-moment correlation
##
## data: wine$V2 and wine$V3
## t = 1.2579, df = 176, p-value = 0.2101
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05342959 0.23817474
## sample estimates:
## cor
## 0.09439694
mosthighlycorrelated <- function(mydataframe,numtoreport)
{
# find the correlations
cormatrix <- cor(mydataframe)
# set the correlations on the diagonal or lower triangle to zero,
# so they will not be reported as the highest ones:
diag(cormatrix) <- 0
cormatrix[lower.tri(cormatrix)] <- 0
# flatten the matrix into a dataframe for easy sorting
fm <- as.data.frame(as.table(cormatrix))
# assign human-friendly names
names(fm) <- c("First.Variable", "Second.Variable","Correlation")
# sort and print the top n correlations
head(fm[order(abs(fm$Correlation),decreasing=T),],n=numtoreport)
}
mosthighlycorrelated(wine[2:14], 10)
Standardising Variables
standardisedconcentrations <- as.data.frame(scale(wine[2:14]))
sapply(standardisedconcentrations,mean)
## V2 V3 V4 V5 V6
## -8.591766e-16 -6.776446e-17 8.045176e-16 -7.720494e-17 -4.073935e-17
## V7 V8 V9 V10 V11
## -1.395560e-17 6.958263e-17 -1.042186e-16 -1.221369e-16 3.649376e-17
## V12 V13 V14
## 2.093741e-16 3.003459e-16 -1.034429e-16
sapply(standardisedconcentrations,sd)
## V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1 1 1 1 1 1 1 1 1 1 1 1 1
Principal Component Analysis
standardisedconcentrations <- as.data.frame(scale(wine[2:14]))
wine.pca <- prcomp(standardisedconcentrations)
summary(wine.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.169 1.5802 1.2025 0.95863 0.92370 0.80103 0.74231
## Proportion of Variance 0.362 0.1921 0.1112 0.07069 0.06563 0.04936 0.04239
## Cumulative Proportion 0.362 0.5541 0.6653 0.73599 0.80162 0.85098 0.89337
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.59034 0.53748 0.5009 0.47517 0.41082 0.32152
## Proportion of Variance 0.02681 0.02222 0.0193 0.01737 0.01298 0.00795
## Cumulative Proportion 0.92018 0.94240 0.9617 0.97907 0.99205 1.00000
wine.pca$sdev
## [1] 2.1692972 1.5801816 1.2025273 0.9586313 0.9237035 0.8010350 0.7423128
## [8] 0.5903367 0.5374755 0.5009017 0.4751722 0.4108165 0.3215244
sum((wine.pca$sdev)^2)
## [1] 13
Deciding How Many Principal Components to Retain
screeplot(wine.pca, type="lines")
(wine.pca$sdev)^2
## [1] 4.7058503 2.4969737 1.4460720 0.9189739 0.8532282 0.6416570 0.5510283
## [8] 0.3484974 0.2888799 0.2509025 0.2257886 0.1687702 0.1033779
Loadings for the Principal Components
wine.pca$rotation[,1]
## V2 V3 V4 V5 V6 V7
## -0.144329395 0.245187580 0.002051061 0.239320405 -0.141992042 -0.394660845
## V8 V9 V10 V11 V12 V13
## -0.422934297 0.298533103 -0.313429488 0.088616705 -0.296714564 -0.376167411
## V14
## -0.286752227
sum((wine.pca$rotation[,1])^2)
## [1] 1
calcpc <- function(variables,loadings)
{
# find the number of samples in the data set
as.data.frame(variables)
numsamples <- nrow(variables)
# make a vector to store the component
pc <- numeric(numsamples)
# find the number of variables
numvariables <- length(variables)
# calculate the value of the component for each sample
for (i in 1:numsamples)
{
valuei <- 0
for (j in 1:numvariables)
{
valueij <- variables[i,j]
loadingj <- loadings[j]
valuei <- valuei + (valueij * loadingj)
}
pc[i] <- valuei
}
return(pc)
}
calcpc(standardisedconcentrations, wine.pca$rotation[,1])
## [1] -3.30742097 -2.20324981 -2.50966069 -3.74649719 -1.00607049 -3.04167373
## [7] -2.44220051 -2.05364379 -2.50381135 -2.74588238 -3.46994837 -1.74981688
## [13] -2.10751729 -3.44842921 -4.30065228 -2.29870383 -2.16584568 -1.89362947
## [19] -3.53202167 -2.07865856 -3.11561376 -1.08351361 -2.52809263 -1.64036108
## [25] -1.75662066 -0.98729406 -1.77028387 -1.23194878 -2.18225047 -2.24976267
## [31] -2.49318704 -2.66987964 -1.62399801 -1.89733870 -1.40642118 -1.89847087
## [37] -1.38096669 -1.11905070 -1.49796891 -2.52268490 -2.58081526 -0.66660159
## [43] -3.06216898 -0.46090897 -2.09544094 -1.13297020 -2.71893118 -2.81340300
## [49] -2.00419725 -2.69987528 -3.20587409 -2.85091773 -3.49574328 -2.21853316
## [55] -2.14094846 -2.46238340 -2.73380617 -2.16762631 -3.13054925 0.92596992
## [61] 1.53814123 1.83108449 -0.03052074 -2.04449433 0.60796583 -0.89769555
## [67] -2.24218226 -0.18286818 0.81051865 -1.97006319 1.56779366 -1.65301884
## [73] 0.72333196 -2.55501977 -1.82741266 0.86555129 -0.36897357 1.45327752
## [79] -1.25937829 -0.37509228 -0.75992026 -1.03166776 0.49348469 2.53183508
## [85] -0.83297044 -0.78568828 0.80456258 0.55647288 1.11197430 0.55415961
## [91] 1.34548982 1.56008180 1.92711944 -0.74456561 -0.95476209 -2.53670943
## [97] 0.54242248 -1.02814946 -2.24557492 -1.40624916 -0.79547585 0.54798592
## [103] 0.16072037 0.65793897 -0.39125074 1.76751314 0.36523707 1.61611371
## [109] -0.08230361 -1.57383547 -1.41657326 0.27791878 1.29947929 0.45578615
## [115] 0.49279573 -0.48071836 0.25217752 0.10692601 2.42616867 0.54953935
## [121] -0.73754141 -1.33256273 1.17377592 0.46103449 -0.97572169 0.09653741
## [127] -0.03837888 1.59266578 0.47821593 1.78779033 1.32336859 2.37779336
## [133] 2.92867865 2.14077227 2.36320318 3.05522315 3.90473898 3.92539034
## [139] 3.08557209 2.36779237 2.77099630 2.28012931 2.97723506 2.36851341
## [145] 2.20364930 2.61823528 4.26859758 3.57256360 2.79916760 2.89150275
## [151] 2.31420887 2.54265841 1.80744271 2.75238051 2.72945105 3.59472857
## [157] 2.88169708 3.38261413 1.04523342 1.60538369 3.13428951 2.23385546
## [163] 2.83966343 2.59019044 2.94100316 3.52010248 2.39934228 2.92084537
## [169] 2.17527658 2.37423037 3.20258311 3.66757294 2.45862032 3.36104305
## [175] 2.59463669 2.67030685 2.38030254 3.19973210
wine.pca$x[,1]
## [1] -3.30742097 -2.20324981 -2.50966069 -3.74649719 -1.00607049 -3.04167373
## [7] -2.44220051 -2.05364379 -2.50381135 -2.74588238 -3.46994837 -1.74981688
## [13] -2.10751729 -3.44842921 -4.30065228 -2.29870383 -2.16584568 -1.89362947
## [19] -3.53202167 -2.07865856 -3.11561376 -1.08351361 -2.52809263 -1.64036108
## [25] -1.75662066 -0.98729406 -1.77028387 -1.23194878 -2.18225047 -2.24976267
## [31] -2.49318704 -2.66987964 -1.62399801 -1.89733870 -1.40642118 -1.89847087
## [37] -1.38096669 -1.11905070 -1.49796891 -2.52268490 -2.58081526 -0.66660159
## [43] -3.06216898 -0.46090897 -2.09544094 -1.13297020 -2.71893118 -2.81340300
## [49] -2.00419725 -2.69987528 -3.20587409 -2.85091773 -3.49574328 -2.21853316
## [55] -2.14094846 -2.46238340 -2.73380617 -2.16762631 -3.13054925 0.92596992
## [61] 1.53814123 1.83108449 -0.03052074 -2.04449433 0.60796583 -0.89769555
## [67] -2.24218226 -0.18286818 0.81051865 -1.97006319 1.56779366 -1.65301884
## [73] 0.72333196 -2.55501977 -1.82741266 0.86555129 -0.36897357 1.45327752
## [79] -1.25937829 -0.37509228 -0.75992026 -1.03166776 0.49348469 2.53183508
## [85] -0.83297044 -0.78568828 0.80456258 0.55647288 1.11197430 0.55415961
## [91] 1.34548982 1.56008180 1.92711944 -0.74456561 -0.95476209 -2.53670943
## [97] 0.54242248 -1.02814946 -2.24557492 -1.40624916 -0.79547585 0.54798592
## [103] 0.16072037 0.65793897 -0.39125074 1.76751314 0.36523707 1.61611371
## [109] -0.08230361 -1.57383547 -1.41657326 0.27791878 1.29947929 0.45578615
## [115] 0.49279573 -0.48071836 0.25217752 0.10692601 2.42616867 0.54953935
## [121] -0.73754141 -1.33256273 1.17377592 0.46103449 -0.97572169 0.09653741
## [127] -0.03837888 1.59266578 0.47821593 1.78779033 1.32336859 2.37779336
## [133] 2.92867865 2.14077227 2.36320318 3.05522315 3.90473898 3.92539034
## [139] 3.08557209 2.36779237 2.77099630 2.28012931 2.97723506 2.36851341
## [145] 2.20364930 2.61823528 4.26859758 3.57256360 2.79916760 2.89150275
## [151] 2.31420887 2.54265841 1.80744271 2.75238051 2.72945105 3.59472857
## [157] 2.88169708 3.38261413 1.04523342 1.60538369 3.13428951 2.23385546
## [163] 2.83966343 2.59019044 2.94100316 3.52010248 2.39934228 2.92084537
## [169] 2.17527658 2.37423037 3.20258311 3.66757294 2.45862032 3.36104305
## [175] 2.59463669 2.67030685 2.38030254 3.19973210
wine.pca$rotation[,2]
## V2 V3 V4 V5 V6 V7
## 0.483651548 0.224930935 0.316068814 -0.010590502 0.299634003 0.065039512
## V8 V9 V10 V11 V12 V13
## -0.003359812 0.028779488 0.039301722 0.529995672 -0.279235148 -0.164496193
## V14
## 0.364902832
sum((wine.pca$rotation[,2])^2)
## [1] 1
Scatterplots of the Principal Components
plot(wine.pca$x[,1],wine.pca$x[,2])
text(wine.pca$x[,1],wine.pca$x[,2], wine$V1, cex=0.7, pos=4, col="red")
printMeanAndSdByGroup(standardisedconcentrations,wine[1])
## [1] "Means:"
## V1 V2 V3 V4 V5 V6 V7
## 1 1 0.9166093 -0.2915199 0.3246886 -0.7359212 0.46192317 0.87090552
## 2 2 -0.8892116 -0.3613424 -0.4437061 0.2225094 -0.36354162 -0.05790375
## 3 3 0.1886265 0.8928122 0.2572190 0.5754413 -0.03004191 -0.98483874
## V8 V9 V10 V11 V12 V13
## 1 0.95419225 -0.57735640 0.5388633 0.2028288 0.4575567 0.7691811
## 2 0.05163434 0.01452785 0.0688079 -0.8503999 0.4323908 0.2446043
## 3 -1.24923710 0.68817813 -0.7641311 1.0085728 -1.2019916 -1.3072623
## V14
## 1 1.1711967
## 2 -0.7220731
## 3 -0.3715295
## [1] "Standard deviations:"
## V1 V2 V3 V4 V5 V6 V7 V8
## 1 1 0.5692415 0.6163463 0.8280333 0.7624716 0.7350927 0.5416007 0.3979478
## 2 2 0.6626591 0.9090742 1.1498967 1.0030563 1.1730101 0.8713912 0.7065071
## 3 3 0.6531461 0.9738258 0.6732065 0.6761844 0.7625055 0.5703767 0.2938394
## V9 V10 V11 V12 V13 V14
## 1 0.5628555 0.7200189 0.5342623 0.5096112 0.5029315 0.7034472
## 2 0.9960462 1.0519061 0.3989712 0.8878480 0.6994087 0.4992299
## 3 0.9974790 0.7142999 0.9968323 0.5006795 0.3832607 0.3654948
## [1] "Sample sizes:"
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## 1 1 59 59 59 59 59 59 59 59 59 59 59 59 59
## 2 2 71 71 71 71 71 71 71 71 71 71 71 71 71
## 3 3 48 48 48 48 48 48 48 48 48 48 48 48 48
Linear Discriminant Analysis
library("MASS")
wine.lda <- lda(wine$V1 ~ wine$V2 + wine$V3 + wine$V4 + wine$V5 + wine$V6 + wine$V7 +
wine$V8 + wine$V9 + wine$V10 + wine$V11 + wine$V12 + wine$V13 +
wine$V14)
Loadings for the Discriminant Functions
wine.lda
## Call:
## lda(wine$V1 ~ wine$V2 + wine$V3 + wine$V4 + wine$V5 + wine$V6 +
## wine$V7 + wine$V8 + wine$V9 + wine$V10 + wine$V11 + wine$V12 +
## wine$V13 + wine$V14)
##
## Prior probabilities of groups:
## 1 2 3
## 0.3314607 0.3988764 0.2696629
##
## Group means:
## wine$V2 wine$V3 wine$V4 wine$V5 wine$V6 wine$V7 wine$V8 wine$V9
## 1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2 12.27873 1.932676 2.244789 20.23803 94.5493 2.258873 2.0808451 0.363662
## 3 13.15375 3.333750 2.437083 21.41667 99.3125 1.678750 0.7814583 0.447500
## wine$V10 wine$V11 wine$V12 wine$V13 wine$V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352 519.5070
## 3 1.153542 7.396250 0.6827083 1.683542 629.8958
##
## Coefficients of linear discriminants:
## LD1 LD2
## wine$V2 -0.403399781 0.8717930699
## wine$V3 0.165254596 0.3053797325
## wine$V4 -0.369075256 2.3458497486
## wine$V5 0.154797889 -0.1463807654
## wine$V6 -0.002163496 -0.0004627565
## wine$V7 0.618052068 -0.0322128171
## wine$V8 -1.661191235 -0.4919980543
## wine$V9 -1.495818440 -1.6309537953
## wine$V10 0.134092628 -0.3070875776
## wine$V11 0.355055710 0.2532306865
## wine$V12 -0.818036073 -1.5156344987
## wine$V13 -1.157559376 0.0511839665
## wine$V14 -0.002691206 0.0028529846
##
## Proportion of trace:
## LD1 LD2
## 0.6875 0.3125
wine.lda$scaling[,1]
## wine$V2 wine$V3 wine$V4 wine$V5 wine$V6 wine$V7
## -0.403399781 0.165254596 -0.369075256 0.154797889 -0.002163496 0.618052068
## wine$V8 wine$V9 wine$V10 wine$V11 wine$V12 wine$V13
## -1.661191235 -1.495818440 0.134092628 0.355055710 -0.818036073 -1.157559376
## wine$V14
## -0.002691206
calclda <- function(variables,loadings)
{
# find the number of samples in the data set
as.data.frame(variables)
numsamples <- nrow(variables)
# make a vector to store the discriminant function
ld <- numeric(numsamples)
# find the number of variables
numvariables <- length(variables)
# calculate the value of the discriminant function for each sample
for (i in 1:numsamples)
{
valuei <- 0
for (j in 1:numvariables)
{
valueij <- variables[i,j]
loadingj <- loadings[j]
valuei <- valuei + (valueij * loadingj)
}
ld[i] <- valuei
}
# standardise the discriminant function so that its mean value is 0:
ld <- as.data.frame(scale(ld, center=TRUE, scale=FALSE))
ld <- ld[[1]]
return(ld)
}
calclda(wine[2:14], wine.lda$scaling[,1])
## [1] -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934
## [7] -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646
## [13] -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262
## [19] -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961
## [25] -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573
## [31] -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996
## [37] -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062
## [43] -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356
## [49] -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293
## [55] -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551 1.58618749
## [61] 0.79967216 2.38015446 -0.45917726 -0.50726885 0.39398359 -0.92256616
## [67] -1.95549377 -0.34732815 0.20371212 -0.24831914 1.17987999 -1.07718925
## [73] 0.64100179 -1.74684421 -0.34721117 1.14274222 0.18665882 0.90052500
## [79] -0.70709551 -0.59562833 -0.55761818 -1.80430417 0.23077079 2.03482711
## [85] -0.62113021 -1.03372742 0.76598781 0.35042568 0.15324508 -0.14962842
## [91] 0.48079504 1.39689016 0.91972331 -0.59102937 0.49411386 -1.62614426
## [97] 2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340 0.02594549
## [103] -0.21887407 1.36437640 -1.12901245 -0.21263094 -0.77946884 0.61546732
## [109] 0.22550192 -2.03869851 0.79274716 0.30229545 -0.50664882 0.99837397
## [115] -0.21954922 -0.37131517 0.05545894 -0.09137874 1.79755252 -0.17405009
## [121] -1.17870281 -3.21054390 0.62605202 0.03366613 -0.69930080 -0.72061079
## [127] -0.51933512 1.17030045 0.10824791 1.12319783 2.24632419 3.28527755
## [133] 4.07236441 3.86691235 3.45088333 3.71583899 3.92220510 4.85161020
## [139] 3.54993389 3.76889174 2.66942250 2.32491492 3.17712883 2.88964418
## [145] 3.78325562 3.04411324 4.70697017 4.85021393 4.98359184 4.86968293
## [151] 4.59869190 5.67447884 5.32986123 5.03401031 4.52080087 5.09783710
## [157] 5.04368277 4.86980829 5.61316558 5.67046737 5.37413513 3.09975377
## [163] 3.35888137 3.04007194 4.94861303 4.54504458 5.27255844 5.13016117
## [169] 4.30468082 5.08336782 4.06743571 5.74212961 4.48205140 4.29150758
## [175] 4.50329623 5.04747033 4.27615505 5.53808610
wine.lda.values <- predict(wine.lda, wine[2:14])
wine.lda.values$x[,1]
## 1 2 3 4 5 6
## -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934
## 7 8 9 10 11 12
## -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646
## 13 14 15 16 17 18
## -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262
## 19 20 21 22 23 24
## -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961
## 25 26 27 28 29 30
## -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573
## 31 32 33 34 35 36
## -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996
## 37 38 39 40 41 42
## -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062
## 43 44 45 46 47 48
## -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356
## 49 50 51 52 53 54
## -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293
## 55 56 57 58 59 60
## -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551 1.58618749
## 61 62 63 64 65 66
## 0.79967216 2.38015446 -0.45917726 -0.50726885 0.39398359 -0.92256616
## 67 68 69 70 71 72
## -1.95549377 -0.34732815 0.20371212 -0.24831914 1.17987999 -1.07718925
## 73 74 75 76 77 78
## 0.64100179 -1.74684421 -0.34721117 1.14274222 0.18665882 0.90052500
## 79 80 81 82 83 84
## -0.70709551 -0.59562833 -0.55761818 -1.80430417 0.23077079 2.03482711
## 85 86 87 88 89 90
## -0.62113021 -1.03372742 0.76598781 0.35042568 0.15324508 -0.14962842
## 91 92 93 94 95 96
## 0.48079504 1.39689016 0.91972331 -0.59102937 0.49411386 -1.62614426
## 97 98 99 100 101 102
## 2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340 0.02594549
## 103 104 105 106 107 108
## -0.21887407 1.36437640 -1.12901245 -0.21263094 -0.77946884 0.61546732
## 109 110 111 112 113 114
## 0.22550192 -2.03869851 0.79274716 0.30229545 -0.50664882 0.99837397
## 115 116 117 118 119 120
## -0.21954922 -0.37131517 0.05545894 -0.09137874 1.79755252 -0.17405009
## 121 122 123 124 125 126
## -1.17870281 -3.21054390 0.62605202 0.03366613 -0.69930080 -0.72061079
## 127 128 129 130 131 132
## -0.51933512 1.17030045 0.10824791 1.12319783 2.24632419 3.28527755
## 133 134 135 136 137 138
## 4.07236441 3.86691235 3.45088333 3.71583899 3.92220510 4.85161020
## 139 140 141 142 143 144
## 3.54993389 3.76889174 2.66942250 2.32491492 3.17712883 2.88964418
## 145 146 147 148 149 150
## 3.78325562 3.04411324 4.70697017 4.85021393 4.98359184 4.86968293
## 151 152 153 154 155 156
## 4.59869190 5.67447884 5.32986123 5.03401031 4.52080087 5.09783710
## 157 158 159 160 161 162
## 5.04368277 4.86980829 5.61316558 5.67046737 5.37413513 3.09975377
## 163 164 165 166 167 168
## 3.35888137 3.04007194 4.94861303 4.54504458 5.27255844 5.13016117
## 169 170 171 172 173 174
## 4.30468082 5.08336782 4.06743571 5.74212961 4.48205140 4.29150758
## 175 176 177 178
## 4.50329623 5.04747033 4.27615505 5.53808610
groupStandardise <- function(variables, groupvariable)
{
# find out how many variables we have
variables <- as.data.frame(variables)
numvariables <- length(variables)
# find the variable names
variablenames <- colnames(variables)
# calculate the group-standardised version of each variable
for (i in 1:numvariables)
{
variablei <- variables[i]
variablei_name <- variablenames[i]
variablei_Vw <- calcWithinGroupsVariance(variablei, groupvariable)
variablei_mean <- mean(wine$V2)+mean(wine$V3)+mean(wine$V4)+mean(wine$V5)+mean(wine$V6)+mean(wine$V7)+mean(wine$V8)+mean(wine$V9)+mean(wine$V10)+mean(wine$V11)+mean(wine$V12)+mean(wine$V13)+mean(wine$V14)
variablei_new <- (variablei - variablei_mean)/(sqrt(variablei_Vw))
data_length <- nrow(variablei)
if (i == 1) { variables_new <- data.frame(row.names=seq(1,data_length)) }
variables_new[`variablei_name`] <- variablei_new
}
return(variables_new)
}
groupstandardisedconcentrations <- groupStandardise(wine[2:14], wine[1])
wine.lda2 <- lda(wine$V1 ~ groupstandardisedconcentrations$V2 + groupstandardisedconcentrations$V3 +
groupstandardisedconcentrations$V4 + groupstandardisedconcentrations$V5 +
groupstandardisedconcentrations$V6 + groupstandardisedconcentrations$V7 +
groupstandardisedconcentrations$V8 + groupstandardisedconcentrations$V9 +
groupstandardisedconcentrations$V10 + groupstandardisedconcentrations$V11 +
groupstandardisedconcentrations$V12 + groupstandardisedconcentrations$V13 +
groupstandardisedconcentrations$V14)
wine.lda2
## Call:
## lda(wine$V1 ~ groupstandardisedconcentrations$V2 + groupstandardisedconcentrations$V3 +
## groupstandardisedconcentrations$V4 + groupstandardisedconcentrations$V5 +
## groupstandardisedconcentrations$V6 + groupstandardisedconcentrations$V7 +
## groupstandardisedconcentrations$V8 + groupstandardisedconcentrations$V9 +
## groupstandardisedconcentrations$V10 + groupstandardisedconcentrations$V11 +
## groupstandardisedconcentrations$V12 + groupstandardisedconcentrations$V13 +
## groupstandardisedconcentrations$V14)
##
## Prior probabilities of groups:
## 1 2 3
## 0.3314607 0.3988764 0.2696629
##
## Group means:
## groupstandardisedconcentrations$V2 groupstandardisedconcentrations$V3
## 1 -1728.804 -951.8414
## 2 -1731.667 -951.9242
## 3 -1729.958 -950.4370
## groupstandardisedconcentrations$V4 groupstandardisedconcentrations$V5
## 1 -3486.869 -311.5955
## 2 -3487.689 -310.4644
## 3 -3486.941 -310.0478
## groupstandardisedconcentrations$V6 groupstandardisedconcentrations$V7
## 1 -58.95429 -2048.492
## 2 -59.83144 -2049.821
## 3 -59.47706 -2051.148
## groupstandardisedconcentrations$V8 groupstandardisedconcentrations$V9
## 1 -1709.047 -8232.009
## 2 -1710.767 -8231.334
## 3 -1713.247 -8230.566
## groupstandardisedconcentrations$V10 groupstandardisedconcentrations$V11
## 1 -1807.565 -590.9047
## 2 -1808.108 -592.5200
## 3 -1809.068 -589.6690
## groupstandardisedconcentrations$V12 groupstandardisedconcentrations$V13
## 1 -5736.485 -2233.521
## 2 -5736.522 -2234.450
## 3 -5738.909 -2237.198
## groupstandardisedconcentrations$V14
## 1 1.258849
## 2 -2.200234
## 3 -1.559777
##
## Coefficients of linear discriminants:
## LD1 LD2
## groupstandardisedconcentrations$V2 -0.20650463 0.446280119
## groupstandardisedconcentrations$V3 0.15568586 0.287697336
## groupstandardisedconcentrations$V4 -0.09486893 0.602988809
## groupstandardisedconcentrations$V5 0.43802089 -0.414203541
## groupstandardisedconcentrations$V6 -0.02907934 -0.006219863
## groupstandardisedconcentrations$V7 0.27030186 -0.014088108
## groupstandardisedconcentrations$V8 -0.87067265 -0.257868714
## groupstandardisedconcentrations$V9 -0.16325474 -0.178003512
## groupstandardisedconcentrations$V10 0.06653116 -0.152364015
## groupstandardisedconcentrations$V11 0.53670086 0.382782544
## groupstandardisedconcentrations$V12 -0.12801061 -0.237174509
## groupstandardisedconcentrations$V13 -0.46414916 0.020523349
## groupstandardisedconcentrations$V14 -0.46385409 0.491738050
##
## Proportion of trace:
## LD1 LD2
## 0.6875 0.3125
wine.lda.values <- predict(wine.lda, wine[2:14])
wine.lda.values$x[,1]
## 1 2 3 4 5 6
## -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934
## 7 8 9 10 11 12
## -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646
## 13 14 15 16 17 18
## -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262
## 19 20 21 22 23 24
## -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961
## 25 26 27 28 29 30
## -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573
## 31 32 33 34 35 36
## -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996
## 37 38 39 40 41 42
## -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062
## 43 44 45 46 47 48
## -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356
## 49 50 51 52 53 54
## -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293
## 55 56 57 58 59 60
## -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551 1.58618749
## 61 62 63 64 65 66
## 0.79967216 2.38015446 -0.45917726 -0.50726885 0.39398359 -0.92256616
## 67 68 69 70 71 72
## -1.95549377 -0.34732815 0.20371212 -0.24831914 1.17987999 -1.07718925
## 73 74 75 76 77 78
## 0.64100179 -1.74684421 -0.34721117 1.14274222 0.18665882 0.90052500
## 79 80 81 82 83 84
## -0.70709551 -0.59562833 -0.55761818 -1.80430417 0.23077079 2.03482711
## 85 86 87 88 89 90
## -0.62113021 -1.03372742 0.76598781 0.35042568 0.15324508 -0.14962842
## 91 92 93 94 95 96
## 0.48079504 1.39689016 0.91972331 -0.59102937 0.49411386 -1.62614426
## 97 98 99 100 101 102
## 2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340 0.02594549
## 103 104 105 106 107 108
## -0.21887407 1.36437640 -1.12901245 -0.21263094 -0.77946884 0.61546732
## 109 110 111 112 113 114
## 0.22550192 -2.03869851 0.79274716 0.30229545 -0.50664882 0.99837397
## 115 116 117 118 119 120
## -0.21954922 -0.37131517 0.05545894 -0.09137874 1.79755252 -0.17405009
## 121 122 123 124 125 126
## -1.17870281 -3.21054390 0.62605202 0.03366613 -0.69930080 -0.72061079
## 127 128 129 130 131 132
## -0.51933512 1.17030045 0.10824791 1.12319783 2.24632419 3.28527755
## 133 134 135 136 137 138
## 4.07236441 3.86691235 3.45088333 3.71583899 3.92220510 4.85161020
## 139 140 141 142 143 144
## 3.54993389 3.76889174 2.66942250 2.32491492 3.17712883 2.88964418
## 145 146 147 148 149 150
## 3.78325562 3.04411324 4.70697017 4.85021393 4.98359184 4.86968293
## 151 152 153 154 155 156
## 4.59869190 5.67447884 5.32986123 5.03401031 4.52080087 5.09783710
## 157 158 159 160 161 162
## 5.04368277 4.86980829 5.61316558 5.67046737 5.37413513 3.09975377
## 163 164 165 166 167 168
## 3.35888137 3.04007194 4.94861303 4.54504458 5.27255844 5.13016117
## 169 170 171 172 173 174
## 4.30468082 5.08336782 4.06743571 5.74212961 4.48205140 4.29150758
## 175 176 177 178
## 4.50329623 5.04747033 4.27615505 5.53808610
wine.lda.values2 <- predict(wine.lda2, groupstandardisedconcentrations)
wine.lda.values2$x[,1]
## 1 2 3 4 5 6
## -4.70024401 -4.30195811 -3.42071952 -4.20575366 -1.50998168 -4.51868934
## 7 8 9 10 11 12
## -4.52737794 -4.14834781 -3.86082876 -3.36662444 -4.80587907 -3.42807646
## 13 14 15 16 17 18
## -3.66610246 -5.58824635 -5.50131449 -3.18475189 -3.28936988 -2.99809262
## 19 20 21 22 23 24
## -5.24640372 -3.13653106 -3.57747791 -1.69077135 -4.83515033 -3.09588961
## 25 26 27 28 29 30
## -3.32164716 -2.14482223 -3.98242850 -2.68591432 -3.56309464 -3.17301573
## 31 32 33 34 35 36
## -2.99626797 -3.56866244 -3.38506383 -3.52753750 -2.85190852 -2.79411996
## 37 38 39 40 41 42
## -2.75808511 -2.17734477 -3.02926382 -3.27105228 -2.92065533 -2.23721062
## 43 44 45 46 47 48
## -4.69972568 -1.23036133 -2.58203904 -2.58312049 -3.88887889 -3.44975356
## 49 50 51 52 53 54
## -2.34223331 -3.52062596 -3.21840912 -4.38214896 -4.36311727 -3.51917293
## 55 56 57 58 59 60
## -3.12277475 -1.80240540 -2.87378754 -3.61690518 -3.73868551 1.58618749
## 61 62 63 64 65 66
## 0.79967216 2.38015446 -0.45917726 -0.50726885 0.39398359 -0.92256616
## 67 68 69 70 71 72
## -1.95549377 -0.34732815 0.20371212 -0.24831914 1.17987999 -1.07718925
## 73 74 75 76 77 78
## 0.64100179 -1.74684421 -0.34721117 1.14274222 0.18665882 0.90052500
## 79 80 81 82 83 84
## -0.70709551 -0.59562833 -0.55761818 -1.80430417 0.23077079 2.03482711
## 85 86 87 88 89 90
## -0.62113021 -1.03372742 0.76598781 0.35042568 0.15324508 -0.14962842
## 91 92 93 94 95 96
## 0.48079504 1.39689016 0.91972331 -0.59102937 0.49411386 -1.62614426
## 97 98 99 100 101 102
## 2.00044562 -1.00534818 -2.07121314 -1.63815890 -1.05894340 0.02594549
## 103 104 105 106 107 108
## -0.21887407 1.36437640 -1.12901245 -0.21263094 -0.77946884 0.61546732
## 109 110 111 112 113 114
## 0.22550192 -2.03869851 0.79274716 0.30229545 -0.50664882 0.99837397
## 115 116 117 118 119 120
## -0.21954922 -0.37131517 0.05545894 -0.09137874 1.79755252 -0.17405009
## 121 122 123 124 125 126
## -1.17870281 -3.21054390 0.62605202 0.03366613 -0.69930080 -0.72061079
## 127 128 129 130 131 132
## -0.51933512 1.17030045 0.10824791 1.12319783 2.24632419 3.28527755
## 133 134 135 136 137 138
## 4.07236441 3.86691235 3.45088333 3.71583899 3.92220510 4.85161020
## 139 140 141 142 143 144
## 3.54993389 3.76889174 2.66942250 2.32491492 3.17712883 2.88964418
## 145 146 147 148 149 150
## 3.78325562 3.04411324 4.70697017 4.85021393 4.98359184 4.86968293
## 151 152 153 154 155 156
## 4.59869190 5.67447884 5.32986123 5.03401031 4.52080087 5.09783710
## 157 158 159 160 161 162
## 5.04368277 4.86980829 5.61316558 5.67046737 5.37413513 3.09975377
## 163 164 165 166 167 168
## 3.35888137 3.04007194 4.94861303 4.54504458 5.27255844 5.13016117
## 169 170 171 172 173 174
## 4.30468082 5.08336782 4.06743571 5.74212961 4.48205140 4.29150758
## 175 176 177 178
## 4.50329623 5.04747033 4.27615505 5.53808610
Separation Achieved by the Discriminant Functions
wine.lda.values <- predict(wine.lda, standardisedconcentrations)
calcSeparations(wine.lda.values$x,wine[1])
## [1] "variable LD1 Vw= 0.999999999999999 Vb= 15837.082234555 separation= 15837.082234555"
## [1] "variable LD2 Vw= 1 Vb= 15403.6710754822 separation= 15403.6710754822"
wine.lda
## Call:
## lda(wine$V1 ~ wine$V2 + wine$V3 + wine$V4 + wine$V5 + wine$V6 +
## wine$V7 + wine$V8 + wine$V9 + wine$V10 + wine$V11 + wine$V12 +
## wine$V13 + wine$V14)
##
## Prior probabilities of groups:
## 1 2 3
## 0.3314607 0.3988764 0.2696629
##
## Group means:
## wine$V2 wine$V3 wine$V4 wine$V5 wine$V6 wine$V7 wine$V8 wine$V9
## 1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000
## 2 12.27873 1.932676 2.244789 20.23803 94.5493 2.258873 2.0808451 0.363662
## 3 13.15375 3.333750 2.437083 21.41667 99.3125 1.678750 0.7814583 0.447500
## wine$V10 wine$V11 wine$V12 wine$V13 wine$V14
## 1 1.899322 5.528305 1.0620339 3.157797 1115.7119
## 2 1.630282 3.086620 1.0562817 2.785352 519.5070
## 3 1.153542 7.396250 0.6827083 1.683542 629.8958
##
## Coefficients of linear discriminants:
## LD1 LD2
## wine$V2 -0.403399781 0.8717930699
## wine$V3 0.165254596 0.3053797325
## wine$V4 -0.369075256 2.3458497486
## wine$V5 0.154797889 -0.1463807654
## wine$V6 -0.002163496 -0.0004627565
## wine$V7 0.618052068 -0.0322128171
## wine$V8 -1.661191235 -0.4919980543
## wine$V9 -1.495818440 -1.6309537953
## wine$V10 0.134092628 -0.3070875776
## wine$V11 0.355055710 0.2532306865
## wine$V12 -0.818036073 -1.5156344987
## wine$V13 -1.157559376 0.0511839665
## wine$V14 -0.002691206 0.0028529846
##
## Proportion of trace:
## LD1 LD2
## 0.6875 0.3125
(wine.lda$svd)^2
## [1] 794.6522 361.2410
A Stacked Histogram of the LDA Values
ldahist(data = wine.lda.values$x[,1], g=wine$V1)
ldahist(data = wine.lda.values$x[,2], g=wine$V1)
Scatterplots of the Discriminant Functions
plot(wine.lda.values$x[,1],wine.lda.values$x[,2])
text(wine.lda.values$x[,1],wine.lda.values$x[,2],wine$V1,cex=0.7,pos=4,col="red")
Allocation Rules and Misclassification Rate
printMeanAndSdByGroup(wine.lda.values$x,wine[1])
## [1] "Means:"
## V1 LD1 LD2
## 1 1 -3.42248851 1.691674
## 2 2 -0.07972623 -2.472656
## 3 3 4.32473717 1.578120
## [1] "Standard deviations:"
## V1 LD1 LD2
## 1 1 0.9394626 1.0176394
## 2 2 1.0839318 0.9973165
## 3 3 0.9404188 0.9818673
## [1] "Sample sizes:"
## V1 LD1 LD2
## 1 1 59 59
## 2 2 71 71
## 3 3 48 48
calcAllocationRuleAccuracy <- function(ldavalue, groupvariable, cutoffpoints)
{
# find out how many values the group variable can take
groupvariable2 <- as.factor(groupvariable[[1]])
levels <- levels(groupvariable2)
numlevels <- length(levels)
# calculate the number of true positives and false negatives for each group
numlevels <- length(levels)
for (i in 1:numlevels)
{
leveli <- levels[i]
levelidata <- ldavalue[groupvariable==leveli]
# see how many of the samples from this group are classified in each group
for (j in 1:numlevels)
{
levelj <- levels[j]
if (j == 1)
{
cutoff1 <- cutoffpoints[1]
cutoff2 <- "NA"
results <- summary(levelidata <= cutoff1)
}
else if (j == numlevels)
{
cutoff1 <- cutoffpoints[(numlevels-1)]
cutoff2 <- "NA"
results <- summary(levelidata > cutoff1)
}
else
{
cutoff1 <- cutoffpoints[(j-1)]
cutoff2 <- cutoffpoints[(j)]
results <- summary(levelidata > cutoff1 & levelidata <= cutoff2)
}
trues <- results["TRUE"]
trues <- trues[[1]]
print(paste("Number of samples of group",leveli,"classified as group",levelj," : ",
trues,"(cutoffs:",cutoff1,",",cutoff2,")"))
}
}
}
calcAllocationRuleAccuracy(wine.lda.values$x[,1], wine[1], c(-1.751107, 2.122505))
## [1] "Number of samples of group 1 classified as group 1 : 56 (cutoffs: -1.751107 , NA )"
## [1] "Number of samples of group 1 classified as group 2 : 3 (cutoffs: -1.751107 , 2.122505 )"
## [1] "Number of samples of group 1 classified as group 3 : NA (cutoffs: 2.122505 , NA )"
## [1] "Number of samples of group 2 classified as group 1 : 5 (cutoffs: -1.751107 , NA )"
## [1] "Number of samples of group 2 classified as group 2 : 65 (cutoffs: -1.751107 , 2.122505 )"
## [1] "Number of samples of group 2 classified as group 3 : 1 (cutoffs: 2.122505 , NA )"
## [1] "Number of samples of group 3 classified as group 1 : NA (cutoffs: -1.751107 , NA )"
## [1] "Number of samples of group 3 classified as group 2 : NA (cutoffs: -1.751107 , 2.122505 )"
## [1] "Number of samples of group 3 classified as group 3 : 48 (cutoffs: 2.122505 , NA )"