####Q1
library(MASS)
# Check species distribution
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
# Define color lookup table with correct species names
lookup <- c("setosa" = "blue", "versicolor" = "green", "virginica" = "orange")
# Assign colors based on species
col.ind <- lookup[as.character(iris$Species)]
# Plot the pairwise scatterplot matrix
pairs(iris[-5], pch = 21, col = "gray", bg = col.ind)

# Load necessary library
library(MASS)
# Perform Linear Discriminant Analysis (LDA)
lda.fit <- lda(Species ~ ., data = iris)
# Print the full LDA model summary
print(lda.fit)
## Call:
## lda(Species ~ ., data = iris)
##
## Prior probabilities of groups:
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa 5.006 3.428 1.462 0.246
## versicolor 5.936 2.770 4.260 1.326
## virginica 6.588 2.974 5.552 2.026
##
## Coefficients of linear discriminants:
## LD1 LD2
## Sepal.Length 0.8293776 0.02410215
## Sepal.Width 1.5344731 2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width -2.8104603 2.83918785
##
## Proportion of trace:
## LD1 LD2
## 0.9912 0.0088
# Display components separately for clarity
cat("\nPrior probabilities of groups:\n")
##
## Prior probabilities of groups:
print(lda.fit$prior)
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
cat("\nGroup means:\n")
##
## Group means:
print(lda.fit$means)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa 5.006 3.428 1.462 0.246
## versicolor 5.936 2.770 4.260 1.326
## virginica 6.588 2.974 5.552 2.026
cat("\nCoefficients of linear discriminants:\n")
##
## Coefficients of linear discriminants:
print(lda.fit$scaling)
## LD1 LD2
## Sepal.Length 0.8293776 0.02410215
## Sepal.Width 1.5344731 2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width -2.8104603 2.83918785
cat("\nProportion of trace:\n")
##
## Proportion of trace:
prop_trace <- lda.fit$svd^2 / sum(lda.fit$svd^2)
print(prop_trace)
## [1] 0.991212605 0.008787395
plot(Sepal.Width ~ Sepal.Length, data = iris, pch=21, col="gray", bg= col.ind)
points(lda.fit$means[,1], lda.fit$means[,2], pch=21, cex=2,
col="black", bg=lookup)

lda.pred <- predict(lda.fit)
head(lda.pred$x)
## LD1 LD2
## 1 8.061800 0.3004206
## 2 7.128688 -0.7866604
## 3 7.489828 -0.2653845
## 4 6.813201 -0.6706311
## 5 8.132309 0.5144625
## 6 7.701947 1.4617210
plot(LD2 ~ LD1, data = lda.pred$x, pch=21, col="gray", bg=col.ind)

table(pred=lda.pred$class, true=iris$Species)
## true
## pred setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
1 - mean(lda.pred$class == iris$Species)
## [1] 0.02
lda.cv <- predict(lda.fit, CV=TRUE)
####Q2
library(MASS)
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
#create a lookup table that maps each species name to a corresponding color
lookup <- c(setosa='blue', versicola='green', virginica='orange')
#Using the lookup table to create a new vector that contains the corresponding color for each species in the iris dataset
col.ind <- lookup[iris$Species]
#Updating the bg parameter in the pairs function call to use the colors in the col.ind vector
pairs(iris[-5], pch=21, col="gray", bg=col.ind)

lda.fit <- lda(Species ~ ., data = iris)
lda.fit
## Call:
## lda(Species ~ ., data = iris)
##
## Prior probabilities of groups:
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa 5.006 3.428 1.462 0.246
## versicolor 5.936 2.770 4.260 1.326
## virginica 6.588 2.974 5.552 2.026
##
## Coefficients of linear discriminants:
## LD1 LD2
## Sepal.Length 0.8293776 0.02410215
## Sepal.Width 1.5344731 2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width -2.8104603 2.83918785
##
## Proportion of trace:
## LD1 LD2
## 0.9912 0.0088
#Draw the scatterplot
plot(Sepal.Width ~ Sepal.Length, data = iris, pch=21, col="gray", bg= col.ind)
#Draw the centroids
points(lda.fit$means[,1], lda.fit$means[,2], pch=21, cex=2,
col="black", bg=lookup)

lda.pred <- predict(lda.fit)
head(lda.pred$x)
## LD1 LD2
## 1 8.061800 0.3004206
## 2 7.128688 -0.7866604
## 3 7.489828 -0.2653845
## 4 6.813201 -0.6706311
## 5 8.132309 0.5144625
## 6 7.701947 1.4617210
plot(LD2 ~ LD1, data = lda.pred$x, pch=21, col="gray", bg=col.ind)

table(pred=lda.pred$class, true=iris$Species)
## true
## pred setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
1 - mean(lda.pred$class == iris$Species)
## [1] 0.02