####Q1

library(MASS)

# Check species distribution
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
# Define color lookup table with correct species names
lookup <- c("setosa" = "blue", "versicolor" = "green", "virginica" = "orange")

# Assign colors based on species
col.ind <- lookup[as.character(iris$Species)]

# Plot the pairwise scatterplot matrix
pairs(iris[-5], pch = 21, col = "gray", bg = col.ind)

# Load necessary library
library(MASS)

# Perform Linear Discriminant Analysis (LDA)
lda.fit <- lda(Species ~ ., data = iris)

# Print the full LDA model summary
print(lda.fit)
## Call:
## lda(Species ~ ., data = iris)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa            5.006       3.428        1.462       0.246
## versicolor        5.936       2.770        4.260       1.326
## virginica         6.588       2.974        5.552       2.026
## 
## Coefficients of linear discriminants:
##                     LD1         LD2
## Sepal.Length  0.8293776  0.02410215
## Sepal.Width   1.5344731  2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width  -2.8104603  2.83918785
## 
## Proportion of trace:
##    LD1    LD2 
## 0.9912 0.0088
# Display components separately for clarity
cat("\nPrior probabilities of groups:\n")
## 
## Prior probabilities of groups:
print(lda.fit$prior)
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333
cat("\nGroup means:\n")
## 
## Group means:
print(lda.fit$means)
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa            5.006       3.428        1.462       0.246
## versicolor        5.936       2.770        4.260       1.326
## virginica         6.588       2.974        5.552       2.026
cat("\nCoefficients of linear discriminants:\n")
## 
## Coefficients of linear discriminants:
print(lda.fit$scaling)
##                     LD1         LD2
## Sepal.Length  0.8293776  0.02410215
## Sepal.Width   1.5344731  2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width  -2.8104603  2.83918785
cat("\nProportion of trace:\n")
## 
## Proportion of trace:
prop_trace <- lda.fit$svd^2 / sum(lda.fit$svd^2)
print(prop_trace)
## [1] 0.991212605 0.008787395
plot(Sepal.Width ~ Sepal.Length, data = iris, pch=21, col="gray", bg= col.ind)
points(lda.fit$means[,1], lda.fit$means[,2], pch=21, cex=2,
       col="black", bg=lookup)

lda.pred <- predict(lda.fit)
head(lda.pred$x)
##        LD1        LD2
## 1 8.061800  0.3004206
## 2 7.128688 -0.7866604
## 3 7.489828 -0.2653845
## 4 6.813201 -0.6706311
## 5 8.132309  0.5144625
## 6 7.701947  1.4617210
plot(LD2 ~ LD1, data = lda.pred$x, pch=21, col="gray", bg=col.ind)

table(pred=lda.pred$class, true=iris$Species)
##             true
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         1
##   virginica       0          2        49
1 - mean(lda.pred$class == iris$Species)
## [1] 0.02
lda.cv <- predict(lda.fit, CV=TRUE)

####Q2
library(MASS)
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
#create a lookup table that maps each species name to a corresponding color
lookup <- c(setosa='blue', versicola='green', virginica='orange')

#Using the lookup table to create a new vector that contains the corresponding color for each species in the iris dataset
col.ind <- lookup[iris$Species]

#Updating the bg parameter in the pairs function call to use the colors in the col.ind vector
pairs(iris[-5], pch=21, col="gray", bg=col.ind)

lda.fit <- lda(Species ~ ., data = iris)
lda.fit
## Call:
## lda(Species ~ ., data = iris)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa            5.006       3.428        1.462       0.246
## versicolor        5.936       2.770        4.260       1.326
## virginica         6.588       2.974        5.552       2.026
## 
## Coefficients of linear discriminants:
##                     LD1         LD2
## Sepal.Length  0.8293776  0.02410215
## Sepal.Width   1.5344731  2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width  -2.8104603  2.83918785
## 
## Proportion of trace:
##    LD1    LD2 
## 0.9912 0.0088
#Draw the scatterplot
plot(Sepal.Width ~ Sepal.Length, data = iris, pch=21, col="gray", bg= col.ind)

#Draw the centroids
points(lda.fit$means[,1], lda.fit$means[,2], pch=21, cex=2,
       col="black", bg=lookup)

lda.pred <- predict(lda.fit)
head(lda.pred$x)
##        LD1        LD2
## 1 8.061800  0.3004206
## 2 7.128688 -0.7866604
## 3 7.489828 -0.2653845
## 4 6.813201 -0.6706311
## 5 8.132309  0.5144625
## 6 7.701947  1.4617210
plot(LD2 ~ LD1, data = lda.pred$x, pch=21, col="gray", bg=col.ind)

table(pred=lda.pred$class, true=iris$Species)
##             true
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         1
##   virginica       0          2        49
1 - mean(lda.pred$class == iris$Species)
## [1] 0.02