Q1: Example from Website

library(MASS)
library(ggplot2)
library(caret)
## Loading required package: lattice
## Loading required package: lattice
# Define colors for each species
lookup <- c(setosa = 'blue', versicolor = 'green', virginica = 'orange')

# Assign colors based on species
col.ind <- lookup[iris$Species]

# Scatterplot matrix with colored points
pairs(iris[-5], pch = 21, col = "gray", bg = col.ind)

# Perform LDA on the iris dataset
lda.fit <- lda(Species ~ ., data = iris)
lda.fit
## Call:
## lda(Species ~ ., data = iris)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa            5.006       3.428        1.462       0.246
## versicolor        5.936       2.770        4.260       1.326
## virginica         6.588       2.974        5.552       2.026
## 
## Coefficients of linear discriminants:
##                     LD1         LD2
## Sepal.Length  0.8293776  0.02410215
## Sepal.Width   1.5344731  2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width  -2.8104603  2.83918785
## 
## Proportion of trace:
##    LD1    LD2 
## 0.9912 0.0088
## Call:
## lda(Species ~ ., data = iris)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa            5.006       3.428        1.462       0.246
## versicolor        5.936       2.770        4.260       1.326
## virginica         6.588       2.974        5.552       2.026
## 
## Coefficients of linear discriminants:
##                     LD1         LD2
## Sepal.Length  0.8293776  0.02410215
## Sepal.Width   1.5344731  2.16452123
## Petal.Length -2.2012117 -0.93192121
## Petal.Width  -2.8104603  2.83918785
## 
## Proportion of trace:
##    LD1    LD2 
## 0.9912 0.0088
# Make predictions
lda.pred <- predict(lda.fit)
head(lda.pred$x)
##        LD1        LD2
## 1 8.061800  0.3004206
## 2 7.128688 -0.7866604
## 3 7.489828 -0.2653845
## 4 6.813201 -0.6706311
## 5 8.132309  0.5144625
## 6 7.701947  1.4617210
##        LD1        LD2
## 1 8.061800  0.3004206
## 2 7.128688 -0.7866604
## 3 7.489828 -0.2653845
## 4 6.813201 -0.6706311
## 5 8.132309  0.5144625
## 6 7.701947  1.4617210
# Plot the first two discriminant functions
plot(LD2 ~ LD1, data = lda.pred$x, pch=21, col="gray", bg=col.ind)

set.seed(123)
data <- data.frame(
  X1 = rnorm(100),
  X2 = rnorm(100),
  Y = factor(sample(c("Class1", "Class2"), 100, replace = TRUE))
)
lda_model <- lda(Y ~ X1 + X2, data = data)
lda_model
## Call:
## lda(Y ~ X1 + X2, data = data)
## 
## Prior probabilities of groups:
## Class1 Class2 
##    0.5    0.5 
## 
## Group means:
##                X1          X2
## Class1 -0.0725385 -0.15998212
## Class2  0.2533503 -0.05511148
## 
## Coefficients of linear discriminants:
##          LD1
## X1 1.0650581
## X2 0.3499439
## Call:
## lda(Y ~ X1 + X2, data = data)
## 
## Prior probabilities of groups:
## Class1 Class2 
##    0.5    0.5 
## 
## Group means:
##                X1          X2
## Class1 -0.0725385 -0.15998212
## Class2  0.2533503 -0.05511148
## 
## Coefficients of linear discriminants:
##          LD1
## X1 1.0650581
## X2 0.3499439
data$lda <- predict(lda_model)$x

ggplot(data, aes(x = X1, y = X2, color = Y)) +
  geom_point() +
  labs(title = "LDA Classification")

Q2: Section 4.7.3 Example

library(ISLR)
data(Smarket)
lda_fit <- lda(Direction ~ Lag1 + Lag2, data = Smarket, subset = Year < 2005)
lda_fit
## Call:
## lda(Direction ~ Lag1 + Lag2, data = Smarket, subset = Year < 
##     2005)
## 
## Prior probabilities of groups:
##     Down       Up 
## 0.491984 0.508016 
## 
## Group means:
##             Lag1        Lag2
## Down  0.04279022  0.03389409
## Up   -0.03954635 -0.03132544
## 
## Coefficients of linear discriminants:
##             LD1
## Lag1 -0.6420190
## Lag2 -0.5135293
## Call:
## lda(Direction ~ Lag1 + Lag2, data = Smarket, subset = Year < 
##     2005)
## 
## Prior probabilities of groups:
##     Down       Up 
## 0.491984 0.508016 
## 
## Group means:
##             Lag1        Lag2
## Down  0.04279022  0.03389409
## Up   -0.03954635 -0.03132544
## 
## Coefficients of linear discriminants:
##             LD1
## Lag1 -0.6420190
## Lag2 -0.5135293
# Make predictions for 2005 data
lda_pred <- predict(lda_fit, Smarket[Smarket$Year == 2005, ])
lda_class <- lda_pred$class

# Create confusion matrix
table(lda_class, Smarket$Direction[Smarket$Year == 2005])
##          
## lda_class Down  Up
##      Down   35  35
##      Up     76 106
##          
## lda_class Down  Up
##      Down   35  35
##      Up     76 106
colnames(data)
## [1] "X1"  "X2"  "Y"   "lda"
head(data)
##            X1          X2      Y        LD1
## 1 -0.56047565 -0.71040656 Class1 -0.9041938
## 2 -0.23017749  0.25688371 Class1 -0.2139097
## 3  1.55870831 -0.24669188 Class1  1.5151344
## 4  0.07050839 -0.34754260 Class1 -0.1051771
## 5  0.12928774 -0.95161857 Class1 -0.2539663
## 6  1.71506499 -0.04502772 Class1  1.7522345
str(data)
## 'data.frame':    100 obs. of  4 variables:
##  $ X1 : num  -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
##  $ X2 : num  -0.71 0.257 -0.247 -0.348 -0.952 ...
##  $ Y  : Factor w/ 2 levels "Class1","Class2": 1 1 1 1 1 1 2 1 2 1 ...
##  $ lda: num [1:100, 1] -0.904 -0.214 1.515 -0.105 -0.254 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:100] "1" "2" "3" "4" ...
##   .. ..$ : chr "LD1"
lda_model <- lda(Y ~ X1 + X2, data = data)
data_subset <- data[, c("X1", "X2", "Y")]
rm(list = ls())  # Clears all variables
gc()  # Frees up memory
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 2457552 131.3    5113662 273.1  3580940 191.3
## Vcells 4218149  32.2   10146329  77.5  6966584  53.2
## [1] 0.5595238