Overview

With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion

Load JPEG Files

path = "~/DataScience/DATA_605_F23/jpg"

files = list.files(path, pattern = "\\.jpg", full.names = TRUE)
num = length(files)
height = 1200
width = 2500
scale = 20
new_height <- height/scale
new_width <- width/scale

Initialize Array

array_a <- array(rep(0, num * new_height * new_width * 3), dim = c(num, new_height,
    new_width, 3))

for (i in 1:num) {
    temp <- resizeImage(readJPEG(files[i]), new_height, new_width)
    array_a[i, , , ] <- array(temp, dim = c(1, new_height, new_width, 3))
}

In this code block, we take a look at each shoe and then for each shoe separates the rgb values and places the values in the matrix flat.

flat <- matrix(0, num, prod(dim(array_a)))

for (i in 1:num) {
    new_array_a <- readJPEG(files[i])
    r = as.vector(array_a[i, , , 1])
    g = as.vector(array_a[i, , , 2])
    b = as.vector(array_a[i, , , 3])
    flat[i, ] <- t(c(r, g, b))
}
shoes <- as.data.frame(t(flat))

View the Shoes

# plot function from prof's code
plot_jpeg = function(path, add = FALSE) {
    jpg = readJPEG(path, native = TRUE)
    res = dim(jpg)[2:1]
    if (!add) {
        plot(1, 1, xlim = c(1, res[1]), ylim = c(1, res[2]), asp = 1, type = "n",
            xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n", xlab = "", ylab = "",
            bty = "n")
    }
    rasterImage(jpg, 1, 1, res[1], res[2])
}

par(mfrow = c(3, 3))
par(mai = c(0.3, 0.3, 0.3, 0.3))
for (i in 1:num) {
    plot_jpeg(writeJPEG(array_a[i, , , ]))
}

scaled <- scale(shoes, center = TRUE, scale = TRUE)
mean.shoe <- attr(scaled, "scaled:center")
std.shoe <- attr(scaled, "scaled:scale")
Sigma_ <- cor(scaled)

Calculating the eigenvalues and eigenvectors

myeigen <- eigen(Sigma_)

eigenvalues <- myeigen$values
eigenvalues
##  [1] 11.61745316  1.70394885  0.87429472  0.47497591  0.33549455  0.28761630
##  [7]  0.24989310  0.21531927  0.17872717  0.16921863  0.15382298  0.14492412
## [13]  0.14248337  0.12123246  0.11947244  0.11496463  0.09615834
eigenvectors <- myeigen$vectors
head(eigenvectors)
##            [,1]        [,2]       [,3]        [,4]         [,5]        [,6]
## [1,] -0.2525635 -0.05757309 -0.1386619  0.36125792  0.336795812  0.07714345
## [2,] -0.2568621  0.22859824 -0.0981094  0.22827537 -0.029185981  0.12458931
## [3,] -0.1969535 -0.34657657 -0.2315417  0.66768941 -0.157050532  0.23288255
## [4,] -0.2402368  0.30490346 -0.1377157 -0.11201596 -0.321430513  0.32407792
## [5,] -0.2538292  0.23904696 -0.0601217 -0.01981363  0.305521995 -0.12826832
## [6,] -0.2082231 -0.34778476 -0.4348510 -0.33174988  0.002099497 -0.19402087
##             [,7]        [,8]        [,9]        [,10]       [,11]       [,12]
## [1,]  0.51016493  0.45903229  0.10048740 -0.006663671  0.03999231  0.20915917
## [2,]  0.19981140  0.12302108 -0.24425171  0.069491874 -0.02297889 -0.20663168
## [3,] -0.44473001 -0.21792979  0.08398429 -0.013399895 -0.02335518 -0.04777264
## [4,]  0.06421329 -0.11105696 -0.06611799 -0.036340843  0.37657603  0.42599168
## [5,] -0.28851750  0.07371527  0.16898480  0.288445357  0.26809982 -0.12597319
## [6,]  0.02105116  0.06888972  0.15482756 -0.251952500 -0.26124933 -0.14946145
##              [,13]        [,14]       [,15]       [,16]       [,17]
## [1,]  0.0802011231 -0.027992918  0.14739213 -0.31732180  0.08762847
## [2,]  0.2766961639  0.014196963 -0.10190894  0.73418507 -0.12127788
## [3,] -0.0002178602  0.003778655 -0.03005534 -0.07032958  0.03964193
## [4,] -0.2934239130 -0.211658499 -0.12455871  0.03762131  0.34089309
## [5,] -0.3649986827  0.487564095  0.29387861  0.11774417  0.07261458
## [6,] -0.0409279247 -0.079125630  0.10591815  0.23418911  0.49158657

Compare the original image to the eigenshoe.

testing_img <- readJPEG(files[4])

plot(1:2, type = "n", main = "")
rasterImage(testing_img, 1, 1, 2, 2)

scaling <- diag(myeigen$values[1:5]^(-1/2))/(sqrt(nrow(scaled) - 1))
eigenshoes <- scaled %*% myeigen$vectors[, 1:5] %*% scaling
par(mfrow = c(2, 3))
imageShow(array(eigenshoes[, 4], c(60, 125, 3)))

Here, we calculate the cumulative variance explained

cumulative_variance <- cumsum(myeigen$values)/sum(myeigen$values)
cumulative_variance
##  [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
##  [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000

Next is to figure out the number of eigenshoes that accounts 80% of the variability

num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp
## [1] 3
ggplot(as.data.frame(cumulative_variance), aes(x = 1:num, cumulative_variance)) +
    geom_line() + geom_point() + labs(x = "Number of eigenshoes", y = "Cumulative Variance") +
    scale_x_continuous(breaks = seq(1, 17, by = 2)) + theme_minimal()

Now, we can visually see that after 3 eigenshoes accounts for 80% of the variability