#Task ##With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion

#Packages used

library(doParallel)
## Warning: package 'doParallel' was built under R version 4.3.2
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.3.2
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 4.3.2
## Loading required package: parallel
library(foreach)
library(jpeg)
library(iterators)
library(parallel)
library(EBImage)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.3.2
## Loading required package: ggplot2
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.3.2
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.92 loaded
library(OpenImageR)
## Warning: package 'OpenImageR' was built under R version 4.3.2
## 
## Attaching package: 'OpenImageR'
## The following objects are masked from 'package:EBImage':
## 
##     readImage, writeImage
rm(list = ls())
files=list.files(path='C:/Users/Jonathan Burns/Documents/DATA 605/jpeg',pattern="\\.jpg")

#Read the Images into R

height=1200
width=2500
scale=20
plot_jpeg = function(path, add=FALSE) #initialize function
{
  require('jpeg')
  jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x is 2, y is 1]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]), #set the X Limits by size
         asp=1, #aspect ratio
         type='n', #don't plot
         xaxs='i',yaxs='i',#prevents expanding axis windows +6% as normal
         xaxt='n',yaxt='n',xlab='',ylab='', # no axes or labels
         bty='n') # no box around graph
  rasterImage(jpg,1,1,res[1],res[2]) #image, xleft,ybottom,xright,ytop
}

#Resizing the Images in ‘Files’ ##Putting the resized images in array ‘im’

im=array(rep(0,length(files)*height/scale*width/scale*3),
         #set dimension to N, x, y, 3 colors, 4D array)
         dim=c(length(files), height/scale, width/scale,3)) 

for (i in 1:length(files)){
  #define file to be read
  tmp=paste0("C:/Users/Jonathan Burns/Documents/DATA 605/jpeg/", files[i])
  #read the file
  temp=EBImage::resize(readJPEG(tmp),height/scale, width/scale)
  #assign to the array
  im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))
}

#Visualize Original JPGs from ‘files’

par(mfrow=c(3,3)) #set graphics to 3 x 3 table
par(mai=c(.3,.3,.3,.3)) #set margins 
for (i in 1:17){  #plot the first images only
plot_jpeg(writeJPEG(im[i,,,]))
}

#Vectorize

flat=matrix(0, 17, prod(dim(im))) 
for (i in 1:17) {
  newim <- readJPEG(paste0("C:/Users/Jonathan Burns/Documents/DATA 605/jpeg/", files[i]))
  r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
  flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))

#Eigencomponents

#################################################
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") #saving for classification
std.shoe=attr(scaled, "scaled:scale")  #saving for classification...later
#################################################

#Calculate Co-Variance

cov_shoes <- cor(scaled)
myeigen <- eigen(cov_shoes)
cumsum(myeigen$values) / sum(myeigen$values)
##  [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
##  [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000

#Eigenshoe

windows()
scaling=diag(myeigen$values[1:5]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:5]%*%scaling
par(mfrow=c(2,3))
imageShow(array(eigenshoes[,1], c(60,125,3)))

Sys.sleep(10)

#Generate Principal Components

height=1200
width=2500
scale=20
newdata=im
dim(newdata)=c(length(files),height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
sum(mypca$sdev^2/sum(mypca$sdev^2)) #verify that sum of variance=1
## [1] 1
mycomponents=mypca$sdev^2/sum(mypca$sdev^2)
sum(mycomponents[1:3]) #first 3 components account for 84% of variability
## [1] 0.8451073

#Scree Plot to visualize the distrubution of variance in the factors For the purposes of this assignment, building and visualizing eigenimagery that accounts for 80% of the variability of the original set of images. Reading the scree plot, 80% or more variability is achieved by using PC[1:3]. PC[1:2] is just under, coming in at 79.4%

fviz_eig(mypca, addlabels = TRUE, ylim = c(0,90))

vars <- get_pca_var(mypca)
head(vars$coord)
##          Dim.1      Dim.2      Dim.3       Dim.4        Dim.5       Dim.6
## [1,] 0.8633211  0.0782201 0.13150542  0.25847126  0.186628097  0.03340035
## [2,] 0.8801689 -0.3013327 0.08835013  0.15705547 -0.018550879  0.06224184
## [3,] 0.6777686  0.4529178 0.22897931  0.44409552 -0.097596000  0.12862747
## [4,] 0.8207247 -0.4003131 0.12676857 -0.08388422 -0.185993060  0.17726256
## [5,] 0.8666247 -0.3134600 0.05680148 -0.01030995  0.181998134 -0.07047314
## [6,] 0.7196414  0.4561963 0.39433761 -0.22535845  0.002947831 -0.10748617
##            Dim.7       Dim.8        Dim.9      Dim.10       Dim.11       Dim.12
## [1,]  0.25593785  0.20954720  0.032847670  0.01125784  0.031348603  0.021979810
## [2,]  0.09114030  0.04647299 -0.090485403 -0.05759761  0.001141095 -0.132043680
## [3,] -0.21564782 -0.10320265  0.034392492  0.02236158 -0.012272042 -0.008579068
## [4,]  0.04550352 -0.04920257 -0.008196665 -0.01687250  0.146359488  0.191228394
## [5,] -0.15058659  0.03398895  0.125413916 -0.12293550  0.029373623  0.094804386
## [6,]  0.01604161  0.03747101  0.013477629  0.14280999 -0.070295718 -0.011238270
##           Dim.13       Dim.14       Dim.15       Dim.16      Dim.17
## [1,]  0.06263059  0.007171977  0.110194949  0.005853876  0.01657369
## [2,] -0.01577211  0.001714993 -0.223485892 -0.088782342 -0.01796484
## [3,] -0.00229346 -0.002558053  0.018429573  0.014827152  0.01181976
## [4,]  0.03267434  0.062477971 -0.025550435  0.034884099  0.08581751
## [5,] -0.11913717 -0.148156996  0.008984605 -0.090933598  0.01625364
## [6,] -0.06092234  0.027210395 -0.043282381 -0.061368691  0.13694393
head(vars$cos2)
##          Dim.1       Dim.2       Dim.3       Dim.4        Dim.5       Dim.6
## [1,] 0.7453232 0.006118384 0.017293676 0.066807391 3.483005e-02 0.001115584
## [2,] 0.7746974 0.090801387 0.007805746 0.024666420 3.441351e-04 0.003874047
## [3,] 0.4593702 0.205134544 0.052431527 0.197220830 9.524979e-03 0.016545027
## [4,] 0.6735890 0.160250572 0.016070271 0.007036563 3.459342e-02 0.031422015
## [5,] 0.7510383 0.098257195 0.003226408 0.000106295 3.312332e-02 0.004966464
## [6,] 0.5178838 0.208115074 0.155502154 0.050786431 8.689706e-06 0.011553278
##             Dim.7       Dim.8        Dim.9       Dim.10       Dim.11
## [1,] 0.0655041850 0.043910030 1.078969e-03 0.0001267390 9.827349e-04
## [2,] 0.0083065549 0.002159738 8.187608e-03 0.0033174849 1.302098e-06
## [3,] 0.0465039829 0.010650787 1.182843e-03 0.0005000404 1.506030e-04
## [4,] 0.0020705699 0.002420893 6.718531e-05 0.0002846811 2.142110e-02
## [5,] 0.0226763220 0.001155249 1.572865e-02 0.0151131375 8.628097e-04
## [6,] 0.0002573334 0.001404077 1.816465e-04 0.0203946920 4.941488e-03
##            Dim.12       Dim.13       Dim.14       Dim.15       Dim.16
## [1,] 0.0004831120 3.922591e-03 5.143725e-05 1.214293e-02 3.426787e-05
## [2,] 0.0174355334 2.487595e-04 2.941202e-06 4.994594e-02 7.882304e-03
## [3,] 0.0000736004 5.259957e-06 6.543636e-06 3.396492e-04 2.198444e-04
## [4,] 0.0365682986 1.067613e-03 3.903497e-03 6.528247e-04 1.216900e-03
## [5,] 0.0089878716 1.419366e-02 2.195050e-02 8.072313e-05 8.268919e-03
## [6,] 0.0001262987 3.711532e-03 7.404056e-04 1.873364e-03 3.766116e-03
##            Dim.17
## [1,] 0.0002746872
## [2,] 0.0003227356
## [3,] 0.0001397067
## [4,] 0.0073646451
## [5,] 0.0002641807
## [6,] 0.0187536393
head(vars$contrib)
##         Dim.1      Dim.2      Dim.3       Dim.4        Dim.5      Dim.6
## [1,] 6.328127  0.3555507  1.9922207 14.40692085 10.783637363  0.4038409
## [2,] 6.577526  5.2766371  0.8992171  5.31927924  0.106546749  1.4024036
## [3,] 3.900258 11.9207491  6.0400794 42.53039758  2.949003276  5.9892942
## [4,] 5.719071  9.3124581  1.8512852  1.51742500 10.710375508 11.3747587
## [5,] 6.376650  5.7099080  0.3716802  0.02292237 10.255222500  1.7978582
## [6,] 4.397064 12.0939532 17.9137518 10.95202312  0.002690397  4.1822826
##           Dim.7      Dim.8      Dim.9      Dim.10       Dim.11      Dim.12
## [1,] 27.5336967 21.4161548 0.65443599  0.08230263 6.834547e-01  0.35580566
## [2,]  3.4915351  1.0533651 4.96609582  2.15433033 9.055597e-04 12.84104120
## [3,] 19.5472482  5.1946878 0.71743957  0.32471957 1.047387e-01  0.05420573
## [4,]  0.8703328  1.1807375 0.04075045  0.18486812 1.489756e+01 26.93207140
## [5,]  9.5316501  0.5634472 9.54002476  9.81426944 6.000513e-01  6.61944931
## [6,]  0.1081662  0.6848076 0.11017550 13.24404029 3.436616e+00  0.09301734
##            Dim.13       Dim.14     Dim.15     Dim.16     Dim.17
## [1,]  3.023017285  0.045177536 11.5536709 0.03384725  0.3481392
## [2,]  0.191711084  0.002583269 47.5222333 7.78555318  0.4090358
## [3,]  0.004053683  0.005747300  0.3231671 0.21714597  0.1770645
## [4,]  0.822775436  3.428456374  0.6211453 1.20196352  9.3339675
## [5,] 10.938609648 19.279205015  0.0768059 8.16742265  0.3348232
## [6,]  2.860360427  0.650301090  1.7824563 3.71988916 23.7684041
head(vars$coord, 3)
##          Dim.1      Dim.2      Dim.3     Dim.4       Dim.5      Dim.6
## [1,] 0.8633211  0.0782201 0.13150542 0.2584713  0.18662810 0.03340035
## [2,] 0.8801689 -0.3013327 0.08835013 0.1570555 -0.01855088 0.06224184
## [3,] 0.6777686  0.4529178 0.22897931 0.4440955 -0.09759600 0.12862747
##           Dim.7       Dim.8       Dim.9      Dim.10       Dim.11       Dim.12
## [1,]  0.2559379  0.20954720  0.03284767  0.01125784  0.031348603  0.021979810
## [2,]  0.0911403  0.04647299 -0.09048540 -0.05759761  0.001141095 -0.132043680
## [3,] -0.2156478 -0.10320265  0.03439249  0.02236158 -0.012272042 -0.008579068
##           Dim.13       Dim.14      Dim.15       Dim.16      Dim.17
## [1,]  0.06263059  0.007171977  0.11019495  0.005853876  0.01657369
## [2,] -0.01577211  0.001714993 -0.22348589 -0.088782342 -0.01796484
## [3,] -0.00229346 -0.002558053  0.01842957  0.014827152  0.01181976

Now that I think about it, this visualization doesnt make much sense, since the originals are jpg and do not have any defined variables to compare, so this is just plotting explained variance of the 17 shoes, not exactly helpful.

fviz_pca_var(mypca, col.var = "black")

A coorplot might be a better option to visualize which shoes are most important. This reinforces the finding from above, but now its spread across all of the variables. The first dimension for most of the shoes represents the largest variability. With some shoes having less dominant 1st dimensions like 3 and 6 for example.

corrplot(vars$cos2, is.corr = FALSE)

#Eigenshoes

mypca2=t(mypca$scores)
dim(mypca2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){  # plotting 17 eigenshoes
plot_jpeg(writeJPEG(mypca2[i,,,], quality=1,bg="white"))
}

a=round(mypca$sdev[1:17]^2/ sum(mypca$sdev^2),3)
cumsum(a)
##  Comp.1  Comp.2  Comp.3  Comp.4  Comp.5  Comp.6  Comp.7  Comp.8  Comp.9 Comp.10 
##   0.693   0.794   0.845   0.872   0.891   0.907   0.921   0.933   0.943   0.952 
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17 
##   0.960   0.968   0.976   0.983   0.989   0.995   1.000

#New Data Set

shoes_v2 <- t(t(eigenshoes)%*%scaled)