Read Pix Set Up Initialization

library(jpeg)
## Warning: package 'jpeg' was built under R version 4.2.2
library(OpenImageR)
## Warning: package 'OpenImageR' was built under R version 4.2.2
library(EBImage)
## 
## Attaching package: 'EBImage'
## The following objects are masked from 'package:OpenImageR':
## 
##     readImage, writeImage
getwd()
## [1] "C:/Users/enidr/OneDrive/Documents/CUNY SPS DATA 605/CUNY SPS DATA 605 Week 4"

Use of Graphics

We add graphics to the data set.

# Prepare for Image Processing
num=17
files=list.files("./jpg/",pattern=".jpg")[1:num] 
#Setting the file path
dim(readJPEG(paste0(file.path(getwd(),'jpg//'),files[1])))
## [1] 1200 2500    3
is.na(getwd())
## [1] FALSE

View Shoes Function

# Set Adj. Parameters
height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}

Load the Data into an Array

This is loading the data in the array by each file, an image structure into the array Dimensions will be number of images x ht/scale x wt/scale, channel. There are 3 channels for red, green and blue. Resize from the EBI package will scale the images to the desired dimension.

# Load
im=array(rep(0,length(files)*height/scale*width/scale*3), dim=c(length(files), height/scale, width/scale,3))

for (i in 1:17){
temp=resize(readJPEG(paste0('jpg//', files[i])),height/scale, width/scale)
  im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))}

Vectorize

The initializing array built in the previous step now needs to be modified into a 2 dimensional matrix. The dimensions will be the number of images x (ht/s * wt/s * 3) Essentially, reshaping the array into a vector. Dimension (number of image x(numberHt/sWt/s*3))

flat=matrix(0, 17, prod(dim(im))) 
for (i in 1:17) {
  newim <- readJPEG(paste0('jpg//', files[i]))
  r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
  flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
dim(flat)
## [1]     17 382500
dim(shoes)
## [1] 382500     17
str(shoes)
## 'data.frame':    382500 obs. of  17 variables:
##  $ V1 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V2 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V3 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V4 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V5 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V6 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V7 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V8 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V9 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V10: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V11: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V12: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V13: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V14: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V15: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V16: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V17: num  1 1 1 1 1 1 1 1 1 1 ...

Actual Plots

Using the plot_jpeg function to see the images

#Old Shoes
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:17){  #plot the first images only
plot_jpeg(writeJPEG(im[i,,,]))
}

Get Eigencomponents from Correlation Structure

We are scaling the pixels in the array. Now we need to scale and center data using the scale function. We are normalizing the data to mean=0 and sd=1.

scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") #saving for classification
std.shoe=attr(scaled, "scaled:scale")  #saving for classification...later
dim(scaled)
## [1] 382500     17
str(scaled)
##  num [1:382500, 1:17] 0.651 0.651 0.651 0.651 0.651 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
##  - attr(*, "scaled:center")= Named num [1:17] 0.867 0.755 0.909 0.779 0.762 ...
##   ..- attr(*, "names")= chr [1:17] "V1" "V2" "V3" "V4" ...
##  - attr(*, "scaled:scale")= Named num [1:17] 0.205 0.343 0.194 0.327 0.328 ...
##   ..- attr(*, "names")= chr [1:17] "V1" "V2" "V3" "V4" ...

Calculate Covariance (Correlation)

We are using the Correlation matrix. All the preceeding work was done to get to this matrix. It is this correlation matrix that will be used to determine the eigencomponents.

Sigma_=cor(scaled)
dim(Sigma_)
## [1] 17 17
str(Sigma_)
##  num [1:17, 1:17] 1 0.787 0.678 0.654 0.727 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
##   ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
Sigma_
##            V1        V2        V3        V4        V5        V6        V7
## V1  1.0000000 0.7872851 0.6776806 0.6544080 0.7265980 0.6526121 0.6833201
## V2  0.7872851 1.0000000 0.5265192 0.8346666 0.8313697 0.4974713 0.5452394
## V3  0.6776806 0.5265192 1.0000000 0.3996754 0.4560180 0.6677417 0.6925274
## V4  0.6544080 0.8346666 0.3996754 1.0000000 0.7982592 0.4516536 0.5003529
## V5  0.7265980 0.8313697 0.4560180 0.7982592 1.0000000 0.5039991 0.5657749
## V6  0.6526121 0.4974713 0.6677417 0.4516536 0.5039991 1.0000000 0.8766556
## V7  0.6833201 0.5452394 0.6925274 0.5003529 0.5657749 0.8766556 1.0000000
## V8  0.7560167 0.8386777 0.5447951 0.8512430 0.8125317 0.6770704 0.6651375
## V9  0.5943100 0.8226958 0.3142268 0.8543069 0.7992001 0.3968194 0.4426932
## V10 0.7250559 0.8539275 0.4332609 0.7899076 0.8746777 0.4563027 0.5134776
## V11 0.6973755 0.8153650 0.4336216 0.7689457 0.8420656 0.4380275 0.4884842
## V12 0.7403649 0.7045207 0.6556382 0.6127174 0.6556111 0.6497151 0.6954737
## V13 0.6922406 0.5943306 0.6064572 0.5314402 0.5975513 0.6720664 0.6979225
## V14 0.6561369 0.6316040 0.5724038 0.5781606 0.6257459 0.5942177 0.6371818
## V15 0.7046243 0.7113572 0.5662060 0.6572434 0.6939202 0.5563215 0.5942733
## V16 0.7289012 0.6932984 0.5814029 0.6411786 0.7189399 0.6294994 0.6653560
## V17 0.7229931 0.6882444 0.5803742 0.6129113 0.6771056 0.5761203 0.6180556
##            V8        V9       V10       V11       V12       V13       V14
## V1  0.7560167 0.5943100 0.7250559 0.6973755 0.7403649 0.6922406 0.6561369
## V2  0.8386777 0.8226958 0.8539275 0.8153650 0.7045207 0.5943306 0.6316040
## V3  0.5447951 0.3142268 0.4332609 0.4336216 0.6556382 0.6064572 0.5724038
## V4  0.8512430 0.8543069 0.7899076 0.7689457 0.6127174 0.5314402 0.5781606
## V5  0.8125317 0.7992001 0.8746777 0.8420656 0.6556111 0.5975513 0.6257459
## V6  0.6770704 0.3968194 0.4563027 0.4380275 0.6497151 0.6720664 0.5942177
## V7  0.6651375 0.4426932 0.5134776 0.4884842 0.6954737 0.6979225 0.6371818
## V8  1.0000000 0.8189832 0.8034116 0.7634081 0.6949891 0.6342264 0.6425449
## V9  0.8189832 1.0000000 0.8050235 0.7639324 0.5417763 0.4402583 0.4948664
## V10 0.8034116 0.8050235 1.0000000 0.8797837 0.6871696 0.5871662 0.6344487
## V11 0.7634081 0.7639324 0.8797837 1.0000000 0.6973725 0.6136412 0.6736291
## V12 0.6949891 0.5417763 0.6871696 0.6973725 1.0000000 0.7515997 0.7983485
## V13 0.6342264 0.4402583 0.5871662 0.6136412 0.7515997 1.0000000 0.7738524
## V14 0.6425449 0.4948664 0.6344487 0.6736291 0.7983485 0.7738524 1.0000000
## V15 0.6920963 0.5965985 0.7056080 0.7348275 0.8138716 0.7377756 0.8451754
## V16 0.7053563 0.5709788 0.7078820 0.7184022 0.7792859 0.8487052 0.8218953
## V17 0.6784023 0.5544874 0.6970469 0.7274268 0.8390444 0.7541332 0.7958402
##           V15       V16       V17
## V1  0.7046243 0.7289012 0.7229931
## V2  0.7113572 0.6932984 0.6882444
## V3  0.5662060 0.5814029 0.5803742
## V4  0.6572434 0.6411786 0.6129113
## V5  0.6939202 0.7189399 0.6771056
## V6  0.5563215 0.6294994 0.5761203
## V7  0.5942733 0.6653560 0.6180556
## V8  0.6920963 0.7053563 0.6784023
## V9  0.5965985 0.5709788 0.5544874
## V10 0.7056080 0.7078820 0.6970469
## V11 0.7348275 0.7184022 0.7274268
## V12 0.8138716 0.7792859 0.8390444
## V13 0.7377756 0.8487052 0.7541332
## V14 0.8451754 0.8218953 0.7958402
## V15 1.0000000 0.8122591 0.8124942
## V16 0.8122591 1.0000000 0.8180335
## V17 0.8124942 0.8180335 1.0000000

Get the Eigencomponents

We use the Correlation matrix to find the eigenvectors and eigenvalues. The matrix will find the mulitpliers (eigenvalues) to the vectors (eigenvectors) for the new linear combination so that the most variability is on the 1st component and descends thereafter. This is a restructing that is used to reduce the dimensionality. The eigenvalues are the proportion of variability associated with each component. Specifically, the cumulative proportion of variability is sought after.

myeigen=eigen(Sigma_)
myeigen
## eigen() decomposition
## $values
##  [1] 11.77794388  1.72081925  0.86806022  0.46371734  0.32298978  0.27624335
##  [7]  0.23790552  0.20503228  0.16487012  0.15399147  0.14378933  0.13577975
## [13]  0.12975748  0.11385581  0.10510016  0.10124270  0.07890155
## 
## $vectors
##             [,1]        [,2]        [,3]         [,4]         [,5]         [,6]
##  [1,] -0.2515577 -0.05962807 -0.14114605  0.379564498  0.328384491  0.063548478
##  [2,] -0.2564669  0.22970932 -0.09482706  0.230635627 -0.032641499  0.118423122
##  [3,] -0.1974907 -0.34526438 -0.24576573  0.652153338 -0.171726622  0.244730346
##  [4,] -0.2391458  0.30516320 -0.13606194 -0.123183806 -0.327267101  0.337264863
##  [5,] -0.2525203  0.23895414 -0.06096558 -0.015140135  0.320237763 -0.134084234
##  [6,] -0.2096918 -0.34776361 -0.42324640 -0.330938410  0.005186903 -0.204506297
##  [7,] -0.2220439 -0.32176935 -0.36923615 -0.258761028  0.032948289 -0.251123643
##  [8,] -0.2597468  0.13861061 -0.27362524 -0.147491638 -0.159303710  0.115445148
##  [9,] -0.2242754  0.39008169 -0.17677165 -0.152415293 -0.239805816  0.083991770
## [10,] -0.2523894  0.26939880  0.02645111  0.078843036  0.269266509 -0.257207906
## [11,] -0.2504276  0.23813195  0.14578328  0.073261747  0.226873600 -0.269224686
## [12,] -0.2541524 -0.16064493  0.16973475  0.126756162 -0.260586173 -0.363831380
## [13,] -0.2374627 -0.25443032  0.18739393 -0.231917619  0.363004533  0.477340146
## [14,] -0.2431988 -0.17131145  0.34992145 -0.146263113 -0.265657288  0.024086423
## [15,] -0.2531910 -0.06188346  0.32463869  0.002861056 -0.331362672 -0.006033122
## [16,] -0.2571186 -0.11980858  0.24383184 -0.183679152  0.245239312  0.313599800
## [17,] -0.2513643 -0.10507094  0.30609685  0.063192679 -0.061516569 -0.259276337
##              [,7]         [,8]        [,9]        [,10]        [,11]
##  [1,]  0.52472561  0.462775915  0.08089722 -0.028688435 -0.082671317
##  [2,]  0.18685650  0.102633577 -0.22284739  0.146776372 -0.003009252
##  [3,] -0.44212270 -0.227918578  0.08470180 -0.056984171  0.032363352
##  [4,]  0.09329163 -0.108661745 -0.02018674  0.042996293 -0.385973550
##  [5,] -0.30873371  0.075063118  0.30886931  0.313277344 -0.077462977
##  [6,]  0.03288863  0.082753104  0.03319269 -0.363923622  0.185381133
##  [7,] -0.10293093  0.025168032 -0.01883752  0.449235459 -0.162176681
##  [8,]  0.11833803 -0.024123103  0.03766722 -0.430320880 -0.007345780
##  [9,] -0.01801266 -0.145348780  0.08507916  0.182869158  0.335105238
## [10,] -0.16721442 -0.008893103 -0.16124987 -0.017921028  0.043924436
## [11,] -0.28837871 -0.096204767 -0.23846363 -0.437093574 -0.015352133
## [12,]  0.25268251 -0.248712605 -0.45258476  0.273337019  0.012561412
## [13,]  0.06307660 -0.247378906 -0.38601024 -0.004189989  0.162212977
## [14,] -0.25497022  0.410431410 -0.07746391 -0.114629267 -0.513065886
## [15,] -0.07653768  0.415274397  0.11932690  0.048663804  0.597907646
## [16,] -0.05781238 -0.076685492  0.29966011  0.139313424  0.021128977
## [17,]  0.33330232 -0.444466219  0.53287860 -0.143199538 -0.119597335
##               [,12]        [,13]        [,14]       [,15]        [,16]
##  [1,] -0.0596494479  0.173868263  0.021255008  0.33990691  0.018397621
##  [2,]  0.3583439856 -0.043784824  0.005082587 -0.68936372 -0.279026041
##  [3,]  0.0232821235 -0.006366854 -0.007581095  0.05684779  0.046598924
##  [4,] -0.5189611873  0.090706970  0.185160913 -0.07881277  0.109634097
##  [5,] -0.2572829048 -0.330735690 -0.439080915  0.02771388 -0.285787030
##  [6,]  0.0304987443 -0.169126001  0.080641248 -0.13350866 -0.192870142
##  [7,] -0.0371921186  0.404445380  0.076928476 -0.10798016  0.172641737
##  [8,]  0.0008197682 -0.344227162 -0.157967432  0.10736325  0.137979303
##  [9,]  0.4421762647  0.290087257 -0.080299759  0.42913990 -0.091687407
## [10,]  0.1600326150 -0.200829589  0.151174826 -0.06475324  0.725134134
## [11,] -0.1827441887  0.420303198  0.203200555  0.04231127 -0.339738279
## [12,] -0.1165179657 -0.355619561  0.077240978  0.28274188 -0.181693130
## [13,] -0.0596685563  0.120675891 -0.399244171 -0.01585738  0.091551701
## [14,]  0.3522810681  0.005869241 -0.180002374  0.12378054 -0.007728344
## [15,] -0.3278229277  0.085448269 -0.010923197 -0.16580365  0.127717686
## [16,]  0.1412604581 -0.238084563  0.654492594  0.07441734 -0.149990508
## [17,]  0.0804570050  0.182593387 -0.195393055 -0.19831669  0.099936680
##              [,17]
##  [1,]  0.059003318
##  [2,] -0.063955907
##  [3,]  0.042079036
##  [4,]  0.305515426
##  [5,]  0.057863911
##  [6,]  0.487528502
##  [7,] -0.357324326
##  [8,] -0.634619845
##  [9,]  0.159325935
## [10,]  0.207476664
## [11,] -0.144511446
## [12,] -0.008412949
## [13,]  0.060720116
## [14,]  0.093570243
## [15,] -0.075670182
## [16,] -0.126287618
## [17,]  0.048581047
cumsum(myeigen$values) / sum(myeigen$values)
##  [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
##  [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000

Eigenshoes

The new shoes are a reduced number that are built by the new linear combinations. The new shoes are eigenvectors.

80% is between 2 and 3 components….. Here is 2

scaling=diag(myeigen$values[1:2]^(-1/2)) / (sqrt(nrow(scaled)-1))

scaling
##              [,1]        [,2]
## [1,] 0.0004711401 0.000000000
## [2,] 0.0000000000 0.001232586
eigenshoes=scaled%*%myeigen$vectors[,1:2]%*%scaling
dim(eigenshoes)
## [1] 382500      2
str(eigenshoes)
##  num [1:382500, 1:2] -0.0012 -0.0012 -0.0012 -0.0012 -0.0012 ...

Image 1

scaling=diag(myeigen$values[1:5]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:5]%*%scaling
par(mfrow=c(2,3))
imageShow(array(eigenshoes[,1], c(60,125,3)))

Image 2

imageShow(array(eigenshoes[,2], c(60,125,3)))

Generate Principal Components Transform the images

#Generate Variables
height=1200
width=2500
scale=20
newdata=im
dim(newdata)=c(length(files),height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
################################################################

Eigenshoes Generate Eigenshoes

#Eigenshoes
mypca2=t(mypca$scores)
dim(mypca2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){#plot the first 25 Eigenshoes only
plot_jpeg(writeJPEG(mypca2[i,,,], bg="white"))  #complete without reduction
}

a=round(mypca$sdev[1:17]^2/ sum(mypca$sdev^2),3)
cumsum(a)
##  Comp.1  Comp.2  Comp.3  Comp.4  Comp.5  Comp.6  Comp.7  Comp.8  Comp.9 Comp.10 
##   0.693   0.794   0.845   0.872   0.891   0.907   0.921   0.933   0.943   0.952 
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17 
##   0.960   0.968   0.976   0.983   0.989   0.995   1.000

New Data Set

x = t(t(eigenshoes)%*%scaled)

REFERENCES Fulton, Larry. Eigenshoes. https://rpubs.com/R-Minator/eigenshoes.