Read Pix Set Up Initialization
library(jpeg)
## Warning: package 'jpeg' was built under R version 4.2.2
library(OpenImageR)
## Warning: package 'OpenImageR' was built under R version 4.2.2
library(EBImage)
##
## Attaching package: 'EBImage'
## The following objects are masked from 'package:OpenImageR':
##
## readImage, writeImage
getwd()
## [1] "C:/Users/enidr/OneDrive/Documents/CUNY SPS DATA 605/CUNY SPS DATA 605 Week 4"
Use of Graphics
We add graphics to the data set.
# Prepare for Image Processing
num=17
files=list.files("./jpg/",pattern=".jpg")[1:num]
#Setting the file path
dim(readJPEG(paste0(file.path(getwd(),'jpg//'),files[1])))
## [1] 1200 2500 3
is.na(getwd())
## [1] FALSE
View Shoes Function
# Set Adj. Parameters
height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])
}
Load the Data into an Array
This is loading the data in the array by each file, an image structure into the array Dimensions will be number of images x ht/scale x wt/scale, channel. There are 3 channels for red, green and blue. Resize from the EBI package will scale the images to the desired dimension.
# Load
im=array(rep(0,length(files)*height/scale*width/scale*3), dim=c(length(files), height/scale, width/scale,3))
for (i in 1:17){
temp=resize(readJPEG(paste0('jpg//', files[i])),height/scale, width/scale)
im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))}
Vectorize
The initializing array built in the previous step now needs to be modified into a 2 dimensional matrix. The dimensions will be the number of images x (ht/s * wt/s * 3) Essentially, reshaping the array into a vector. Dimension (number of image x(numberHt/sWt/s*3))
flat=matrix(0, 17, prod(dim(im)))
for (i in 1:17) {
newim <- readJPEG(paste0('jpg//', files[i]))
r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
dim(flat)
## [1] 17 382500
dim(shoes)
## [1] 382500 17
str(shoes)
## 'data.frame': 382500 obs. of 17 variables:
## $ V1 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V2 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V3 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V4 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V5 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V6 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V7 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V8 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V9 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V10: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V11: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V12: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V13: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V14: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V15: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V16: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V17: num 1 1 1 1 1 1 1 1 1 1 ...
Actual Plots
Using the plot_jpeg function to see the images
#Old Shoes
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:17){ #plot the first images only
plot_jpeg(writeJPEG(im[i,,,]))
}
Get Eigencomponents from Correlation Structure
We are scaling the pixels in the array. Now we need to scale and center data using the scale function. We are normalizing the data to mean=0 and sd=1.
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") #saving for classification
std.shoe=attr(scaled, "scaled:scale") #saving for classification...later
dim(scaled)
## [1] 382500 17
str(scaled)
## num [1:382500, 1:17] 0.651 0.651 0.651 0.651 0.651 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
## - attr(*, "scaled:center")= Named num [1:17] 0.867 0.755 0.909 0.779 0.762 ...
## ..- attr(*, "names")= chr [1:17] "V1" "V2" "V3" "V4" ...
## - attr(*, "scaled:scale")= Named num [1:17] 0.205 0.343 0.194 0.327 0.328 ...
## ..- attr(*, "names")= chr [1:17] "V1" "V2" "V3" "V4" ...
Calculate Covariance (Correlation)
We are using the Correlation matrix. All the preceeding work was done to get to this matrix. It is this correlation matrix that will be used to determine the eigencomponents.
Sigma_=cor(scaled)
dim(Sigma_)
## [1] 17 17
str(Sigma_)
## num [1:17, 1:17] 1 0.787 0.678 0.654 0.727 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
## ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
Sigma_
## V1 V2 V3 V4 V5 V6 V7
## V1 1.0000000 0.7872851 0.6776806 0.6544080 0.7265980 0.6526121 0.6833201
## V2 0.7872851 1.0000000 0.5265192 0.8346666 0.8313697 0.4974713 0.5452394
## V3 0.6776806 0.5265192 1.0000000 0.3996754 0.4560180 0.6677417 0.6925274
## V4 0.6544080 0.8346666 0.3996754 1.0000000 0.7982592 0.4516536 0.5003529
## V5 0.7265980 0.8313697 0.4560180 0.7982592 1.0000000 0.5039991 0.5657749
## V6 0.6526121 0.4974713 0.6677417 0.4516536 0.5039991 1.0000000 0.8766556
## V7 0.6833201 0.5452394 0.6925274 0.5003529 0.5657749 0.8766556 1.0000000
## V8 0.7560167 0.8386777 0.5447951 0.8512430 0.8125317 0.6770704 0.6651375
## V9 0.5943100 0.8226958 0.3142268 0.8543069 0.7992001 0.3968194 0.4426932
## V10 0.7250559 0.8539275 0.4332609 0.7899076 0.8746777 0.4563027 0.5134776
## V11 0.6973755 0.8153650 0.4336216 0.7689457 0.8420656 0.4380275 0.4884842
## V12 0.7403649 0.7045207 0.6556382 0.6127174 0.6556111 0.6497151 0.6954737
## V13 0.6922406 0.5943306 0.6064572 0.5314402 0.5975513 0.6720664 0.6979225
## V14 0.6561369 0.6316040 0.5724038 0.5781606 0.6257459 0.5942177 0.6371818
## V15 0.7046243 0.7113572 0.5662060 0.6572434 0.6939202 0.5563215 0.5942733
## V16 0.7289012 0.6932984 0.5814029 0.6411786 0.7189399 0.6294994 0.6653560
## V17 0.7229931 0.6882444 0.5803742 0.6129113 0.6771056 0.5761203 0.6180556
## V8 V9 V10 V11 V12 V13 V14
## V1 0.7560167 0.5943100 0.7250559 0.6973755 0.7403649 0.6922406 0.6561369
## V2 0.8386777 0.8226958 0.8539275 0.8153650 0.7045207 0.5943306 0.6316040
## V3 0.5447951 0.3142268 0.4332609 0.4336216 0.6556382 0.6064572 0.5724038
## V4 0.8512430 0.8543069 0.7899076 0.7689457 0.6127174 0.5314402 0.5781606
## V5 0.8125317 0.7992001 0.8746777 0.8420656 0.6556111 0.5975513 0.6257459
## V6 0.6770704 0.3968194 0.4563027 0.4380275 0.6497151 0.6720664 0.5942177
## V7 0.6651375 0.4426932 0.5134776 0.4884842 0.6954737 0.6979225 0.6371818
## V8 1.0000000 0.8189832 0.8034116 0.7634081 0.6949891 0.6342264 0.6425449
## V9 0.8189832 1.0000000 0.8050235 0.7639324 0.5417763 0.4402583 0.4948664
## V10 0.8034116 0.8050235 1.0000000 0.8797837 0.6871696 0.5871662 0.6344487
## V11 0.7634081 0.7639324 0.8797837 1.0000000 0.6973725 0.6136412 0.6736291
## V12 0.6949891 0.5417763 0.6871696 0.6973725 1.0000000 0.7515997 0.7983485
## V13 0.6342264 0.4402583 0.5871662 0.6136412 0.7515997 1.0000000 0.7738524
## V14 0.6425449 0.4948664 0.6344487 0.6736291 0.7983485 0.7738524 1.0000000
## V15 0.6920963 0.5965985 0.7056080 0.7348275 0.8138716 0.7377756 0.8451754
## V16 0.7053563 0.5709788 0.7078820 0.7184022 0.7792859 0.8487052 0.8218953
## V17 0.6784023 0.5544874 0.6970469 0.7274268 0.8390444 0.7541332 0.7958402
## V15 V16 V17
## V1 0.7046243 0.7289012 0.7229931
## V2 0.7113572 0.6932984 0.6882444
## V3 0.5662060 0.5814029 0.5803742
## V4 0.6572434 0.6411786 0.6129113
## V5 0.6939202 0.7189399 0.6771056
## V6 0.5563215 0.6294994 0.5761203
## V7 0.5942733 0.6653560 0.6180556
## V8 0.6920963 0.7053563 0.6784023
## V9 0.5965985 0.5709788 0.5544874
## V10 0.7056080 0.7078820 0.6970469
## V11 0.7348275 0.7184022 0.7274268
## V12 0.8138716 0.7792859 0.8390444
## V13 0.7377756 0.8487052 0.7541332
## V14 0.8451754 0.8218953 0.7958402
## V15 1.0000000 0.8122591 0.8124942
## V16 0.8122591 1.0000000 0.8180335
## V17 0.8124942 0.8180335 1.0000000
Get the Eigencomponents
We use the Correlation matrix to find the eigenvectors and eigenvalues. The matrix will find the mulitpliers (eigenvalues) to the vectors (eigenvectors) for the new linear combination so that the most variability is on the 1st component and descends thereafter. This is a restructing that is used to reduce the dimensionality. The eigenvalues are the proportion of variability associated with each component. Specifically, the cumulative proportion of variability is sought after.
myeigen=eigen(Sigma_)
myeigen
## eigen() decomposition
## $values
## [1] 11.77794388 1.72081925 0.86806022 0.46371734 0.32298978 0.27624335
## [7] 0.23790552 0.20503228 0.16487012 0.15399147 0.14378933 0.13577975
## [13] 0.12975748 0.11385581 0.10510016 0.10124270 0.07890155
##
## $vectors
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.2515577 -0.05962807 -0.14114605 0.379564498 0.328384491 0.063548478
## [2,] -0.2564669 0.22970932 -0.09482706 0.230635627 -0.032641499 0.118423122
## [3,] -0.1974907 -0.34526438 -0.24576573 0.652153338 -0.171726622 0.244730346
## [4,] -0.2391458 0.30516320 -0.13606194 -0.123183806 -0.327267101 0.337264863
## [5,] -0.2525203 0.23895414 -0.06096558 -0.015140135 0.320237763 -0.134084234
## [6,] -0.2096918 -0.34776361 -0.42324640 -0.330938410 0.005186903 -0.204506297
## [7,] -0.2220439 -0.32176935 -0.36923615 -0.258761028 0.032948289 -0.251123643
## [8,] -0.2597468 0.13861061 -0.27362524 -0.147491638 -0.159303710 0.115445148
## [9,] -0.2242754 0.39008169 -0.17677165 -0.152415293 -0.239805816 0.083991770
## [10,] -0.2523894 0.26939880 0.02645111 0.078843036 0.269266509 -0.257207906
## [11,] -0.2504276 0.23813195 0.14578328 0.073261747 0.226873600 -0.269224686
## [12,] -0.2541524 -0.16064493 0.16973475 0.126756162 -0.260586173 -0.363831380
## [13,] -0.2374627 -0.25443032 0.18739393 -0.231917619 0.363004533 0.477340146
## [14,] -0.2431988 -0.17131145 0.34992145 -0.146263113 -0.265657288 0.024086423
## [15,] -0.2531910 -0.06188346 0.32463869 0.002861056 -0.331362672 -0.006033122
## [16,] -0.2571186 -0.11980858 0.24383184 -0.183679152 0.245239312 0.313599800
## [17,] -0.2513643 -0.10507094 0.30609685 0.063192679 -0.061516569 -0.259276337
## [,7] [,8] [,9] [,10] [,11]
## [1,] 0.52472561 0.462775915 0.08089722 -0.028688435 -0.082671317
## [2,] 0.18685650 0.102633577 -0.22284739 0.146776372 -0.003009252
## [3,] -0.44212270 -0.227918578 0.08470180 -0.056984171 0.032363352
## [4,] 0.09329163 -0.108661745 -0.02018674 0.042996293 -0.385973550
## [5,] -0.30873371 0.075063118 0.30886931 0.313277344 -0.077462977
## [6,] 0.03288863 0.082753104 0.03319269 -0.363923622 0.185381133
## [7,] -0.10293093 0.025168032 -0.01883752 0.449235459 -0.162176681
## [8,] 0.11833803 -0.024123103 0.03766722 -0.430320880 -0.007345780
## [9,] -0.01801266 -0.145348780 0.08507916 0.182869158 0.335105238
## [10,] -0.16721442 -0.008893103 -0.16124987 -0.017921028 0.043924436
## [11,] -0.28837871 -0.096204767 -0.23846363 -0.437093574 -0.015352133
## [12,] 0.25268251 -0.248712605 -0.45258476 0.273337019 0.012561412
## [13,] 0.06307660 -0.247378906 -0.38601024 -0.004189989 0.162212977
## [14,] -0.25497022 0.410431410 -0.07746391 -0.114629267 -0.513065886
## [15,] -0.07653768 0.415274397 0.11932690 0.048663804 0.597907646
## [16,] -0.05781238 -0.076685492 0.29966011 0.139313424 0.021128977
## [17,] 0.33330232 -0.444466219 0.53287860 -0.143199538 -0.119597335
## [,12] [,13] [,14] [,15] [,16]
## [1,] -0.0596494479 0.173868263 0.021255008 0.33990691 0.018397621
## [2,] 0.3583439856 -0.043784824 0.005082587 -0.68936372 -0.279026041
## [3,] 0.0232821235 -0.006366854 -0.007581095 0.05684779 0.046598924
## [4,] -0.5189611873 0.090706970 0.185160913 -0.07881277 0.109634097
## [5,] -0.2572829048 -0.330735690 -0.439080915 0.02771388 -0.285787030
## [6,] 0.0304987443 -0.169126001 0.080641248 -0.13350866 -0.192870142
## [7,] -0.0371921186 0.404445380 0.076928476 -0.10798016 0.172641737
## [8,] 0.0008197682 -0.344227162 -0.157967432 0.10736325 0.137979303
## [9,] 0.4421762647 0.290087257 -0.080299759 0.42913990 -0.091687407
## [10,] 0.1600326150 -0.200829589 0.151174826 -0.06475324 0.725134134
## [11,] -0.1827441887 0.420303198 0.203200555 0.04231127 -0.339738279
## [12,] -0.1165179657 -0.355619561 0.077240978 0.28274188 -0.181693130
## [13,] -0.0596685563 0.120675891 -0.399244171 -0.01585738 0.091551701
## [14,] 0.3522810681 0.005869241 -0.180002374 0.12378054 -0.007728344
## [15,] -0.3278229277 0.085448269 -0.010923197 -0.16580365 0.127717686
## [16,] 0.1412604581 -0.238084563 0.654492594 0.07441734 -0.149990508
## [17,] 0.0804570050 0.182593387 -0.195393055 -0.19831669 0.099936680
## [,17]
## [1,] 0.059003318
## [2,] -0.063955907
## [3,] 0.042079036
## [4,] 0.305515426
## [5,] 0.057863911
## [6,] 0.487528502
## [7,] -0.357324326
## [8,] -0.634619845
## [9,] 0.159325935
## [10,] 0.207476664
## [11,] -0.144511446
## [12,] -0.008412949
## [13,] 0.060720116
## [14,] 0.093570243
## [15,] -0.075670182
## [16,] -0.126287618
## [17,] 0.048581047
cumsum(myeigen$values) / sum(myeigen$values)
## [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
## [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
Eigenshoes
The new shoes are a reduced number that are built by the new linear combinations. The new shoes are eigenvectors.
80% is between 2 and 3 components….. Here is 2
scaling=diag(myeigen$values[1:2]^(-1/2)) / (sqrt(nrow(scaled)-1))
scaling
## [,1] [,2]
## [1,] 0.0004711401 0.000000000
## [2,] 0.0000000000 0.001232586
eigenshoes=scaled%*%myeigen$vectors[,1:2]%*%scaling
dim(eigenshoes)
## [1] 382500 2
str(eigenshoes)
## num [1:382500, 1:2] -0.0012 -0.0012 -0.0012 -0.0012 -0.0012 ...
Image 1
scaling=diag(myeigen$values[1:5]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:5]%*%scaling
par(mfrow=c(2,3))
imageShow(array(eigenshoes[,1], c(60,125,3)))
Image 2
imageShow(array(eigenshoes[,2], c(60,125,3)))
Generate Principal Components Transform the images
#Generate Variables
height=1200
width=2500
scale=20
newdata=im
dim(newdata)=c(length(files),height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
################################################################
Eigenshoes Generate Eigenshoes
#Eigenshoes
mypca2=t(mypca$scores)
dim(mypca2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){#plot the first 25 Eigenshoes only
plot_jpeg(writeJPEG(mypca2[i,,,], bg="white")) #complete without reduction
}
a=round(mypca$sdev[1:17]^2/ sum(mypca$sdev^2),3)
cumsum(a)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 0.693 0.794 0.845 0.872 0.891 0.907 0.921 0.933 0.943 0.952
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## 0.960 0.968 0.976 0.983 0.989 0.995 1.000
New Data Set
x = t(t(eigenshoes)%*%scaled)
REFERENCES Fulton, Larry. Eigenshoes. https://rpubs.com/R-Minator/eigenshoes.