This code is dedicated to read in all of the JPEGs and getting them into a format where we can calculate the Eigen Values.
files <- list.files(path="jpg/")
height = 120
width = 250
par(mfrow=c(1,1))
newtemp=array(rep(0,length(files)*height*width*3), dim=c(length(files), height, width,3))
for (i in 1:length(files)) {
path = paste0("jpg/", files[i])
print(files[i])
print(i)
temp = resize(readJPEG(path),height, width)
newtemp[i,,,]=array(temp, dim=c(1,height,width,3))
}
## [1] "RC_2500x1200_2014_us_53446.jpg"
## [1] 1
## [1] "RC_2500x1200_2014_us_53455.jpg"
## [1] 2
## [1] "RC_2500x1200_2014_us_53469.jpg"
## [1] 3
## [1] "RC_2500x1200_2014_us_53626.jpg"
## [1] 4
## [1] "RC_2500x1200_2014_us_53632.jpg"
## [1] 5
## [1] "RC_2500x1200_2014_us_53649.jpg"
## [1] 6
## [1] "RC_2500x1200_2014_us_53655.jpg"
## [1] 7
## [1] "RC_2500x1200_2014_us_53663.jpg"
## [1] 8
## [1] "RC_2500x1200_2014_us_53697.jpg"
## [1] 9
## [1] "RC_2500x1200_2014_us_54018.jpg"
## [1] 10
## [1] "RC_2500x1200_2014_us_54067.jpg"
## [1] 11
## [1] "RC_2500x1200_2014_us_54106.jpg"
## [1] 12
## [1] "RC_2500x1200_2014_us_54130.jpg"
## [1] 13
## [1] "RC_2500x1200_2014_us_54148.jpg"
## [1] 14
## [1] "RC_2500x1200_2014_us_54157.jpg"
## [1] 15
## [1] "RC_2500x1200_2014_us_54165.jpg"
## [1] 16
## [1] "RC_2500x1200_2014_us_54172.jpg"
## [1] 17
finaltemp=matrix(0, length(files), prod(dim(newtemp)))
#extract each color into its own vector so we can view each as a column
for (i in 1:length(files)){
red = as.vector(newtemp[i,,,1])
green = as.vector(newtemp[i,,,2])
blue = as.vector(newtemp[i,,,3])
finaltemp[i,] = t(c(red, green, blue))
}
final=as.data.frame(t(finaltemp))
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])
}
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(newtemp[i,,,]))
}
For the purposes of this project we cover 80% of the variability in the first 2 values.
scaled=scale(final, center = TRUE, scale = TRUE)
Sigma=cor(scaled)
myeigen=eigs(Sigma,5)
cumsum(myeigen$values) / sum(eigen(Sigma)$values)
## [1] 0.6916309 0.7911549 0.8442585 0.8714110 0.8903922
eigim=newtemp
dim(eigim)=c(length(files),height*width*3)
mypca=princomp(t(as.matrix(eigim)), scores=TRUE, cor=TRUE)
mypca
## Call:
## princomp(x = t(as.matrix(eigim)), cor = TRUE, scores = TRUE)
##
## Standard deviations:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## 3.4289539 1.3007336 0.9501380 0.6794057 0.5680501 0.5316181 0.4944501 0.4554398
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16
## 0.4074390 0.3898768 0.3771138 0.3655017 0.3578468 0.3349775 0.3311088 0.3181653
## Comp.17
## 0.2895940
##
## 17 variables and 90000 observations.
The results are pretty similar to the eigne shoes shown in the course. Where the first image is basically just the shape of a shoe.
pcaScores=t(mypca$scores)
dim(pcaScores)=c(length(files),height,width,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(pcaScores[i,,,]))
}