Reading Files

This code is dedicated to read in all of the JPEGs and getting them into a format where we can calculate the Eigen Values.

files <- list.files(path="jpg/")

height = 120
width = 250

par(mfrow=c(1,1))
newtemp=array(rep(0,length(files)*height*width*3), dim=c(length(files), height, width,3))



for (i in 1:length(files)) {
  path = paste0("jpg/", files[i])
  print(files[i])
  print(i)
  temp = resize(readJPEG(path),height, width)
  newtemp[i,,,]=array(temp, dim=c(1,height,width,3))
}
## [1] "RC_2500x1200_2014_us_53446.jpg"
## [1] 1
## [1] "RC_2500x1200_2014_us_53455.jpg"
## [1] 2
## [1] "RC_2500x1200_2014_us_53469.jpg"
## [1] 3
## [1] "RC_2500x1200_2014_us_53626.jpg"
## [1] 4
## [1] "RC_2500x1200_2014_us_53632.jpg"
## [1] 5
## [1] "RC_2500x1200_2014_us_53649.jpg"
## [1] 6
## [1] "RC_2500x1200_2014_us_53655.jpg"
## [1] 7
## [1] "RC_2500x1200_2014_us_53663.jpg"
## [1] 8
## [1] "RC_2500x1200_2014_us_53697.jpg"
## [1] 9
## [1] "RC_2500x1200_2014_us_54018.jpg"
## [1] 10
## [1] "RC_2500x1200_2014_us_54067.jpg"
## [1] 11
## [1] "RC_2500x1200_2014_us_54106.jpg"
## [1] 12
## [1] "RC_2500x1200_2014_us_54130.jpg"
## [1] 13
## [1] "RC_2500x1200_2014_us_54148.jpg"
## [1] 14
## [1] "RC_2500x1200_2014_us_54157.jpg"
## [1] 15
## [1] "RC_2500x1200_2014_us_54165.jpg"
## [1] 16
## [1] "RC_2500x1200_2014_us_54172.jpg"
## [1] 17
finaltemp=matrix(0, length(files), prod(dim(newtemp)))

#extract each color into its own vector so we can view each as a column
for (i in 1:length(files)){
  red = as.vector(newtemp[i,,,1])
  green = as.vector(newtemp[i,,,2])
  blue = as.vector(newtemp[i,,,3])
  finaltemp[i,] = t(c(red, green, blue))
}

final=as.data.frame(t(finaltemp))

Plot result of ingesting files

plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}

par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))

for (i in 1:length(files)){ 
plot_jpeg(writeJPEG(newtemp[i,,,]))
}

Calculate Eigen Values

For the purposes of this project we cover 80% of the variability in the first 2 values.

scaled=scale(final, center = TRUE, scale = TRUE)
Sigma=cor(scaled)
myeigen=eigs(Sigma,5)
cumsum(myeigen$values) / sum(eigen(Sigma)$values)
## [1] 0.6916309 0.7911549 0.8442585 0.8714110 0.8903922

PrinComp

eigim=newtemp
dim(eigim)=c(length(files),height*width*3)
mypca=princomp(t(as.matrix(eigim)), scores=TRUE, cor=TRUE)
mypca
## Call:
## princomp(x = t(as.matrix(eigim)), cor = TRUE, scores = TRUE)
## 
## Standard deviations:
##    Comp.1    Comp.2    Comp.3    Comp.4    Comp.5    Comp.6    Comp.7    Comp.8 
## 3.4289539 1.3007336 0.9501380 0.6794057 0.5680501 0.5316181 0.4944501 0.4554398 
##    Comp.9   Comp.10   Comp.11   Comp.12   Comp.13   Comp.14   Comp.15   Comp.16 
## 0.4074390 0.3898768 0.3771138 0.3655017 0.3578468 0.3349775 0.3311088 0.3181653 
##   Comp.17 
## 0.2895940 
## 
##  17  variables and  90000 observations.

Plot our Eigen Images

The results are pretty similar to the eigne shoes shown in the course. Where the first image is basically just the shape of a shoe.

pcaScores=t(mypca$scores)
dim(pcaScores)=c(length(files),height,width,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(pcaScores[i,,,]))  
}