library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(foreach)
library(jpeg)
library(EBImage)
# list.files looks at the folder given in path directory and creates a list of all file names.
files=list.files(path='/Users/dirkhartog/Desktop/CUNY_MSDS/DATA_605/WK4/jpg',pattern="\\.jpg")
files
## [1] "RC_2500x1200_2014_us_53446.jpg" "RC_2500x1200_2014_us_53455.jpg"
## [3] "RC_2500x1200_2014_us_53469.jpg" "RC_2500x1200_2014_us_53626.jpg"
## [5] "RC_2500x1200_2014_us_53632.jpg" "RC_2500x1200_2014_us_53649.jpg"
## [7] "RC_2500x1200_2014_us_53655.jpg" "RC_2500x1200_2014_us_53663.jpg"
## [9] "RC_2500x1200_2014_us_53697.jpg" "RC_2500x1200_2014_us_54018.jpg"
## [11] "RC_2500x1200_2014_us_54067.jpg" "RC_2500x1200_2014_us_54106.jpg"
## [13] "RC_2500x1200_2014_us_54130.jpg" "RC_2500x1200_2014_us_54148.jpg"
## [15] "RC_2500x1200_2014_us_54157.jpg" "RC_2500x1200_2014_us_54165.jpg"
## [17] "RC_2500x1200_2014_us_54172.jpg"
###################Set Adj. Parameters##########################
height=1200
width=2500
scale=20
plot_jpeg = function(path, add=FALSE) #initialize function
{
require('jpeg') # require() loads and attach add-on packages
jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x is 2, y is 1]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]), #set the X Limits by size
asp=1, #aspect ratio
type='n', #don't plot
xaxs='i',yaxs='i',#prevents expanding axis windows +6% as normal
xaxt='n',yaxt='n',xlab='',ylab='', # no axes or labels
bty='n') # no box around graph
rasterImage(jpg,1,1,res[1],res[2]) #image, xleft,ybottom,xright,ytop
}
################################################################
###################Load#########################
#initialize array with zeros.
im=array(rep(0,length(files)*height/scale*width/scale*3),
#set dimension to N, x, y, 3 colors, 4D array)
dim=c(length(files), height/scale, width/scale,3))
for (i in 1:length(files)){
#define file to be read
tmp=paste0('/Users/dirkhartog/Desktop/CUNY_MSDS/DATA_605/WK4/jpg/', files[i])
#read the file
temp=EBImage::resize(readJPEG(tmp),height/scale, width/scale)
#assign to the array
im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))
}
#################################################
#################################################
flat=matrix(0, 17, prod(dim(im)))
for (i in 1:17) {
newim <- readJPEG(paste0('/Users/dirkhartog/Desktop/CUNY_MSDS/DATA_605/WK4/jpg/', files[i]))
r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
#################################################
####Old Shoes##################
par(mfrow=c(3,3)) #set graphics to 3 x 3 table
par(mai=c(.3,.3,.3,.3)) #set margins
for (i in 1:length(files)){ #plot the first images only
plot_jpeg(writeJPEG(im[i,,,]))
}
#################################################
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") #saving for classification
std.shoe=attr(scaled, "scaled:scale") #saving for classification...later
#################################################
Sigma_=cor(scaled)
#################################################
myeigen=eigen(Sigma_)
cumsum(myeigen$values) / sum(myeigen$values)
## [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
## [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
###################Generate Variables###########################
height=1200
width=2500
scale=20
newdata=im
dim(newdata)=c(length(files),height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
sum(mypca$sdev^2/sum(mypca$sdev^2)) # verify that sum of variance=1
## [1] 1
The first 2 - 3 components account for 79% - 85% of variability
mycomponents=mypca$sdev^2/sum(mypca$sdev^2)
sum(mycomponents[1:3])
## [1] 0.8451073
sum(mycomponents[1:8]) #first 8 components account for 90% of variability
## [1] 0.9336889
###################Eigenshoes###################################
mypca2=t(mypca$scores)
dim(mypca2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){ #plot the first 81 Eigenshoes only
plot_jpeg(writeJPEG(mypca2[i,,,], quality=1,bg="white"))
}
a=round(mypca$sdev[1:17]^2/ sum(mypca$sdev^2),3)
cumsum(a)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 0.693 0.794 0.845 0.872 0.891 0.907 0.921 0.933 0.943 0.952
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## 0.960 0.968 0.976 0.983 0.989 0.995 1.000