library(jpeg)
library(rasterImage)
library(imager)
library(cluster)
library(factoextra)
library(flexclust)
library(fpc)
library(clustertend)
library(cluster)
library(ClusterR)
library(tidyverse)
library(scatterplot3d)
library(rgl)
library(gridExtra)
library(ggplot2)
library(raster)
library(abind)
library(magick)
library(knitr)

Intro

in this project we continue the analysis of the paintings. This time we will be reconstructing the paintings using less dimensions, or in other words, we will try to reduce the size of the image while preserving its quality

names <- c("mona_lisa.jpg", "girl_with_pearl_earring.jpg", "wanderer.jpg", "the_fighting_temaraire.jpg",
           "starry_night.jpg", "marilyn_monroe.jpg", "fangor_2.jpg", "fangor_3.jpg")
#obtaining names of paintings for convenience
short_names <- substr(names, 1, nchar(names) - 4)
par(mfrow = c(2,4))
for (name in names){
  image <- load.image(name)
plot(image, axes = FALSE)
}

The reduction of dimensionality will occur on each of the three dimensions - red, green and blue. Here we can see the intensity, or saturation of each color on the painting. The greener, the more intense/saturated the area

par(mfrow = c(1,3))
for (name in names){
  image <- readJPEG(name)
  red <- image[,,1]
  green <- image[,,2]
  blue <- image[,,3]
  
  plot(raster(red), axes = FALSE, main = "red")
  plot(raster(green), axes = FALSE, main = "green")
  plot(raster(blue), axes = FALSE, main = "blue")
}

We can also display the images in red, green and blue and see which color preserves the painting the most

par(mfrow = c(1,3))
for (name in names){
  image <- load.image(name)
  G(image) <- 0
  B(image) <- 0
  plot(image, axes = FALSE)
  
  image <- load.image(name)
  R(image) <- 0
  B(image) <- 0
  plot(image, axes = FALSE)
  
  image <- load.image(name)
  R(image) <- 0
  G(image) <- 0
  plot(image, axes = FALSE)
  
}

to avoid redundancy, we will create a painting class

setClass("painting", slots = c(name = "character", red_pca = "ANY", green_pca = "ANY",
                               blue_pca = "ANY"))

Here we create a function that will perform pca() on every dimension of the painting

pca_modifier <- function(name){
  new("painting", name = substr(name, 1, nchar(name) - 4), 
            red_pca = prcomp(readJPEG(name)[,,1], center = FALSE),
            green_pca = prcomp(readJPEG(name)[,,2], center = FALSE),
            blue_pca = prcomp(readJPEG(name)[,,3], center = FALSE)
      )
}
marilyn_monroe_pca <- pca_modifier("marilyn_monroe.jpg")
mona_lisa_pca <- pca_modifier("mona_lisa.jpg")
wanderer_pca <- pca_modifier("wanderer.jpg")
fighting_temaraire_pca <- pca_modifier("the_fighting_temaraire.jpg")
starry_night_pca <- pca_modifier("starry_night.jpg")
girl_pca <- pca_modifier("girl_with_pearl_earring.jpg")
fangor_1_pca <- pca_modifier("fangor_2.jpg")
fangor_2_pca <- pca_modifier("fangor_3.jpg")

pca_paintings <- list(mona_lisa_pca, girl_pca, wanderer_pca, fighting_temaraire_pca,
                      starry_night_pca, marilyn_monroe_pca, fangor_1_pca, fangor_2_pca)

We can explore each color, and how many dimensions explain how much variance. Ideally, we want the least amount of dimensions, but dimensions that explain the most variance, aka represent the data with least loss of quality

par(mfrow = c(2,3))
for (painting in pca_paintings){
  red_eigenvalues <- fviz_eig(slot(painting, "red_pca"), 
                              main = paste(slot(painting, "name"), "red"), 
                              barfill = rgb(1,0,0, alpha = 0.3), barcolor = "black")
  green_eigenvalues <- fviz_eig(slot(painting, "green_pca"), 
                              main = paste(slot(painting, "name"), "green"), 
                              barfill = rgb(0.1,0.7,0.1, alpha = 0.4), barcolor = "black")
  blue_eigenvalues <- fviz_eig(slot(painting, "blue_pca"), 
                              main = paste(slot(painting, "name"), "blue"), 
                              barfill = rgb(0,0,1, alpha = 0.3), barcolor = "black")
  
  grid.arrange(red_eigenvalues, green_eigenvalues, blue_eigenvalues, 
               ncol = 3)
  
  
}

## Component and color analysis Here we display the performance of each color. Interestingly, mona lisa looked undoubtedly the worst in blue, and if we look at the graph, blue color needs a lot more components to reach the same level of variance that red and green reach

# Here we display the performance of each color. Interestingly, mona lisa looked undoubtedly the worst in
# blue, and if we look at the graph, blue color needs a lot more components to reach the same
# level of variance that red and green reach
for (painting in pca_paintings){
  x <- summary(slot(painting, "red_pca"))$importance[3,]
  y <- summary(slot(painting, "green_pca"))$importance[3,]
  z <- summary(slot(painting, "blue_pca"))$importance[3,]
  
  df <- data.frame("red" = x,"green" = y,"blue" = z)
  df <- head(df, 500)
  df$index = index = c(1:500)
  
  g <- ggplot() +
    geom_line(data = df, aes(x = index, y = red), col = "red", size = 1) +
    geom_line(data = df, aes(x = index, y = green), col = "darkgreen", size = 1) +
    geom_line(data = df, aes(x = index, y = blue), col = "steelblue", size = 1) +
    ylab("cumulative variance by color") +
    xlab("number of principal components") +
    theme_bw() +
    ggtitle(slot(painting, "name"))
  print(g)
}

## Reconstruction. now we create a reconstructing function, that will display the paintings but using principal components

# now we create a reconstructing function, that will display the paintings but using 
# principal components
reconstructer <- function(painting, component) {
  reconstruction <- abind(slot(painting, "red_pca")$x[,1:component] %*% 
                            t(slot(painting, "red_pca")$rotation[,1:component]),
                          slot(painting, "green_pca")$x[,1:component] %*% 
                            t(slot(painting, "green_pca")$rotation[,1:component]),
                          slot(painting, "blue_pca")$x[,1:component] %*%
                            t(slot(painting, "blue_pca")$rotation[,1:component]),
                          along = 3)
  
  plot(image_read(reconstruction))
  title(paste(as.character(component), "components"))
}

we can see that just with 32 components, the paintings are practically the same.

par(mfrow = c(2,3))
for (painting in pca_paintings) {
  for (i in 1:6) {
  reconstructer(painting, 2**i)
  }
}

Data reduction

Now let’s check how mucn we reduced the sizes of the files with 32 principal components.

new_sizes <- c()
for (painting in pca_paintings) {
reconstruction <- abind(slot(painting, "red_pca")$x[,1:32] %*% 
                            t(slot(painting, "red_pca")$rotation[,1:32]),
                          slot(painting, "green_pca")$x[,1:32] %*% 
                            t(slot(painting, "green_pca")$rotation[,1:32]),
                          slot(painting, "blue_pca")$x[,1:32] %*%
                            t(slot(painting, "blue_pca")$rotation[,1:32]),
                          along = 3)


  painting_reconstruction <- (writeJPEG(reconstruction, "placeholder.jpg"))
  new_sizes <- append(new_sizes, file.info("placeholder.jpg")$size)
}
old_sizes <- c()
for (name in names) {
  old_sizes <- append(old_sizes,file.info(name)$size)
}

We can see that for the most part, the sizes of the files have been largely reduced, sometimes by even two thirds.

for (i in c(1:8)){
  print(paste(short_names[i], 'percentage of original size',round(new_sizes[i] / old_sizes[i], 2)))
}
## [1] "mona_lisa percentage of original size 0.42"
## [1] "girl_with_pearl_earring percentage of original size 0.63"
## [1] "wanderer percentage of original size 0.6"
## [1] "the_fighting_temaraire percentage of original size 0.32"
## [1] "starry_night percentage of original size 0.33"
## [1] "marilyn_monroe percentage of original size 0.91"
## [1] "fangor_2 percentage of original size 0.87"
## [1] "fangor_3 percentage of original size 0.67"

thank you.