Activity 3.1 - SVD

SUBMISSION INSTRUCTIONS

  1. Render to html
  2. Publish your html to RPubs
  1. Submit a link to your published solutions

Problem 1

Reconsider the US air pollution data set:

library(HSAUR2)
Loading required package: tools
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(USairpollution)
scaled_data <- scale(USairpollution,center=TRUE,scale = FALSE)

A)

Perform singular value decomposition of this data matrix. Then create the matrix \(D\). Describe what this matrix looks like. 1 x 7 matrix

X <- as.matrix(scaled_data) %>% round(1)

components <- svd(X)

D <- components$d
V <- components$v
U <- components$u
D
[1] 5053.600534  769.728180  167.557533   90.549298   68.317199   21.961415
[7]    7.618065

B)

Verify that \(X=UDV^T\) by plotting all the entries of \(X\) versus all the entries of \(UDV^T\) with the 0/1 line.

components
$d
[1] 5053.600534  769.728180  167.557533   90.549298   68.317199   21.961415
[7]    7.618065

$u
              [,1]         [,2]         [,3]          [,4]         [,5]
 [1,]  0.127613895  0.060976058  0.137144748 -0.2143610723 -0.116708956
 [2,]  0.109337494 -0.064130867 -0.337852249 -0.1032430877 -0.199927161
 [3,]  0.028969199  0.011822486  0.014985198  0.0769972560  0.168463387
 [4,] -0.064433625 -0.115466485  0.047829656 -0.2085587433  0.073036225
 [5,]  0.030631556  0.067054421  0.249207480  0.3168724483 -0.277244316
 [6,]  0.135284857  0.092133746  0.187733839  0.0102141549 -0.044163011
 [7,] -0.789174067  0.184644308 -0.078144463 -0.0079450019  0.058353762
 [8,]  0.022241932  0.139828800  0.044352996  0.1960319269 -0.036493534
 [9,] -0.095343745  0.382123744  0.102646096  0.0653254667 -0.072881614
[10,]  0.036912074 -0.118685618  0.165065408 -0.0437295893 -0.121707238
[11,] -0.057837284 -0.054049620 -0.223741435  0.1767954121  0.126176996
[12,]  0.014591618  0.071836723 -0.235858387  0.0766905069 -0.237743746
[13,]  0.107395322  0.033146038 -0.077230192  0.0274862850 -0.098592867
[14,] -0.211204804 -0.257464952  0.148719426 -0.0438197621 -0.209811031
[15,]  0.070884186  0.363243789 -0.013161804 -0.0421962585  0.110472628
[16,] -0.124077442 -0.328144684  0.034588610  0.0962591233  0.161108620
[17,] -0.005414574 -0.217405340  0.129197192 -0.1218935883 -0.064868758
[18,]  0.056453887 -0.232505197  0.118673429 -0.0006585299  0.227786092
[19,]  0.025797678  0.012335005 -0.103575719  0.1265816707  0.012706645
[20,]  0.118997422  0.082042472 -0.094241336  0.1363168459  0.212619807
[21,]  0.025939375 -0.143945096  0.124521291 -0.1012180173  0.022958963
[22,]  0.015276875 -0.133826568 -0.001763723  0.1123783584  0.156276850
[23,]  0.074197973  0.007961283  0.091877520  0.2468080072  0.332854475
[24,] -0.029944288 -0.000509752  0.019681217  0.1680716564 -0.229518223
[25,] -0.051752689  0.098160230  0.066935579  0.1410104073 -0.286924228
[26,]  0.048762322 -0.030227399  0.047588937  0.0930451688  0.092592999
[27,]  0.070932700 -0.019444235  0.015290278  0.1960550903  0.290440732
[28,]  0.093266186 -0.067941457  0.069561889 -0.1215736718  0.110782199
[29,]  0.076076146 -0.028155257 -0.092532082  0.0339222877 -0.091237354
[30,] -0.359905893 -0.068401695  0.021234469 -0.1399306640  0.076115637
[31,]  0.038372230 -0.216866182 -0.421389744 -0.1710443121 -0.069829258
[32,]  0.028456245 -0.020194047  0.241314022 -0.3139932057 -0.090246079
[33,]  0.077293288  0.285393185  0.056537711 -0.5155545253  0.186877587
[34,]  0.080633398  0.032667089  0.011420715  0.0097611612  0.083818995
[35,]  0.106362734  0.086224059 -0.189097996 -0.1034390282 -0.194435246
[36,] -0.013760028 -0.112555912 -0.269502349  0.0007031895 -0.088369737
[37,]  0.022580519 -0.003219513  0.290776686  0.0848208215 -0.180892989
[38,] -0.044990370  0.278306695 -0.139241222 -0.0501680403  0.119649752
[39,] -0.017037617 -0.160885605  0.047135075 -0.0936789311  0.016324068
[40,]  0.093760012 -0.019198697 -0.191936680  0.0646621864 -0.007119477
[41,]  0.127805123  0.123192755 -0.010825302 -0.0825986690  0.072151776
              [,6]          [,7]
 [1,]  0.003287629  5.765297e-02
 [2,] -0.135144557 -5.293395e-02
 [3,]  0.015872881  1.827659e-02
 [4,]  0.131385695 -2.198249e-03
 [5,] -0.053336671 -2.773372e-01
 [6,] -0.234201714  2.729958e-01
 [7,] -0.018351732  6.832705e-02
 [8,] -0.055665209  3.359880e-01
 [9,] -0.252376847 -2.236366e-01
[10,]  0.016786194  1.330964e-01
[11,] -0.042691217 -2.359820e-01
[12,] -0.089534650  5.120902e-02
[13,]  0.260815993 -1.127492e-01
[14,]  0.037655884  1.582492e-02
[15,]  0.164737854  1.219750e-01
[16,] -0.063949778 -2.355919e-01
[17,]  0.153452093  2.663066e-02
[18,] -0.085153752 -4.225718e-02
[19,]  0.217871217  3.442502e-02
[20,]  0.163282233  1.845880e-01
[21,]  0.067767212  1.509871e-01
[22,]  0.165200887  7.801722e-02
[23,] -0.372896608 -1.801622e-01
[24,]  0.214884359 -1.147786e-01
[25,]  0.068488105 -6.253695e-05
[26,]  0.022192234  1.962888e-01
[27,] -0.028627888  6.732709e-02
[28,]  0.012933632 -2.254871e-01
[29,]  0.207916799 -1.002313e-01
[30,]  0.066580606  5.879044e-02
[31,] -0.482160151  1.055944e-01
[32,] -0.153986378 -9.668315e-02
[33,]  0.062916188 -2.494529e-01
[34,]  0.013269627  2.120197e-01
[35,] -0.077693175  3.649631e-02
[36,]  0.078529622  1.142461e-01
[37,] -0.194409133 -1.466739e-02
[38,] -0.040203088 -4.650939e-02
[39,]  0.033585983  3.509942e-03
[40,]  0.177311254 -3.860125e-01
[41,]  0.090648578  4.492731e-02

$v
              [,1]         [,2]         [,3]        [,4]         [,5]
[1,] -0.0168607524  0.099835666  0.208833223 -0.95891661  0.151513780
[2,]  0.0011417794 -0.025814318 -0.071628250  0.10994885  0.477979999
[3,] -0.6968327948  0.710249100 -0.067197536  0.07318090  0.009663595
[4,] -0.7170284535 -0.692912516  0.056678012 -0.04905136 -0.010746556
[5,] -0.0004067534  0.001011673  0.005385869  0.01506196 -0.025434614
[6,]  0.0004280077 -0.001194686  0.265586278  0.16192756  0.832873194
[7,] -0.0028836965  0.069155411  0.934326011  0.18488187 -0.232476114
             [,6]          [,7]
[1,] -0.054111370 -2.705005e-02
[2,] -0.852497526 -1.640002e-01
[3,] -0.002155860  1.134406e-03
[4,]  0.002918418 -5.545382e-05
[5,]  0.176466852 -9.838473e-01
[6,]  0.453326609  6.371038e-02
[7,] -0.183469886 -1.888041e-02
X_recreation <- (components$u %*% diag(components$d) %*% t(components$v)) %>% round(1)
X_recreation
      [,1]  [,2]   [,3]   [,4] [,5]  [,6]  [,7]
 [1,]   16  -8.2 -419.1 -492.6 -0.6  -3.4  21.1
 [2,]  -19   1.0 -417.1 -364.6 -0.5 -29.0 -55.9
 [3,]   -6   5.7  -95.1 -111.6 -0.3  11.6   1.1
 [4,]   17  -0.8  161.9  296.4  0.2   4.5  -2.9
 [5,]  -19  -8.7  -72.1 -145.6  3.0  -0.7  52.1
 [6,]    1  -0.6 -428.1 -537.6 -2.9   4.0  34.1
 [7,]   80  -5.2 2880.9 2760.4  1.0  -2.3   8.1
 [8,]   -7  -1.8   -1.1 -155.6 -2.3   2.3  18.1
 [9,]   35  -6.1  543.9  142.4  1.5  -1.8  41.1
[10,]   -4  -4.3 -197.1  -68.6 -0.8   0.2  20.1
[11,]  -21  10.4  177.9  235.4  1.5  -0.8 -35.9
[12,]  -13  -3.9   -9.1  -93.6 -0.4 -23.8 -27.9
[13,]  -13  -6.8 -359.1 -407.6  1.8  -5.9 -10.9
[14,]    5  -5.9  600.9  904.4  0.7  -5.8  15.1
[15,]   26  -6.7  -51.1 -450.6 -0.4   6.6  13.1
[16,]  -20  13.1  257.9  624.4  1.4  11.4 -10.9
[17,]   -2  -3.5 -102.1  137.4  0.3   2.0   7.1
[18,]  -16  12.6 -327.1  -79.6 -0.6  17.7   2.1
[19,]  -16  -1.3  -82.1 -101.6  0.6   0.2 -14.9
[20,]  -17   5.2 -372.1 -476.6 -1.2  11.8 -13.9
[21,]    0  -0.2 -172.1  -15.6 -1.1   6.3   9.1
[22,]  -20   5.8 -126.1   15.4 -0.2  12.3  -8.9
[23,]  -20  19.7 -256.1 -273.6 -0.4  23.0  14.1
[24,]  -14 -10.1  105.9  108.4  2.4  -7.7   9.1
[25,]   -1 -12.3  235.9  135.4  1.2 -10.8  23.1
[26,]  -12   3.6 -188.1 -160.6 -1.5   9.2   5.1
[27,]  -21  12.5 -259.1 -247.6 -1.0  20.0  -0.9
[28,]    1   3.5 -367.1 -300.6  1.2   7.9   2.1
[29,]  -16  -4.3 -282.1 -261.6  1.5  -6.6 -15.9
[30,]   39  -1.2 1228.9 1341.4  0.2   3.2   1.1
[31,]  -20  14.5 -250.1  -26.6 -3.4 -29.7 -77.9
[32,]   31  -5.4 -116.1  -88.6  0.0  -0.5  33.1
[33,]   64  -5.8 -120.1 -429.6  1.2   6.0  11.1
[34,]   -4   2.0 -266.1 -309.6 -1.8   5.8   1.1
[35,]   -2  -4.8 -326.1 -432.6 -0.7 -21.6 -24.9
[36,]  -18   0.9  -10.1  107.4 -0.7 -16.1 -46.9
[37,]   -1  -4.7  -84.1  -77.6  0.0   2.0  50.1
[38,]   26   0.1  311.9   13.4  0.1  -0.9  -8.9
[39,]   -1   1.5  -29.1  148.4 -0.1   2.1  -2.9
[40,]  -22   0.8 -338.1 -331.6  3.3  -6.2 -31.9
[41,]    6  -1.8 -383.1 -528.6 -0.4   3.5   0.1
X
               SO2  temp   manu  popul wind precip predays
Albany          16  -8.2 -419.1 -492.6 -0.6   -3.4    21.1
Albuquerque    -19   1.0 -417.1 -364.6 -0.5  -29.0   -55.9
Atlanta         -6   5.7  -95.1 -111.6 -0.3   11.6     1.1
Baltimore       17  -0.8  161.9  296.4  0.2    4.5    -2.9
Buffalo        -19  -8.7  -72.1 -145.6  3.0   -0.7    52.1
Charleston       1  -0.6 -428.1 -537.6 -2.9    4.0    34.1
Chicago         80  -5.2 2880.9 2760.4  1.0   -2.3     8.1
Cincinnati      -7  -1.8   -1.1 -155.6 -2.3    2.3    18.1
Cleveland       35  -6.1  543.9  142.4  1.5   -1.8    41.1
Columbus        -4  -4.3 -197.1  -68.6 -0.8    0.2    20.1
Dallas         -21  10.4  177.9  235.4  1.5   -0.8   -35.9
Denver         -13  -3.9   -9.1  -93.6 -0.4  -23.8   -27.9
Des Moines     -13  -6.8 -359.1 -407.6  1.8   -5.9   -10.9
Detroit          5  -5.9  600.9  904.4  0.7   -5.8    15.1
Hartford        26  -6.7  -51.1 -450.6 -0.4    6.6    13.1
Houston        -20  13.1  257.9  624.4  1.4   11.4   -10.9
Indianapolis    -2  -3.5 -102.1  137.4  0.3    2.0     7.1
Jacksonville   -16  12.6 -327.1  -79.6 -0.6   17.7     2.1
Kansas City    -16  -1.3  -82.1 -101.6  0.6    0.2   -14.9
Little Rock    -17   5.2 -372.1 -476.6 -1.2   11.8   -13.9
Louisville       0  -0.2 -172.1  -15.6 -1.1    6.3     9.1
Memphis        -20   5.8 -126.1   15.4 -0.2   12.3    -8.9
Miami          -20  19.7 -256.1 -273.6 -0.4   23.0    14.1
Milwaukee      -14 -10.1  105.9  108.4  2.4   -7.7     9.1
Minneapolis     -1 -12.3  235.9  135.4  1.2  -10.8    23.1
Nashville      -12   3.6 -188.1 -160.6 -1.5    9.2     5.1
New Orleans    -21  12.5 -259.1 -247.6 -1.0   20.0    -0.9
Norfolk          1   3.5 -367.1 -300.6  1.2    7.9     2.1
Omaha          -16  -4.3 -282.1 -261.6  1.5   -6.6   -15.9
Philadelphia    39  -1.2 1228.9 1341.4  0.2    3.2     1.1
Phoenix        -20  14.5 -250.1  -26.6 -3.4  -29.7   -77.9
Pittsburgh      31  -5.4 -116.1  -88.6  0.0   -0.5    33.1
Providence      64  -5.8 -120.1 -429.6  1.2    6.0    11.1
Richmond        -4   2.0 -266.1 -309.6 -1.8    5.8     1.1
Salt Lake City  -2  -4.8 -326.1 -432.6 -0.7  -21.6   -24.9
San Francisco  -18   0.9  -10.1  107.4 -0.7  -16.1   -46.9
Seattle         -1  -4.7  -84.1  -77.6  0.0    2.0    50.1
St. Louis       26   0.1  311.9   13.4  0.1   -0.9    -8.9
Washington      -1   1.5  -29.1  148.4 -0.1    2.1    -2.9
Wichita        -22   0.8 -338.1 -331.6  3.3   -6.2   -31.9
Wilmington       6  -1.8 -383.1 -528.6 -0.4    3.5     0.1
attr(,"scaled:center")
       SO2       temp       manu      popul       wind     precip    predays 
 30.048780  55.763415 463.097561 608.609756   9.443902  36.769024 113.902439 

C)

Consider low-dimensional approximations of the data matrix. What is the fewest number of dimensions required to yield a correlation between the entries of \(X\) and \(\tilde X\) of at least 0.9? K = 2

k <- 2
(Xtilde2 <- U[,1:k] %*% diag(D[1:k]) %*% t(V[,1:k])) %>% round(1)
       [,1] [,2]   [,3]   [,4] [,5] [,6]  [,7]
 [1,]  -6.2 -0.5 -416.1 -494.9 -0.2  0.2   1.4
 [2,] -14.2  1.9 -420.1 -362.0 -0.3  0.3  -5.0
 [3,]  -1.6 -0.1  -95.6 -111.3 -0.1  0.1   0.2
 [4,]  -3.4  1.9  163.8  295.1  0.0  0.0  -5.2
 [5,]   2.5 -1.2  -71.2 -146.8  0.0  0.0   3.1
 [6,]  -4.4 -1.1 -426.0 -539.4 -0.2  0.2   2.9
 [7,]  81.4 -8.2 2880.0 2761.2  1.8 -1.9  21.3
 [8,]   8.9 -2.7   -1.9 -155.2  0.1 -0.1   7.1
 [9,]  37.5 -8.1  544.7  141.7  0.5 -0.6  21.7
[10,] -12.3  2.6 -194.9  -70.5 -0.2  0.2  -6.9
[11,]   0.8  0.7  174.1  238.4  0.1 -0.1  -2.0
[12,]   4.3 -1.3  -12.1  -91.2  0.0  0.0   3.6
[13,]  -6.6  0.0 -360.1 -406.8 -0.2  0.2   0.2
[14,]  -1.8  3.9  603.0  902.6  0.2 -0.2 -10.6
[15,]  21.9 -6.8  -51.0 -450.6  0.1 -0.2  18.3
[16,] -14.6  5.8  257.5  624.6  0.0  0.0 -15.7
[17,] -16.2  4.3  -99.8  135.6 -0.2  0.2 -11.5
[18,] -22.7  4.9 -325.9  -80.6 -0.3  0.3 -13.2
[19,]  -1.3 -0.1  -84.1 -100.1  0.0  0.0   0.3
[20,]  -3.8 -0.9 -374.2 -475.0 -0.2  0.2   2.6
[21,] -13.3  3.0 -170.0  -17.2 -0.2  0.2  -8.0
[22,] -11.6  2.7 -127.0   16.0 -0.1  0.2  -7.3
[23,]  -5.7  0.3 -256.9 -273.1 -0.1  0.2  -0.7
[24,]   2.5 -0.2  105.2  108.8  0.1 -0.1   0.4
[25,]  12.0 -2.2  235.9  135.2  0.2 -0.2   6.0
[26,]  -6.5  0.9 -188.2 -160.6 -0.1  0.1  -2.3
[27,]  -7.5  0.8 -260.4 -246.7 -0.2  0.2  -2.1
[28,] -13.2  1.9 -365.6 -301.7 -0.2  0.3  -5.0
[29,]  -8.6  1.0 -283.3 -260.7 -0.2  0.2  -2.6
[30,]  25.4 -0.7 1230.0 1340.6  0.7 -0.7   1.6
[31,] -19.9  4.5 -253.7  -23.4 -0.2  0.3 -12.1
[32,]  -4.0  0.6 -111.2  -92.3 -0.1  0.1  -1.5
[33,]  15.3 -5.2 -116.2 -432.3  0.1 -0.1  14.1
[34,]  -4.4 -0.2 -266.1 -309.6 -0.1  0.1   0.6
[35,]  -2.4 -1.1 -327.4 -431.4 -0.2  0.2   3.0
[36,]  -7.5  2.2  -13.1  109.9 -0.1  0.1  -5.8
[37,]  -2.2  0.2  -81.3  -80.1  0.0  0.1  -0.5
[38,]  25.2 -5.8  310.6   14.6  0.3 -0.4  15.5
[39,] -10.9  3.1  -28.0  147.5 -0.1  0.1  -8.3
[40,]  -9.5  0.9 -340.7 -329.5 -0.2  0.2  -2.4
[41,]  -1.4 -1.7 -382.7 -528.8 -0.2  0.2   4.7
cor(as.vector(X), as.vector(Xtilde2))
[1] 0.9992072
k <- 3
(Xtilde3 <- U[,1:k] %*% diag(D[1:k]) %*% t(V[,1:k])) %>% round(1)
       [,1] [,2]   [,3]   [,4] [,5]  [,6]  [,7]
 [1,]  -1.4 -2.1 -417.6 -493.6 -0.1   6.3  22.9
 [2,] -26.1  6.0 -416.3 -365.2 -0.6 -14.7 -57.9
 [3,]  -1.0 -0.2  -95.7 -111.1  0.0   0.7   2.6
 [4,]  -1.7  1.3  163.2  295.5  0.1   2.1   2.3
 [5,]  11.3 -4.1  -74.0 -144.4  0.2  11.1  42.1
 [6,]   2.1 -3.3 -428.2 -537.6  0.0   8.6  32.3
 [7,]  78.7 -7.3 2880.9 2760.4  1.7  -5.4   9.1
 [8,]  10.4 -3.2   -2.4 -154.8  0.1   1.9  14.1
 [9,]  41.1 -9.4  543.5  142.7  0.6   4.0  37.8
[10,]  -6.5  0.6 -196.7  -68.9  0.0   7.5  19.0
[11,]  -7.1  3.4  176.6  236.3 -0.1 -10.0 -37.1
[12,]  -4.0  1.5   -9.5  -93.4 -0.2 -10.5 -33.3
[13,]  -9.3  0.9 -359.2 -407.6 -0.3  -3.2 -11.9
[14,]   3.4  2.1  601.3  904.0  0.4   6.4  12.7
[15,]  21.4 -6.7  -50.9 -450.7  0.1  -0.8  16.2
[16,] -13.4  5.4  257.2  624.9  0.0   1.6 -10.2
[17,] -11.7  2.7 -101.2  136.8  0.0   5.9   8.7
[18,] -18.5  3.5 -327.2  -79.4 -0.2   5.6   5.4
[19,]  -4.9  1.1  -82.9 -101.0 -0.1  -4.6 -15.9
[20,]  -7.1  0.2 -373.1 -475.8 -0.3  -4.0 -12.1
[21,]  -8.9  1.5 -171.4  -16.0 -0.1   5.7  11.5
[22,] -11.6  2.8 -126.9   16.0 -0.1   0.1  -7.6
[23,]  -2.5 -0.8 -258.0 -272.2 -0.1   4.2  13.7
[24,]   3.2 -0.4  104.9  109.0  0.1   0.8   3.5
[25,]  14.3 -3.1  235.2  135.8  0.2   2.8  16.5
[26,]  -4.8  0.3 -188.8 -160.1 -0.1   2.3   5.1
[27,]  -7.0  0.6 -260.6 -246.5 -0.1   0.9   0.3
[28,] -10.7  1.1 -366.4 -301.1 -0.2   3.4   5.9
[29,] -11.9  2.1 -282.3 -261.5 -0.3  -3.9 -17.1
[30,]  26.2 -1.0 1229.8 1340.8  0.7   0.2   4.9
[31,] -34.7  9.6 -248.9  -27.4 -0.6 -18.5 -78.1
[32,]   4.5 -2.3 -114.0  -90.1  0.1  10.8  36.3
[33,]  17.3 -5.9 -116.8 -431.8  0.1   2.4  22.9
[34,]  -4.0 -0.3 -266.2 -309.5 -0.1   0.7   2.4
[35,]  -9.1  1.2 -325.3 -433.2 -0.3  -8.3 -26.6
[36,] -16.9  5.4  -10.0  107.3 -0.3 -11.9 -48.0
[37,]   8.0 -3.3  -84.6  -77.3  0.2  13.0  45.0
[38,]  20.3 -4.1  312.2   13.3  0.2  -6.5  -6.3
[39,]  -9.3  2.5  -28.5  148.0  0.0   2.2  -0.9
[40,] -16.2  3.2 -338.5 -331.3 -0.4  -8.3 -32.4
[41,]  -1.8 -1.6 -382.6 -528.9 -0.2  -0.3   3.0
cor(as.vector(X), as.vector(Xtilde3))
[1] 0.9997438

D)

Find \(\Sigma\), the covariance matrix of this data set. Then perform eigen-decomposition of this matrix. Verify that

  • The eigenvectors of \(\Sigma\) equal the columns of \(V\)
  • The eigenvalues of \(\Sigma\) equal the diagonals of \(D^2/(n-1)\)
covar <- cov(scaled_data)
eig_matrix <- eigen(covar)
eig_matrix
eigen() decomposition
$values
[1] 6.384720e+05 1.481204e+04 7.019599e+02 2.050019e+02 1.167047e+02
[6] 1.205705e+01 1.448704e+00

$vectors
              [,1]         [,2]         [,3]        [,4]         [,5]
[1,]  0.0168607518 -0.099835625  0.208775573  0.95883106 -0.152191203
[2,] -0.0011417794  0.025814390 -0.071600745 -0.11014784 -0.477854201
[3,]  0.6968327936 -0.710249079 -0.067182201 -0.07319788 -0.009643654
[4,]  0.7170284512  0.692912523  0.056666935  0.04906669  0.010735457
[5,]  0.0004067530 -0.001011680  0.005386606 -0.01506609  0.025401917
[6,] -0.0004336922  0.001225937  0.265807619 -0.16261712 -0.832729325
[7,]  0.0028836950 -0.069155051  0.934279828 -0.18459052  0.232812295
             [,6]          [,7]
[1,] -0.053952911 -2.704138e-02
[2,] -0.852534945 -1.640507e-01
[3,] -0.002153023  1.136208e-03
[4,]  0.002915751 -5.682006e-05
[5,]  0.176541256 -9.838347e-01
[6,]  0.453206155  6.376788e-02
[7,] -0.183568735 -1.891454e-02
D <- components$d
(D**2)/(9-1)
[1] 3.192360e+06 7.406018e+04 3.509441e+03 1.024897e+03 5.834050e+02
[6] 6.028797e+01 7.254365e+00
V
              [,1]         [,2]         [,3]        [,4]         [,5]
[1,] -0.0168607524  0.099835666  0.208833223 -0.95891661  0.151513780
[2,]  0.0011417794 -0.025814318 -0.071628250  0.10994885  0.477979999
[3,] -0.6968327948  0.710249100 -0.067197536  0.07318090  0.009663595
[4,] -0.7170284535 -0.692912516  0.056678012 -0.04905136 -0.010746556
[5,] -0.0004067534  0.001011673  0.005385869  0.01506196 -0.025434614
[6,]  0.0004280077 -0.001194686  0.265586278  0.16192756  0.832873194
[7,] -0.0028836965  0.069155411  0.934326011  0.18488187 -0.232476114
             [,6]          [,7]
[1,] -0.054111370 -2.705005e-02
[2,] -0.852497526 -1.640002e-01
[3,] -0.002155860  1.134406e-03
[4,]  0.002918418 -5.545382e-05
[5,]  0.176466852 -9.838473e-01
[6,]  0.453326609  6.371038e-02
[7,] -0.183469886 -1.888041e-02

Problem 2

In this problem we explore how “high” a low-dimensional SVD approximation of an image has to be before you can recognize it.

.Rdata objects are essentially R workspace memory snapshots that, when loaded, load any type of R object that you want into your global environment. The command below, when executed, will load three objects into your memory: mysteryU4, mysteryD4, and mysteryV4. These are the first \(k\) vectors and singular values of an SVD I performed on a 700-pixels-tall \(\times\) 600-pixels-wide image of a well-known villain.

library(magick)
Linking to ImageMagick 6.9.13.29
Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
Disabled features: fontconfig, x11
load('Data/mystery_person_k4.Rdata')

A)

Write a function that takes SVD ingredients u, d and v and renders the \(700 \times 600\) image produced by this approximation using functions from the magick package. Use your function to determine whether a 4-dimensional approximation to this image is enough for you to tell who the mystery villain is. Recall that you will likely need to rescale your recomposed approximation so that all pixels are in [0,1].

k <- 4
recompose_villian <- mysteryU4[,1:k] %*% diag(mysteryD4[1:k]) %*% t(mysteryV4[,1:k])
R <- max(recompose_villian)-min(recompose_villian)
recomposed_scaled_villian <- (recompose_villian-min(recompose_villian))/R

(recomposed_scaled_villian
  %>% as.raster
  %>% image_read
 )

B)

I’m giving you slightly higher-dimensional approximations (\(k=10\) and \(k=50\), respectively) in the objects below:

load('Data/mystery_person_k10.Rdata')
load('Data/mystery_person_k50.Rdata')

Create both of the images produced by these approximations. At what point can you tell who the mystery villain is? The evil professor big data

k <- 10
recompose_villian <- mysteryU10[,1:k] %*% diag(mysteryD10[1:k]) %*% t(mysteryV10[,1:k])
R <- max(recompose_villian)-min(recompose_villian)
recomposed_scaled_villian <- (recompose_villian-min(recompose_villian))/R

(recomposed_scaled_villian
  %>% as.raster
  %>% image_read
 )

k <- 50
recompose_villian <- mysteryU50[,1:k] %*% diag(mysteryD50[1:k]) %*% t(mysteryV50[,1:k])
R <- max(recompose_villian)-min(recompose_villian)
recomposed_scaled_villian <- (recompose_villian-min(recompose_villian))/R

(recomposed_scaled_villian
  %>% as.raster
  %>% image_read
 )

C)

How many numbers need to be stored in memory for each of the following:

  • A full \(700\times 600\) image? 420,000
  • A 4-dimensional approximation? \((700\times 4 + 4 + 4^2 = 2820)\)
  • A 10-dimensional approximation? \((700\times 10 + 10 + 10^2 = 7110)\)
  • A 50-dimensional approximation? \((700\times 50 + 50 + 50^2 = 37550)\)