For this project I chose to use Single Value Decomposition (SVD) matrix factorization within the context of an item-item collaborative filtering recommender system.
I used the Jester data set which contains aggregated joke rating data up to 100 jokes over thousands of users.

library(recommenderlab)
# Jester data contains user ratings over 100 jokes
jesterdf <- read.csv('jester-data-1.csv')
# drop the first column which indicates the user and their corresponding ratings by joke
jesterdf[,1] <- NULL

# set '99' values, unrated values to 0, or neutral. Ratings in this dataset range from -10/+10
jesterdf[jesterdf==99] <- 0

# set column names as sequence of jokes from 1-100
colnames <- seq(1:100)
colnames(jesterdf) <- colnames


Now that the data is loaded, perform recommendation based on the Recommenderlab item-item collaborative filtering approach

# Convert to matrix
R <- as.matrix(jesterdf)
r <- as(R, "realRatingMatrix")

# Recommenderlab cosine similarity using item-item cf
simjester <- similarity(r[1:100,],method="cosine", which="items")
simjester <- as.matrix(simjester)
head(simjester)
##           1         2         3         4         5         6         7
## 1 0.0000000 0.4395643 0.4249140 0.3727608 0.2558314 0.1405758 0.3475624
## 2 0.4395643 0.0000000 0.2864470 0.3497866 0.1878873 0.1998267 0.3336379
## 3 0.4249140 0.2864470 0.0000000 0.5247078 0.2860770 0.1845423 0.2996709
## 4 0.3727608 0.3497866 0.5247078 0.0000000 0.3064756 0.2655054 0.2899915
## 5 0.2558314 0.1878873 0.2860770 0.3064756 0.0000000 0.1607973 0.1218406
## 6 0.1405758 0.1998267 0.1845423 0.2655054 0.1607973 0.0000000 0.1321282
##             8          9         10        11        12          13
## 1  0.40644398 0.33205506 0.34727011 0.4269052 0.2831891  0.08257706
## 2  0.24448036 0.27231250 0.16798512 0.3611745 0.2656873  0.39555597
## 3  0.17917971 0.34154414 0.31375016 0.6060165 0.2985383  0.11749435
## 4  0.14844152 0.47620406 0.22758891 0.3854993 0.1528981  0.18986393
## 5  0.35675808 0.18179176 0.24846694 0.2381203 0.1580268  0.17903892
## 6 -0.06501615 0.04532291 0.03876957 0.2740760 0.3760787 -0.02516843
##          14          15          16          17          18         19
## 1 0.2857586  0.01289400 -0.03953702  0.12366778 -0.02994520 0.13734865
## 2 0.3511557  0.04377305 -0.08610570  0.02257109 -0.02904188 0.10576472
## 3 0.3276654  0.12499783  0.07193521  0.11352312  0.05436360 0.13047187
## 4 0.2206366  0.16512569  0.17870407  0.14961327  0.07799061 0.05426031
## 5 0.2895551  0.05977706  0.02274650  0.23433193  0.22873900 0.10835878
## 6 0.3973681 -0.31616385 -0.18765631 -0.02947378  0.08322846 0.35955349
##           20        21        22        23         24        25        26
## 1 0.11606097 0.2210128 0.2482934 0.2599006 0.16210053 0.4048808 0.2986862
## 2 0.11461843 0.3088328 0.2548945 0.3707202 0.03826959 0.3294228 0.2570957
## 3 0.14806698 0.2451156 0.3155065 0.3481317 0.17594039 0.5068059 0.3982167
## 4 0.18292135 0.1640771 0.2961456 0.2193293 0.49687969 0.4175197 0.2884400
## 5 0.22941384 0.2109126 0.1636257 0.2886296 0.16404395 0.1889658 0.2366035
## 6 0.09595459 0.4662852 0.2334745 0.2142393 0.10543768 0.3255688 0.3630010
##           27         28         29        30        31         32
## 1 0.12683472 0.19981773 0.27375831 0.2292546 0.2606423 0.18041813
## 2 0.30515702 0.06838973 0.33131422 0.3152154 0.3111069 0.11392642
## 3 0.20066362 0.21903917 0.34248093 0.2759196 0.1841701 0.16327904
## 4 0.01173081 0.22072033 0.03745812 0.4279334 0.1542700 0.05040128
## 5 0.23098300 0.23879201 0.02613059 0.1166838 0.1998264 0.11726045
## 6 0.39467846 0.38272300 0.20608795 0.4617996 0.5038228 0.28917730
##           33         34        35          36        37        38
## 1 0.29325128 0.19253712 0.2128451  0.17528977 0.3760118 0.3899436
## 2 0.36645562 0.20122689 0.3049941  0.20085579 0.4026337 0.3559577
## 3 0.47709841 0.17936166 0.3044496  0.22395073 0.4364265 0.4402060
## 4 0.33630609 0.17056791 0.1391569 -0.02559309 0.6506767 0.3188761
## 5 0.09090592 0.02838061 0.1784218  0.16863152 0.2043580 0.2873129
## 6 0.08394651 0.26315642 0.4262442  0.32590646 0.1763755 0.3058636
##           39        40         41         42           43          44
## 1 0.09453386 0.5441656 0.45216461 0.02554126  0.268413785  0.02736405
## 2 0.17468833 0.3919674 0.41824684 0.15193438 -0.004024159  0.22985128
## 3 0.17285544 0.5919134 0.36165841 0.07703658  0.193420271  0.28790565
## 4 0.18260868 0.4601645 0.42743487 0.09195502  0.367132780  0.45879147
## 5 0.36933484 0.3257997 0.19731037 0.13942454  0.403343799  0.16527443
## 6 0.57760069 0.1526484 0.06109139 0.44258597  0.147383607 -0.02943206
##          45        46         47        48         49          50
## 1 0.1625393 0.2597376 0.15924750 0.3010008 0.17703014  0.11646299
## 2 0.1897275 0.2779266 0.16736304 0.2670573 0.16469678  0.21653487
## 3 0.2830701 0.2935667 0.09837036 0.3644929 0.09451148  0.22305423
## 4 0.1879673 0.1340365 0.19014108 0.3077853 0.04462822 -0.04350821
## 5 0.1354051 0.2973150 0.15407386 0.2657065 0.16731833  0.02089771
## 6 0.2939979 0.2817491 0.55793000 0.3451230 0.44064491  0.32668096
##          51        52           53        54        55         56
## 1 0.3134830 0.2170295 0.0818743987 0.2189972 0.2946886 0.27338008
## 2 0.2914467 0.3531921 0.1629546657 0.2525394 0.1099182 0.30132271
## 3 0.4530512 0.4657236 0.0862863191 0.1401217 0.1695407 0.19371951
## 4 0.5057642 0.4822371 0.0002037858 0.1674058 0.2924497 0.08281139
## 5 0.2339323 0.2017145 0.0388014708 0.2511922 0.0846692 0.19585495
## 6 0.2541159 0.4687612 0.4518692966 0.2572189 0.3349934 0.37878770
##           57         58        59         60        61         62
## 1 0.14626342 0.04164413 0.1509995 0.40128770 0.3093822 0.22905094
## 2 0.29022855 0.19329812 0.2301673 0.25815658 0.2694100 0.22933454
## 3 0.25449781 0.19271670 0.2418131 0.45581364 0.1744140 0.05202851
## 4 0.62339587 0.57619340 0.2989209 0.43448909 0.0570256 0.01666584
## 5 0.10845809 0.22080214 0.1734944 0.14532625 0.2572304 0.14985245
## 6 0.08098882 0.11025491 0.3822097 0.06871624 0.4642782 0.40682182
##          63         64        65        66        67         68         69
## 1 0.2985771 0.28994530 0.3398867 0.2086863 0.4115091 0.10704452 0.16055010
## 2 0.2516777 0.30899047 0.2706617 0.1765883 0.1685053 0.13786008 0.10467567
## 3 0.1862233 0.27059213 0.4053198 0.1567393 0.2593603 0.09551333 0.10537713
## 4 0.2604404 0.46957827 0.2626264 0.1979441 0.5058753 0.09517487 0.05018873
## 5 0.0997726 0.17997809 0.2327499 0.1892915 0.1314505 0.19837197 0.03461529
## 6 0.3691181 0.01281013 0.3243530 0.4518212 0.3113032 0.50262643 0.44025461
##          70          71           72         73        74          75
## 1 0.1978082 -0.04525169  0.163433046 0.10576745 0.2114968 -0.03032197
## 2 0.2101386  0.07433689  0.179936490 0.16414152 0.1888246 -0.13758150
## 3 0.3071758  0.03464057 -0.004967610 0.09316952 0.2925640  0.09861653
## 4 0.3249120  0.09367013  0.001110791 0.13847712 0.3357485  0.25339948
## 5 0.0481126  0.17667442  0.052040950 0.14334627 0.2509061  0.11269178
## 6 0.4307216  0.17277061  0.285405238 0.18047785 0.1131694 -0.00271214
##           76          77          78          79          80        81
## 1 0.23195895  0.07667396  0.03323488  0.02104895 -0.01787629 0.4458802
## 2 0.02414987  0.24648903  0.10129019  0.11099796  0.11750118 0.1001534
## 3 0.37545023 -0.11391475  0.06170825 -0.04594455 -0.11289195 0.3884740
## 4 0.32582805  0.08454319  0.11514861  0.06206323  0.07117609 0.3416116
## 5 0.13813701  0.16241818 -0.06730685  0.05102862  0.17058804 0.2311745
## 6 0.27085098  0.23179076  0.33666903  0.16684135  0.33366799 0.2302327
##            82        83         84         85         86          87
## 1  0.16360731 0.2693224 0.18906989 0.16701265 0.37221959  0.02690194
## 2 -0.04989387 0.2694905 0.04632873 0.16644727 0.07038068 -0.04154067
## 3  0.10819431 0.1306376 0.06193234 0.06001095 0.12851688  0.09781543
## 4  0.23270034 0.2625194 0.24472692 0.11198194 0.19349913  0.09394957
## 5 -0.03305863 0.3270794 0.10662290 0.19738218 0.11003719  0.09792109
## 6  0.35147616 0.2613340 0.40026671 0.22136663 0.08756471  0.36336141
##            88          89          90          91          92           93
## 1  0.23525454  0.12517598 -0.01875062  0.24093068 0.003185035  0.230227603
## 2  0.11012968  0.09252558  0.11498090  0.20436111 0.115461476  0.153656620
## 3  0.12022548  0.08327957  0.09947107  0.03224014 0.052803767 -0.055363540
## 4 -0.05217219 -0.11251237  0.28923929 -0.13819448 0.064785890 -0.173953728
## 5 -0.01349688  0.06201759  0.22994716  0.06387780 0.025897510 -0.006095602
## 6  0.31635519  0.22677687  0.23375409  0.16025132 0.519361897  0.348347915
##            94        95           96          97         98         99
## 1  0.31475117 0.1814723 -0.099482559  0.40786880 0.06666792 0.14069068
## 2  0.09388688 0.2169415 -0.104498210  0.24165438 0.04511819 0.19975144
## 3 -0.03709201 0.1699300  0.003613705  0.12284761 0.11730475 0.04118937
## 4  0.23370115 0.1176007 -0.027966041 -0.01465989 0.09398252 0.07980947
## 5  0.27260665 0.2413827 -0.038614510  0.26861235 0.07159713 0.19005827
## 6  0.16109969 0.2942596  0.471269408  0.01251777 0.06541690 0.23891101
##           100
## 1 0.046733950
## 2 0.301178225
## 3 0.255774101
## 4 0.237323533
## 5 0.008345025
## 6 0.212482029


Perform Single Value Decomposition to determine underlying relationships in the recommendation data set

# perform SVD
s <- svd(simjester)
# left 'U' matrix
s$u[1,]
##   [1] -0.0969030190 -0.0722065656  0.0804084473 -0.2107449071  0.0168595214
##   [6]  0.0897631405  0.0358530439 -0.0189268406  0.0593783877 -0.1969800722
##  [11]  0.0459185922 -0.0272669322 -0.0277245783 -0.1485223680  0.0023215814
##  [16]  0.0022755667  0.2575619763 -0.0495567573 -0.0673956612 -0.0194665148
##  [21] -0.0052851723  0.0829636083  0.0770772756 -0.1815535593 -0.0251889765
##  [26] -0.0354655386 -0.1085228705  0.0918109223  0.0101944411  0.0638671472
##  [31] -0.1365888082 -0.0702195744 -0.0407469734 -0.0618247186  0.0920132228
##  [36]  0.0289873934 -0.0998326583 -0.0323572270 -0.1676584642  0.0006283951
##  [41]  0.1726330157 -0.0785435669 -0.0352099838 -0.0620035501 -0.0916049862
##  [46]  0.0460683213 -0.0321127522  0.0766960843  0.0519635220 -0.0358256531
##  [51] -0.2014334088  0.0303678870  0.0823305937 -0.1756351454 -0.0292385287
##  [56]  0.0243360557  0.1209871402  0.1364194752 -0.0719660063  0.1479560554
##  [61]  0.0148147026  0.1279322889 -0.1118124931 -0.2347718364  0.1974167818
##  [66] -0.1199634633 -0.0068976807 -0.1163952338 -0.1301298737 -0.0176983865
##  [71] -0.0896446783 -0.1258444334  0.0231088942  0.0658222903 -0.1199411798
##  [76]  0.0302401292  0.0723947102  0.2022125086  0.1621688193 -0.0475213605
##  [81]  0.0798959106  0.0920103369 -0.0677649014  0.0736826076 -0.0298175663
##  [86]  0.0130153786  0.0240679612 -0.1698049795 -0.0006057124  0.0176887205
##  [91]  0.0718552452 -0.1362245620 -0.0251610135 -0.1271243713  0.0353694657
##  [96]  0.0416407926  0.1065737952 -0.0298761082  0.1735228611 -0.0345426591
# diagonal containing singular values
s$d[1:5]
## [1] 21.622125  8.679419  5.500342  3.135774  2.657444
# right 'V' matrix
v <- s$v[1,]
t(v)
##             [,1]        [,2]       [,3]       [,4]       [,5]       [,6]
## [1,] -0.09690302 -0.07220657 0.08040845 -0.2107449 0.01685952 0.08976314
##            [,7]        [,8]       [,9]      [,10]      [,11]       [,12]
## [1,] 0.03585304 -0.01892684 0.05937839 -0.1969801 0.04591859 -0.02726693
##           [,13]     [,14]        [,15]        [,16]     [,17]      [,18]
## [1,] 0.02772458 0.1485224 -0.002321581 -0.002275567 -0.257562 0.04955676
##           [,19]      [,20]       [,21]       [,22]       [,23]     [,24]
## [1,] 0.06739566 0.01946651 0.005285172 -0.08296361 -0.07707728 0.1815536
##           [,25]      [,26]     [,27]       [,28]       [,29]       [,30]
## [1,] 0.02518898 0.03546554 0.1085229 -0.09181092 -0.01019444 -0.06386715
##          [,31]      [,32]      [,33]      [,34]       [,35]       [,36]
## [1,] 0.1365888 0.07021957 0.04074697 0.06182472 -0.09201322 -0.02898739
##           [,37]      [,38]     [,39]         [,40]     [,41]      [,42]
## [1,] 0.09983266 0.03235723 0.1676585 -0.0006283951 -0.172633 0.07854357
##           [,43]      [,44]      [,45]       [,46]       [,47]       [,48]
## [1,] 0.03520998 0.06200355 0.09160499 -0.04606832 -0.03211275 -0.07669608
##            [,49]      [,50]     [,51]       [,52]       [,53]     [,54]
## [1,] -0.05196352 0.03582565 0.2014334 -0.03036789 -0.08233059 0.1756351
##           [,55]       [,56]      [,57]      [,58]      [,59]      [,60]
## [1,] 0.02923853 -0.02433606 -0.1209871 -0.1364195 0.07196601 -0.1479561
##           [,61]      [,62]     [,63]     [,64]      [,65]     [,66]
## [1,] -0.0148147 -0.1279323 0.1118125 0.2347718 -0.1974168 0.1199635
##            [,67]     [,68]      [,69]      [,70]      [,71]      [,72]
## [1,] 0.006897681 0.1163952 -0.1301299 0.01769839 0.08964468 -0.1258444
##            [,73]       [,74]     [,75]       [,76]       [,77]     [,78]
## [1,] -0.02310889 -0.06582229 0.1199412 -0.03024013 -0.07239471 0.2022125
##           [,79]      [,80]      [,81]       [,82]      [,83]       [,84]
## [1,] -0.1621688 0.04752136 0.07989591 -0.09201034 -0.0677649 -0.07368261
##           [,85]       [,86]       [,87]     [,88]        [,89]      [,90]
## [1,] 0.02981757 -0.01301538 -0.02406796 -0.169805 0.0006057124 0.01768872
##            [,91]     [,92]      [,93]      [,94]       [,95]      [,96]
## [1,] -0.07185525 0.1362246 0.02516101 -0.1271244 -0.03536947 0.04164079
##           [,97]       [,98]     [,99]     [,100]
## [1,] -0.1065738 -0.02987611 0.1735229 0.03454266


An interesting observation of this data is that dimension reduction is possible, this can be seen with the greatest amount of variation in amounts in the ‘d’ vector being contained in the first five values as noted above. These five applied as a cross product with the left matrix, and transpose of the right would be able to generate a similar matrix as the original ‘s’. If this were greyscale image data for example, this would be enough to generate enough data for a recognizable image.
The ‘v’ or right matrix should tell concepts, or reveal underlying structure from the original ‘s’ matrix. In this case, with the large amount of variables - it is difficult to ascertain how much of that is readily available in this dataset.