PageRank

First create the A matrix that represents our 6 page internet and introduce decay with a B matrix - 5 points:

A_matrix <- matrix(c(0,1/6,1/3,0,0,0,
              1/2,1/6,1/3,0,0,0,
              1/2,1/6,0,0,0,0,
              0,1/6,0,0,1/2,1,
              0,1/6,1/3,1/2,0,0,
              0,1/6,0,1/2,1/2,0
              ),nrow = 6)
A_matrix <- t(A_matrix)
colSums(A_matrix) # total probability for any node is 1
## [1] 1 1 1 1 1 1
B_matrix <- .85 * A_matrix + 0.15/ncol(A_matrix)
B_matrix
##       [,1]      [,2]      [,3]  [,4]  [,5]  [,6]
## [1,] 0.025 0.1666667 0.3083333 0.025 0.025 0.025
## [2,] 0.450 0.1666667 0.3083333 0.025 0.025 0.025
## [3,] 0.450 0.1666667 0.0250000 0.025 0.025 0.025
## [4,] 0.025 0.1666667 0.0250000 0.025 0.450 0.875
## [5,] 0.025 0.1666667 0.3083333 0.450 0.025 0.025
## [6,] 0.025 0.1666667 0.0250000 0.450 0.450 0.025

Create a uniform rank vector r and perform power iterations on matrix B until convergence - 5 points:

# Uniform rank based off number of columns
r_vector <- rep(1/ncol(A_matrix),ncol(A_matrix))
r_vector
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
library(matrixcalc)

# n = 30
matrix.power(B_matrix,30) %*% r_vector
##            [,1]
## [1,] 0.05170475
## [2,] 0.07367927
## [3,] 0.05741242
## [4,] 0.34870367
## [5,] 0.19990381
## [6,] 0.26859607
# n = 35
matrix.power(B_matrix,35) %*% r_vector
##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870368
## [5,] 0.19990381
## [6,] 0.26859608
# n = 40
matrix.power(B_matrix,40) %*% r_vector
##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
# n = 45
matrix.power(B_matrix,45) %*% r_vector
##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
# n = 50
matrix.power(B_matrix,50) %*% r_vector
##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608

it seems like there is some convergence at n=35

Compute the eigen-decomposition of B and confirm an eigenvalue of 1 and that the corresponding eigenvector has all positive entries and sums to 1 - 10 points:

We see max eigen value of 1 for the first set

eigen(B_matrix)$values
## [1]  1.00000000+0i  0.57619235+0i -0.42500000+0i -0.42500000-0i -0.34991524+0i
## [6] -0.08461044+0i

All those vectors are positive but they do not sum to 1

eigen(B_matrix)$vectors[,1]
## [1] 0.1044385+0i 0.1488249+0i 0.1159674+0i 0.7043472+0i 0.4037861+0i
## [6] 0.5425377+0i
sum(eigen(B_matrix)$vectors[,1])
## [1] 2.019902+0i

Use the graph package in R and its page.rank method to compute the Page Rank from matrix A without applying decay (it is applied within the method) - 10 points:

library(igraph)
## Warning: package 'igraph' was built under R version 4.1.3
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:matrixcalc':
## 
##     %s%
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
A_graph_matrix <- matrix(c(0,0,1/3,0,0,0,
              1/2,0,1/3,0,0,0,
              1/2,0,0,0,0,0,
              0,0,0,0,1/2,1,
              0,0,1/3,1/2,0,0,
              0,0,0,1/2,1/2,0
              ),nrow = 6)
A_graph_matrix <- t(A_graph_matrix)
a <- graph.adjacency(t(A_graph_matrix), weighted=TRUE, mode='directed')
ceb <- cluster_edge_betweenness(a)
## Warning in cluster_edge_betweenness(a): At community.c:461 :Membership vector
## will be selected based on the lowest modularity score.
## Warning in cluster_edge_betweenness(a): At community.c:468 :Modularity
## calculation with weighted edge betweenness community detection might not make
## sense -- modularity treats edge weights as similarities while edge betwenness
## treats them as distances
plot(ceb, a) 

page.rank(a)
## $vector
## [1] 0.05170475 0.07367926 0.05741241 0.34870369 0.19990381 0.26859608
## 
## $value
## [1] 1
## 
## $options
## NULL
sum(page.rank(a)$vector)
## [1] 1

Looks like these values actually add up to 1 and the max eigenvalue is also 1

References

This example

Kaggle Digit Recognizer

Using the training.csv file, plot representations of the first 10 images to understand the data

format. Go ahead and divide all pixels by 255 to produce values between 0 and 1. (This is equivalent to min-max scaling.) (5 points)

library(reshape)
## Warning: package 'reshape' was built under R version 4.1.2
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.2
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
## 
##     colsplit, melt, recast
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning in as.POSIXlt.POSIXct(Sys.time()): unable to identify current timezone 'H':
## please set environment variable 'TZ'
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose()       masks igraph::compose()
## x tidyr::crossing()      masks igraph::crossing()
## x tidyr::expand()        masks reshape::expand()
## x dplyr::filter()        masks stats::filter()
## x dplyr::groups()        masks igraph::groups()
## x dplyr::lag()           masks stats::lag()
## x dplyr::rename()        masks reshape::rename()
## x purrr::simplify()      masks igraph::simplify()
train <- read.csv("train.csv")
sliced_train <- train[0:10,]
head(sliced_train)
##   label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 pixel9
## 1     1      0      0      0      0      0      0      0      0      0      0
## 2     0      0      0      0      0      0      0      0      0      0      0
## 3     1      0      0      0      0      0      0      0      0      0      0
## 4     4      0      0      0      0      0      0      0      0      0      0
## 5     0      0      0      0      0      0      0      0      0      0      0
## 6     0      0      0      0      0      0      0      0      0      0      0
##   pixel10 pixel11 pixel12 pixel13 pixel14 pixel15 pixel16 pixel17 pixel18
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel19 pixel20 pixel21 pixel22 pixel23 pixel24 pixel25 pixel26 pixel27
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel28 pixel29 pixel30 pixel31 pixel32 pixel33 pixel34 pixel35 pixel36
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel37 pixel38 pixel39 pixel40 pixel41 pixel42 pixel43 pixel44 pixel45
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel46 pixel47 pixel48 pixel49 pixel50 pixel51 pixel52 pixel53 pixel54
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel55 pixel56 pixel57 pixel58 pixel59 pixel60 pixel61 pixel62 pixel63
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel64 pixel65 pixel66 pixel67 pixel68 pixel69 pixel70 pixel71 pixel72
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel73 pixel74 pixel75 pixel76 pixel77 pixel78 pixel79 pixel80 pixel81
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel82 pixel83 pixel84 pixel85 pixel86 pixel87 pixel88 pixel89 pixel90
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel91 pixel92 pixel93 pixel94 pixel95 pixel96 pixel97 pixel98 pixel99
## 1       0       0       0       0       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       0
## 3       0       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       0       0       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       0       0       0       0       0       0       0       0
##   pixel100 pixel101 pixel102 pixel103 pixel104 pixel105 pixel106 pixel107
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel108 pixel109 pixel110 pixel111 pixel112 pixel113 pixel114 pixel115
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel116 pixel117 pixel118 pixel119 pixel120 pixel121 pixel122 pixel123
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0       18       30
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        1       25      130
## 6        0        0        0        0        0        0        0        0
##   pixel124 pixel125 pixel126 pixel127 pixel128 pixel129 pixel130 pixel131
## 1        0        0        0        0        0        0        0        0
## 2      137      137      192       86       72        1        0        0
## 3        3      141      139        3        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5      155      254      254      254      157       30        2        0
## 6        3      141      202      254      193       44        0        0
##   pixel132 pixel133 pixel134 pixel135 pixel136 pixel137 pixel138 pixel139
## 1      188      255       94        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel140 pixel141 pixel142 pixel143 pixel144 pixel145 pixel146 pixel147
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0      220      179
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel148 pixel149 pixel150 pixel151 pixel152 pixel153 pixel154 pixel155
## 1        0        0        0        0        0        0        0        0
## 2       13       86      250      254      254      254      254      217
## 3        0        0        0        0        9      254      254        8
## 4        6        0        0        0        0        0        0        0
## 5        8      103      253      253      253      253      253      253
## 6        0        0        0        5      165      254      179      163
##   pixel156 pixel157 pixel158 pixel159 pixel160 pixel161 pixel162 pixel163
## 1        0        0        0      191      250      253       93        0
## 2      246      151       32        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        9       77        0        0        0        0        0
## 5      253      253      114        2        0        0        0        0
## 6      249      244       72        0        0        0        0        0
##   pixel164 pixel165 pixel166 pixel167 pixel168 pixel169 pixel170 pixel171
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel172 pixel173 pixel174 pixel175 pixel176 pixel177 pixel178 pixel179
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0       16      179      254      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0       28      247       17        0        0        0
## 5        0        0        0       11      208      253      253      253
## 6        0        0        0        0        0        0        0      135
##   pixel180 pixel181 pixel182 pixel183 pixel184 pixel185 pixel186 pixel187
## 1        0        0        0        0        0        0      123      248
## 2      254      254      254      254      254      254      231       54
## 3        9      254      254        8        0        0        0        0
## 4        0        0        0        0        0       27      202        0
## 5      253      253      253      253      253      253      253      107
## 6      254      150        0        0      189      254      243       31
##   pixel188 pixel189 pixel190 pixel191 pixel192 pixel193 pixel194 pixel195
## 1      253      167       10        0        0        0        0        0
## 2       15        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel196 pixel197 pixel198 pixel199 pixel200 pixel201 pixel202 pixel203
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0       72
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0      242
## 5        0        0        0        0        0        0        0       31
## 6        0        0        0        0        0        0        0        0
##   pixel204 pixel205 pixel206 pixel207 pixel208 pixel209 pixel210 pixel211
## 1        0        0        0        0        0        0        0        0
## 2      254      254      254      254      254      254      254      254
## 3        0        0        0        0        9      254      254      106
## 4      155        0        0        0        0        0        0        0
## 5      253      253      253      253      253      253      253      253
## 6        0        0       82      248      209        5        0        0
##   pixel212 pixel213 pixel214 pixel215 pixel216 pixel217 pixel218 pixel219
## 1        0       80      247      253      208       13        0        0
## 2      254      254      254      254      104        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0       27      254       63        0        0        0        0
## 5      253      253      253      215      101        3        0        0
## 6      164      236      254      115        0        0        0        0
##   pixel220 pixel221 pixel222 pixel223 pixel224 pixel225 pixel226 pixel227
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel228 pixel229 pixel230 pixel231 pixel232 pixel233 pixel234 pixel235
## 1        0        0        0        0        0        0        0        0
## 2        0        0       61      191      254      254      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0      160      207        6        0        0
## 5        0        0       23      210      253      253      253      248
## 6        0        0        0        0        0        8      211      254
##   pixel236 pixel237 pixel238 pixel239 pixel240 pixel241 pixel242 pixel243
## 1        0        0        0        0       29      207      253      235
## 2      254      109       83      199      254      254      254      254
## 3        9      254      254      184        0        0        0        0
## 4        0        0        0        0        0       27      254       65
## 5      161      222      222      246      253      253      253      253
## 6       58        0        0        0        0       33      230      212
##   pixel244 pixel245 pixel246 pixel247 pixel248 pixel249 pixel250 pixel251
## 1       77        0        0        0        0        0        0        0
## 2      243       85        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5      253       39        0        0        0        0        0        0
## 6        6        0        0        0        0        0        0        0
##   pixel252 pixel253 pixel254 pixel255 pixel256 pixel257 pixel258 pixel259
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0      172      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0      127
## 5        0        0        0        0        0        0      136      253
## 6        0        0        0        0        0        0        0        0
##   pixel260 pixel261 pixel262 pixel263 pixel264 pixel265 pixel266 pixel267
## 1        0        0        0        0        0        0        0       54
## 2      254      254      202      147      147       45        0       11
## 3        0        0        0        0        9      254      254      184
## 4      254       21        0        0        0        0        0        0
## 5      253      253      229       77        0        0        0       70
## 6        0      119      254      156        3        0        0        0
##   pixel268 pixel269 pixel270 pixel271 pixel272 pixel273 pixel274 pixel275
## 1      209      253      253       88        0        0        0        0
## 2       29      200      254      254      254      171        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0       20      239       65        0        0        0        0
## 5      218      253      253      253      253      215       91        0
## 6        0       18      230      254       33        0        0        0
##   pixel276 pixel277 pixel278 pixel279 pixel280 pixel281 pixel282 pixel283
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel284 pixel285 pixel286 pixel287 pixel288 pixel289 pixel290 pixel291
## 1        0        0        0        0        0        0        0        0
## 2        0        1      174      254      254       89       67        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0       77      254       21        0        0
## 5        0        5      214      253      253      253      195        0
## 6        0        0        0        0       10      212      254       35
##   pixel292 pixel293 pixel294 pixel295 pixel296 pixel297 pixel298 pixel299
## 1        0        0       93      254      253      238      170       17
## 2        0        0        0        0        0      128      252      254
## 3        9      254      254      184        0        0        0        0
## 4        0        0        0        0        0        0      195       65
## 5        0        0        0        0      104      224      253      253
## 6        0        0        0        0        0       33      254      254
##   pixel300 pixel301 pixel302 pixel303 pixel304 pixel305 pixel306 pixel307
## 1        0        0        0        0        0        0        0        0
## 2      254      212       76        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5      253      253      215       29        0        0        0        0
## 6       33        0        0        0        0        0        0        0
##   pixel308 pixel309 pixel310 pixel311 pixel312 pixel313 pixel314 pixel315
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0       47      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0       70
## 5        0        0        0        0        0      116      253      253
## 6        0        0        0        0        0        0        0        0
##   pixel316 pixel317 pixel318 pixel319 pixel320 pixel321 pixel322 pixel323
## 1        0        0        0        0        0       23      210      254
## 2      254       29        0        0        0        0        0        0
## 3        0        0        0        0        6      185      254      184
## 4      254       21        0        0        0        0        0        0
## 5      253      247       75        0        0        0        0        0
## 6      116      254      154        3        0        0        0        0
##   pixel324 pixel325 pixel326 pixel327 pixel328 pixel329 pixel330 pixel331
## 1      253      159        0        0        0        0        0        0
## 2        0        0       83      254      254      254      153        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0      195      142        0        0        0        0
## 5        0       26      200      253      253      253      253      216
## 6        0       33      254      254       33        0        0        0
##   pixel332 pixel333 pixel334 pixel335 pixel336 pixel337 pixel338 pixel339
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        4        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel340 pixel341 pixel342 pixel343 pixel344 pixel345 pixel346 pixel347
## 1        0        0        0        0        0        0        0        0
## 2        0       80      254      254      240       24        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0       56      251       21        0        0
## 5        0      254      253      253      253      195        0        0
## 6        0        0        0        0      124      254      115        0
##   pixel348 pixel349 pixel350 pixel351 pixel352 pixel353 pixel354 pixel355
## 1       16      209      253      254      240       81        0        0
## 2        0        0        0        0        0        0       25      240
## 3        0       89      254      184        0        0        0        0
## 4        0        0        0        0        0        0      195      227
## 5        0        0        0        0        0        0       26      200
## 6        0        0        0        0        0      160      254      239
##   pixel356 pixel357 pixel358 pixel359 pixel360 pixel361 pixel362 pixel363
## 1        0        0        0        0        0        0        0        0
## 2      254      254      153        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5      253      253      253      253        5        0        0        0
## 6       23        0        0        0        0        0        0        0
##   pixel364 pixel365 pixel366 pixel367 pixel368 pixel369 pixel370 pixel371
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0       64      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0      254      253      253
## 6        0        0        0        0        0        0        0        0
##   pixel372 pixel373 pixel374 pixel375 pixel376 pixel377 pixel378 pixel379
## 1        0        0        0        0       27      253      253      254
## 2      186        7        0        0        0        0        0        0
## 3        0        0        0        0        4      146      254      184
## 4      222      153        5        0        0        0        0        0
## 5      253       99        0        0        0        0        0        0
## 6      203      254       35        0        0        0        0        0
##   pixel380 pixel381 pixel382 pixel383 pixel384 pixel385 pixel386 pixel387
## 1       13        0        0        0        0        0        0        0
## 2        0        0        0      166      254      254      224       12
## 3        0        0        0        0        0        0        0        0
## 4        0        0      120      240       13        0        0        0
## 5        0        0        0       25      231      253      253      253
## 6        0      197      254      178        0        0        0        0
##   pixel388 pixel389 pixel390 pixel391 pixel392 pixel393 pixel394 pixel395
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5       36        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel396 pixel397 pixel398 pixel399 pixel400 pixel401 pixel402 pixel403
## 1        0        0        0        0        0        0        0       20
## 2       14      232      254      254      254       29        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0       67      251       40        0
## 5        0      254      253      253      253       99        0        0
## 6        0        0        0       23      239      221       11        0
##   pixel404 pixel405 pixel406 pixel407 pixel408 pixel409 pixel410 pixel411
## 1      206      254      254      198        7        0        0        0
## 2        0        0        0        0        0        0        0       75
## 3        9      254      254      184        0        0        0        0
## 4        0        0        0        0        0        0       94      255
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0      198      255      123
##   pixel412 pixel413 pixel414 pixel415 pixel416 pixel417 pixel418 pixel419
## 1        0        0        0        0        0        0        0        0
## 2      254      254      254       17        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4       69        0        0        0        0        0        0        0
## 5      223      253      253      253      129        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel420 pixel421 pixel422 pixel423 pixel424 pixel425 pixel426 pixel427
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0       18      254      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0      254      253      253
## 6        0        0        0        0        0        0        0       23
##   pixel428 pixel429 pixel430 pixel431 pixel432 pixel433 pixel434 pixel435
## 1        0        0        0      168      253      253      196        7
## 2      254       29        0        0        0        0        0        0
## 3        0        0        0        0        9      254      254      184
## 4        0      234      184        0        0        0        0        0
## 5      253       99        0        0        0        0        0        0
## 6      238      178        0        0        0        0        0        0
##   pixel436 pixel437 pixel438 pixel439 pixel440 pixel441 pixel442 pixel443
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0       48      254      254      254       17
## 3        0        0        0        0        0        0        0        0
## 4        0        0       19      245       69        0        0        0
## 5        0        0        0        0      127      253      253      253
## 6       10      219      254       96        0        0        0        0
##   pixel444 pixel445 pixel446 pixel447 pixel448 pixel449 pixel450 pixel451
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5      129        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel452 pixel453 pixel454 pixel455 pixel456 pixel457 pixel458 pixel459
## 1        0        0        0        0        0        0       20      203
## 2        2      163      254      254      254       29        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0      234      169        0
## 5        0      254      253      253      253       99        0        0
## 6        0        0        0       30      249      204        0        0
##   pixel460 pixel461 pixel462 pixel463 pixel464 pixel465 pixel466 pixel467
## 1      253      248       76        0        0        0        0        0
## 2        0        0        0        0        0        0        0       48
## 3        9      254      254      184        0        0        0        0
## 4        0        0        0        0        0        0        3      199
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0       25      235      254       62
##   pixel468 pixel469 pixel470 pixel471 pixel472 pixel473 pixel474 pixel475
## 1        0        0        0        0        0        0        0        0
## 2      254      254      254       17        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4      182       10        0        0        0        0        0        0
## 5      139      253      253      253       90        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel476 pixel477 pixel478 pixel479 pixel480 pixel481 pixel482 pixel483
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0       94      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0      254      253      253
## 6        0        0        0        0        0        0        0       26
##   pixel484 pixel485 pixel486 pixel487 pixel488 pixel489 pixel490 pixel491
## 1        0       22      188      253      245       93        0        0
## 2      254      200       12        0        0        0        0        0
## 3        0        0        0        0        9      254      254      184
## 4        0      154      205        4        0        0       26       72
## 5      253       99        0        0        0        0        0        0
## 6      243      204        0        0        0        0        0        0
##   pixel492 pixel493 pixel494 pixel495 pixel496 pixel497 pixel498 pixel499
## 1        0        0        0        0        0        0        0        0
## 2        0        0       16      209      254      254      150        1
## 3        0        0        0        0        0        0        0        0
## 4      128      203      208      254      254      131        0        0
## 5        0        0        0       78      248      253      253      253
## 6       91      254      248       36        0        0        0        0
##   pixel500 pixel501 pixel502 pixel503 pixel504 pixel505 pixel506 pixel507
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        5        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel508 pixel509 pixel510 pixel511 pixel512 pixel513 pixel514 pixel515
## 1        0        0        0        0        0      103      253      253
## 2        0       15      206      254      254      254      202       66
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0       61      254      129
## 5        0      254      253      253      253      216       34        0
## 6        0        0        0       33      254      204        0        0
##   pixel516 pixel517 pixel518 pixel519 pixel520 pixel521 pixel522 pixel523
## 1      191        0        0        0        0        0        0        0
## 2        0        0        0        0        0       21      161      254
## 3        9      254      254      184        0        0        0        0
## 4      113      186      245      251      189       75       56      136
## 5        0        0        0        0        0        0       33      152
## 6        0        0        0       67      241      254      133        0
##   pixel524 pixel525 pixel526 pixel527 pixel528 pixel529 pixel530 pixel531
## 1        0        0        0        0        0        0        0        0
## 2      254      245       31        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4      254       73        0        0        0        0        0        0
## 5      253      253      253      107        1        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel532 pixel533 pixel534 pixel535 pixel536 pixel537 pixel538 pixel539
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0       60      212
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0      206      253      253
## 6        0        0        0        0        0        0        0       33
##   pixel540 pixel541 pixel542 pixel543 pixel544 pixel545 pixel546 pixel547
## 1       89      240      253      195       25        0        0        0
## 2      254      254      254      194       48       48       34       41
## 3        0        0        0        0      156      254      254      184
## 4        0       15      216      233      233      159      104       52
## 5      253      253      140        0        0        0        0        0
## 6      254      214        7        0        0        0       50      242
##   pixel548 pixel549 pixel550 pixel551 pixel552 pixel553 pixel554 pixel555
## 1        0        0        0        0        0        0        0        0
## 2       48      209      254      254      254      171        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0       38      254       73        0        0
## 5       30      139      234      253      253      253      154        2
## 6      254      194       24        0        0        0        0        0
##   pixel556 pixel557 pixel558 pixel559 pixel560 pixel561 pixel562 pixel563
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel564 pixel565 pixel566 pixel567 pixel568 pixel569 pixel570 pixel571
## 1        0        0        0       15      220      253      253       80
## 2        0        0        0       86      243      254      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0       16      205      253      253      253      250      208
## 6        0        0        0        5      193      254       78        0
##   pixel572 pixel573 pixel574 pixel575 pixel576 pixel577 pixel578 pixel579
## 1        0        0        0        0        0        0        0        0
## 2      254      254      233      243      254      254      254      254
## 3      185      255      255      184        0        0        0        0
## 4        0        0        0        0        0        0        0       18
## 5      106      106      106      200      237      253      253      253
## 6        0       19      128      254      195       36        0        0
##   pixel580 pixel581 pixel582 pixel583 pixel584 pixel585 pixel586 pixel587
## 1        0        0        0        0        0        0        0        0
## 2      254       86        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4      254       73        0        0        0        0        0        0
## 5      253      209       22        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel588 pixel589 pixel590 pixel591 pixel592 pixel593 pixel594 pixel595
## 1        0        0        0        0        0        0        0       94
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0       82      253
## 6        0        0        0        0        0        0        0        0
##   pixel596 pixel597 pixel598 pixel599 pixel600 pixel601 pixel602 pixel603
## 1      253      253      253       94        0        0        0        0
## 2      114      254      254      254      254      254      254      254
## 3        0        0        0        0      185      254      254      184
## 4        0        0        0        0        0        0        0        0
## 5      253      253      253      253      253      253      253      253
## 6      103      254      222       74      143      235      254      228
##   pixel604 pixel605 pixel606 pixel607 pixel608 pixel609 pixel610 pixel611
## 1        0        0        0        0        0        0        0        0
## 2      254      254      254      239       86       11        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0       18      254       73        0        0
## 5      253      253      253      253      209       22        0        0
## 6       83        0        0        0        0        0        0        0
##   pixel612 pixel613 pixel614 pixel615 pixel616 pixel617 pixel618 pixel619
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel620 pixel621 pixel622 pixel623 pixel624 pixel625 pixel626 pixel627
## 1        0        0        0       89      251      253      250      131
## 2        0        0        0        0       13      182      254      254
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        1       91      253      253      253      253
## 6        0        0        0        0       30      242      254      254
##   pixel628 pixel629 pixel630 pixel631 pixel632 pixel633 pixel634 pixel635
## 1        0        0        0        0        0        0        0        0
## 2      254      254      254      254      254      254      243       70
## 3      185      254      254      184        0        0        0        0
## 4        0        0        0        0        0        0        0        5
## 5      253      253      253      253      253      253      213       90
## 6      254      254      252       84        0        0        0        0
##   pixel636 pixel637 pixel638 pixel639 pixel640 pixel641 pixel642 pixel643
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4      206      106        0        0        0        0        0        0
## 5        7        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel644 pixel645 pixel646 pixel647 pixel648 pixel649 pixel650 pixel651
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        1
## 6        0        0        0        0        0        0        0        0
##   pixel652 pixel653 pixel654 pixel655 pixel656 pixel657 pixel658 pixel659
## 1      214      218       95        0        0        0        0        0
## 2        0        8       76      146      254      255      254      255
## 3        0        0        0        0       63      254      254       62
## 4        0        0        0        0        0        0        0        0
## 5       18      129      208      253      253      253      253      159
## 6        0       23       64      158      200      174       61        0
##   pixel660 pixel661 pixel662 pixel663 pixel664 pixel665 pixel666 pixel667
## 1        0        0        0        0        0        0        0        0
## 2      146       19       15        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0      186      159        0        0
## 5      129       90        4        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel668 pixel669 pixel670 pixel671 pixel672 pixel673 pixel674 pixel675
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel676 pixel677 pixel678 pixel679 pixel680 pixel681 pixel682 pixel683
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel684 pixel685 pixel686 pixel687 pixel688 pixel689 pixel690 pixel691
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        6
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel692 pixel693 pixel694 pixel695 pixel696 pixel697 pixel698 pixel699
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4      209      101        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel700 pixel701 pixel702 pixel703 pixel704 pixel705 pixel706 pixel707
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel708 pixel709 pixel710 pixel711 pixel712 pixel713 pixel714 pixel715
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel716 pixel717 pixel718 pixel719 pixel720 pixel721 pixel722 pixel723
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel724 pixel725 pixel726 pixel727 pixel728 pixel729 pixel730 pixel731
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel732 pixel733 pixel734 pixel735 pixel736 pixel737 pixel738 pixel739
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel740 pixel741 pixel742 pixel743 pixel744 pixel745 pixel746 pixel747
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel748 pixel749 pixel750 pixel751 pixel752 pixel753 pixel754 pixel755
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel756 pixel757 pixel758 pixel759 pixel760 pixel761 pixel762 pixel763
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel764 pixel765 pixel766 pixel767 pixel768 pixel769 pixel770 pixel771
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel772 pixel773 pixel774 pixel775 pixel776 pixel777 pixel778 pixel779
## 1        0        0        0        0        0        0        0        0
## 2        0        0        0        0        0        0        0        0
## 3        0        0        0        0        0        0        0        0
## 4        0        0        0        0        0        0        0        0
## 5        0        0        0        0        0        0        0        0
## 6        0        0        0        0        0        0        0        0
##   pixel780 pixel781 pixel782 pixel783
## 1        0        0        0        0
## 2        0        0        0        0
## 3        0        0        0        0
## 4        0        0        0        0
## 5        0        0        0        0
## 6        0        0        0        0

We know based off the dataset it is a 28x28 matrix and we can use this stackoverflow response for how to plot the images

plot_digit = function(matrix_row) {
m = matrix(matrix_row,28,28)
mode(m) = "numeric"
image(m, useRaster=TRUE, axes=FALSE)
}

for( row in 1:10) {
  plot_digit(sliced_train[row,])
}
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]

Then we divide all pixels by 255:

sliced_train_reduced <- sliced_train[,2:ncol(sliced_train)]/255 #avoid dividing the label column
sliced_train_reduced$label <- sliced_train$label

What is the frequency distribution of the numbers in the dataset? (5 points)

melted_train <- melt(sliced_train_reduced)
## No id variables; using all as measure variables
hist(melted_train$value)

For each number, provide the mean pixel intensity. What does this tell you? (5 points)

rowMeans(sliced_train_reduced[,2:ncol(sliced_train_reduced)])
##          1          2          3          4          5          6          7 
## 0.08455382 0.22313425 0.06842737 0.08025710 0.25556723 0.11535114 0.09472289 
##          8          9         10 
## 0.12531513 0.14603341 0.16173970

We can see that all the numbers are pretty close to 0 since most pixels are not on

Reduce the data by using principal components that account for 95% of the variance. How many components did you generate? Use PCA to generate all possible components (100% of the

variance). How many components are possible? Why? (5 points)

train_covariance <- cov(sliced_train_reduced)
pca_train <- prcomp(train_covariance)

pca_train_var <- cumsum(pca_train$sdev^2)/sum(pca_train$sdev^2)
plot(pca_train_var)

Plot the first 10 images generated by PCA. They will appear to be noise. Why? (5 points)

I wasnt sure how to plot these, but it will appear to be noise because most of the variance across the images will skew how the pixel light up. This will cause a lot of blur which makes it difficult to recognize as images.

Now, select only those images that have labels that are 8’s. Re-run PCA that accounts for all of the variance (100%). Plot the first 10 images. What do you see? (5 points)

An incorrect approach to predicting the images would be to build a linear regression model with y as the digit values and X as the pixel matrix. Instead, we can build a multinomial model that classifies the digits. Build a multinomial model on the entirety of the training set. Then provide its classification accuracy (percent correctly identified) as well as a matrix of observed versus forecast values (confusion matrix). This matrix will be a 10 x 10, and correct classifications will be on the diagonal. (10 points

library(nnet)
## Warning: package 'nnet' was built under R version 4.1.3
train$label <- as.factor(train$label)

train_reduced_pixels <- train[2:ncol(train)]/255
train_reduced_pixels$label <- train$label

multi_model <- multinom(label ~., data = train_reduced_pixels,MaxNWts=84581)
## # weights:  7860 (7065 variable)
## initial  value 96708.573906 
## iter  10 value 25322.714106
## iter  20 value 20402.086316
## iter  30 value 19312.872829
## iter  40 value 18703.256586
## iter  50 value 18197.815143
## iter  60 value 17732.985798
## iter  70 value 16739.962157
## iter  80 value 14961.658448
## iter  90 value 13446.085942
## iter 100 value 12442.636014
## final  value 12442.636014 
## stopped after 100 iterations
multi_predictions <- tibble(actual = train$label %>% as.numeric(), prediction = predict(multi_model, train[2:ncol(train)]) %>% as.numeric())

multi_predictions <- multi_predictions %>% mutate(correct = ifelse(prediction == actual, 1, 0))

paste0("We correctly identified ", format(sum(multi_predictions$correct) / nrow(multi_predictions), digits = 4), " of the digits")
## [1] "We correctly identified 0.8342 of the digits"
library(caret)
## Warning: package 'caret' was built under R version 4.1.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
confusionMatrix(multi_predictions$prediction %>% as.factor(),multi_predictions$actual %>% as.factor())
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2    3    4    5    6    7    8    9   10
##         1  3841    0   12    5    6   31   14   12    4    9
##         2     1 3734   11    3    2   11    4    7    5    3
##         3     9   13 3460   49   22   14   17   43    5    8
##         4    11   37   99 3833   18  235    4   52   32   36
##         5     6    2   22    2 3222   15    8   13    2   17
##         6     9    0    3   12    2 1926   11    3    1    4
##         7    27    5   31   13   22   53 3808    3    2    0
##         8     2    4   14    8    2    8    2 3420    1   19
##         9   217  867  492  391  473 1425  266  234 4005  303
##         10    9   22   33   35  303   77    3  614    6 3789
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8342          
##                  95% CI : (0.8306, 0.8378)
##     No Information Rate : 0.1115          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8158          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity           0.92957  0.79718  0.82835  0.88095  0.79126  0.50751
## Specificity           0.99754  0.99874  0.99524  0.98608  0.99771  0.99882
## Pos Pred Value        0.97636  0.98757  0.95055  0.87973  0.97371  0.97717
## Neg Pred Value        0.99236  0.97514  0.98131  0.98624  0.97803  0.95331
## Prevalence            0.09838  0.11152  0.09945  0.10360  0.09695  0.09036
## Detection Rate        0.09145  0.08890  0.08238  0.09126  0.07671  0.04586
## Detection Prevalence  0.09367  0.09002  0.08667  0.10374  0.07879  0.04693
## Balanced Accuracy     0.96356  0.89796  0.91179  0.93351  0.89448  0.75317
##                      Class: 7 Class: 8 Class: 9 Class: 10
## Sensitivity           0.92047  0.77710  0.98572   0.90473
## Specificity           0.99588  0.99840  0.87695   0.97086
## Pos Pred Value        0.96065  0.98276  0.46178   0.77469
## Neg Pred Value        0.99135  0.97453  0.99826   0.98925
## Prevalence            0.09850  0.10479  0.09674   0.09971
## Detection Rate        0.09067  0.08143  0.09536   0.09021
## Detection Prevalence  0.09438  0.08286  0.20650   0.11645
## Balanced Accuracy     0.95818  0.88775  0.93134   0.93779

Kaggle Home Prices

Descriptive and Inferential Statistics. Provide univariate descriptive statistics and appropriate plots for the training data set. Provide a scatterplot matrix for at least two of the independent variables and the dependent variable. Derive a correlation matrix for any three quantitative variables in the dataset. Test the hypotheses that the correlations between each pairwise set of variables is 0 and provide an 80% confidence interval. Discuss the meaning of your analysis. Would you be worried about familywise error? Why or why not? 5 points

library(psych)
## Warning: package 'psych' was built under R version 4.1.2
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
test_house <- read.csv("test_house.csv")
train_house <- read.csv("train_house.csv")

describe(train_house)
##                vars    n      mean       sd   median   trimmed      mad   min
## Id                1 1460    730.50   421.61    730.5    730.50   541.15     1
## MSSubClass        2 1460     56.90    42.30     50.0     49.15    44.48    20
## MSZoning*         3 1460      4.03     0.63      4.0      4.06     0.00     1
## LotFrontage       4 1201     70.05    24.28     69.0     68.94    16.31    21
## LotArea           5 1460  10516.83  9981.26   9478.5   9563.28  2962.23  1300
## Street*           6 1460      2.00     0.06      2.0      2.00     0.00     1
## Alley*            7   91      1.45     0.50      1.0      1.44     0.00     1
## LotShape*         8 1460      2.94     1.41      4.0      3.05     0.00     1
## LandContour*      9 1460      3.78     0.71      4.0      4.00     0.00     1
## Utilities*       10 1460      1.00     0.03      1.0      1.00     0.00     1
## LotConfig*       11 1460      4.02     1.62      5.0      4.27     0.00     1
## LandSlope*       12 1460      1.06     0.28      1.0      1.00     0.00     1
## Neighborhood*    13 1460     13.15     5.89     13.0     13.11     7.41     1
## Condition1*      14 1460      3.03     0.87      3.0      3.00     0.00     1
## Condition2*      15 1460      3.01     0.26      3.0      3.00     0.00     1
## BldgType*        16 1460      1.49     1.20      1.0      1.14     0.00     1
## HouseStyle*      17 1460      4.04     1.91      3.0      4.03     1.48     1
## OverallQual      18 1460      6.10     1.38      6.0      6.08     1.48     1
## OverallCond      19 1460      5.58     1.11      5.0      5.48     0.00     1
## YearBuilt        20 1460   1971.27    30.20   1973.0   1974.13    37.06  1872
## YearRemodAdd     21 1460   1984.87    20.65   1994.0   1986.37    19.27  1950
## RoofStyle*       22 1460      2.41     0.83      2.0      2.26     0.00     1
## RoofMatl*        23 1460      2.08     0.60      2.0      2.00     0.00     1
## Exterior1st*     24 1460     10.62     3.20     13.0     10.93     1.48     1
## Exterior2nd*     25 1460     11.34     3.54     14.0     11.65     2.97     1
## MasVnrType*      26 1452      2.76     0.62      3.0      2.73     0.00     1
## MasVnrArea       27 1452    103.69   181.07      0.0     63.15     0.00     0
## ExterQual*       28 1460      3.54     0.69      4.0      3.65     0.00     1
## ExterCond*       29 1460      4.73     0.73      5.0      4.95     0.00     1
## Foundation*      30 1460      2.40     0.72      2.0      2.46     1.48     1
## BsmtQual*        31 1423      3.26     0.87      3.0      3.43     1.48     1
## BsmtCond*        32 1423      3.81     0.66      4.0      4.00     0.00     1
## BsmtExposure*    33 1422      3.27     1.15      4.0      3.46     0.00     1
## BsmtFinType1*    34 1423      3.73     1.83      3.0      3.79     2.97     1
## BsmtFinSF1       35 1460    443.64   456.10    383.5    386.08   568.58     0
## BsmtFinType2*    36 1422      5.71     0.94      6.0      5.98     0.00     1
## BsmtFinSF2       37 1460     46.55   161.32      0.0      1.38     0.00     0
## BsmtUnfSF        38 1460    567.24   441.87    477.5    519.29   426.99     0
## TotalBsmtSF      39 1460   1057.43   438.71    991.5   1036.70   347.67     0
## Heating*         40 1460      2.04     0.30      2.0      2.00     0.00     1
## HeatingQC*       41 1460      2.54     1.74      1.0      2.42     0.00     1
## CentralAir*      42 1460      1.93     0.25      2.0      2.00     0.00     1
## Electrical*      43 1459      4.68     1.05      5.0      5.00     0.00     1
## X1stFlrSF        44 1460   1162.63   386.59   1087.0   1129.99   347.67   334
## X2ndFlrSF        45 1460    346.99   436.53      0.0    285.36     0.00     0
## LowQualFinSF     46 1460      5.84    48.62      0.0      0.00     0.00     0
## GrLivArea        47 1460   1515.46   525.48   1464.0   1467.67   483.33   334
## BsmtFullBath     48 1460      0.43     0.52      0.0      0.39     0.00     0
## BsmtHalfBath     49 1460      0.06     0.24      0.0      0.00     0.00     0
## FullBath         50 1460      1.57     0.55      2.0      1.56     0.00     0
## HalfBath         51 1460      0.38     0.50      0.0      0.34     0.00     0
## BedroomAbvGr     52 1460      2.87     0.82      3.0      2.85     0.00     0
## KitchenAbvGr     53 1460      1.05     0.22      1.0      1.00     0.00     0
## KitchenQual*     54 1460      3.34     0.83      4.0      3.50     0.00     1
## TotRmsAbvGrd     55 1460      6.52     1.63      6.0      6.41     1.48     2
## Functional*      56 1460      6.75     0.98      7.0      7.00     0.00     1
## Fireplaces       57 1460      0.61     0.64      1.0      0.53     1.48     0
## FireplaceQu*     58  770      3.73     1.13      3.0      3.80     1.48     1
## GarageType*      59 1379      3.28     1.79      2.0      3.11     0.00     1
## GarageYrBlt      60 1379   1978.51    24.69   1980.0   1981.07    31.13  1900
## GarageFinish*    61 1379      2.18     0.81      2.0      2.23     1.48     1
## GarageCars       62 1460      1.77     0.75      2.0      1.77     0.00     0
## GarageArea       63 1460    472.98   213.80    480.0    469.81   177.91     0
## GarageQual*      64 1379      4.86     0.61      5.0      5.00     0.00     1
## GarageCond*      65 1379      4.90     0.52      5.0      5.00     0.00     1
## PavedDrive*      66 1460      2.86     0.50      3.0      3.00     0.00     1
## WoodDeckSF       67 1460     94.24   125.34      0.0     71.76     0.00     0
## OpenPorchSF      68 1460     46.66    66.26     25.0     33.23    37.06     0
## EnclosedPorch    69 1460     21.95    61.12      0.0      3.87     0.00     0
## X3SsnPorch       70 1460      3.41    29.32      0.0      0.00     0.00     0
## ScreenPorch      71 1460     15.06    55.76      0.0      0.00     0.00     0
## PoolArea         72 1460      2.76    40.18      0.0      0.00     0.00     0
## PoolQC*          73    7      2.14     0.90      2.0      2.14     1.48     1
## Fence*           74  281      2.43     0.86      3.0      2.48     0.00     1
## MiscFeature*     75   54      2.91     0.45      3.0      3.00     0.00     1
## MiscVal          76 1460     43.49   496.12      0.0      0.00     0.00     0
## MoSold           77 1460      6.32     2.70      6.0      6.25     2.97     1
## YrSold           78 1460   2007.82     1.33   2008.0   2007.77     1.48  2006
## SaleType*        79 1460      8.51     1.56      9.0      8.92     0.00     1
## SaleCondition*   80 1460      4.77     1.10      5.0      5.00     0.00     1
## SalePrice        81 1460 180921.20 79442.50 163000.0 170783.29 56338.80 34900
##                   max  range   skew kurtosis      se
## Id               1460   1459   0.00    -1.20   11.03
## MSSubClass        190    170   1.40     1.56    1.11
## MSZoning*           5      4  -1.73     6.25    0.02
## LotFrontage       313    292   2.16    17.34    0.70
## LotArea        215245 213945  12.18   202.26  261.22
## Street*             2      1 -15.49   238.01    0.00
## Alley*              2      1   0.20    -1.98    0.05
## LotShape*           4      3  -0.61    -1.60    0.04
## LandContour*        4      3  -3.16     8.65    0.02
## Utilities*          2      1  38.13  1453.00    0.00
## LotConfig*          5      4  -1.13    -0.59    0.04
## LandSlope*          3      2   4.80    24.47    0.01
## Neighborhood*      25     24   0.02    -1.06    0.15
## Condition1*         9      8   3.01    16.34    0.02
## Condition2*         8      7  13.14   247.54    0.01
## BldgType*           5      4   2.24     3.41    0.03
## HouseStyle*         8      7   0.31    -0.96    0.05
## OverallQual        10      9   0.22     0.09    0.04
## OverallCond         9      8   0.69     1.09    0.03
## YearBuilt        2010    138  -0.61    -0.45    0.79
## YearRemodAdd     2010     60  -0.50    -1.27    0.54
## RoofStyle*          6      5   1.47     0.61    0.02
## RoofMatl*           8      7   8.09    66.28    0.02
## Exterior1st*       15     14  -0.72    -0.37    0.08
## Exterior2nd*       16     15  -0.69    -0.52    0.09
## MasVnrType*         4      3  -0.07    -0.13    0.02
## MasVnrArea       1600   1600   2.66    10.03    4.75
## ExterQual*          4      3  -1.83     3.86    0.02
## ExterCond*          5      4  -2.56     5.29    0.02
## Foundation*         6      5   0.09     1.02    0.02
## BsmtQual*           4      3  -1.31     1.27    0.02
## BsmtCond*           4      3  -3.39    10.14    0.02
## BsmtExposure*       4      3  -1.15    -0.39    0.03
## BsmtFinType1*       6      5  -0.02    -1.39    0.05
## BsmtFinSF1       5644   5644   1.68    11.06   11.94
## BsmtFinType2*       6      5  -3.56    12.32    0.02
## BsmtFinSF2       1474   1474   4.25    20.01    4.22
## BsmtUnfSF        2336   2336   0.92     0.46   11.56
## TotalBsmtSF      6110   6110   1.52    13.18   11.48
## Heating*            6      5   9.83   110.98    0.01
## HeatingQC*          5      4   0.48    -1.51    0.05
## CentralAir*         2      1  -3.52    10.42    0.01
## Electrical*         5      4  -3.06     7.49    0.03
## X1stFlrSF        4692   4358   1.37     5.71   10.12
## X2ndFlrSF        2065   2065   0.81    -0.56   11.42
## LowQualFinSF      572    572   8.99    82.83    1.27
## GrLivArea        5642   5308   1.36     4.86   13.75
## BsmtFullBath        3      3   0.59    -0.84    0.01
## BsmtHalfBath        2      2   4.09    16.31    0.01
## FullBath            3      3   0.04    -0.86    0.01
## HalfBath            2      2   0.67    -1.08    0.01
## BedroomAbvGr        8      8   0.21     2.21    0.02
## KitchenAbvGr        3      3   4.48    21.42    0.01
## KitchenQual*        4      3  -1.42     1.72    0.02
## TotRmsAbvGrd       14     12   0.67     0.87    0.04
## Functional*         7      6  -4.08    16.37    0.03
## Fireplaces          3      3   0.65    -0.22    0.02
## FireplaceQu*        5      4  -0.16    -0.98    0.04
## GarageType*         6      5   0.76    -1.30    0.05
## GarageYrBlt      2010    110  -0.65    -0.42    0.66
## GarageFinish*       3      2  -0.35    -1.41    0.02
## GarageCars          4      4  -0.34     0.21    0.02
## GarageArea       1418   1418   0.18     0.90    5.60
## GarageQual*         5      4  -4.43    18.25    0.02
## GarageCond*         5      4  -5.28    26.77    0.01
## PavedDrive*         3      2  -3.30     9.22    0.01
## WoodDeckSF        857    857   1.54     2.97    3.28
## OpenPorchSF       547    547   2.36     8.44    1.73
## EnclosedPorch     552    552   3.08    10.37    1.60
## X3SsnPorch        508    508  10.28   123.06    0.77
## ScreenPorch       480    480   4.11    18.34    1.46
## PoolArea          738    738  14.80   222.19    1.05
## PoolQC*             3      2  -0.22    -1.90    0.34
## Fence*              4      3  -0.57    -0.88    0.05
## MiscFeature*        4      3  -2.93    10.71    0.06
## MiscVal         15500  15500  24.43   697.64   12.98
## MoSold             12     11   0.21    -0.41    0.07
## YrSold           2010      4   0.10    -1.19    0.03
## SaleType*           9      8  -3.83    14.57    0.04
## SaleCondition*      6      5  -2.74     6.82    0.03
## SalePrice      755000 720100   1.88     6.50 2079.11
train_house %>% ggplot(aes(x=SalePrice)) + geom_boxplot()

train_house %>% ggplot(aes(y=SalePrice,x=X1stFlrSF)) + geom_point()

train_house %>% ggplot(aes(y=SalePrice,x=LotArea)) + geom_point()

# correlations

correlation <- train_house %>% select(LotArea, X1stFlrSF, X2ndFlrSF) %>% cor()
correlation
##              LotArea  X1stFlrSF   X2ndFlrSF
## LotArea   1.00000000  0.2994746  0.05098595
## X1stFlrSF 0.29947458  1.0000000 -0.20264618
## X2ndFlrSF 0.05098595 -0.2026462  1.00000000

I’m surprised that there is low correlation between first and second floor square footage as I would have assumed that they would be collinear as well.

Familywise error is the probability of making multiple false discoveries (Type 1 Errors). I think I would be worried about Type 1 errors because of the collinearity in the variables.

cor.test(train_house$X1stFlrSF,train_house$X2ndFlrSF, conf.level=.8)
## 
##  Pearson's product-moment correlation
## 
## data:  train_house$X1stFlrSF and train_house$X2ndFlrSF
## t = -7.9017, df = 1458, p-value = 5.379e-15
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  -0.2346122 -0.1702424
## sample estimates:
##        cor 
## -0.2026462

Linear Algebra and Correlation. Invert your correlation matrix from above. (This is known as the precision matrix and contains variance inflation factors on the diagonal.) Multiply the correlation matrix by the precision matrix, and then multiply the precision matrix by the correlation matrix. Conduct LU decomposition on the matrix. 5 points

precision_matrix <- solve(correlation)
precision_matrix
##              LotArea  X1stFlrSF  X2ndFlrSF
## LotArea    1.1144421 -0.3600471 -0.1297831
## X1stFlrSF -0.3600471  1.1591459  0.2532538
## X2ndFlrSF -0.1297831  0.2532538  1.0579380
precision_matrix %*% correlation
##                 LotArea     X1stFlrSF X2ndFlrSF
## LotArea    1.000000e+00  1.283695e-16         0
## X1stFlrSF -4.163336e-17  1.000000e+00         0
## X2ndFlrSF  1.387779e-17 -2.775558e-17         1
correlation %*% precision_matrix
##                LotArea    X1stFlrSF     X2ndFlrSF
## LotArea   1.000000e+00 1.387779e-17 -1.387779e-17
## X1stFlrSF 6.591949e-17 1.000000e+00 -2.775558e-17
## X2ndFlrSF 2.775558e-17 0.000000e+00  1.000000e+00

LU Decomp using the matrix library:

library(matrixcalc)

lu.decomposition((precision_matrix))
## $L
##            [,1]      [,2] [,3]
## [1,]  1.0000000 0.0000000    0
## [2,] -0.3230739 1.0000000    0
## [3,] -0.1164556 0.2026462    1
## 
## $U
##          [,1]       [,2]       [,3]
## [1,] 1.114442 -0.3600471 -0.1297831
## [2,] 0.000000  1.0428241  0.2113243
## [3,] 0.000000  0.0000000  1.0000000

Calculus-Based Probability & Statistics. Many times, it makes sense to fit a closed form distribution to data. Select a variable in the Kaggle.com training dataset that is skewed to the right, shift it so that the minimum value is absolutely above zero if necessary. Then load the MASS package and run fitdistr to fit an exponential probability density function. (See https://stat.ethz.ch/R-manual/R-devel/library/MASS/html/fitdistr.html ). Find the optimal value of λ for this distribution, and then take 1000 samples from this exponential distribution using this value (e.g., rexp(1000, λ)). Plot a histogram and compare it with a histogram of your original variable. Using the exponential pdf, find the 5th and 95th percentiles using the cumulative distribution function (CDF). Also generate a 95% confidence interval from the empirical data, assuming normality. Finally, provide the empirical 5th percentile and 95th percentile of the data. Discuss. 10 points

TBD

Modeling. Build some type of multiple regression model and submit your model to the competition board. Provide your complete model summary and results with analysis. Report your Kaggle.com user name and score. 10 points

Since house prices tend to go up across multiple variables, i’ll attempt a linear regression model fit to multiple variables:

library(dplyr)

train_house$Utilities <- as.factor(train_house$Utilities)
train_house$BldgType <- as.factor(train_house$BldgType)


lm <- lm(SalePrice~X1stFlrSF + LotArea + OverallQual + OverallCond + Utilities + BldgType, data = train_house)

summary(lm)
## 
## Call:
## lm(formula = SalePrice ~ X1stFlrSF + LotArea + OverallQual + 
##     OverallCond + Utilities + BldgType, data = train_house)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -400255  -24732   -1601   19707  346946 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.201e+05  8.381e+03 -14.332  < 2e-16 ***
## X1stFlrSF        5.264e+01  3.572e+00  14.737  < 2e-16 ***
## LotArea          8.127e-01  1.205e-01   6.746 2.18e-11 ***
## OverallQual      3.787e+04  9.663e+02  39.193  < 2e-16 ***
## OverallCond      5.490e+02  1.034e+03   0.531  0.59563    
## UtilitiesNoSeWa -5.534e+04  4.275e+04  -1.295  0.19566    
## BldgType2fmCon  -9.172e+03  7.861e+03  -1.167  0.24347    
## BldgTypeDuplex  -1.463e+04  6.272e+03  -2.332  0.01983 *  
## BldgTypeTwnhs   -1.898e+04  6.817e+03  -2.784  0.00543 ** 
## BldgTypeTwnhsE  -1.859e+04  4.342e+03  -4.280 1.99e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42720 on 1450 degrees of freedom
## Multiple R-squared:  0.7126, Adjusted R-squared:  0.7108 
## F-statistic: 399.5 on 9 and 1450 DF,  p-value: < 2.2e-16
predictions <- predict(lm, test_house)

predictions <- as.data.frame(predictions, row.names="SalePrice")
## Warning in as.data.frame.numeric(predictions, row.names = "SalePrice"):
## 'row.names' is not a character vector of length 1459 -- omitting it. Will be an
## error!
predictions$Id <- test_house$Id

predictions <- predictions %>% dplyr::rename("SalePrice" = "predictions")

# replace  NAs of missing data with mean of dataset

predictions$SalePrice[is.na(predictions$SalePrice)] <- mean(predictions$SalePrice, na.rm = TRUE)


write.csv(predictions,"C:\\Users\\Santi\\OneDrive\\Documents\\GitHub\\DATA605\\Final\\predictions.csv", row.names = FALSE)

My best attempt was with only using 1stFlrSF suprisingly. Just a single variable model.

Username: sserrot Score: .33063