A_matrix <- matrix(c(0,1/6,1/3,0,0,0,
1/2,1/6,1/3,0,0,0,
1/2,1/6,0,0,0,0,
0,1/6,0,0,1/2,1,
0,1/6,1/3,1/2,0,0,
0,1/6,0,1/2,1/2,0
),nrow = 6)
A_matrix <- t(A_matrix)
colSums(A_matrix) # total probability for any node is 1
## [1] 1 1 1 1 1 1
B_matrix <- .85 * A_matrix + 0.15/ncol(A_matrix)
B_matrix
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.025 0.1666667 0.3083333 0.025 0.025 0.025
## [2,] 0.450 0.1666667 0.3083333 0.025 0.025 0.025
## [3,] 0.450 0.1666667 0.0250000 0.025 0.025 0.025
## [4,] 0.025 0.1666667 0.0250000 0.025 0.450 0.875
## [5,] 0.025 0.1666667 0.3083333 0.450 0.025 0.025
## [6,] 0.025 0.1666667 0.0250000 0.450 0.450 0.025
# Uniform rank based off number of columns
r_vector <- rep(1/ncol(A_matrix),ncol(A_matrix))
r_vector
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
library(matrixcalc)
# n = 30
matrix.power(B_matrix,30) %*% r_vector
## [,1]
## [1,] 0.05170475
## [2,] 0.07367927
## [3,] 0.05741242
## [4,] 0.34870367
## [5,] 0.19990381
## [6,] 0.26859607
# n = 35
matrix.power(B_matrix,35) %*% r_vector
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870368
## [5,] 0.19990381
## [6,] 0.26859608
# n = 40
matrix.power(B_matrix,40) %*% r_vector
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
# n = 45
matrix.power(B_matrix,45) %*% r_vector
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
# n = 50
matrix.power(B_matrix,50) %*% r_vector
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
it seems like there is some convergence at n=35
We see max eigen value of 1 for the first set
eigen(B_matrix)$values
## [1] 1.00000000+0i 0.57619235+0i -0.42500000+0i -0.42500000-0i -0.34991524+0i
## [6] -0.08461044+0i
All those vectors are positive but they do not sum to 1
eigen(B_matrix)$vectors[,1]
## [1] 0.1044385+0i 0.1488249+0i 0.1159674+0i 0.7043472+0i 0.4037861+0i
## [6] 0.5425377+0i
sum(eigen(B_matrix)$vectors[,1])
## [1] 2.019902+0i
library(igraph)
## Warning: package 'igraph' was built under R version 4.1.3
##
## Attaching package: 'igraph'
## The following object is masked from 'package:matrixcalc':
##
## %s%
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
A_graph_matrix <- matrix(c(0,0,1/3,0,0,0,
1/2,0,1/3,0,0,0,
1/2,0,0,0,0,0,
0,0,0,0,1/2,1,
0,0,1/3,1/2,0,0,
0,0,0,1/2,1/2,0
),nrow = 6)
A_graph_matrix <- t(A_graph_matrix)
a <- graph.adjacency(t(A_graph_matrix), weighted=TRUE, mode='directed')
ceb <- cluster_edge_betweenness(a)
## Warning in cluster_edge_betweenness(a): At community.c:461 :Membership vector
## will be selected based on the lowest modularity score.
## Warning in cluster_edge_betweenness(a): At community.c:468 :Modularity
## calculation with weighted edge betweenness community detection might not make
## sense -- modularity treats edge weights as similarities while edge betwenness
## treats them as distances
plot(ceb, a)
page.rank(a)
## $vector
## [1] 0.05170475 0.07367926 0.05741241 0.34870369 0.19990381 0.26859608
##
## $value
## [1] 1
##
## $options
## NULL
sum(page.rank(a)$vector)
## [1] 1
Looks like these values actually add up to 1 and the max eigenvalue is also 1
format. Go ahead and divide all pixels by 255 to produce values between 0 and 1. (This is equivalent to min-max scaling.) (5 points)
library(reshape)
## Warning: package 'reshape' was built under R version 4.1.2
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.2
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
##
## colsplit, melt, recast
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning in as.POSIXlt.POSIXct(Sys.time()): unable to identify current timezone 'H':
## please set environment variable 'TZ'
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose() masks igraph::compose()
## x tidyr::crossing() masks igraph::crossing()
## x tidyr::expand() masks reshape::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::groups() masks igraph::groups()
## x dplyr::lag() masks stats::lag()
## x dplyr::rename() masks reshape::rename()
## x purrr::simplify() masks igraph::simplify()
train <- read.csv("train.csv")
sliced_train <- train[0:10,]
head(sliced_train)
## label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 pixel9
## 1 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 1 0 0 0 0 0 0 0 0 0 0
## 4 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## pixel10 pixel11 pixel12 pixel13 pixel14 pixel15 pixel16 pixel17 pixel18
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel19 pixel20 pixel21 pixel22 pixel23 pixel24 pixel25 pixel26 pixel27
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel28 pixel29 pixel30 pixel31 pixel32 pixel33 pixel34 pixel35 pixel36
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel37 pixel38 pixel39 pixel40 pixel41 pixel42 pixel43 pixel44 pixel45
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel46 pixel47 pixel48 pixel49 pixel50 pixel51 pixel52 pixel53 pixel54
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel55 pixel56 pixel57 pixel58 pixel59 pixel60 pixel61 pixel62 pixel63
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel64 pixel65 pixel66 pixel67 pixel68 pixel69 pixel70 pixel71 pixel72
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel73 pixel74 pixel75 pixel76 pixel77 pixel78 pixel79 pixel80 pixel81
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel82 pixel83 pixel84 pixel85 pixel86 pixel87 pixel88 pixel89 pixel90
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel91 pixel92 pixel93 pixel94 pixel95 pixel96 pixel97 pixel98 pixel99
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## pixel100 pixel101 pixel102 pixel103 pixel104 pixel105 pixel106 pixel107
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel108 pixel109 pixel110 pixel111 pixel112 pixel113 pixel114 pixel115
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel116 pixel117 pixel118 pixel119 pixel120 pixel121 pixel122 pixel123
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 18 30
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 1 25 130
## 6 0 0 0 0 0 0 0 0
## pixel124 pixel125 pixel126 pixel127 pixel128 pixel129 pixel130 pixel131
## 1 0 0 0 0 0 0 0 0
## 2 137 137 192 86 72 1 0 0
## 3 3 141 139 3 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 155 254 254 254 157 30 2 0
## 6 3 141 202 254 193 44 0 0
## pixel132 pixel133 pixel134 pixel135 pixel136 pixel137 pixel138 pixel139
## 1 188 255 94 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel140 pixel141 pixel142 pixel143 pixel144 pixel145 pixel146 pixel147
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 220 179
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel148 pixel149 pixel150 pixel151 pixel152 pixel153 pixel154 pixel155
## 1 0 0 0 0 0 0 0 0
## 2 13 86 250 254 254 254 254 217
## 3 0 0 0 0 9 254 254 8
## 4 6 0 0 0 0 0 0 0
## 5 8 103 253 253 253 253 253 253
## 6 0 0 0 5 165 254 179 163
## pixel156 pixel157 pixel158 pixel159 pixel160 pixel161 pixel162 pixel163
## 1 0 0 0 191 250 253 93 0
## 2 246 151 32 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 9 77 0 0 0 0 0
## 5 253 253 114 2 0 0 0 0
## 6 249 244 72 0 0 0 0 0
## pixel164 pixel165 pixel166 pixel167 pixel168 pixel169 pixel170 pixel171
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel172 pixel173 pixel174 pixel175 pixel176 pixel177 pixel178 pixel179
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 16 179 254 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 28 247 17 0 0 0
## 5 0 0 0 11 208 253 253 253
## 6 0 0 0 0 0 0 0 135
## pixel180 pixel181 pixel182 pixel183 pixel184 pixel185 pixel186 pixel187
## 1 0 0 0 0 0 0 123 248
## 2 254 254 254 254 254 254 231 54
## 3 9 254 254 8 0 0 0 0
## 4 0 0 0 0 0 27 202 0
## 5 253 253 253 253 253 253 253 107
## 6 254 150 0 0 189 254 243 31
## pixel188 pixel189 pixel190 pixel191 pixel192 pixel193 pixel194 pixel195
## 1 253 167 10 0 0 0 0 0
## 2 15 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel196 pixel197 pixel198 pixel199 pixel200 pixel201 pixel202 pixel203
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 72
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 242
## 5 0 0 0 0 0 0 0 31
## 6 0 0 0 0 0 0 0 0
## pixel204 pixel205 pixel206 pixel207 pixel208 pixel209 pixel210 pixel211
## 1 0 0 0 0 0 0 0 0
## 2 254 254 254 254 254 254 254 254
## 3 0 0 0 0 9 254 254 106
## 4 155 0 0 0 0 0 0 0
## 5 253 253 253 253 253 253 253 253
## 6 0 0 82 248 209 5 0 0
## pixel212 pixel213 pixel214 pixel215 pixel216 pixel217 pixel218 pixel219
## 1 0 80 247 253 208 13 0 0
## 2 254 254 254 254 104 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 27 254 63 0 0 0 0
## 5 253 253 253 215 101 3 0 0
## 6 164 236 254 115 0 0 0 0
## pixel220 pixel221 pixel222 pixel223 pixel224 pixel225 pixel226 pixel227
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel228 pixel229 pixel230 pixel231 pixel232 pixel233 pixel234 pixel235
## 1 0 0 0 0 0 0 0 0
## 2 0 0 61 191 254 254 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 160 207 6 0 0
## 5 0 0 23 210 253 253 253 248
## 6 0 0 0 0 0 8 211 254
## pixel236 pixel237 pixel238 pixel239 pixel240 pixel241 pixel242 pixel243
## 1 0 0 0 0 29 207 253 235
## 2 254 109 83 199 254 254 254 254
## 3 9 254 254 184 0 0 0 0
## 4 0 0 0 0 0 27 254 65
## 5 161 222 222 246 253 253 253 253
## 6 58 0 0 0 0 33 230 212
## pixel244 pixel245 pixel246 pixel247 pixel248 pixel249 pixel250 pixel251
## 1 77 0 0 0 0 0 0 0
## 2 243 85 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 253 39 0 0 0 0 0 0
## 6 6 0 0 0 0 0 0 0
## pixel252 pixel253 pixel254 pixel255 pixel256 pixel257 pixel258 pixel259
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 172 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 127
## 5 0 0 0 0 0 0 136 253
## 6 0 0 0 0 0 0 0 0
## pixel260 pixel261 pixel262 pixel263 pixel264 pixel265 pixel266 pixel267
## 1 0 0 0 0 0 0 0 54
## 2 254 254 202 147 147 45 0 11
## 3 0 0 0 0 9 254 254 184
## 4 254 21 0 0 0 0 0 0
## 5 253 253 229 77 0 0 0 70
## 6 0 119 254 156 3 0 0 0
## pixel268 pixel269 pixel270 pixel271 pixel272 pixel273 pixel274 pixel275
## 1 209 253 253 88 0 0 0 0
## 2 29 200 254 254 254 171 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 20 239 65 0 0 0 0
## 5 218 253 253 253 253 215 91 0
## 6 0 18 230 254 33 0 0 0
## pixel276 pixel277 pixel278 pixel279 pixel280 pixel281 pixel282 pixel283
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel284 pixel285 pixel286 pixel287 pixel288 pixel289 pixel290 pixel291
## 1 0 0 0 0 0 0 0 0
## 2 0 1 174 254 254 89 67 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 77 254 21 0 0
## 5 0 5 214 253 253 253 195 0
## 6 0 0 0 0 10 212 254 35
## pixel292 pixel293 pixel294 pixel295 pixel296 pixel297 pixel298 pixel299
## 1 0 0 93 254 253 238 170 17
## 2 0 0 0 0 0 128 252 254
## 3 9 254 254 184 0 0 0 0
## 4 0 0 0 0 0 0 195 65
## 5 0 0 0 0 104 224 253 253
## 6 0 0 0 0 0 33 254 254
## pixel300 pixel301 pixel302 pixel303 pixel304 pixel305 pixel306 pixel307
## 1 0 0 0 0 0 0 0 0
## 2 254 212 76 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 253 253 215 29 0 0 0 0
## 6 33 0 0 0 0 0 0 0
## pixel308 pixel309 pixel310 pixel311 pixel312 pixel313 pixel314 pixel315
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 47 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 70
## 5 0 0 0 0 0 116 253 253
## 6 0 0 0 0 0 0 0 0
## pixel316 pixel317 pixel318 pixel319 pixel320 pixel321 pixel322 pixel323
## 1 0 0 0 0 0 23 210 254
## 2 254 29 0 0 0 0 0 0
## 3 0 0 0 0 6 185 254 184
## 4 254 21 0 0 0 0 0 0
## 5 253 247 75 0 0 0 0 0
## 6 116 254 154 3 0 0 0 0
## pixel324 pixel325 pixel326 pixel327 pixel328 pixel329 pixel330 pixel331
## 1 253 159 0 0 0 0 0 0
## 2 0 0 83 254 254 254 153 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 195 142 0 0 0 0
## 5 0 26 200 253 253 253 253 216
## 6 0 33 254 254 33 0 0 0
## pixel332 pixel333 pixel334 pixel335 pixel336 pixel337 pixel338 pixel339
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel340 pixel341 pixel342 pixel343 pixel344 pixel345 pixel346 pixel347
## 1 0 0 0 0 0 0 0 0
## 2 0 80 254 254 240 24 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 56 251 21 0 0
## 5 0 254 253 253 253 195 0 0
## 6 0 0 0 0 124 254 115 0
## pixel348 pixel349 pixel350 pixel351 pixel352 pixel353 pixel354 pixel355
## 1 16 209 253 254 240 81 0 0
## 2 0 0 0 0 0 0 25 240
## 3 0 89 254 184 0 0 0 0
## 4 0 0 0 0 0 0 195 227
## 5 0 0 0 0 0 0 26 200
## 6 0 0 0 0 0 160 254 239
## pixel356 pixel357 pixel358 pixel359 pixel360 pixel361 pixel362 pixel363
## 1 0 0 0 0 0 0 0 0
## 2 254 254 153 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 253 253 253 253 5 0 0 0
## 6 23 0 0 0 0 0 0 0
## pixel364 pixel365 pixel366 pixel367 pixel368 pixel369 pixel370 pixel371
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 64 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 254 253 253
## 6 0 0 0 0 0 0 0 0
## pixel372 pixel373 pixel374 pixel375 pixel376 pixel377 pixel378 pixel379
## 1 0 0 0 0 27 253 253 254
## 2 186 7 0 0 0 0 0 0
## 3 0 0 0 0 4 146 254 184
## 4 222 153 5 0 0 0 0 0
## 5 253 99 0 0 0 0 0 0
## 6 203 254 35 0 0 0 0 0
## pixel380 pixel381 pixel382 pixel383 pixel384 pixel385 pixel386 pixel387
## 1 13 0 0 0 0 0 0 0
## 2 0 0 0 166 254 254 224 12
## 3 0 0 0 0 0 0 0 0
## 4 0 0 120 240 13 0 0 0
## 5 0 0 0 25 231 253 253 253
## 6 0 197 254 178 0 0 0 0
## pixel388 pixel389 pixel390 pixel391 pixel392 pixel393 pixel394 pixel395
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 36 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel396 pixel397 pixel398 pixel399 pixel400 pixel401 pixel402 pixel403
## 1 0 0 0 0 0 0 0 20
## 2 14 232 254 254 254 29 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 67 251 40 0
## 5 0 254 253 253 253 99 0 0
## 6 0 0 0 23 239 221 11 0
## pixel404 pixel405 pixel406 pixel407 pixel408 pixel409 pixel410 pixel411
## 1 206 254 254 198 7 0 0 0
## 2 0 0 0 0 0 0 0 75
## 3 9 254 254 184 0 0 0 0
## 4 0 0 0 0 0 0 94 255
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 198 255 123
## pixel412 pixel413 pixel414 pixel415 pixel416 pixel417 pixel418 pixel419
## 1 0 0 0 0 0 0 0 0
## 2 254 254 254 17 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 69 0 0 0 0 0 0 0
## 5 223 253 253 253 129 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel420 pixel421 pixel422 pixel423 pixel424 pixel425 pixel426 pixel427
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 18 254 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 254 253 253
## 6 0 0 0 0 0 0 0 23
## pixel428 pixel429 pixel430 pixel431 pixel432 pixel433 pixel434 pixel435
## 1 0 0 0 168 253 253 196 7
## 2 254 29 0 0 0 0 0 0
## 3 0 0 0 0 9 254 254 184
## 4 0 234 184 0 0 0 0 0
## 5 253 99 0 0 0 0 0 0
## 6 238 178 0 0 0 0 0 0
## pixel436 pixel437 pixel438 pixel439 pixel440 pixel441 pixel442 pixel443
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 48 254 254 254 17
## 3 0 0 0 0 0 0 0 0
## 4 0 0 19 245 69 0 0 0
## 5 0 0 0 0 127 253 253 253
## 6 10 219 254 96 0 0 0 0
## pixel444 pixel445 pixel446 pixel447 pixel448 pixel449 pixel450 pixel451
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 129 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel452 pixel453 pixel454 pixel455 pixel456 pixel457 pixel458 pixel459
## 1 0 0 0 0 0 0 20 203
## 2 2 163 254 254 254 29 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 234 169 0
## 5 0 254 253 253 253 99 0 0
## 6 0 0 0 30 249 204 0 0
## pixel460 pixel461 pixel462 pixel463 pixel464 pixel465 pixel466 pixel467
## 1 253 248 76 0 0 0 0 0
## 2 0 0 0 0 0 0 0 48
## 3 9 254 254 184 0 0 0 0
## 4 0 0 0 0 0 0 3 199
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 25 235 254 62
## pixel468 pixel469 pixel470 pixel471 pixel472 pixel473 pixel474 pixel475
## 1 0 0 0 0 0 0 0 0
## 2 254 254 254 17 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 182 10 0 0 0 0 0 0
## 5 139 253 253 253 90 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel476 pixel477 pixel478 pixel479 pixel480 pixel481 pixel482 pixel483
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 94 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 254 253 253
## 6 0 0 0 0 0 0 0 26
## pixel484 pixel485 pixel486 pixel487 pixel488 pixel489 pixel490 pixel491
## 1 0 22 188 253 245 93 0 0
## 2 254 200 12 0 0 0 0 0
## 3 0 0 0 0 9 254 254 184
## 4 0 154 205 4 0 0 26 72
## 5 253 99 0 0 0 0 0 0
## 6 243 204 0 0 0 0 0 0
## pixel492 pixel493 pixel494 pixel495 pixel496 pixel497 pixel498 pixel499
## 1 0 0 0 0 0 0 0 0
## 2 0 0 16 209 254 254 150 1
## 3 0 0 0 0 0 0 0 0
## 4 128 203 208 254 254 131 0 0
## 5 0 0 0 78 248 253 253 253
## 6 91 254 248 36 0 0 0 0
## pixel500 pixel501 pixel502 pixel503 pixel504 pixel505 pixel506 pixel507
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel508 pixel509 pixel510 pixel511 pixel512 pixel513 pixel514 pixel515
## 1 0 0 0 0 0 103 253 253
## 2 0 15 206 254 254 254 202 66
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 61 254 129
## 5 0 254 253 253 253 216 34 0
## 6 0 0 0 33 254 204 0 0
## pixel516 pixel517 pixel518 pixel519 pixel520 pixel521 pixel522 pixel523
## 1 191 0 0 0 0 0 0 0
## 2 0 0 0 0 0 21 161 254
## 3 9 254 254 184 0 0 0 0
## 4 113 186 245 251 189 75 56 136
## 5 0 0 0 0 0 0 33 152
## 6 0 0 0 67 241 254 133 0
## pixel524 pixel525 pixel526 pixel527 pixel528 pixel529 pixel530 pixel531
## 1 0 0 0 0 0 0 0 0
## 2 254 245 31 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 254 73 0 0 0 0 0 0
## 5 253 253 253 107 1 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel532 pixel533 pixel534 pixel535 pixel536 pixel537 pixel538 pixel539
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 60 212
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 206 253 253
## 6 0 0 0 0 0 0 0 33
## pixel540 pixel541 pixel542 pixel543 pixel544 pixel545 pixel546 pixel547
## 1 89 240 253 195 25 0 0 0
## 2 254 254 254 194 48 48 34 41
## 3 0 0 0 0 156 254 254 184
## 4 0 15 216 233 233 159 104 52
## 5 253 253 140 0 0 0 0 0
## 6 254 214 7 0 0 0 50 242
## pixel548 pixel549 pixel550 pixel551 pixel552 pixel553 pixel554 pixel555
## 1 0 0 0 0 0 0 0 0
## 2 48 209 254 254 254 171 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 38 254 73 0 0
## 5 30 139 234 253 253 253 154 2
## 6 254 194 24 0 0 0 0 0
## pixel556 pixel557 pixel558 pixel559 pixel560 pixel561 pixel562 pixel563
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel564 pixel565 pixel566 pixel567 pixel568 pixel569 pixel570 pixel571
## 1 0 0 0 15 220 253 253 80
## 2 0 0 0 86 243 254 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 16 205 253 253 253 250 208
## 6 0 0 0 5 193 254 78 0
## pixel572 pixel573 pixel574 pixel575 pixel576 pixel577 pixel578 pixel579
## 1 0 0 0 0 0 0 0 0
## 2 254 254 233 243 254 254 254 254
## 3 185 255 255 184 0 0 0 0
## 4 0 0 0 0 0 0 0 18
## 5 106 106 106 200 237 253 253 253
## 6 0 19 128 254 195 36 0 0
## pixel580 pixel581 pixel582 pixel583 pixel584 pixel585 pixel586 pixel587
## 1 0 0 0 0 0 0 0 0
## 2 254 86 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 254 73 0 0 0 0 0 0
## 5 253 209 22 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel588 pixel589 pixel590 pixel591 pixel592 pixel593 pixel594 pixel595
## 1 0 0 0 0 0 0 0 94
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 82 253
## 6 0 0 0 0 0 0 0 0
## pixel596 pixel597 pixel598 pixel599 pixel600 pixel601 pixel602 pixel603
## 1 253 253 253 94 0 0 0 0
## 2 114 254 254 254 254 254 254 254
## 3 0 0 0 0 185 254 254 184
## 4 0 0 0 0 0 0 0 0
## 5 253 253 253 253 253 253 253 253
## 6 103 254 222 74 143 235 254 228
## pixel604 pixel605 pixel606 pixel607 pixel608 pixel609 pixel610 pixel611
## 1 0 0 0 0 0 0 0 0
## 2 254 254 254 239 86 11 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 18 254 73 0 0
## 5 253 253 253 253 209 22 0 0
## 6 83 0 0 0 0 0 0 0
## pixel612 pixel613 pixel614 pixel615 pixel616 pixel617 pixel618 pixel619
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel620 pixel621 pixel622 pixel623 pixel624 pixel625 pixel626 pixel627
## 1 0 0 0 89 251 253 250 131
## 2 0 0 0 0 13 182 254 254
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 1 91 253 253 253 253
## 6 0 0 0 0 30 242 254 254
## pixel628 pixel629 pixel630 pixel631 pixel632 pixel633 pixel634 pixel635
## 1 0 0 0 0 0 0 0 0
## 2 254 254 254 254 254 254 243 70
## 3 185 254 254 184 0 0 0 0
## 4 0 0 0 0 0 0 0 5
## 5 253 253 253 253 253 253 213 90
## 6 254 254 252 84 0 0 0 0
## pixel636 pixel637 pixel638 pixel639 pixel640 pixel641 pixel642 pixel643
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 206 106 0 0 0 0 0 0
## 5 7 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel644 pixel645 pixel646 pixel647 pixel648 pixel649 pixel650 pixel651
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 1
## 6 0 0 0 0 0 0 0 0
## pixel652 pixel653 pixel654 pixel655 pixel656 pixel657 pixel658 pixel659
## 1 214 218 95 0 0 0 0 0
## 2 0 8 76 146 254 255 254 255
## 3 0 0 0 0 63 254 254 62
## 4 0 0 0 0 0 0 0 0
## 5 18 129 208 253 253 253 253 159
## 6 0 23 64 158 200 174 61 0
## pixel660 pixel661 pixel662 pixel663 pixel664 pixel665 pixel666 pixel667
## 1 0 0 0 0 0 0 0 0
## 2 146 19 15 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 186 159 0 0
## 5 129 90 4 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel668 pixel669 pixel670 pixel671 pixel672 pixel673 pixel674 pixel675
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel676 pixel677 pixel678 pixel679 pixel680 pixel681 pixel682 pixel683
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel684 pixel685 pixel686 pixel687 pixel688 pixel689 pixel690 pixel691
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 6
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel692 pixel693 pixel694 pixel695 pixel696 pixel697 pixel698 pixel699
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 209 101 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel700 pixel701 pixel702 pixel703 pixel704 pixel705 pixel706 pixel707
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel708 pixel709 pixel710 pixel711 pixel712 pixel713 pixel714 pixel715
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel716 pixel717 pixel718 pixel719 pixel720 pixel721 pixel722 pixel723
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel724 pixel725 pixel726 pixel727 pixel728 pixel729 pixel730 pixel731
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel732 pixel733 pixel734 pixel735 pixel736 pixel737 pixel738 pixel739
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel740 pixel741 pixel742 pixel743 pixel744 pixel745 pixel746 pixel747
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel748 pixel749 pixel750 pixel751 pixel752 pixel753 pixel754 pixel755
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel756 pixel757 pixel758 pixel759 pixel760 pixel761 pixel762 pixel763
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel764 pixel765 pixel766 pixel767 pixel768 pixel769 pixel770 pixel771
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel772 pixel773 pixel774 pixel775 pixel776 pixel777 pixel778 pixel779
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## pixel780 pixel781 pixel782 pixel783
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
We know based off the dataset it is a 28x28 matrix and we can use this stackoverflow response for how to plot the images
plot_digit = function(matrix_row) {
m = matrix(matrix_row,28,28)
mode(m) = "numeric"
image(m, useRaster=TRUE, axes=FALSE)
}
for( row in 1:10) {
plot_digit(sliced_train[row,])
}
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
## Warning in matrix(matrix_row, 28, 28): data length [785] is not a sub-multiple
## or multiple of the number of rows [28]
Then we divide all pixels by 255:
sliced_train_reduced <- sliced_train[,2:ncol(sliced_train)]/255 #avoid dividing the label column
sliced_train_reduced$label <- sliced_train$label
melted_train <- melt(sliced_train_reduced)
## No id variables; using all as measure variables
hist(melted_train$value)
rowMeans(sliced_train_reduced[,2:ncol(sliced_train_reduced)])
## 1 2 3 4 5 6 7
## 0.08455382 0.22313425 0.06842737 0.08025710 0.25556723 0.11535114 0.09472289
## 8 9 10
## 0.12531513 0.14603341 0.16173970
We can see that all the numbers are pretty close to 0 since most pixels are not on
variance). How many components are possible? Why? (5 points)
train_covariance <- cov(sliced_train_reduced)
pca_train <- prcomp(train_covariance)
pca_train_var <- cumsum(pca_train$sdev^2)/sum(pca_train$sdev^2)
plot(pca_train_var)
I wasnt sure how to plot these, but it will appear to be noise because most of the variance across the images will skew how the pixel light up. This will cause a lot of blur which makes it difficult to recognize as images.
library(nnet)
## Warning: package 'nnet' was built under R version 4.1.3
train$label <- as.factor(train$label)
train_reduced_pixels <- train[2:ncol(train)]/255
train_reduced_pixels$label <- train$label
multi_model <- multinom(label ~., data = train_reduced_pixels,MaxNWts=84581)
## # weights: 7860 (7065 variable)
## initial value 96708.573906
## iter 10 value 25322.714106
## iter 20 value 20402.086316
## iter 30 value 19312.872829
## iter 40 value 18703.256586
## iter 50 value 18197.815143
## iter 60 value 17732.985798
## iter 70 value 16739.962157
## iter 80 value 14961.658448
## iter 90 value 13446.085942
## iter 100 value 12442.636014
## final value 12442.636014
## stopped after 100 iterations
multi_predictions <- tibble(actual = train$label %>% as.numeric(), prediction = predict(multi_model, train[2:ncol(train)]) %>% as.numeric())
multi_predictions <- multi_predictions %>% mutate(correct = ifelse(prediction == actual, 1, 0))
paste0("We correctly identified ", format(sum(multi_predictions$correct) / nrow(multi_predictions), digits = 4), " of the digits")
## [1] "We correctly identified 0.8342 of the digits"
library(caret)
## Warning: package 'caret' was built under R version 4.1.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
confusionMatrix(multi_predictions$prediction %>% as.factor(),multi_predictions$actual %>% as.factor())
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 6 7 8 9 10
## 1 3841 0 12 5 6 31 14 12 4 9
## 2 1 3734 11 3 2 11 4 7 5 3
## 3 9 13 3460 49 22 14 17 43 5 8
## 4 11 37 99 3833 18 235 4 52 32 36
## 5 6 2 22 2 3222 15 8 13 2 17
## 6 9 0 3 12 2 1926 11 3 1 4
## 7 27 5 31 13 22 53 3808 3 2 0
## 8 2 4 14 8 2 8 2 3420 1 19
## 9 217 867 492 391 473 1425 266 234 4005 303
## 10 9 22 33 35 303 77 3 614 6 3789
##
## Overall Statistics
##
## Accuracy : 0.8342
## 95% CI : (0.8306, 0.8378)
## No Information Rate : 0.1115
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8158
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity 0.92957 0.79718 0.82835 0.88095 0.79126 0.50751
## Specificity 0.99754 0.99874 0.99524 0.98608 0.99771 0.99882
## Pos Pred Value 0.97636 0.98757 0.95055 0.87973 0.97371 0.97717
## Neg Pred Value 0.99236 0.97514 0.98131 0.98624 0.97803 0.95331
## Prevalence 0.09838 0.11152 0.09945 0.10360 0.09695 0.09036
## Detection Rate 0.09145 0.08890 0.08238 0.09126 0.07671 0.04586
## Detection Prevalence 0.09367 0.09002 0.08667 0.10374 0.07879 0.04693
## Balanced Accuracy 0.96356 0.89796 0.91179 0.93351 0.89448 0.75317
## Class: 7 Class: 8 Class: 9 Class: 10
## Sensitivity 0.92047 0.77710 0.98572 0.90473
## Specificity 0.99588 0.99840 0.87695 0.97086
## Pos Pred Value 0.96065 0.98276 0.46178 0.77469
## Neg Pred Value 0.99135 0.97453 0.99826 0.98925
## Prevalence 0.09850 0.10479 0.09674 0.09971
## Detection Rate 0.09067 0.08143 0.09536 0.09021
## Detection Prevalence 0.09438 0.08286 0.20650 0.11645
## Balanced Accuracy 0.95818 0.88775 0.93134 0.93779
library(psych)
## Warning: package 'psych' was built under R version 4.1.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
test_house <- read.csv("test_house.csv")
train_house <- read.csv("train_house.csv")
describe(train_house)
## vars n mean sd median trimmed mad min
## Id 1 1460 730.50 421.61 730.5 730.50 541.15 1
## MSSubClass 2 1460 56.90 42.30 50.0 49.15 44.48 20
## MSZoning* 3 1460 4.03 0.63 4.0 4.06 0.00 1
## LotFrontage 4 1201 70.05 24.28 69.0 68.94 16.31 21
## LotArea 5 1460 10516.83 9981.26 9478.5 9563.28 2962.23 1300
## Street* 6 1460 2.00 0.06 2.0 2.00 0.00 1
## Alley* 7 91 1.45 0.50 1.0 1.44 0.00 1
## LotShape* 8 1460 2.94 1.41 4.0 3.05 0.00 1
## LandContour* 9 1460 3.78 0.71 4.0 4.00 0.00 1
## Utilities* 10 1460 1.00 0.03 1.0 1.00 0.00 1
## LotConfig* 11 1460 4.02 1.62 5.0 4.27 0.00 1
## LandSlope* 12 1460 1.06 0.28 1.0 1.00 0.00 1
## Neighborhood* 13 1460 13.15 5.89 13.0 13.11 7.41 1
## Condition1* 14 1460 3.03 0.87 3.0 3.00 0.00 1
## Condition2* 15 1460 3.01 0.26 3.0 3.00 0.00 1
## BldgType* 16 1460 1.49 1.20 1.0 1.14 0.00 1
## HouseStyle* 17 1460 4.04 1.91 3.0 4.03 1.48 1
## OverallQual 18 1460 6.10 1.38 6.0 6.08 1.48 1
## OverallCond 19 1460 5.58 1.11 5.0 5.48 0.00 1
## YearBuilt 20 1460 1971.27 30.20 1973.0 1974.13 37.06 1872
## YearRemodAdd 21 1460 1984.87 20.65 1994.0 1986.37 19.27 1950
## RoofStyle* 22 1460 2.41 0.83 2.0 2.26 0.00 1
## RoofMatl* 23 1460 2.08 0.60 2.0 2.00 0.00 1
## Exterior1st* 24 1460 10.62 3.20 13.0 10.93 1.48 1
## Exterior2nd* 25 1460 11.34 3.54 14.0 11.65 2.97 1
## MasVnrType* 26 1452 2.76 0.62 3.0 2.73 0.00 1
## MasVnrArea 27 1452 103.69 181.07 0.0 63.15 0.00 0
## ExterQual* 28 1460 3.54 0.69 4.0 3.65 0.00 1
## ExterCond* 29 1460 4.73 0.73 5.0 4.95 0.00 1
## Foundation* 30 1460 2.40 0.72 2.0 2.46 1.48 1
## BsmtQual* 31 1423 3.26 0.87 3.0 3.43 1.48 1
## BsmtCond* 32 1423 3.81 0.66 4.0 4.00 0.00 1
## BsmtExposure* 33 1422 3.27 1.15 4.0 3.46 0.00 1
## BsmtFinType1* 34 1423 3.73 1.83 3.0 3.79 2.97 1
## BsmtFinSF1 35 1460 443.64 456.10 383.5 386.08 568.58 0
## BsmtFinType2* 36 1422 5.71 0.94 6.0 5.98 0.00 1
## BsmtFinSF2 37 1460 46.55 161.32 0.0 1.38 0.00 0
## BsmtUnfSF 38 1460 567.24 441.87 477.5 519.29 426.99 0
## TotalBsmtSF 39 1460 1057.43 438.71 991.5 1036.70 347.67 0
## Heating* 40 1460 2.04 0.30 2.0 2.00 0.00 1
## HeatingQC* 41 1460 2.54 1.74 1.0 2.42 0.00 1
## CentralAir* 42 1460 1.93 0.25 2.0 2.00 0.00 1
## Electrical* 43 1459 4.68 1.05 5.0 5.00 0.00 1
## X1stFlrSF 44 1460 1162.63 386.59 1087.0 1129.99 347.67 334
## X2ndFlrSF 45 1460 346.99 436.53 0.0 285.36 0.00 0
## LowQualFinSF 46 1460 5.84 48.62 0.0 0.00 0.00 0
## GrLivArea 47 1460 1515.46 525.48 1464.0 1467.67 483.33 334
## BsmtFullBath 48 1460 0.43 0.52 0.0 0.39 0.00 0
## BsmtHalfBath 49 1460 0.06 0.24 0.0 0.00 0.00 0
## FullBath 50 1460 1.57 0.55 2.0 1.56 0.00 0
## HalfBath 51 1460 0.38 0.50 0.0 0.34 0.00 0
## BedroomAbvGr 52 1460 2.87 0.82 3.0 2.85 0.00 0
## KitchenAbvGr 53 1460 1.05 0.22 1.0 1.00 0.00 0
## KitchenQual* 54 1460 3.34 0.83 4.0 3.50 0.00 1
## TotRmsAbvGrd 55 1460 6.52 1.63 6.0 6.41 1.48 2
## Functional* 56 1460 6.75 0.98 7.0 7.00 0.00 1
## Fireplaces 57 1460 0.61 0.64 1.0 0.53 1.48 0
## FireplaceQu* 58 770 3.73 1.13 3.0 3.80 1.48 1
## GarageType* 59 1379 3.28 1.79 2.0 3.11 0.00 1
## GarageYrBlt 60 1379 1978.51 24.69 1980.0 1981.07 31.13 1900
## GarageFinish* 61 1379 2.18 0.81 2.0 2.23 1.48 1
## GarageCars 62 1460 1.77 0.75 2.0 1.77 0.00 0
## GarageArea 63 1460 472.98 213.80 480.0 469.81 177.91 0
## GarageQual* 64 1379 4.86 0.61 5.0 5.00 0.00 1
## GarageCond* 65 1379 4.90 0.52 5.0 5.00 0.00 1
## PavedDrive* 66 1460 2.86 0.50 3.0 3.00 0.00 1
## WoodDeckSF 67 1460 94.24 125.34 0.0 71.76 0.00 0
## OpenPorchSF 68 1460 46.66 66.26 25.0 33.23 37.06 0
## EnclosedPorch 69 1460 21.95 61.12 0.0 3.87 0.00 0
## X3SsnPorch 70 1460 3.41 29.32 0.0 0.00 0.00 0
## ScreenPorch 71 1460 15.06 55.76 0.0 0.00 0.00 0
## PoolArea 72 1460 2.76 40.18 0.0 0.00 0.00 0
## PoolQC* 73 7 2.14 0.90 2.0 2.14 1.48 1
## Fence* 74 281 2.43 0.86 3.0 2.48 0.00 1
## MiscFeature* 75 54 2.91 0.45 3.0 3.00 0.00 1
## MiscVal 76 1460 43.49 496.12 0.0 0.00 0.00 0
## MoSold 77 1460 6.32 2.70 6.0 6.25 2.97 1
## YrSold 78 1460 2007.82 1.33 2008.0 2007.77 1.48 2006
## SaleType* 79 1460 8.51 1.56 9.0 8.92 0.00 1
## SaleCondition* 80 1460 4.77 1.10 5.0 5.00 0.00 1
## SalePrice 81 1460 180921.20 79442.50 163000.0 170783.29 56338.80 34900
## max range skew kurtosis se
## Id 1460 1459 0.00 -1.20 11.03
## MSSubClass 190 170 1.40 1.56 1.11
## MSZoning* 5 4 -1.73 6.25 0.02
## LotFrontage 313 292 2.16 17.34 0.70
## LotArea 215245 213945 12.18 202.26 261.22
## Street* 2 1 -15.49 238.01 0.00
## Alley* 2 1 0.20 -1.98 0.05
## LotShape* 4 3 -0.61 -1.60 0.04
## LandContour* 4 3 -3.16 8.65 0.02
## Utilities* 2 1 38.13 1453.00 0.00
## LotConfig* 5 4 -1.13 -0.59 0.04
## LandSlope* 3 2 4.80 24.47 0.01
## Neighborhood* 25 24 0.02 -1.06 0.15
## Condition1* 9 8 3.01 16.34 0.02
## Condition2* 8 7 13.14 247.54 0.01
## BldgType* 5 4 2.24 3.41 0.03
## HouseStyle* 8 7 0.31 -0.96 0.05
## OverallQual 10 9 0.22 0.09 0.04
## OverallCond 9 8 0.69 1.09 0.03
## YearBuilt 2010 138 -0.61 -0.45 0.79
## YearRemodAdd 2010 60 -0.50 -1.27 0.54
## RoofStyle* 6 5 1.47 0.61 0.02
## RoofMatl* 8 7 8.09 66.28 0.02
## Exterior1st* 15 14 -0.72 -0.37 0.08
## Exterior2nd* 16 15 -0.69 -0.52 0.09
## MasVnrType* 4 3 -0.07 -0.13 0.02
## MasVnrArea 1600 1600 2.66 10.03 4.75
## ExterQual* 4 3 -1.83 3.86 0.02
## ExterCond* 5 4 -2.56 5.29 0.02
## Foundation* 6 5 0.09 1.02 0.02
## BsmtQual* 4 3 -1.31 1.27 0.02
## BsmtCond* 4 3 -3.39 10.14 0.02
## BsmtExposure* 4 3 -1.15 -0.39 0.03
## BsmtFinType1* 6 5 -0.02 -1.39 0.05
## BsmtFinSF1 5644 5644 1.68 11.06 11.94
## BsmtFinType2* 6 5 -3.56 12.32 0.02
## BsmtFinSF2 1474 1474 4.25 20.01 4.22
## BsmtUnfSF 2336 2336 0.92 0.46 11.56
## TotalBsmtSF 6110 6110 1.52 13.18 11.48
## Heating* 6 5 9.83 110.98 0.01
## HeatingQC* 5 4 0.48 -1.51 0.05
## CentralAir* 2 1 -3.52 10.42 0.01
## Electrical* 5 4 -3.06 7.49 0.03
## X1stFlrSF 4692 4358 1.37 5.71 10.12
## X2ndFlrSF 2065 2065 0.81 -0.56 11.42
## LowQualFinSF 572 572 8.99 82.83 1.27
## GrLivArea 5642 5308 1.36 4.86 13.75
## BsmtFullBath 3 3 0.59 -0.84 0.01
## BsmtHalfBath 2 2 4.09 16.31 0.01
## FullBath 3 3 0.04 -0.86 0.01
## HalfBath 2 2 0.67 -1.08 0.01
## BedroomAbvGr 8 8 0.21 2.21 0.02
## KitchenAbvGr 3 3 4.48 21.42 0.01
## KitchenQual* 4 3 -1.42 1.72 0.02
## TotRmsAbvGrd 14 12 0.67 0.87 0.04
## Functional* 7 6 -4.08 16.37 0.03
## Fireplaces 3 3 0.65 -0.22 0.02
## FireplaceQu* 5 4 -0.16 -0.98 0.04
## GarageType* 6 5 0.76 -1.30 0.05
## GarageYrBlt 2010 110 -0.65 -0.42 0.66
## GarageFinish* 3 2 -0.35 -1.41 0.02
## GarageCars 4 4 -0.34 0.21 0.02
## GarageArea 1418 1418 0.18 0.90 5.60
## GarageQual* 5 4 -4.43 18.25 0.02
## GarageCond* 5 4 -5.28 26.77 0.01
## PavedDrive* 3 2 -3.30 9.22 0.01
## WoodDeckSF 857 857 1.54 2.97 3.28
## OpenPorchSF 547 547 2.36 8.44 1.73
## EnclosedPorch 552 552 3.08 10.37 1.60
## X3SsnPorch 508 508 10.28 123.06 0.77
## ScreenPorch 480 480 4.11 18.34 1.46
## PoolArea 738 738 14.80 222.19 1.05
## PoolQC* 3 2 -0.22 -1.90 0.34
## Fence* 4 3 -0.57 -0.88 0.05
## MiscFeature* 4 3 -2.93 10.71 0.06
## MiscVal 15500 15500 24.43 697.64 12.98
## MoSold 12 11 0.21 -0.41 0.07
## YrSold 2010 4 0.10 -1.19 0.03
## SaleType* 9 8 -3.83 14.57 0.04
## SaleCondition* 6 5 -2.74 6.82 0.03
## SalePrice 755000 720100 1.88 6.50 2079.11
train_house %>% ggplot(aes(x=SalePrice)) + geom_boxplot()
train_house %>% ggplot(aes(y=SalePrice,x=X1stFlrSF)) + geom_point()
train_house %>% ggplot(aes(y=SalePrice,x=LotArea)) + geom_point()
# correlations
correlation <- train_house %>% select(LotArea, X1stFlrSF, X2ndFlrSF) %>% cor()
correlation
## LotArea X1stFlrSF X2ndFlrSF
## LotArea 1.00000000 0.2994746 0.05098595
## X1stFlrSF 0.29947458 1.0000000 -0.20264618
## X2ndFlrSF 0.05098595 -0.2026462 1.00000000
I’m surprised that there is low correlation between first and second floor square footage as I would have assumed that they would be collinear as well.
Familywise error is the probability of making multiple false discoveries (Type 1 Errors). I think I would be worried about Type 1 errors because of the collinearity in the variables.
cor.test(train_house$X1stFlrSF,train_house$X2ndFlrSF, conf.level=.8)
##
## Pearson's product-moment correlation
##
## data: train_house$X1stFlrSF and train_house$X2ndFlrSF
## t = -7.9017, df = 1458, p-value = 5.379e-15
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
## -0.2346122 -0.1702424
## sample estimates:
## cor
## -0.2026462
precision_matrix <- solve(correlation)
precision_matrix
## LotArea X1stFlrSF X2ndFlrSF
## LotArea 1.1144421 -0.3600471 -0.1297831
## X1stFlrSF -0.3600471 1.1591459 0.2532538
## X2ndFlrSF -0.1297831 0.2532538 1.0579380
precision_matrix %*% correlation
## LotArea X1stFlrSF X2ndFlrSF
## LotArea 1.000000e+00 1.283695e-16 0
## X1stFlrSF -4.163336e-17 1.000000e+00 0
## X2ndFlrSF 1.387779e-17 -2.775558e-17 1
correlation %*% precision_matrix
## LotArea X1stFlrSF X2ndFlrSF
## LotArea 1.000000e+00 1.387779e-17 -1.387779e-17
## X1stFlrSF 6.591949e-17 1.000000e+00 -2.775558e-17
## X2ndFlrSF 2.775558e-17 0.000000e+00 1.000000e+00
LU Decomp using the matrix library:
library(matrixcalc)
lu.decomposition((precision_matrix))
## $L
## [,1] [,2] [,3]
## [1,] 1.0000000 0.0000000 0
## [2,] -0.3230739 1.0000000 0
## [3,] -0.1164556 0.2026462 1
##
## $U
## [,1] [,2] [,3]
## [1,] 1.114442 -0.3600471 -0.1297831
## [2,] 0.000000 1.0428241 0.2113243
## [3,] 0.000000 0.0000000 1.0000000
TBD
Since house prices tend to go up across multiple variables, i’ll attempt a linear regression model fit to multiple variables:
library(dplyr)
train_house$Utilities <- as.factor(train_house$Utilities)
train_house$BldgType <- as.factor(train_house$BldgType)
lm <- lm(SalePrice~X1stFlrSF + LotArea + OverallQual + OverallCond + Utilities + BldgType, data = train_house)
summary(lm)
##
## Call:
## lm(formula = SalePrice ~ X1stFlrSF + LotArea + OverallQual +
## OverallCond + Utilities + BldgType, data = train_house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -400255 -24732 -1601 19707 346946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.201e+05 8.381e+03 -14.332 < 2e-16 ***
## X1stFlrSF 5.264e+01 3.572e+00 14.737 < 2e-16 ***
## LotArea 8.127e-01 1.205e-01 6.746 2.18e-11 ***
## OverallQual 3.787e+04 9.663e+02 39.193 < 2e-16 ***
## OverallCond 5.490e+02 1.034e+03 0.531 0.59563
## UtilitiesNoSeWa -5.534e+04 4.275e+04 -1.295 0.19566
## BldgType2fmCon -9.172e+03 7.861e+03 -1.167 0.24347
## BldgTypeDuplex -1.463e+04 6.272e+03 -2.332 0.01983 *
## BldgTypeTwnhs -1.898e+04 6.817e+03 -2.784 0.00543 **
## BldgTypeTwnhsE -1.859e+04 4.342e+03 -4.280 1.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42720 on 1450 degrees of freedom
## Multiple R-squared: 0.7126, Adjusted R-squared: 0.7108
## F-statistic: 399.5 on 9 and 1450 DF, p-value: < 2.2e-16
predictions <- predict(lm, test_house)
predictions <- as.data.frame(predictions, row.names="SalePrice")
## Warning in as.data.frame.numeric(predictions, row.names = "SalePrice"):
## 'row.names' is not a character vector of length 1459 -- omitting it. Will be an
## error!
predictions$Id <- test_house$Id
predictions <- predictions %>% dplyr::rename("SalePrice" = "predictions")
# replace NAs of missing data with mean of dataset
predictions$SalePrice[is.na(predictions$SalePrice)] <- mean(predictions$SalePrice, na.rm = TRUE)
write.csv(predictions,"C:\\Users\\Santi\\OneDrive\\Documents\\GitHub\\DATA605\\Final\\predictions.csv", row.names = FALSE)
My best attempt was with only using 1stFlrSF suprisingly. Just a single variable model.
Username: sserrot Score: .33063