library(readr)
bobross <- read_csv("C:/Users/Lynx/Documents/MSDA/MSDA 622 - Big Data/Homework 1/bob_ross.csv")
## Rows: 403 Columns: 69
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): EPISODE, TITLE
## dbl (67): APPLE_FRAME, AURORA_BOREALIS, BARN, BEACH, BOAT, BRIDGE, BUILDING,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
str(bobross)
## spc_tbl_ [403 × 69] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ EPISODE : chr [1:403] "S01E01" "S01E02" "S01E03" "S01E04" ...
## $ TITLE : chr [1:403] "\"A WALK IN THE WOODS\"" "\"MT. MCKINLEY\"" "\"EBONY SUNSET\"" "\"WINTER MIST\"" ...
## $ APPLE_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ AURORA_BOREALIS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ BARN : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ BEACH : num [1:403] 0 0 0 0 0 0 0 0 1 0 ...
## $ BOAT : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ BRIDGE : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ BUILDING : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ BUSHES : num [1:403] 1 0 0 1 0 0 0 1 0 1 ...
## $ CABIN : num [1:403] 0 1 1 0 0 1 0 0 0 0 ...
## $ CACTUS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ CIRCLE_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ CIRRUS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ CLIFF : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ CLOUDS : num [1:403] 0 1 0 1 0 0 0 0 1 0 ...
## $ CONIFER : num [1:403] 0 1 1 1 0 1 0 1 0 1 ...
## $ CUMULUS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ DECIDUOUS : num [1:403] 1 0 0 0 1 0 1 0 0 1 ...
## $ DIANE_ANDRE : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ DOCK : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ DOUBLE_OVAL_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ FARM : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ FENCE : num [1:403] 0 0 1 0 0 0 0 0 1 0 ...
## $ FIRE : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ FLORIDA_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ FLOWERS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ FOG : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ FRAMED : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ GRASS : num [1:403] 1 0 0 0 0 0 0 0 0 0 ...
## $ GUEST : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ HALF_CIRCLE_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ HALF_OVAL_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ HILLS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ LAKE : num [1:403] 0 0 0 1 0 1 1 1 0 1 ...
## $ LAKES : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ LIGHTHOUSE : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ MILL : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ MOON : num [1:403] 0 0 0 0 0 1 0 0 0 0 ...
## $ MOUNTAIN : num [1:403] 0 1 1 1 0 1 1 1 0 1 ...
## $ MOUNTAINS : num [1:403] 0 0 1 0 0 1 1 1 0 0 ...
## $ NIGHT : num [1:403] 0 0 0 0 0 1 0 0 0 0 ...
## $ OCEAN : num [1:403] 0 0 0 0 0 0 0 0 1 0 ...
## $ OVAL_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ PALM_TREES : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ PATH : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ PERSON : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ PORTRAIT : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ RECTANGLE_3D_FRAME: num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ RECTANGULAR_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ RIVER : num [1:403] 1 0 0 0 1 0 0 0 0 0 ...
## $ ROCKS : num [1:403] 0 0 0 0 1 0 0 0 0 0 ...
## $ SEASHELL_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ SNOW : num [1:403] 0 1 0 0 0 1 0 0 0 0 ...
## $ SNOWY_MOUNTAIN : num [1:403] 0 1 0 1 0 1 1 0 0 0 ...
## $ SPLIT_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ STEVE_ROSS : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ STRUCTURE : num [1:403] 0 0 1 0 0 1 0 0 0 0 ...
## $ SUN : num [1:403] 0 0 1 0 0 0 0 0 0 0 ...
## $ TOMB_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ TREE : num [1:403] 1 1 1 1 1 1 1 1 0 1 ...
## $ TREES : num [1:403] 1 1 1 1 1 1 1 1 0 1 ...
## $ TRIPLE_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ WATERFALL : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ WAVES : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ WINDMILL : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ WINDOW_FRAME : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## $ WINTER : num [1:403] 0 1 1 0 0 1 0 0 0 0 ...
## $ WOOD_FRAMED : num [1:403] 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. EPISODE = col_character(),
## .. TITLE = col_character(),
## .. APPLE_FRAME = col_double(),
## .. AURORA_BOREALIS = col_double(),
## .. BARN = col_double(),
## .. BEACH = col_double(),
## .. BOAT = col_double(),
## .. BRIDGE = col_double(),
## .. BUILDING = col_double(),
## .. BUSHES = col_double(),
## .. CABIN = col_double(),
## .. CACTUS = col_double(),
## .. CIRCLE_FRAME = col_double(),
## .. CIRRUS = col_double(),
## .. CLIFF = col_double(),
## .. CLOUDS = col_double(),
## .. CONIFER = col_double(),
## .. CUMULUS = col_double(),
## .. DECIDUOUS = col_double(),
## .. DIANE_ANDRE = col_double(),
## .. DOCK = col_double(),
## .. DOUBLE_OVAL_FRAME = col_double(),
## .. FARM = col_double(),
## .. FENCE = col_double(),
## .. FIRE = col_double(),
## .. FLORIDA_FRAME = col_double(),
## .. FLOWERS = col_double(),
## .. FOG = col_double(),
## .. FRAMED = col_double(),
## .. GRASS = col_double(),
## .. GUEST = col_double(),
## .. HALF_CIRCLE_FRAME = col_double(),
## .. HALF_OVAL_FRAME = col_double(),
## .. HILLS = col_double(),
## .. LAKE = col_double(),
## .. LAKES = col_double(),
## .. LIGHTHOUSE = col_double(),
## .. MILL = col_double(),
## .. MOON = col_double(),
## .. MOUNTAIN = col_double(),
## .. MOUNTAINS = col_double(),
## .. NIGHT = col_double(),
## .. OCEAN = col_double(),
## .. OVAL_FRAME = col_double(),
## .. PALM_TREES = col_double(),
## .. PATH = col_double(),
## .. PERSON = col_double(),
## .. PORTRAIT = col_double(),
## .. RECTANGLE_3D_FRAME = col_double(),
## .. RECTANGULAR_FRAME = col_double(),
## .. RIVER = col_double(),
## .. ROCKS = col_double(),
## .. SEASHELL_FRAME = col_double(),
## .. SNOW = col_double(),
## .. SNOWY_MOUNTAIN = col_double(),
## .. SPLIT_FRAME = col_double(),
## .. STEVE_ROSS = col_double(),
## .. STRUCTURE = col_double(),
## .. SUN = col_double(),
## .. TOMB_FRAME = col_double(),
## .. TREE = col_double(),
## .. TREES = col_double(),
## .. TRIPLE_FRAME = col_double(),
## .. WATERFALL = col_double(),
## .. WAVES = col_double(),
## .. WINDMILL = col_double(),
## .. WINDOW_FRAME = col_double(),
## .. WINTER = col_double(),
## .. WOOD_FRAMED = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
bobross$EPISODE = NULL
bobross$TITLE = NULL
bobross <- as(as.matrix(bobross), "transactions")
bob_rules <- apriori(bobross, parameter = list(sup = 0.3, conf = 0.9, target = "rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.3 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 120
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 403 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [31 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(bob_rules)
## lhs rhs support confidence coverage
## [1] {RIVER} => {TREES} 0.3002481 0.9603175 0.3126551
## [2] {RIVER} => {TREE} 0.3126551 1.0000000 0.3126551
## [3] {GRASS} => {TREE} 0.3374690 0.9577465 0.3523573
## [4] {LAKE} => {TREES} 0.3325062 0.9370629 0.3548387
## [5] {LAKE} => {TREE} 0.3523573 0.9930070 0.3548387
## [6] {MOUNTAIN} => {TREES} 0.3672457 0.9250000 0.3970223
## [7] {MOUNTAIN} => {TREE} 0.3870968 0.9750000 0.3970223
## [8] {DECIDUOUS} => {TREES} 0.5136476 0.9118943 0.5632754
## [9] {DECIDUOUS} => {TREE} 0.5632754 1.0000000 0.5632754
## [10] {CONIFER} => {TREES} 0.5161290 0.9811321 0.5260546
## [11] {CONIFER} => {TREE} 0.5260546 1.0000000 0.5260546
## [12] {TREES} => {TREE} 0.8362283 1.0000000 0.8362283
## [13] {TREE} => {TREES} 0.8362283 0.9335180 0.8957816
## [14] {RIVER, TREES} => {TREE} 0.3002481 1.0000000 0.3002481
## [15] {RIVER, TREE} => {TREES} 0.3002481 0.9603175 0.3126551
## [16] {GRASS, TREES} => {TREE} 0.3126551 1.0000000 0.3126551
## [17] {GRASS, TREE} => {TREES} 0.3126551 0.9264706 0.3374690
## [18] {LAKE, TREES} => {TREE} 0.3325062 1.0000000 0.3325062
## [19] {LAKE, TREE} => {TREES} 0.3325062 0.9436620 0.3523573
## [20] {CONIFER, MOUNTAIN} => {TREES} 0.3126551 0.9767442 0.3200993
## [21] {CONIFER, MOUNTAIN} => {TREE} 0.3200993 1.0000000 0.3200993
## [22] {MOUNTAIN, TREES} => {TREE} 0.3672457 1.0000000 0.3672457
## [23] {MOUNTAIN, TREE} => {TREES} 0.3672457 0.9487179 0.3870968
## [24] {CLOUDS, TREES} => {TREE} 0.3424318 1.0000000 0.3424318
## [25] {CLOUDS, TREE} => {TREES} 0.3424318 0.9387755 0.3647643
## [26] {DECIDUOUS, TREES} => {TREE} 0.5136476 1.0000000 0.5136476
## [27] {DECIDUOUS, TREE} => {TREES} 0.5136476 0.9118943 0.5632754
## [28] {CONIFER, TREES} => {TREE} 0.5161290 1.0000000 0.5161290
## [29] {CONIFER, TREE} => {TREES} 0.5161290 0.9811321 0.5260546
## [30] {CONIFER, MOUNTAIN, TREES} => {TREE} 0.3126551 1.0000000 0.3126551
## [31] {CONIFER, MOUNTAIN, TREE} => {TREES} 0.3126551 0.9767442 0.3200993
## lift count
## [1] 1.148392 121
## [2] 1.116343 126
## [3] 1.069174 136
## [4] 1.120583 134
## [5] 1.108537 142
## [6] 1.106157 148
## [7] 1.088435 156
## [8] 1.090485 207
## [9] 1.116343 227
## [10] 1.173283 208
## [11] 1.116343 212
## [12] 1.116343 337
## [13] 1.116343 337
## [14] 1.116343 121
## [15] 1.148392 121
## [16] 1.116343 126
## [17] 1.107916 126
## [18] 1.116343 134
## [19] 1.128474 134
## [20] 1.168035 126
## [21] 1.116343 129
## [22] 1.116343 148
## [23] 1.134520 148
## [24] 1.116343 138
## [25] 1.122631 138
## [26] 1.116343 207
## [27] 1.090485 207
## [28] 1.116343 208
## [29] 1.173283 208
## [30] 1.116343 126
## [31] 1.168035 126
nhanes_train <- read_csv("C:/Users/Lynx/Documents/MSDA/MSDA 622 - Big Data/Homework 1/nhanes_train.csv")
## Rows: 2871 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): Age, Weight, Height, Pulse
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(nhanes_train)
## spc_tbl_ [2,871 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:2871] 80 48 20 18 46 63 42 43 46 50 ...
## $ Weight: num [1:2871] 97.6 120.7 46.8 100.2 112.2 ...
## $ Height: num [1:2871] 176 186 164 177 180 ...
## $ Pulse : num [1:2871] 58 56 78 84 90 66 100 100 54 74 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Weight = col_double(),
## .. Height = col_double(),
## .. Pulse = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#Regression Model
model <- lm(Weight ~., data = nhanes_train)
summary(model)
##
## Call:
## lm(formula = Weight ~ ., data = nhanes_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.860 -13.096 -3.099 10.227 116.080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -112.64017 6.78932 -16.591 < 2e-16 ***
## Age 0.12471 0.02045 6.097 1.22e-09 ***
## Height 1.01324 0.03522 28.768 < 2e-16 ***
## Pulse 0.24685 0.03019 8.177 4.32e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.67 on 2867 degrees of freedom
## Multiple R-squared: 0.2311, Adjusted R-squared: 0.2303
## F-statistic: 287.2 on 3 and 2867 DF, p-value: < 2.2e-16
#Decision Tree
library(rpart)
library(rpart.plot)
reg_tree <- rpart(formula = Weight ~ ., data = nhanes_train)
prp(reg_tree, digits = 4, extra = 1)
#Bagging Model
library(ipred)
bag_model <- bagging(formula = Weight ~., data = nhanes_train, nbagg = 100)
bag_model
##
## Bagging regression trees with 100 bootstrap replications
##
## Call: bagging.data.frame(formula = Weight ~ ., data = nhanes_train,
## nbagg = 100)
#Random Forest
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
rf_model <- randomForest(Weight ~., data = nhanes_train, importance = TRUE, ntree = 200)
rf_model
##
## Call:
## randomForest(formula = Weight ~ ., data = nhanes_train, importance = TRUE, ntree = 200)
## Type of random forest: regression
## Number of trees: 200
## No. of variables tried at each split: 1
##
## Mean of squared residuals: 272.8998
## % Var explained: 39.73
#Boosting Model
library(gbm)
## Loaded gbm 2.1.8.1
bob_boost <- gbm(formula = Weight ~., data = nhanes_train, distribution = "gaussian", n.trees = 200, shrinkage = 0.03, interaction.depth = 5)
summary(bob_boost)
## var rel.inf
## Height Height 66.04329
## Pulse Pulse 17.56052
## Age Age 16.39619
nhanes_test <- read_csv("C:/Users/Lynx/Documents/MSDA/MSDA 622 - Big Data/Homework 1/nhanes_test.csv")
## Rows: 2872 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): Age, Weight, Height, Pulse
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Regression MSE
model_pred <- predict(model, nhanes_test)
mean((model_pred - nhanes_test$Weight)^2)
## [1] 332.8215
#Decision Tree MSE
tree_pred = predict(reg_tree, nhanes_test)
mean((tree_pred - nhanes_test$Weight)^2)
## [1] 334.6264
#Bagging MSE
bag_pred <- predict(bag_model, newdata = nhanes_test)
mean((bag_pred - nhanes_test$Weight)^2)
## [1] 326.7834
#Random Forest MSE
rf_pred <- predict(rf_model, nhanes_test)
mean((rf_pred - nhanes_test$Weight)^2)
## [1] 262.4927
#Boosting MSE
boost_pred <- predict(bob_boost, nhanes_test, n.trees = 200)
mean((boost_pred - nhanes_test$Weight)^2)
## [1] 315.4649
Based on the results, the best model is the Random Forest Model with a Mean Squared Error of 261.5092.
str(nhanes_train)
## spc_tbl_ [2,871 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:2871] 80 48 20 18 46 63 42 43 46 50 ...
## $ Weight: num [1:2871] 97.6 120.7 46.8 100.2 112.2 ...
## $ Height: num [1:2871] 176 186 164 177 180 ...
## $ Pulse : num [1:2871] 58 56 78 84 90 66 100 100 54 74 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Weight = col_double(),
## .. Height = col_double(),
## .. Pulse = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#Mean of Age
mean(nhanes_train$Age)
## [1] 45.14699
#Mean of Weight
mean(nhanes_train$Weight)
## [1] 82.63375
#Mean of Height
mean(nhanes_train$Height)
## [1] 169.5465
#Mean of Pulse
mean(nhanes_train$Pulse)
## [1] 72.32045
Perform k-means clustering on the data in the nhanes_train csv file. Create 4 distinct clusters.
library(fpc)
fit <- kmeans(nhanes_train[, 1:4], 4)
fit
## K-means clustering with 4 clusters of sizes 716, 487, 847, 821
##
## Cluster means:
## Age Weight Height Pulse
## 1 30.04888 63.92165 165.1246 75.13966
## 2 48.06571 117.06982 176.2801 75.34292
## 3 64.30224 73.19764 164.9505 68.76033
## 4 36.82095 88.26090 174.1502 71.74178
##
## Clustering vector:
## [1] 3 2 1 4 2 3 4 4 4 3 1 1 4 2 2 4 1 1 1 3 4 1 3 4 3 3 3 2 1 4 4 3 1 2 1 4 3
## [38] 3 4 4 4 3 3 4 4 3 1 3 4 3 3 3 1 2 1 3 2 3 3 3 4 1 1 1 3 1 2 4 4 1 3 3 4 3
## [75] 2 1 1 4 1 4 3 4 2 1 2 1 1 3 3 4 1 3 1 2 2 3 4 1 1 1 3 3 3 1 1 2 3 4 3 1 1
## [112] 1 4 2 3 1 3 4 3 1 1 3 3 3 3 4 4 2 2 4 4 4 3 2 3 1 1 2 2 3 3 3 3 3 4 2 3 4
## [149] 1 3 4 1 4 1 3 3 4 1 4 4 4 1 2 4 2 1 1 4 1 3 4 4 4 1 4 4 1 4 1 3 4 1 4 1 1
## [186] 3 2 3 4 1 3 4 2 4 3 4 3 1 1 3 4 3 4 4 2 3 2 3 3 1 4 4 2 2 4 2 3 1 1 2 4 1
## [223] 1 2 4 4 1 2 3 3 1 4 2 3 4 2 2 3 3 4 1 1 4 3 3 2 1 3 3 3 1 4 3 4 1 2 3 4 3
## [260] 4 3 2 2 3 4 3 2 2 3 1 4 1 4 2 1 4 4 2 4 3 3 3 3 3 3 2 4 1 4 1 4 2 2 1 1 1
## [297] 3 2 2 1 4 2 1 1 4 2 3 4 1 3 3 1 1 4 1 3 1 4 3 1 2 2 4 4 4 1 2 3 1 1 1 3 4
## [334] 1 3 3 3 1 1 2 3 4 4 3 4 3 1 1 4 3 2 3 2 1 2 1 2 1 2 4 3 3 1 4 4 1 3 1 3 1
## [371] 3 1 4 1 4 1 3 4 1 3 4 3 4 4 3 1 4 1 2 1 3 3 3 1 3 2 4 4 1 4 4 4 1 4 3 4 3
## [408] 3 2 1 4 4 3 4 3 2 3 3 1 3 3 3 3 4 4 1 3 2 3 4 2 1 1 1 4 1 1 2 4 1 3 1 4 4
## [445] 1 3 1 4 1 2 3 4 3 3 4 3 2 3 1 1 4 4 4 2 2 4 4 3 4 3 4 3 4 2 4 4 2 4 2 4 1
## [482] 4 3 1 1 3 2 3 4 4 4 1 4 1 3 1 4 4 3 1 3 2 2 4 2 4 4 4 1 3 3 4 3 1 3 3 4 2
## [519] 4 1 1 1 4 4 3 3 1 2 1 4 3 3 3 3 4 2 4 4 1 3 2 2 3 4 1 4 2 1 4 1 3 4 4 3 4
## [556] 2 3 1 2 3 4 4 3 3 4 4 1 3 3 3 2 1 3 2 1 4 2 1 3 4 1 1 2 4 3 1 1 3 2 2 3 1
## [593] 4 4 3 4 3 3 3 1 1 4 1 3 3 1 3 2 1 4 3 1 4 3 3 4 1 4 3 1 1 4 1 4 4 4 2 4 3
## [630] 3 3 4 3 4 2 3 3 2 2 4 3 2 3 1 1 2 4 4 3 4 1 2 3 1 3 4 4 1 4 3 4 1 1 1 1 3
## [667] 4 4 1 1 1 1 2 3 2 1 3 4 2 4 2 4 4 2 2 1 1 1 3 4 3 2 4 4 3 3 1 2 3 4 2 4 1
## [704] 2 2 2 3 3 3 3 1 1 3 3 1 1 1 1 4 3 3 3 3 4 1 1 4 3 2 3 2 2 1 4 1 3 3 4 2 4
## [741] 1 3 2 1 1 3 1 1 3 4 1 4 4 3 3 3 3 3 4 4 1 4 4 3 1 4 1 4 1 3 4 2 2 2 1 4 2
## [778] 4 3 4 1 4 3 3 4 3 4 2 1 1 2 1 1 3 4 3 3 4 3 2 1 4 3 4 3 1 4 3 3 1 1 2 4 2
## [815] 2 4 2 4 1 4 3 1 4 1 4 4 3 2 4 3 3 2 3 4 1 3 3 1 4 4 4 4 1 3 4 3 1 4 4 3 1
## [852] 2 3 4 1 4 1 4 4 1 3 2 3 1 1 3 1 3 1 3 3 2 2 4 3 4 3 3 4 1 4 3 2 3 4 4 4 4
## [889] 2 3 4 4 2 3 1 4 2 3 4 2 3 2 3 4 4 3 3 1 3 4 4 1 4 4 4 4 1 4 1 3 1 4 2 4 1
## [926] 1 2 3 3 1 1 4 3 1 2 3 4 4 3 3 3 4 4 3 4 3 4 1 2 4 1 1 3 3 3 4 2 4 3 4 1 1
## [963] 1 2 1 1 1 2 4 4 3 4 3 1 4 2 1 4 2 2 1 2 3 2 4 3 1 3 1 3 1 4 1 2 1 3 2 4 2
## [1000] 3 1 4 3 2 3 4 1 2 3 4 2 1 1 3 4 4 4 4 3 1 4 2 2 3 4 3 1 2 1 2 3 1 1 2 2 1
## [1037] 2 4 1 3 2 1 1 4 1 3 2 1 3 1 1 1 1 3 2 3 4 4 4 4 4 1 4 3 3 3 1 1 1 3 4 1 1
## [1074] 4 2 3 4 2 3 3 3 2 2 1 4 2 3 1 1 1 1 3 3 2 2 4 1 4 4 1 4 3 4 3 3 4 1 2 4 4
## [1111] 4 1 3 4 2 3 2 2 1 3 2 2 2 4 4 3 1 4 4 3 3 4 1 4 1 1 4 3 2 1 2 4 4 1 3 1 4
## [1148] 3 1 3 3 3 1 1 1 3 3 4 4 2 3 1 1 4 1 2 2 1 3 4 4 2 2 3 3 2 4 2 3 1 2 1 1 3
## [1185] 3 1 3 4 2 2 4 3 4 4 2 2 3 4 4 3 4 3 3 1 4 1 4 3 1 1 2 4 4 4 4 3 4 3 3 2 4
## [1222] 4 4 1 3 3 1 3 1 1 3 2 1 1 2 3 1 2 4 4 4 3 3 1 2 2 1 2 3 3 1 1 1 3 3 1 2 2
## [1259] 2 3 3 3 4 3 1 1 1 3 3 4 3 4 4 1 4 4 3 4 4 3 3 3 3 3 1 4 3 4 1 1 1 1 1 1 1
## [1296] 1 2 4 4 2 4 2 4 3 3 3 3 4 3 3 2 1 2 4 3 4 2 1 3 3 2 4 1 4 1 3 4 3 3 2 2 2
## [1333] 3 3 1 4 2 3 3 2 3 3 3 2 4 4 3 1 2 1 4 1 3 3 4 3 2 4 1 4 3 3 3 2 3 4 3 1 4
## [1370] 4 4 3 1 3 3 4 1 3 1 2 4 1 3 3 3 3 3 1 1 3 4 3 4 4 2 1 3 4 3 3 3 4 3 1 2 1
## [1407] 3 2 4 3 2 4 3 2 4 2 1 4 3 3 1 2 4 3 2 4 4 3 3 3 3 2 3 2 2 1 1 4 3 3 3 1 4
## [1444] 2 4 1 1 3 3 3 2 3 3 1 2 4 1 1 3 4 4 4 3 1 2 1 1 4 1 1 1 1 4 3 2 4 1 1 4 3
## [1481] 3 3 2 2 1 2 4 3 4 1 1 3 2 4 1 1 4 3 2 1 3 3 2 4 3 3 3 2 4 1 3 1 4 4 3 1 3
## [1518] 2 4 3 3 1 3 2 2 3 3 3 1 3 2 4 1 4 4 4 1 3 4 4 3 4 2 1 1 2 1 3 3 4 4 4 1 4
## [1555] 2 3 3 1 4 3 2 3 3 3 1 1 3 3 4 2 4 1 2 2 1 4 3 4 4 2 3 1 2 4 4 1 4 3 1 4 3
## [1592] 3 4 1 4 1 3 1 2 4 3 4 3 3 3 4 3 3 3 4 4 4 2 2 4 3 2 2 3 4 1 4 4 3 4 4 2 3
## [1629] 4 3 4 3 4 1 1 1 4 2 3 1 4 4 3 1 4 4 3 3 4 3 4 1 4 3 2 4 4 1 3 4 4 3 2 3 4
## [1666] 3 3 4 1 4 2 1 1 1 2 3 4 1 2 2 4 1 1 4 1 4 3 3 1 4 2 3 3 3 1 2 2 1 2 2 4 4
## [1703] 2 1 3 3 4 2 1 1 1 4 2 3 3 3 2 3 2 4 4 3 2 3 2 1 4 4 1 4 3 2 3 1 3 1 4 1 2
## [1740] 4 2 2 1 1 3 4 4 3 4 4 4 3 2 3 1 4 4 4 1 3 1 3 3 1 2 4 2 4 1 1 4 3 1 1 1 1
## [1777] 3 2 3 2 3 4 1 4 4 3 4 1 4 3 4 3 4 4 4 1 2 1 3 4 1 3 3 3 3 1 3 2 3 3 3 2 4
## [1814] 3 1 4 4 1 1 4 3 1 1 3 2 3 4 3 3 3 3 3 4 3 4 1 1 3 4 3 2 4 2 4 1 3 1 1 4 1
## [1851] 2 4 3 3 4 3 3 3 3 1 4 2 3 3 4 3 4 1 4 1 4 3 3 2 3 4 1 3 3 3 1 1 4 3 4 4 4
## [1888] 4 1 1 4 3 4 2 1 3 2 3 2 4 4 3 4 3 3 4 1 1 4 2 1 2 1 2 3 4 3 1 3 2 1 4 1 2
## [1925] 4 4 2 3 2 4 3 2 2 3 3 1 3 2 4 4 1 1 2 4 2 1 2 1 2 2 3 3 1 4 4 4 1 3 3 3 4
## [1962] 3 4 1 3 4 1 3 3 3 1 1 1 4 4 3 3 1 3 2 1 4 4 4 3 2 1 1 3 3 1 3 1 4 4 2 4 1
## [1999] 1 4 1 1 3 2 4 4 2 2 4 4 4 3 2 3 1 4 3 4 1 1 4 1 3 4 3 2 2 4 2 1 1 2 4 2 1
## [2036] 3 3 1 4 4 3 4 4 4 4 1 2 2 1 1 4 1 4 2 1 1 2 2 1 1 2 3 3 1 4 2 1 4 2 4 4 2
## [2073] 1 1 4 1 2 2 3 2 1 3 3 1 4 2 2 2 1 3 3 4 1 4 1 2 4 3 2 4 2 4 3 3 4 3 1 4 4
## [2110] 3 1 4 4 1 3 1 4 2 2 1 2 1 1 4 1 4 2 3 3 4 1 3 4 3 1 4 4 4 1 1 3 1 3 1 3 1
## [2147] 1 2 1 4 2 3 2 4 1 1 4 3 3 4 1 2 2 3 4 4 3 3 2 4 4 1 2 2 1 2 3 3 2 3 3 3 4
## [2184] 4 4 4 1 3 3 2 2 1 4 4 3 2 4 1 3 1 3 2 2 4 1 3 3 4 3 2 1 2 4 4 2 4 2 1 1 2
## [2221] 2 1 3 2 1 1 2 1 2 4 4 4 4 1 4 4 2 3 4 4 3 3 2 2 1 3 3 1 1 1 3 3 1 1 4 3 4
## [2258] 1 2 1 1 2 3 3 4 3 2 2 3 1 2 2 1 3 4 4 1 4 4 1 3 1 4 2 4 1 2 1 2 4 2 3 2 4
## [2295] 4 1 3 4 1 3 3 1 3 2 4 4 1 3 4 1 1 3 1 1 1 3 1 2 4 2 3 3 1 4 3 3 4 2 3 1 3
## [2332] 2 4 3 2 4 3 3 3 2 1 1 3 3 4 4 1 4 1 4 3 1 1 4 3 3 3 4 3 4 3 3 2 1 3 1 3 3
## [2369] 2 1 4 4 3 4 2 2 2 4 3 4 1 3 2 3 4 2 3 3 3 1 1 1 4 4 2 2 4 3 3 3 4 4 1 3 1
## [2406] 1 1 1 4 3 1 4 3 1 4 4 4 4 3 4 4 4 4 1 1 1 3 1 2 2 2 2 1 3 1 1 4 4 4 1 3 1
## [2443] 3 4 3 4 3 3 4 1 1 3 4 2 4 3 1 3 3 1 1 2 4 1 1 4 2 4 4 4 1 1 2 2 1 3 4 1 4
## [2480] 4 4 2 1 4 4 4 1 4 2 1 2 1 4 3 3 2 4 3 4 3 1 2 4 2 1 4 1 4 1 2 1 4 3 4 2 3
## [2517] 3 3 4 3 3 3 1 2 4 3 3 1 1 4 4 1 3 2 2 1 4 1 4 2 3 1 4 3 1 3 4 4 4 4 1 3 1
## [2554] 3 3 3 1 4 3 1 2 1 3 1 1 2 2 1 2 4 2 2 2 1 4 3 1 3 4 4 3 4 2 4 3 3 2 2 2 1
## [2591] 3 3 4 3 3 2 3 4 4 3 4 3 4 4 1 1 3 3 4 3 4 4 2 3 1 4 4 4 1 1 3 4 3 4 3 4 4
## [2628] 4 3 2 3 2 4 1 4 3 4 1 3 4 1 3 3 4 3 2 3 1 2 3 4 1 4 4 4 2 1 3 4 3 3 3 1 1
## [2665] 1 2 3 4 2 2 4 1 3 1 3 1 1 2 4 4 2 3 4 3 3 4 1 3 1 3 3 4 2 4 2 3 2 1 1 1 3
## [2702] 4 2 1 3 3 3 2 3 4 3 3 4 4 2 4 4 1 3 1 4 3 3 3 3 3 2 1 4 1 3 2 2 1 4 3 3 2
## [2739] 4 3 4 4 1 2 4 2 4 2 4 2 2 2 3 4 3 4 1 4 1 3 3 3 3 1 1 1 3 3 3 2 4 3 4 2 1
## [2776] 3 1 1 4 4 3 4 4 4 1 3 4 4 4 3 3 1 3 2 3 2 3 3 4 1 3 4 1 1 3 2 2 4 3 4 4 2
## [2813] 1 1 4 3 3 1 1 1 3 4 4 3 1 4 2 2 4 4 3 1 2 4 1 3 4 4 3 4 4 3 3 2 3 4 4 4 4
## [2850] 4 2 3 2 3 2 2 3 4 3 3 1 1 1 2 1 3 3 1 1 3 2
##
## Within cluster sum of squares by cluster:
## [1] 262863.8 365425.0 370405.2 317006.3
## (between_SS / total_SS = 54.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
b) Determine which cluster the data in row 2000 was placed in.
fit$cluster[2000]
## [1] 4