#install.packages("arules") for association analysis
library(arules)
# Bob_Ross csv file loaded below
bob_ross <- read.csv("G:/Other computers/My Laptop/Documents/Richard 622 last semester/Week 1 and 2/Homework 1/bob_ross.csv")
#what does the data look like and what is non-binary
head(bob_ross)
## EPISODE TITLE APPLE_FRAME AURORA_BOREALIS BARN BEACH BOAT
## 1 S01E01 "A WALK IN THE WOODS" 0 0 0 0 0
## 2 S01E02 "MT. MCKINLEY" 0 0 0 0 0
## 3 S01E03 "EBONY SUNSET" 0 0 0 0 0
## 4 S01E04 "WINTER MIST" 0 0 0 0 0
## 5 S01E05 "QUIET STREAM" 0 0 0 0 0
## 6 S01E06 "WINTER MOON" 0 0 0 0 0
## BRIDGE BUILDING BUSHES CABIN CACTUS CIRCLE_FRAME CIRRUS CLIFF CLOUDS CONIFER
## 1 0 0 1 0 0 0 0 0 0 0
## 2 0 0 0 1 0 0 0 0 1 1
## 3 0 0 0 1 0 0 0 0 0 1
## 4 0 0 1 0 0 0 0 0 1 1
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 1 0 0 0 0 0 1
## CUMULUS DECIDUOUS DIANE_ANDRE DOCK DOUBLE_OVAL_FRAME FARM FENCE FIRE
## 1 0 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 1 0
## 4 0 0 0 0 0 0 0 0
## 5 0 1 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## FLORIDA_FRAME FLOWERS FOG FRAMED GRASS GUEST HALF_CIRCLE_FRAME
## 1 0 0 0 0 1 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## HALF_OVAL_FRAME HILLS LAKE LAKES LIGHTHOUSE MILL MOON MOUNTAIN MOUNTAINS
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 1 0
## 3 0 0 0 0 0 0 0 1 1
## 4 0 0 1 0 0 0 0 1 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 1 0 0 0 1 1 1
## NIGHT OCEAN OVAL_FRAME PALM_TREES PATH PERSON PORTRAIT RECTANGLE_3D_FRAME
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 1 0 0 0 0 0 0 0
## RECTANGULAR_FRAME RIVER ROCKS SEASHELL_FRAME SNOW SNOWY_MOUNTAIN SPLIT_FRAME
## 1 0 1 0 0 0 0 0
## 2 0 0 0 0 1 1 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 1 0
## 5 0 1 1 0 0 0 0
## 6 0 0 0 0 1 1 0
## STEVE_ROSS STRUCTURE SUN TOMB_FRAME TREE TREES TRIPLE_FRAME WATERFALL WAVES
## 1 0 0 0 0 1 1 0 0 0
## 2 0 0 0 0 1 1 0 0 0
## 3 0 1 1 0 1 1 0 0 0
## 4 0 0 0 0 1 1 0 0 0
## 5 0 0 0 0 1 1 0 0 0
## 6 0 1 0 0 1 1 0 0 0
## WINDMILL WINDOW_FRAME WINTER WOOD_FRAMED
## 1 0 0 0 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 1 0
# Columns 1 and 2 are non-binary columns and will be removed from the dataset
bob_ross$EPISODE = NULL
bob_ross$TITLE= NULL
# Make the dataset a Matrix to make it work with association analysis
bob_ross <- as(as.matrix(bob_ross), "transactions")
#Association analysis rules with 30% support and 90% confidence
bobross_rules <- apriori(bob_ross, parameter = list(sup = 0.3, conf = 0.9, target = "rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.3 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 120
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 403 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [31 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(bobross_rules)
## lhs rhs support confidence coverage
## [1] {RIVER} => {TREES} 0.3002481 0.9603175 0.3126551
## [2] {RIVER} => {TREE} 0.3126551 1.0000000 0.3126551
## [3] {GRASS} => {TREE} 0.3374690 0.9577465 0.3523573
## [4] {LAKE} => {TREES} 0.3325062 0.9370629 0.3548387
## [5] {LAKE} => {TREE} 0.3523573 0.9930070 0.3548387
## [6] {MOUNTAIN} => {TREES} 0.3672457 0.9250000 0.3970223
## [7] {MOUNTAIN} => {TREE} 0.3870968 0.9750000 0.3970223
## [8] {DECIDUOUS} => {TREES} 0.5136476 0.9118943 0.5632754
## [9] {DECIDUOUS} => {TREE} 0.5632754 1.0000000 0.5632754
## [10] {CONIFER} => {TREES} 0.5161290 0.9811321 0.5260546
## [11] {CONIFER} => {TREE} 0.5260546 1.0000000 0.5260546
## [12] {TREES} => {TREE} 0.8362283 1.0000000 0.8362283
## [13] {TREE} => {TREES} 0.8362283 0.9335180 0.8957816
## [14] {RIVER, TREES} => {TREE} 0.3002481 1.0000000 0.3002481
## [15] {RIVER, TREE} => {TREES} 0.3002481 0.9603175 0.3126551
## [16] {GRASS, TREES} => {TREE} 0.3126551 1.0000000 0.3126551
## [17] {GRASS, TREE} => {TREES} 0.3126551 0.9264706 0.3374690
## [18] {LAKE, TREES} => {TREE} 0.3325062 1.0000000 0.3325062
## [19] {LAKE, TREE} => {TREES} 0.3325062 0.9436620 0.3523573
## [20] {CONIFER, MOUNTAIN} => {TREES} 0.3126551 0.9767442 0.3200993
## [21] {CONIFER, MOUNTAIN} => {TREE} 0.3200993 1.0000000 0.3200993
## [22] {MOUNTAIN, TREES} => {TREE} 0.3672457 1.0000000 0.3672457
## [23] {MOUNTAIN, TREE} => {TREES} 0.3672457 0.9487179 0.3870968
## [24] {CLOUDS, TREES} => {TREE} 0.3424318 1.0000000 0.3424318
## [25] {CLOUDS, TREE} => {TREES} 0.3424318 0.9387755 0.3647643
## [26] {DECIDUOUS, TREES} => {TREE} 0.5136476 1.0000000 0.5136476
## [27] {DECIDUOUS, TREE} => {TREES} 0.5136476 0.9118943 0.5632754
## [28] {CONIFER, TREES} => {TREE} 0.5161290 1.0000000 0.5161290
## [29] {CONIFER, TREE} => {TREES} 0.5161290 0.9811321 0.5260546
## [30] {CONIFER, MOUNTAIN, TREES} => {TREE} 0.3126551 1.0000000 0.3126551
## [31] {CONIFER, MOUNTAIN, TREE} => {TREES} 0.3126551 0.9767442 0.3200993
## lift count
## [1] 1.148392 121
## [2] 1.116343 126
## [3] 1.069174 136
## [4] 1.120583 134
## [5] 1.108537 142
## [6] 1.106157 148
## [7] 1.088435 156
## [8] 1.090485 207
## [9] 1.116343 227
## [10] 1.173283 208
## [11] 1.116343 212
## [12] 1.116343 337
## [13] 1.116343 337
## [14] 1.116343 121
## [15] 1.148392 121
## [16] 1.116343 126
## [17] 1.107916 126
## [18] 1.116343 134
## [19] 1.128474 134
## [20] 1.168035 126
## [21] 1.116343 129
## [22] 1.116343 148
## [23] 1.134520 148
## [24] 1.116343 138
## [25] 1.122631 138
## [26] 1.116343 207
## [27] 1.090485 207
## [28] 1.116343 208
## [29] 1.173283 208
## [30] 1.116343 126
## [31] 1.168035 126
# All rules have a lift above 1 which means the target response is more likely than average
# Bob Ross almost always painted trees but some of the types of landscapes that he painted with trees are Rivers, Grass, Lakes, and Mountains
#nhanes data loaded below
nhanes_train <- read.csv("G:/Other computers/My Laptop/Documents/Richard 622 last semester/Week 1 and 2/Homework 1/nhanes_train.csv")
# what does the data look like see below
head(nhanes_train)
## Age Weight Height Pulse
## 1 80 97.6 175.8 58
## 2 48 120.7 185.9 56
## 3 20 46.8 163.6 78
## 4 18 100.2 176.9 84
## 5 46 112.2 180.5 90
## 6 63 79.3 182.8 66
regmodel <- lm(Weight ~., data = nhanes_train)
summary(regmodel)
##
## Call:
## lm(formula = Weight ~ ., data = nhanes_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.860 -13.096 -3.099 10.227 116.080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -112.64017 6.78932 -16.591 < 2e-16 ***
## Age 0.12471 0.02045 6.097 1.22e-09 ***
## Height 1.01324 0.03522 28.768 < 2e-16 ***
## Pulse 0.24685 0.03019 8.177 4.32e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.67 on 2867 degrees of freedom
## Multiple R-squared: 0.2311, Adjusted R-squared: 0.2303
## F-statistic: 287.2 on 3 and 2867 DF, p-value: < 2.2e-16
#all variables are significant based on P values
# Libraries needed to run a decision tree below
library(rpart)
library(rpart.plot)
## Printing and plotting the tree
reg_treenhanes <- rpart(formula = Weight ~ ., data = nhanes_train)
prp(reg_treenhanes, digits = 4, extra = 1)
#install.packages('ipred')
library(ipred)
#bagging model with 100 bootstrapped samples
nhanes_bag <- bagging(Weight ~ ., data = nhanes_train, nbagg = 100)
nhanes_bag
##
## Bagging regression trees with 100 bootstrap replications
##
## Call: bagging.data.frame(formula = Weight ~ ., data = nhanes_train,
## nbagg = 100)
#install.packages('randomForest')
library(randomForest)
nhanes_rf <- randomForest(Weight ~., data = nhanes_train, importance = TRUE, ntree = 200)
# install.packages("gbm")
library(gbm)
# Interaction depth is how many splits in each tree we want.
# Shrinkage determines how much each tree contributes to the prediction.
nhanes_boost <- gbm(formula = Weight ~., data = nhanes_train, distribution = "gaussian", n.trees = 200, shrinkage = 0.03, interaction.depth = 5)
summary(nhanes_boost)
## var rel.inf
## Height Height 65.62662
## Pulse Pulse 17.42341
## Age Age 16.94998
#height has the most influence when determining weight
# First the testing set needs to be loaded for prediction
nhanes_test <- read.csv("G:/Other computers/My Laptop/Documents/Richard 622 last semester/Week 1 and 2/Homework 1/nhanes_test.csv")
#Predictions based on:
#Regression
predreg <- predict(regmodel, nhanes_test)
predreg
#Decision Tree
nhanes_test_pred_tree = predict(reg_treenhanes, nhanes_test)
nhanes_test_pred_tree
#Bagging
nhanes_bag_pred <- predict(nhanes_bag, newdata = nhanes_test)
#Random Forest
nhanes_rf_pred <- predict(nhanes_rf, nhanes_test)
nhanes_rf_pred
#Boosting
nhanes_boost_pred <- predict(nhanes_boost, nhanes_test, n.trees = 200)
nhanes_boost_pred
#Regression
#MSE - Mean Squared Error
mean((predreg - nhanes_test$Weight)^2) #MSE= [1] 332.8215
## [1] 332.8215
#Decision Tree
#MSE
mean((nhanes_test_pred_tree - nhanes_test$Weight)^2) #MSE= [1] 334.6264
## [1] 334.6264
#Bagging
mean((nhanes_test$Weight - nhanes_bag_pred)^2) #MSE=[1] 327.1046
## [1] 327.1284
#Random Forest
mean((nhanes_test$Weight - nhanes_rf_pred)^2) #MSE=[1] 262.1942
## [1] 260.3832
#Boosting
mean((nhanes_test$Weight - nhanes_boost_pred)^2) #MSE=[1] 314.9958
## [1] 315.2021
#(Original run) The best model to predict weight is the Random Forest Model with an MSE of 262.1942
#mean for all variables is displayed below
summary(nhanes_train)
## Age Weight Height Pulse
## Min. :16.00 Min. : 39.30 Min. :139.9 Min. : 40.00
## 1st Qu.:30.00 1st Qu.: 67.50 1st Qu.:162.2 1st Qu.: 64.00
## Median :44.00 Median : 79.40 Median :169.7 Median : 72.00
## Mean :45.15 Mean : 82.63 Mean :169.5 Mean : 72.32
## 3rd Qu.:58.00 3rd Qu.: 94.65 3rd Qu.:176.7 3rd Qu.: 80.00
## Max. :80.00 Max. :223.00 Max. :199.9 Max. :122.00
#Age Weight Height Pulse
# Mean :45.15 Mean : 82.63 Mean :169.5 Mean : 72.32
library(fpc)
data(nhanes_train)
fit <- kmeans(nhanes_train[, 1:4], 4)
fit
## K-means clustering with 4 clusters of sizes 847, 821, 716, 487
##
## Cluster means:
## Age Weight Height Pulse
## 1 64.30224 73.19764 164.9505 68.76033
## 2 36.82095 88.26090 174.1502 71.74178
## 3 30.04888 63.92165 165.1246 75.13966
## 4 48.06571 117.06982 176.2801 75.34292
##
## Clustering vector:
## [1] 1 4 3 2 4 1 2 2 2 1 3 3 2 4 4 2 3 3 3 1 2 3 1 2 1 1 1 4 3 2 2 1 3 4 3 2 1
## [38] 1 2 2 2 1 1 2 2 1 3 1 2 1 1 1 3 4 3 1 4 1 1 1 2 3 3 3 1 3 4 2 2 3 1 1 2 1
## [75] 4 3 3 2 3 2 1 2 4 3 4 3 3 1 1 2 3 1 3 4 4 1 2 3 3 3 1 1 1 3 3 4 1 2 1 3 3
## [112] 3 2 4 1 3 1 2 1 3 3 1 1 1 1 2 2 4 4 2 2 2 1 4 1 3 3 4 4 1 1 1 1 1 2 4 1 2
## [149] 3 1 2 3 2 3 1 1 2 3 2 2 2 3 4 2 4 3 3 2 3 1 2 2 2 3 2 2 3 2 3 1 2 3 2 3 3
## [186] 1 4 1 2 3 1 2 4 2 1 2 1 3 3 1 2 1 2 2 4 1 4 1 1 3 2 2 4 4 2 4 1 3 3 4 2 3
## [223] 3 4 2 2 3 4 1 1 3 2 4 1 2 4 4 1 1 2 3 3 2 1 1 4 3 1 1 1 3 2 1 2 3 4 1 2 1
## [260] 2 1 4 4 1 2 1 4 4 1 3 2 3 2 4 3 2 2 4 2 1 1 1 1 1 1 4 2 3 2 3 2 4 4 3 3 3
## [297] 1 4 4 3 2 4 3 3 2 4 1 2 3 1 1 3 3 2 3 1 3 2 1 3 4 4 2 2 2 3 4 1 3 3 3 1 2
## [334] 3 1 1 1 3 3 4 1 2 2 1 2 1 3 3 2 1 4 1 4 3 4 3 4 3 4 2 1 1 3 2 2 3 1 3 1 3
## [371] 1 3 2 3 2 3 1 2 3 1 2 1 2 2 1 3 2 3 4 3 1 1 1 3 1 4 2 2 3 2 2 2 3 2 1 2 1
## [408] 1 4 3 2 2 1 2 1 4 1 1 3 1 1 1 1 2 2 3 1 4 1 2 4 3 3 3 2 3 3 4 2 3 1 3 2 2
## [445] 3 1 3 2 3 4 1 2 1 1 2 1 4 1 3 3 2 2 2 4 4 2 2 1 2 1 2 1 2 4 2 2 4 2 4 2 3
## [482] 2 1 3 3 1 4 1 2 2 2 3 2 3 1 3 2 2 1 3 1 4 4 2 4 2 2 2 3 1 1 2 1 3 1 1 2 4
## [519] 2 3 3 3 2 2 1 1 3 4 3 2 1 1 1 1 2 4 2 2 3 1 4 4 1 2 3 2 4 3 2 3 1 2 2 1 2
## [556] 4 1 3 4 1 2 2 1 1 2 2 3 1 1 1 4 3 1 4 3 2 4 3 1 2 3 3 4 2 1 3 3 1 4 4 1 3
## [593] 2 2 1 2 1 1 1 3 3 2 3 1 1 3 1 4 3 2 1 3 2 1 1 2 3 2 1 3 3 2 3 2 2 2 4 2 1
## [630] 1 1 2 1 2 4 1 1 4 4 2 1 4 1 3 3 4 2 2 1 2 3 4 1 3 1 2 2 3 2 1 2 3 3 3 3 1
## [667] 2 2 3 3 3 3 4 1 4 3 1 2 4 2 4 2 2 4 4 3 3 3 1 2 1 4 2 2 1 1 3 4 1 2 4 2 3
## [704] 4 4 4 1 1 1 1 3 3 1 1 3 3 3 3 2 1 1 1 1 2 3 3 2 1 4 1 4 4 3 2 3 1 1 2 4 2
## [741] 3 1 4 3 3 1 3 3 1 2 3 2 2 1 1 1 1 1 2 2 3 2 2 1 3 2 3 2 3 1 2 4 4 4 3 2 4
## [778] 2 1 2 3 2 1 1 2 1 2 4 3 3 4 3 3 1 2 1 1 2 1 4 3 2 1 2 1 3 2 1 1 3 3 4 2 4
## [815] 4 2 4 2 3 2 1 3 2 3 2 2 1 4 2 1 1 4 1 2 3 1 1 3 2 2 2 2 3 1 2 1 3 2 2 1 3
## [852] 4 1 2 3 2 3 2 2 3 1 4 1 3 3 1 3 1 3 1 1 4 4 2 1 2 1 1 2 3 2 1 4 1 2 2 2 2
## [889] 4 1 2 2 4 1 3 2 4 1 2 4 1 4 1 2 2 1 1 3 1 2 2 3 2 2 2 2 3 2 3 1 3 2 4 2 3
## [926] 3 4 1 1 3 3 2 1 3 4 1 2 2 1 1 1 2 2 1 2 1 2 3 4 2 3 3 1 1 1 2 4 2 1 2 3 3
## [963] 3 4 3 3 3 4 2 2 1 2 1 3 2 4 3 2 4 4 3 4 1 4 2 1 3 1 3 1 3 2 3 4 3 1 4 2 4
## [1000] 1 3 2 1 4 1 2 3 4 1 2 4 3 3 1 2 2 2 2 1 3 2 4 4 1 2 1 3 4 3 4 1 3 3 4 4 3
## [1037] 4 2 3 1 4 3 3 2 3 1 4 3 1 3 3 3 3 1 4 1 2 2 2 2 2 3 2 1 1 1 3 3 3 1 2 3 3
## [1074] 2 4 1 2 4 1 1 1 4 4 3 2 4 1 3 3 3 3 1 1 4 4 2 3 2 2 3 2 1 2 1 1 2 3 4 2 2
## [1111] 2 3 1 2 4 1 4 4 3 1 4 4 4 2 2 1 3 2 2 1 1 2 3 2 3 3 2 1 4 3 4 2 2 3 1 3 2
## [1148] 1 3 1 1 1 3 3 3 1 1 2 2 4 1 3 3 2 3 4 4 3 1 2 2 4 4 1 1 4 2 4 1 3 4 3 3 1
## [1185] 1 3 1 2 4 4 2 1 2 2 4 4 1 2 2 1 2 1 1 3 2 3 2 1 3 3 4 2 2 2 2 1 2 1 1 4 2
## [1222] 2 2 3 1 1 3 1 3 3 1 4 3 3 4 1 3 4 2 2 2 1 1 3 4 4 3 4 1 1 3 3 3 1 1 3 4 4
## [1259] 4 1 1 1 2 1 3 3 3 1 1 2 1 2 2 3 2 2 1 2 2 1 1 1 1 1 3 2 1 2 3 3 3 3 3 3 3
## [1296] 3 4 2 2 4 2 4 2 1 1 1 1 2 1 1 4 3 4 2 1 2 4 3 1 1 4 2 3 2 3 1 2 1 1 4 4 4
## [1333] 1 1 3 2 4 1 1 4 1 1 1 4 2 2 1 3 4 3 2 3 1 1 2 1 4 2 3 2 1 1 1 4 1 2 1 3 2
## [1370] 2 2 1 3 1 1 2 3 1 3 4 2 3 1 1 1 1 1 3 3 1 2 1 2 2 4 3 1 2 1 1 1 2 1 3 4 3
## [1407] 1 4 2 1 4 2 1 4 2 4 3 2 1 1 3 4 2 1 4 2 2 1 1 1 1 4 1 4 4 3 3 2 1 1 1 3 2
## [1444] 4 2 3 3 1 1 1 4 1 1 3 4 2 3 3 1 2 2 2 1 3 4 3 3 2 3 3 3 3 2 1 4 2 3 3 2 1
## [1481] 1 1 4 4 3 4 2 1 2 3 3 1 4 2 3 3 2 1 4 3 1 1 4 2 1 1 1 4 2 3 1 3 2 2 1 3 1
## [1518] 4 2 1 1 3 1 4 4 1 1 1 3 1 4 2 3 2 2 2 3 1 2 2 1 2 4 3 3 4 3 1 1 2 2 2 3 2
## [1555] 4 1 1 3 2 1 4 1 1 1 3 3 1 1 2 4 2 3 4 4 3 2 1 2 2 4 1 3 4 2 2 3 2 1 3 2 1
## [1592] 1 2 3 2 3 1 3 4 2 1 2 1 1 1 2 1 1 1 2 2 2 4 4 2 1 4 4 1 2 3 2 2 1 2 2 4 1
## [1629] 2 1 2 1 2 3 3 3 2 4 1 3 2 2 1 3 2 2 1 1 2 1 2 3 2 1 4 2 2 3 1 2 2 1 4 1 2
## [1666] 1 1 2 3 2 4 3 3 3 4 1 2 3 4 4 2 3 3 2 3 2 1 1 3 2 4 1 1 1 3 4 4 3 4 4 2 2
## [1703] 4 3 1 1 2 4 3 3 3 2 4 1 1 1 4 1 4 2 2 1 4 1 4 3 2 2 3 2 1 4 1 3 1 3 2 3 4
## [1740] 2 4 4 3 3 1 2 2 1 2 2 2 1 4 1 3 2 2 2 3 1 3 1 1 3 4 2 4 2 3 3 2 1 3 3 3 3
## [1777] 1 4 1 4 1 2 3 2 2 1 2 3 2 1 2 1 2 2 2 3 4 3 1 2 3 1 1 1 1 3 1 4 1 1 1 4 2
## [1814] 1 3 2 2 3 3 2 1 3 3 1 4 1 2 1 1 1 1 1 2 1 2 3 3 1 2 1 4 2 4 2 3 1 3 3 2 3
## [1851] 4 2 1 1 2 1 1 1 1 3 2 4 1 1 2 1 2 3 2 3 2 1 1 4 1 2 3 1 1 1 3 3 2 1 2 2 2
## [1888] 2 3 3 2 1 2 4 3 1 4 1 4 2 2 1 2 1 1 2 3 3 2 4 3 4 3 4 1 2 1 3 1 4 3 2 3 4
## [1925] 2 2 4 1 4 2 1 4 4 1 1 3 1 4 2 2 3 3 4 2 4 3 4 3 4 4 1 1 3 2 2 2 3 1 1 1 2
## [1962] 1 2 3 1 2 3 1 1 1 3 3 3 2 2 1 1 3 1 4 3 2 2 2 1 4 3 3 1 1 3 1 3 2 2 4 2 3
## [1999] 3 2 3 3 1 4 2 2 4 4 2 2 2 1 4 1 3 2 1 2 3 3 2 3 1 2 1 4 4 2 4 3 3 4 2 4 3
## [2036] 1 1 3 2 2 1 2 2 2 2 3 4 4 3 3 2 3 2 4 3 3 4 4 3 3 4 1 1 3 2 4 3 2 4 2 2 4
## [2073] 3 3 2 3 4 4 1 4 3 1 1 3 2 4 4 4 3 1 1 2 3 2 3 4 2 1 4 2 4 2 1 1 2 1 3 2 2
## [2110] 1 3 2 2 3 1 3 2 4 4 3 4 3 3 2 3 2 4 1 1 2 3 1 2 1 3 2 2 2 3 3 1 3 1 3 1 3
## [2147] 3 4 3 2 4 1 4 2 3 3 2 1 1 2 3 4 4 1 2 2 1 1 4 2 2 3 4 4 3 4 1 1 4 1 1 1 2
## [2184] 2 2 2 3 1 1 4 4 3 2 2 1 4 2 3 1 3 1 4 4 2 3 1 1 2 1 4 3 4 2 2 4 2 4 3 3 4
## [2221] 4 3 1 4 3 3 4 3 4 2 2 2 2 3 2 2 4 1 2 2 1 1 4 4 3 1 1 3 3 3 1 1 3 3 2 1 2
## [2258] 3 4 3 3 4 1 1 2 1 4 4 1 3 4 4 3 1 2 2 3 2 2 3 1 3 2 4 2 3 4 3 4 2 4 1 4 2
## [2295] 2 3 1 2 3 1 1 3 1 4 2 2 3 1 2 3 3 1 3 3 3 1 3 4 2 4 1 1 3 2 1 1 2 4 1 3 1
## [2332] 4 2 1 4 2 1 1 1 4 3 3 1 1 2 2 3 2 3 2 1 3 3 2 1 1 1 2 1 2 1 1 4 3 1 3 1 1
## [2369] 4 3 2 2 1 2 4 4 4 2 1 2 3 1 4 1 2 4 1 1 1 3 3 3 2 2 4 4 2 1 1 1 2 2 3 1 3
## [2406] 3 3 3 2 1 3 2 1 3 2 2 2 2 1 2 2 2 2 3 3 3 1 3 4 4 4 4 3 1 3 3 2 2 2 3 1 3
## [2443] 1 2 1 2 1 1 2 3 3 1 2 4 2 1 3 1 1 3 3 4 2 3 3 2 4 2 2 2 3 3 4 4 3 1 2 3 2
## [2480] 2 2 4 3 2 2 2 3 2 4 3 4 3 2 1 1 4 2 1 2 1 3 4 2 4 3 2 3 2 3 4 3 2 1 2 4 1
## [2517] 1 1 2 1 1 1 3 4 2 1 1 3 3 2 2 3 1 4 4 3 2 3 2 4 1 3 2 1 3 1 2 2 2 2 3 1 3
## [2554] 1 1 1 3 2 1 3 4 3 1 3 3 4 4 3 4 2 4 4 4 3 2 1 3 1 2 2 1 2 4 2 1 1 4 4 4 3
## [2591] 1 1 2 1 1 4 1 2 2 1 2 1 2 2 3 3 1 1 2 1 2 2 4 1 3 2 2 2 3 3 1 2 1 2 1 2 2
## [2628] 2 1 4 1 4 2 3 2 1 2 3 1 2 3 1 1 2 1 4 1 3 4 1 2 3 2 2 2 4 3 1 2 1 1 1 3 3
## [2665] 3 4 1 2 4 4 2 3 1 3 1 3 3 4 2 2 4 1 2 1 1 2 3 1 3 1 1 2 4 2 4 1 4 3 3 3 1
## [2702] 2 4 3 1 1 1 4 1 2 1 1 2 2 4 2 2 3 1 3 2 1 1 1 1 1 4 3 2 3 1 4 4 3 2 1 1 4
## [2739] 2 1 2 2 3 4 2 4 2 4 2 4 4 4 1 2 1 2 3 2 3 1 1 1 1 3 3 3 1 1 1 4 2 1 2 4 3
## [2776] 1 3 3 2 2 1 2 2 2 3 1 2 2 2 1 1 3 1 4 1 4 1 1 2 3 1 2 3 3 1 4 4 2 1 2 2 4
## [2813] 3 3 2 1 1 3 3 3 1 2 2 1 3 2 4 4 2 2 1 3 4 2 3 1 2 2 1 2 2 1 1 4 1 2 2 2 2
## [2850] 2 4 1 4 1 4 4 1 2 1 1 3 3 3 4 3 1 1 3 3 1 4
##
## Within cluster sum of squares by cluster:
## [1] 370405.2 317006.3 262863.8 365425.0
## (between_SS / total_SS = 54.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
#K-means clustering with 4 clusters of sizes 717, 685, 444, 1025
#Cluster means:
# Age Weight Height Pulse
#1 62.96513 68.54114 162.5576 69.59554
#2 55.77664 95.28380 174.8207 68.92263
#3 34.44595 114.59167 175.8622 78.14414
#4 30.21463 70.19454 168.1748 73.97463
#Compared to the mean of all data
#Age Weight Height Pulse
# Mean :45.15 Mean : 82.63 Mean :169.5 Mean : 72.32
#Looking at the original data: age- clusters 1 and 2 are above average and cluster 3 and 4 are below average
#Weight: Clusters 1 and 4 are below average and clusters 2 and 3 are above average
#Height: Clusters 1 and 4 are below average and clusters 2 and 3 are above average
#Pulse: Clusters 1 and 2 are below average and clusters 3 and 4 are above average
#b) Determine which cluster the data in row 2000 was placed in
fit$cluster[2000]
## [1] 2
#originally 2000 was placed in row 4 ([1] 4)