library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#loading data
data <- mtcars
#structure of the dataset
str(data)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
#create kmeans object from the disp, wt, and qsec columns
datakmeans <- kmeans(mtcars[,1:3], centers=4, nstart=10)
#Size of clusters
datakmeans$size
## [1] 8 11 4 9
# Centers of each cluster
datakmeans$centers
## mpg cyl disp
## 1 20.50000 5.500000 164.08750
## 2 16.19091 7.818182 311.76364
## 3 13.67500 8.000000 443.00000
## 4 27.34444 4.000000 96.55556
#plotting data to see the patterns
plot(data$mpg~data$cyl)

plot(data$mpg~data$hp)

#Average cyl, hp and mpg for each cluster
mtcars %>%
mutate(clusters = datakmeans$cluster) %>%
group_by(clusters) %>%
summarise_each(funs(mean), c(cyl, hp, mpg))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
## # A tibble: 4 x 4
## clusters cyl hp mpg
## <int> <dbl> <dbl> <dbl>
## 1 1 5.5 113. 20.5
## 2 2 7.82 201. 16.2
## 3 3 8 206. 13.7
## 4 4 4 83.6 27.3
#Description of clusters:
#Cluster 1: Cars with 7 cylinder engines, have horsepower of 174 that are designed for efficiency
#and return an economica 16.8 mpg. These cars are also light in weight.
#Cluster 2: Cars with 4-5 cylinders which are less havier and less powerful than 7-8 cylinders cars,
#Have less horsepower and weight is also less due to less cylinders, but has highest 24.5 mpg .
#Cluster 3 & 4: These cars have 8 cylinders, these cars have high horsepower, high in weight
#If you see the mpg is not great as less cylinders mpg. Mpg is 16.3 and 11.8 respectively, made for
#selective customer needs highest horsepower then economical mass.