Loading the Data Libraries
library(data.table)
library(ggplot2)
Loading the datafile containing various places to be clustered
locations <- fread("C:/Documents/Data Science/Cluster Analysis in Data Mining/PAW2/Places.txt",header = FALSE,na.strings = "NA",stringsAsFactors = FALSE, skip = 0)
dim(locations)
## [1] 300 2
Using K-Means Clustering methodology to create 3 different clusters
km <- kmeans(locations,centers = 3)
km
## K-means clustering with 3 clusters of sizes 100, 100, 100
##
## Cluster means:
## V1 V2
## 1 -112.07161 33.46049
## 2 -80.84423 35.21710
## 3 -80.52837 43.47625
##
## Clustering vector:
## [1] 1 1 1 1 3 3 2 1 1 2 1 2 3 1 2 2 2 1 2 3 3 3 3 2 1 3 1 3 1 1 3 2 1 1 1
## [36] 2 3 1 3 2 2 2 2 2 2 3 1 2 1 1 3 2 1 2 3 2 1 2 3 2 1 3 2 1 3 2 2 1 3 2
## [71] 3 1 2 3 1 1 1 2 3 1 1 2 1 3 1 1 3 1 3 1 3 3 1 3 3 3 2 2 1 1 1 3 1 1 2
## [106] 2 1 3 1 1 3 2 1 3 1 1 3 1 3 3 2 2 1 3 2 3 2 3 2 3 2 3 2 1 1 3 3 1 1 3
## [141] 2 2 1 3 2 2 2 1 2 2 3 2 2 3 3 3 3 3 2 2 3 1 3 1 2 3 3 3 3 3 3 1 1 2 2
## [176] 1 3 2 2 3 1 3 2 2 3 2 2 3 1 3 2 2 1 1 1 2 1 1 2 1 3 1 2 1 3 2 1 3 2 2
## [211] 3 2 2 1 3 3 1 3 1 3 1 3 3 3 2 2 2 1 2 1 1 2 2 3 3 2 1 2 1 2 1 1 2 2 2
## [246] 2 2 2 2 1 2 2 3 3 3 3 2 1 2 1 3 1 2 3 3 1 3 1 3 1 3 2 1 2 3 3 1 1 3 3
## [281] 2 2 2 2 3 3 2 3 3 1 3 1 2 1 3 1 1 1 3 1
##
## Within cluster sum of squares by cluster:
## [1] 0.03046838 0.20364907 0.02345053
## (between_SS / total_SS = 100.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
Assigning the cluster numbers to each point
a <- km$cluster-1
df <- cbind("generated_uid3" = sprintf("%01d", 1:nrow(locations)-1), a)
fix(df)
Creating a text file that displays assignment of cluster number to each data point
write.table(df,file = "Cluster.txt",sep = , row.names = FALSE, col.names = FALSE, quote = FALSE)