HW3_Dolas

library(sparcl)

n <- nrow(iris)
p2 <- 10
set.seed(32611)
iris_noise <- matrix(rnorm(n*p2), ncol=p2)
iris_label <- iris$Species
iris2 <- cbind(iris[,1:4], iris_noise)
iris2 <- scale(iris2)
iris2 <- as.matrix(iris2)
head(iris2)

##      Sepal.Length Sepal.Width Petal.Length Petal.Width           1          2
## [1,]   -0.8976739  1.01560199    -1.335752   -1.311052  0.01030854 -1.7464111
## [2,]   -1.1392005 -0.13153881    -1.335752   -1.311052 -1.81509350 -1.9595829
## [3,]   -1.3807271  0.32731751    -1.392399   -1.311052 -0.96787668 -1.2379723
## [4,]   -1.5014904  0.09788935    -1.279104   -1.311052  1.16120965  2.3707478
## [5,]   -1.0184372  1.24503015    -1.335752   -1.311052  0.52239836  0.6843673
## [6,]   -0.5353840  1.93331463    -1.165809   -1.048667 -0.57333318 -0.3697858
##                3          4           5          6          7          8
## [1,] -1.03471665  1.0983893  1.00085514 -0.8369544  0.1169829  0.7753944
## [2,]  2.03687842  0.7780456 -0.65013472  0.2511250  0.6936221 -0.6329921
## [3,] -1.17608421 -0.1295774  1.28454096 -1.1016742  0.4541344  1.6009641
## [4,] -0.28916640 -1.2226579 -1.30257012 -0.2677339 -1.5140342  0.1965231
## [5,] -0.01416505  0.3052926  0.01794642 -2.1871388 -0.7748244  0.6647349
## [6,] -3.37907304  0.9565490  0.35119499 -0.4395768 -1.0769236  0.8723202
##               9         10
## [1,]  0.4078628 -0.1761084
## [2,]  0.6461494 -0.2471816
## [3,] -0.2021692  0.4638294
## [4,]  0.7703887 -0.1041819
## [5,] -1.4301400 -0.6130340
## [6,] -0.2326685 -0.8638616

dim(iris2)

## [1] 150  14

##Part 1, apply the sparse k-means algorithm.
##Apply sparse k-means algorithm to iris2 data to cluster the samples. Fix the number of clusters to be
##K=3. Fix the tuning parameter wbounds wbounds = 1.9. Draw the feature selection plot similarly to
##https://caleb-huo.github.io/teaching/2018SPRING/lectures/sparseClustering1.html#(29). Also preform
##PCA to iris2, visualize the data using the first two principal components, label each sample with the same
##color according their species. Also add appropriate legend to the result.

# run sparse $k$-means
km.out <- KMeansSparseCluster(iris2,K=3,wbounds=1.9)

## 012

print(km.out)

## Wbound is  1.9 :
## Number of non-zero weights:  4
## Sum of weights:  1.9
## Clustering:  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 
## 3 3 3 3 2 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 2 2 2 3 2 2 2 2 2 2 
## 3 2 2 2 2 2 3 2 3 2 3 2 2 3 3 2 2 2 2 2 3 3 2 2 2 3 2 2 2 3 2 2 2 3 2 2 3

plot(km.out, color_labels(iris_label))

##Part 2, apply the sparse hierarchical clustering algorithm.
##Apply sparse hierarchical clustering algorithm to iris2 data to cluster the samples. Fix the tuning parameter
##wbounds wbounds = 1.9 with complete linkage. Draw the hierarchical tree structure and feature selection plot
##similarly to https://caleb-huo.github.io/teaching/2018SPRING/lectures/sparseClustering2.html#(16). Also
##draw the hierarchical tree structure with each sample colored according their species. Also add appropriate
##legend to the result.
##sparse hierarchical clustering

sparsehc <- HierarchicalSparseCluster(x=iris2,wbound=1.9, method="complete")

## 1234567891011121314

plot(sparsehc)

ColorDendrogram(sparsehc$hc,y=iris_label)
iris_label <- factor(iris_label)
n_iris_label <- length(unique(iris_label))
cols_4 <- colorspace::rainbow_hcl(n_iris_label, c = 70, l  = 50)
legend("topleft", legend = levels(iris_label), fill = cols_4)

HW3_Dolas_kunal

Kunal Dolas

2/4/2021