R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

set.seed(1234)
randDat<-matrix(rnorm(50),nrow=5)
dist(randDat)
##          1        2        3        4
## 2 4.261667                           
## 3 4.038030 2.060117                  
## 4 3.456732 3.726399 4.037978         
## 5 5.307253 4.415046 4.111230 4.814393
dist(randDat,method="manhattan")
##           1         2         3         4
## 2 11.382197                              
## 3 10.016795  4.536827                    
## 4  9.887932  8.845512  8.829131          
## 5 14.683770 10.617871  9.091241 11.362705
dist(randDat,method="minkowski", p= 4)
##          1        2        3        4
## 2 2.899494                           
## 3 2.875467 1.653824                  
## 4 2.208297 2.814135 3.453336         
## 5 3.488531 3.192217 3.398721 3.643788
d<-dist(scale(iris[,-5])) #standardizing
h<-hclust(d)
h
## 
## Call:
## hclust(d = d)
## 
## Cluster method   : complete 
## Distance         : euclidean 
## Number of objects: 150
plot(h) #Dendrogram

plot(h, hang=-0.1, labels=iris[["Species"]], cex=0.5) #cex to decrease the font size

clus3<-cutree(h,3)
clus3
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 2 1 1 1 1 1 1 1 1 3 3 3 2 3 2 3 2 3 2 2 3 2 3 3 3 3 2 2 2 3 3 3 3
##  [75] 3 3 3 3 3 2 2 2 2 3 3 3 3 2 3 2 2 3 2 2 2 3 3 3 2 2 3 3 3 3 3 3 2 3 3 3 3
## [112] 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [149] 3 3
length(clus3)
## [1] 150
length(iris$Species)
## [1] 150
(cm<-table(clus3,iris$Species))
##      
## clus3 setosa versicolor virginica
##     1     49          0         0
##     2      1         21         2
##     3      0         29        48
cm
##      
## clus3 setosa versicolor virginica
##     1     49          0         0
##     2      1         21         2
##     3      0         29        48
Error<-100*(1- sum(diag(cm))/sum(cm))
Error
## [1] 21.33333
library(cluster)
sil<-silhouette(clus3,d)
plot(sil) # Plot the Silhouettes, cluster co-efficient having close to 1 is better then others

set.seed(1234)
d<- dist(scale(iris[-5]))
methds<- c('complete','single','average')
avgS<-matrix(NA, ncol=3, nrow=5,dimnames=list(2:6, methds))

for(k in 2:6)
{
  for(m in seq_along(methds))
           {h<- hclust(d, meth=methds[m])
            c<- cutree(h,k)
            s<- silhouette(c,d)
            avgS[k-1,m]=mean(s[,3])
           }
}
avgS
##    complete    single   average
## 2 0.4408121 0.5817500 0.5817500
## 3 0.4496185 0.5046456 0.4802669
## 4 0.4106071 0.4067465 0.4067465
## 5 0.3520630 0.3424089 0.3746013
## 6 0.3106991 0.2018867 0.3248248

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.