wine <- read.csv(file.choose()) View(wine) dim(wine) summary(wine) cor(wine) #Check is any correlation in the data set >.85
newWine <- wine[-1] View(newWine) cor(newWine)
normdata <- scale(newWine) View(normdata) normdata <- as.data.frame(normdata)
pcaObj <- princomp(normdata, cor =FALSE) #PCA values with sd(variance arranged in Desending order from comp1 to comp13) #PCA formula (ai1x1 + ai2x2 +….+ainxn) summary(pcaObj) loadings(pcaObj) #weights #graph showes inmportance of principal components #comp1 having higest importance (highest importance) plot(pcaObj) #pcaObj\(Score gives the top 3 PCA scores which represents whole data pcaObj\)scores #Combining PC1, PC2, PC3 , PC4, PC5 to the original data using cbind()-Coloumn Bind wine <- cbind(wine, pcaObj$scores[,1:6]) View(wine) dim(wine) colnames(wine)
clusdata <- wine[,15:20] View(clusdata) #Implement Clustering using PC1, PC2, PC3, PC4, PC5, PC6 instead of original columns #Normalize the clusdata normClus <- scale(clusdata) View(normClus) #Implementing hirarachy cluster so find the distance bw all the records #Method used to find the distance distance <- dist(normClus, method = “euclidean”) hirarachyClus <- hclust(distance,method = “complete”) #Complete linkage for Maximun distance , Single for minimum distance window() plot(hirarachyClus, hang = -1) newDataSet <- cbind(wine[2], clusdata) View(newDataSet)
wineData <- wine[2] newData1 <- cbind(normClus, wineData) View(newData1) kmean <- kmeans(newData1, 5) #Structure of kmean str(kmean) kmean\(centers clusters <- as.data.frame(kmean\)cluster) View(clusters) wineCluData <- cbind(newData1, clusters) View(wineCluData) #Sorting the data which are stored in cluster number 3 wineClus3 <- wineCluData[kmean$cluster==3,] View(wineClus3)
View(wineClus3) modeldata1 <- wineClus3[,1:7] View(modeldata1) attach(modeldata1)
m1 <- lm(Alcohol ~ Comp.1 + Comp.2 + Comp.3 + Comp.4 + Comp.5 + Comp.6, data = modeldata1) summary(m1) colnames(wine)
m2 <- lm(Alcohol ~ Malic + Ash + Alcalinity + Magnesium + Phenols + Flavanoids + Nonflavanoids + Proanthocyanins + Color + Hue + Dilution + Proline, data = newWine) summary(m2)
cor(modeldata1)