- Load Libraries
library(dplyr)
library(broom)
library(iterators)
library(ggplot2)
- Read in bunckle strength data
df <- read.csv("bkstr.csv")
- Create cell codes based on Sample-Pocket combinations (66 in total)
counter = icount()
df <- df %>% group_by(Sample,Pocket) %>% mutate(cell = nextElem(counter))
- Look at cell histograms, several cells appear to contain two distinct populations.
ggplot(df, aes(x = Strength)) +
geom_histogram() +
facet_wrap(~ cell)

- Complile a list of cells with no noticable grouping issues or concentration breaks
onedist <- c(22,23,24,31,33,44,46,47,52,53,54,55,56,57,64,65,66)
- Assuming 2 groupings per cell, run euclidian k-means clustering on each cell, create new cluster variable, arrange each cell by cluster
kclusts <- df %>% group_by(cell) %>%
do(augment(kmeans(.$Strength,centers=2), .)) %>%
arrange(.cluster)
- Create a new cluster variable accounting for those with no apparent population seperation
kclusts$new_clust <- with(kclusts,ifelse(cell %in% onedist, 1, .cluster))
- Initialize regrouping variable, then loop through each cluster entry, regroup based on Cell - Cluster Combination. This will split the inital 66 cells into new, smaller groupings based on data concentrations
kclusts$regroup <- NA
kclusts$regroup[1] <- 1
#Loop through each cluster entry, regroup based on Cell - Cluster Combination
j = 1
for(i in 2:6039)
{
ifelse(kclusts$new_clust[i] == kclusts$new_clust[i-1], j <- j, j <- j+1)
kclusts$regroup[i] <- j
}
- Examine New DataSet Head and Tail
head(kclusts)
## Source: local data frame [6 x 7]
## Groups: cell
##
## Sample Pocket Strength cell .cluster new_clust regroup
## 1 1 1 92.4 1 1 1 1
## 2 1 1 92.7 1 1 1 1
## 3 1 1 93.6 1 1 1 1
## 4 1 1 92.8 1 1 1 1
## 5 1 1 92.8 1 1 1 1
## 6 1 1 93.7 1 1 1 1
tail(kclusts)
## Source: local data frame [6 x 7]
## Groups: cell
##
## Sample Pocket Strength cell .cluster new_clust regroup
## 1 22 3 105.1 66 2 1 99
## 2 22 3 104.1 66 2 1 99
## 3 22 3 103.4 66 2 1 99
## 4 22 3 104.1 66 2 1 99
## 5 22 3 105.5 66 2 1 99
## 6 22 3 103.2 66 2 1 99
- Plot Clusters In Cell(Sample-Pocket) Groupings
ggplot(kclusts, aes(y = new_clust, x = Strength, colour = new_clust)) +
geom_point() +
facet_wrap(~ cell)

- Plot New Cell Distributions
ggplot(kclusts, aes(x = Strength)) +
geom_histogram() +
facet_wrap(~ regroup)
