1. Load Libraries
library(dplyr)
library(broom)
library(iterators)
library(ggplot2)
  1. Read in bunckle strength data
df <- read.csv("bkstr.csv")
  1. Create cell codes based on Sample-Pocket combinations (66 in total)
counter = icount()
df <- df %>% group_by(Sample,Pocket) %>% mutate(cell = nextElem(counter))
  1. Look at cell histograms, several cells appear to contain two distinct populations.
ggplot(df, aes(x = Strength)) +
  geom_histogram() +
  facet_wrap(~ cell)

  1. Complile a list of cells with no noticable grouping issues or concentration breaks
onedist <- c(22,23,24,31,33,44,46,47,52,53,54,55,56,57,64,65,66)
  1. Assuming 2 groupings per cell, run euclidian k-means clustering on each cell, create new cluster variable, arrange each cell by cluster
kclusts <- df %>% group_by(cell) %>%
  do(augment(kmeans(.$Strength,centers=2), .)) %>%
  arrange(.cluster)
  1. Create a new cluster variable accounting for those with no apparent population seperation
kclusts$new_clust <- with(kclusts,ifelse(cell %in% onedist, 1, .cluster))
  1. Initialize regrouping variable, then loop through each cluster entry, regroup based on Cell - Cluster Combination. This will split the inital 66 cells into new, smaller groupings based on data concentrations
kclusts$regroup <- NA
kclusts$regroup[1] <- 1

#Loop through each cluster entry, regroup based on Cell - Cluster Combination
j = 1
for(i in 2:6039)
{
  ifelse(kclusts$new_clust[i] == kclusts$new_clust[i-1], j <- j, j <- j+1)
  kclusts$regroup[i] <- j
}
  1. Examine New DataSet Head and Tail
head(kclusts)
## Source: local data frame [6 x 7]
## Groups: cell
## 
##   Sample Pocket Strength cell .cluster new_clust regroup
## 1      1      1     92.4    1        1         1       1
## 2      1      1     92.7    1        1         1       1
## 3      1      1     93.6    1        1         1       1
## 4      1      1     92.8    1        1         1       1
## 5      1      1     92.8    1        1         1       1
## 6      1      1     93.7    1        1         1       1
tail(kclusts)
## Source: local data frame [6 x 7]
## Groups: cell
## 
##   Sample Pocket Strength cell .cluster new_clust regroup
## 1     22      3    105.1   66        2         1      99
## 2     22      3    104.1   66        2         1      99
## 3     22      3    103.4   66        2         1      99
## 4     22      3    104.1   66        2         1      99
## 5     22      3    105.5   66        2         1      99
## 6     22      3    103.2   66        2         1      99
  1. Plot Clusters In Cell(Sample-Pocket) Groupings
ggplot(kclusts, aes(y = new_clust, x = Strength, colour = new_clust)) +
  geom_point() + 
  facet_wrap(~ cell)

  1. Plot New Cell Distributions
ggplot(kclusts, aes(x = Strength)) +
  geom_histogram() +
  facet_wrap(~ regroup)