getMeans <- function(Cprev, X,K){
  p = dim(X)[2]
  # create a K x P matrix for means
  M = matrix(data = 0, nrow = K, ncol = p)
  
  # combine X and C so we don't have to carry indices
  XC = cbind(X,Cprev)
  Ccol = p + 1 # cluster column number
  
  # form new means based on previous cluster assignments
  for (k in 1:K) {
    in_cluster_k = subset(XC,XC[,Ccol]==k)
    M[k,] = colMeans(in_cluster_k[,-Ccol])
  }
  
  return(M)
}

reCluster <- function(X,M,K){
  n = dim(X)[1]
  C = matrix(data = 0, nrow = n, ncol = 1)
  
  # for each sample calculate distance to each mean
  # assign cluster to that of closest mean
  for (i in 1:n) {
    eucdist = matrix(data = NA, nrow = 1, ncol = K)
    for (j in 1:K) {
      eucdist[j] = stats::dist(rbind(X[i,],M[j,]))
    }
    C[i] = ramify::argmin(eucdist)
  }
  return(C)
}

KMCiter <- function(Clast, X, M, K) {
  mnew = getMeans(Clast,X,K)
  cnew = reCluster(X,mnew,K)
  return(mnew,cnew)
}

# Use this to print digits from phi(x) transpose (one "row" of our feature map)
  zimage <- function(vec) {
    img <-   matrix(vec[1:256],nrow=16,ncol=16); 
    img <- t(apply(-img, 1, rev))
    image(img,col=grey(seq(0,1,length=256)))
  }
# Load up training data
train <- as.data.frame(read.table(file="zip.train", header=FALSE))
test <- as.data.frame(read.table(file="zip.test", header=FALSE))

# Extract samples as a single digit; put into matrix form
# Examine the first column and only take 3's and 5's
X.train1 <- as.matrix(subset(train,V1 == 1)) # take only the #1s
X.train2 <- as.matrix(subset(train,V1 == 2)) # take only the #2s
X.train3 <- as.matrix(subset(train,V1 == 3)) # take only the #3s
X.train4 <- as.matrix(subset(train,V1 == 4)) # take only the #4s
X.train5 <- as.matrix(subset(train,V1 == 5)) # take only the #5s
X.train6 <- as.matrix(subset(train,V1 == 6)) # take only the #6s
X.train7 <- as.matrix(subset(train,V1 == 7)) # take only the #7s
X.train8 <- as.matrix(subset(train,V1 == 8)) # take only the #8s
X.train9 <- as.matrix(subset(train,V1 == 9)) # take only the #9s
X.train0 <- as.matrix(subset(train,V1 == 0)) # take only the #0s

X.train14 <- rbind(X.train1,X.train4)        # combine the two numbers
  # <digit, 0-9> <pixel value 1 (of 256)> <pixel 2> ... <pixel 256>
#*****************************************#
## START HERE ##
                    ## select number of clusters (K)
                            # data range
                            # number cycles
#*****************************************#
K = 10
numIts = 10


# Great now get rid of the actual labels
X <- test[,-1]
n = dim(X)[1]

# Randomize cluster assignments
c0 = matrix(data = 0, nrow = n, ncol = 1)
for (i in 1:n) {
  c0[i] = ceiling(runif(1,min = 0, max = K))
}
plot(c0,
     main="Randomized cluster assignments",
     xlab = "Indices of items",
     ylab = "Cluster Number")

cluster_history = c0
clast = c0

# Do a step 1
means_last = getMeans(clast,X,K)
means_history = means_last

# Do a step 2
cnew = reCluster(X,means_last,K)

p = dim(X)[2]
tempMeans = matrix(data = NA, nrow = K, ncol = 256)
for (i in 2:numIts) {
  mnew = getMeans(clast,X,K)
  means_history = cbind(means_history,mnew)
  tempMeans = mnew
  
  cnew = reCluster(X,mnew, K)
  cluster_history = cbind(cluster_history,cnew)

  plot(cnew)
  clast = cnew
}

# Look at our clusters
for (i in 1:K) {
  zimage(tempMeans[i,])
}

 b <- 1 # run all from here

```