1. Needed packages

library(raster)
library(fpc)
library(rgdal)
library(imager)
library(ggplot2)
library(gridExtra)
library(dplyr)

2. Read image

r <- raster::stack("C:\\Users\\user\\Desktop\\109_2\\Machine Learning\\Final project\\rice2.jpg")
plotRGB(r)

plot(r)

3. k-means cluster

df.r <- as.data.frame(r)
out <- kmeans(df.r, 3)
cl <- out$cluster
cl.df <- cbind(df.r, cl)
head(cl.df, n = 20)
##    rice2.1 rice2.2 rice2.3 cl
## 1      120     108      66  3
## 2      119     107      65  3
## 3      136     121      78  3
## 4      140     126      81  3
## 5      147     128      85  3
## 6      150     132      86  3
## 7      131     115      66  3
## 8      123     109      60  3
## 9      121     113      66  3
## 10     112     110      62  1
## 11     111     113      66  1
## 12      98     105      61  1
## 13      89     100      57  1
## 14      72      87      46  2
## 15      64      79      40  2
## 16      65      80      41  2
## 17      85      93      56  2
## 18      85      92      51  2
## 19      95      97      58  1
## 20     104     107      64  1

4. Create new layer with cluster outcome and draw the plot

4.1 Using “basic plot”

cl.layer <- raster(ncol = ncol(r), nrow = nrow(r), 
                   xmn = extent(r)[1], xmx = extent(r)[2], 
                   ymn = extent(r)[3], ymx = extent(r)[4])
values(cl.layer) <- cl
plot(cl.layer[[1]], rev(topo.colors(3)))

4.2 Using “ggplot”

r_points <- rasterToPoints(cl.layer)
r.df <- as.data.frame(r_points)
all.plot <- ggplot(data = r.df, aes(x=x, y=y)) +
  geom_tile(aes(fill = layer)) +
  xlab("Longtitude") +
  ylab("Latitude") +
  scale_fill_gradient(low = "green", high = "white") 
all.plot

5. Training data

5.1 Using “basic plot”

train <- df.r[c((ncol(r)*49+1):nrow(df.r)),]
out.train <- kmeans(train, 3)
train.layer <- raster(ncol = ncol(r), nrow = round(nrow(train)/ncol(r)), 
                      xmn = extent(r)[1], xmx = ncol(r), 
                      ymn = nrow(r) - round(nrow(train)/ncol(r)) + 1, ymx = nrow(r))
train.cl <- as.vector(out.train$cluster)
values(train.layer) <- train.cl
plot(train.layer[[1]], rev(topo.colors(3)))

5.2 Using “ggplot”

train.pt <- as.data.frame(rasterToPoints(train.layer))
train.plot <- ggplot(data = train.pt, aes(x=x, y=y)) +
  geom_tile(aes(fill = layer)) +
  xlab("Longtitude") +
  ylab("Latitude") +
  scale_fill_gradient(low = "green", high = "white")
train.plot

6. Testing data

6.1 Using “basic plot”

test <- df.r[-c((ncol(r)*49+1):nrow(df.r)),]
center <- out.train$centers
test.cl <- c()
for(i in 1:nrow(test)){
  test.cl[i] <- which.min(c(dist(rbind(center[1,], test[i,])),
                           dist(rbind(center[2,], test[i,])),
                           dist(rbind(center[3,], test[i,]))))
}
test.layer <- raster(ncol = ncol(r), nrow = nrow(r) - round(nrow(train)/ncol(r)), 
                     xmn = extent(r)[1], xmx = ncol(r), 
                     ymn = extent(r)[3], ymx = nrow(r) - round(nrow(train)/ncol(r)))
values(test.layer) <- test.cl
plot(test.layer[[1]], topo.colors(3))

6.2 Using “ggplot”

test.pt <- as.data.frame(rasterToPoints(test.layer))
test.plot <- ggplot(data = test.pt, aes(x=x, y=y)) +
  geom_tile(aes(fill = layer)) +
  xlab("Longtitude") +
  ylab("Latitude") +
  scale_fill_gradient(low = "green", high = "white")
test.plot

7. Comparing all plots

7.1 Using “basic plot”

par(mfrow=c(2,2))
plot(cl.layer[[1]], rev(topo.colors(3)))
plot(train.layer[[1]], rev(topo.colors(3)))
plot(test.layer[[1]], topo.colors(3))

7.2 Using “ggplot”

grid.arrange(all.plot, train.plot, test.plot, ncol = 2, nrow = 2)

8. DBSCAN

df.cor <- cbind(as.data.frame(coordinates(r)), cl.df)
df.rice <- filter(df.cor, cl == 3)[,c(1,2)]
db <- dbscan(df.rice, eps = 1.5, MinPts = 5)
db
## dbscan Pts=9974 MinPts=5 eps=1.5
##          0   1   2    3   4   5   6  7 8 9 10  11 12  13 14 15 16 17 18 19 20
## border 216  17  22  200  32  13  16  5 3 1 10  18  2  15  4  5  5  3  5  4  7
## seed     0 171 375 5240 642 132 305 45 4 4 71 208 18 199  3 16 14  3 29  2 29
## total  216 188 397 5440 674 145 321 50 7 5 81 226 20 214  7 21 19  6 34  6 36
##         21 22 23  24  25 26 27 28 29 30 31  32 33 34 35  36 37 38 39 40 41 42
## border  16  5 10  16   8  3  2  4  4  2  7  12  4  4  4  30  7  2  5  2  6  9
## seed    95 94 73 220 100 11  3 20 15  4 82 107  5  2  3 533 23  2 16  4 22 39
## total  111 99 83 236 108 14  5 24 19  6 89 119  9  6  7 563 30  4 21  6 28 48
##        43 44 45 46 47 48 49 50 51 52 53 54
## border  3  3  4  5  4  2  3  3  5  4  3  4
## seed    2  5 15  4  4  9  6  8 20 71  2 37
## total   5  8 19  9  8 11  9 11 25 75  5 41

8.1 Plot dbscan total

cx <- tapply(df.rice$x, db$cluster, mean)
cy <- tapply(df.rice$y, db$cluster, mean)
plot(db, df.rice, main = "DBSCAN")
points(cx, cy, pch = 7, col = 1)

8.2 Plot dbscan isseed

8.2.1 Using “basic plot” and “tapply”

cx <- tapply(df.rice$x, db$cluster, mean)
cy <- tapply(df.rice$y, db$cluster, mean)
plot(db, df.rice, main = "DBSCAN")
points(cx, cy, pch = 7, col = 1)

8.2.2 Using “ggplot” and “dplyr”

rice.group <- data.frame(x=df.rice$x, y=df.rice$y, group=db$cluster, isseed=db$isseed)
rice.group.isseed <- filter(rice.group, isseed == TRUE)
rice.plot.df <- rice.group.isseed %>%
  group_by(group) %>%
  summarise(center_x = mean(x), center_y = mean(y))
rice.plot.df <- as.data.frame(rice.plot.df)

rice.plot <- ggplot(rice.group.isseed, aes(x=x, y=y)) +
  geom_tile(aes(fill = group)) +
  ggtitle(label = "DBSCAN") +
  xlab("Longtitude") +
  ylab("Latitude") +
  scale_fill_gradient(low = "#8FBC8F", high = "#8FBC8F") +
  geom_point(data = rice.plot.df, aes(x=center_x, y=center_y), color = "red", shape = 7, size = 2) +
  theme(legend.position = 'none')

rice.plot

head(rice.plot.df, n = 20)
##    group   center_x center_y
## 1      1   4.020468 133.6871
## 2      2  26.358667 131.8707
## 3      3  42.572901  49.4395
## 4      4  82.152648 132.5140
## 5      5 127.689394 138.7424
## 6      6 145.700000 128.3000
## 7      7 170.522222 142.2556
## 8      8 165.000000 142.0000
## 9      9  36.000000 132.0000
## 10    10 167.091549 125.4437
## 11    11  95.788462 115.8317
## 12    12  85.388889 123.6111
## 13    13 125.012563 115.3291
## 14    14 110.166667 121.1667
## 15    15  34.875000 115.3125
## 16    16 153.642857 114.2857
## 17    17  63.166667 114.1667
## 18    18 162.293103 111.7069
## 19    19 150.000000 112.5000
## 20    20   2.362069 108.0172