1. Needed packages
library(raster)
library(fpc)
library(rgdal)
library(imager)
library(ggplot2)
library(gridExtra)
library(dplyr)
2. Read image
r <- raster::stack("C:\\Users\\user\\Desktop\\109_2\\Machine Learning\\Final project\\rice2.jpg")
plotRGB(r)

plot(r)

3. k-means cluster
df.r <- as.data.frame(r)
out <- kmeans(df.r, 3)
cl <- out$cluster
cl.df <- cbind(df.r, cl)
head(cl.df, n = 20)
## rice2.1 rice2.2 rice2.3 cl
## 1 120 108 66 3
## 2 119 107 65 3
## 3 136 121 78 3
## 4 140 126 81 3
## 5 147 128 85 3
## 6 150 132 86 3
## 7 131 115 66 3
## 8 123 109 60 3
## 9 121 113 66 3
## 10 112 110 62 1
## 11 111 113 66 1
## 12 98 105 61 1
## 13 89 100 57 1
## 14 72 87 46 2
## 15 64 79 40 2
## 16 65 80 41 2
## 17 85 93 56 2
## 18 85 92 51 2
## 19 95 97 58 1
## 20 104 107 64 1
4. Create new layer with cluster outcome and draw the plot
4.1 Using “basic plot”
cl.layer <- raster(ncol = ncol(r), nrow = nrow(r),
xmn = extent(r)[1], xmx = extent(r)[2],
ymn = extent(r)[3], ymx = extent(r)[4])
values(cl.layer) <- cl
plot(cl.layer[[1]], rev(topo.colors(3)))

4.2 Using “ggplot”
r_points <- rasterToPoints(cl.layer)
r.df <- as.data.frame(r_points)
all.plot <- ggplot(data = r.df, aes(x=x, y=y)) +
geom_tile(aes(fill = layer)) +
xlab("Longtitude") +
ylab("Latitude") +
scale_fill_gradient(low = "green", high = "white")
all.plot

5. Training data
5.1 Using “basic plot”
train <- df.r[c((ncol(r)*49+1):nrow(df.r)),]
out.train <- kmeans(train, 3)
train.layer <- raster(ncol = ncol(r), nrow = round(nrow(train)/ncol(r)),
xmn = extent(r)[1], xmx = ncol(r),
ymn = nrow(r) - round(nrow(train)/ncol(r)) + 1, ymx = nrow(r))
train.cl <- as.vector(out.train$cluster)
values(train.layer) <- train.cl
plot(train.layer[[1]], rev(topo.colors(3)))

5.2 Using “ggplot”
train.pt <- as.data.frame(rasterToPoints(train.layer))
train.plot <- ggplot(data = train.pt, aes(x=x, y=y)) +
geom_tile(aes(fill = layer)) +
xlab("Longtitude") +
ylab("Latitude") +
scale_fill_gradient(low = "green", high = "white")
train.plot

6. Testing data
6.1 Using “basic plot”
test <- df.r[-c((ncol(r)*49+1):nrow(df.r)),]
center <- out.train$centers
test.cl <- c()
for(i in 1:nrow(test)){
test.cl[i] <- which.min(c(dist(rbind(center[1,], test[i,])),
dist(rbind(center[2,], test[i,])),
dist(rbind(center[3,], test[i,]))))
}
test.layer <- raster(ncol = ncol(r), nrow = nrow(r) - round(nrow(train)/ncol(r)),
xmn = extent(r)[1], xmx = ncol(r),
ymn = extent(r)[3], ymx = nrow(r) - round(nrow(train)/ncol(r)))
values(test.layer) <- test.cl
plot(test.layer[[1]], topo.colors(3))

6.2 Using “ggplot”
test.pt <- as.data.frame(rasterToPoints(test.layer))
test.plot <- ggplot(data = test.pt, aes(x=x, y=y)) +
geom_tile(aes(fill = layer)) +
xlab("Longtitude") +
ylab("Latitude") +
scale_fill_gradient(low = "green", high = "white")
test.plot

7. Comparing all plots
7.1 Using “basic plot”
par(mfrow=c(2,2))
plot(cl.layer[[1]], rev(topo.colors(3)))
plot(train.layer[[1]], rev(topo.colors(3)))
plot(test.layer[[1]], topo.colors(3))

7.2 Using “ggplot”
grid.arrange(all.plot, train.plot, test.plot, ncol = 2, nrow = 2)

8. DBSCAN
df.cor <- cbind(as.data.frame(coordinates(r)), cl.df)
df.rice <- filter(df.cor, cl == 3)[,c(1,2)]
db <- dbscan(df.rice, eps = 1.5, MinPts = 5)
db
## dbscan Pts=9974 MinPts=5 eps=1.5
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## border 216 17 22 200 32 13 16 5 3 1 10 18 2 15 4 5 5 3 5 4 7
## seed 0 171 375 5240 642 132 305 45 4 4 71 208 18 199 3 16 14 3 29 2 29
## total 216 188 397 5440 674 145 321 50 7 5 81 226 20 214 7 21 19 6 34 6 36
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
## border 16 5 10 16 8 3 2 4 4 2 7 12 4 4 4 30 7 2 5 2 6 9
## seed 95 94 73 220 100 11 3 20 15 4 82 107 5 2 3 533 23 2 16 4 22 39
## total 111 99 83 236 108 14 5 24 19 6 89 119 9 6 7 563 30 4 21 6 28 48
## 43 44 45 46 47 48 49 50 51 52 53 54
## border 3 3 4 5 4 2 3 3 5 4 3 4
## seed 2 5 15 4 4 9 6 8 20 71 2 37
## total 5 8 19 9 8 11 9 11 25 75 5 41
8.1 Plot dbscan total
cx <- tapply(df.rice$x, db$cluster, mean)
cy <- tapply(df.rice$y, db$cluster, mean)
plot(db, df.rice, main = "DBSCAN")
points(cx, cy, pch = 7, col = 1)

8.2 Plot dbscan isseed
8.2.1 Using “basic plot” and “tapply”
cx <- tapply(df.rice$x, db$cluster, mean)
cy <- tapply(df.rice$y, db$cluster, mean)
plot(db, df.rice, main = "DBSCAN")
points(cx, cy, pch = 7, col = 1)

8.2.2 Using “ggplot” and “dplyr”
rice.group <- data.frame(x=df.rice$x, y=df.rice$y, group=db$cluster, isseed=db$isseed)
rice.group.isseed <- filter(rice.group, isseed == TRUE)
rice.plot.df <- rice.group.isseed %>%
group_by(group) %>%
summarise(center_x = mean(x), center_y = mean(y))
rice.plot.df <- as.data.frame(rice.plot.df)
rice.plot <- ggplot(rice.group.isseed, aes(x=x, y=y)) +
geom_tile(aes(fill = group)) +
ggtitle(label = "DBSCAN") +
xlab("Longtitude") +
ylab("Latitude") +
scale_fill_gradient(low = "#8FBC8F", high = "#8FBC8F") +
geom_point(data = rice.plot.df, aes(x=center_x, y=center_y), color = "red", shape = 7, size = 2) +
theme(legend.position = 'none')
rice.plot

head(rice.plot.df, n = 20)
## group center_x center_y
## 1 1 4.020468 133.6871
## 2 2 26.358667 131.8707
## 3 3 42.572901 49.4395
## 4 4 82.152648 132.5140
## 5 5 127.689394 138.7424
## 6 6 145.700000 128.3000
## 7 7 170.522222 142.2556
## 8 8 165.000000 142.0000
## 9 9 36.000000 132.0000
## 10 10 167.091549 125.4437
## 11 11 95.788462 115.8317
## 12 12 85.388889 123.6111
## 13 13 125.012563 115.3291
## 14 14 110.166667 121.1667
## 15 15 34.875000 115.3125
## 16 16 153.642857 114.2857
## 17 17 63.166667 114.1667
## 18 18 162.293103 111.7069
## 19 19 150.000000 112.5000
## 20 20 2.362069 108.0172