First time I’m working with this kinda of data. So many parts taken together from http://geoscripting-wur.github.io/IntroToRaster/, http://neondataskills.org/R/Raster-Data-In-R/ and http://www.r-bloggers.com/unsupervised-classification-of-a-landsat-image-in-r-the-whole-story-or-part-two/

Load libraries

#setwd("Solar Array Object /")
#install.packages("raster")
#install.packages('sp')
#install.packages('rgdal')

library(raster)
## Warning: package 'raster' was built under R version 3.2.5
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.2.5
## 
## Attaching package: 'raster'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:dplyr':
## 
##     select
library(sp)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.2.5
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3

To get only the files with .tif extension

# list.files(path="train_images/", pattern = glob2rx('*.tif'))

Or if you are familiar with regular expressions

list.files(path = "train_images/", pattern = '^.*\\.tif$')
##   [1] "image1.tif"   "image10.tif"  "image100.tif" "image101.tif"
##   [5] "image102.tif" "image103.tif" "image104.tif" "image105.tif"
##   [9] "image106.tif" "image107.tif" "image108.tif" "image109.tif"
##  [13] "image11.tif"  "image110.tif" "image111.tif" "image112.tif"
##  [17] "image113.tif" "image114.tif" "image115.tif" "image116.tif"
##  [21] "image117.tif" "image118.tif" "image119.tif" "image12.tif" 
##  [25] "image120.tif" "image121.tif" "image122.tif" "image123.tif"
##  [29] "image124.tif" "image125.tif" "image126.tif" "image127.tif"
##  [33] "image128.tif" "image129.tif" "image13.tif"  "image130.tif"
##  [37] "image131.tif" "image132.tif" "image133.tif" "image134.tif"
##  [41] "image135.tif" "image136.tif" "image137.tif" "image138.tif"
##  [45] "image139.tif" "image14.tif"  "image140.tif" "image141.tif"
##  [49] "image142.tif" "image143.tif" "image144.tif" "image145.tif"
##  [53] "image146.tif" "image147.tif" "image148.tif" "image149.tif"
##  [57] "image15.tif"  "image150.tif" "image151.tif" "image152.tif"
##  [61] "image153.tif" "image154.tif" "image155.tif" "image156.tif"
##  [65] "image157.tif" "image158.tif" "image159.tif" "image16.tif" 
##  [69] "image160.tif" "image161.tif" "image162.tif" "image163.tif"
##  [73] "image164.tif" "image165.tif" "image166.tif" "image167.tif"
##  [77] "image168.tif" "image169.tif" "image17.tif"  "image170.tif"
##  [81] "image171.tif" "image172.tif" "image173.tif" "image174.tif"
##  [85] "image175.tif" "image176.tif" "image177.tif" "image178.tif"
##  [89] "image179.tif" "image18.tif"  "image180.tif" "image181.tif"
##  [93] "image182.tif" "image183.tif" "image184.tif" "image185.tif"
##  [97] "image186.tif" "image187.tif" "image188.tif" "image189.tif"
## [101] "image19.tif"  "image190.tif" "image191.tif" "image192.tif"
## [105] "image193.tif" "image194.tif" "image195.tif" "image196.tif"
## [109] "image197.tif" "image198.tif" "image199.tif" "image2.tif"  
## [113] "image20.tif"  "image200.tif" "image21.tif"  "image22.tif" 
## [117] "image23.tif"  "image24.tif"  "image25.tif"  "image26.tif" 
## [121] "image27.tif"  "image28.tif"  "image29.tif"  "image3.tif"  
## [125] "image30.tif"  "image31.tif"  "image32.tif"  "image33.tif" 
## [129] "image34.tif"  "image35.tif"  "image36.tif"  "image37.tif" 
## [133] "image38.tif"  "image39.tif"  "image4.tif"   "image40.tif" 
## [137] "image41.tif"  "image42.tif"  "image43.tif"  "image44.tif" 
## [141] "image45.tif"  "image46.tif"  "image47.tif"  "image48.tif" 
## [145] "image49.tif"  "image5.tif"   "image50.tif"  "image51.tif" 
## [149] "image52.tif"  "image53.tif"  "image54.tif"  "image55.tif" 
## [153] "image56.tif"  "image57.tif"  "image58.tif"  "image59.tif" 
## [157] "image6.tif"   "image60.tif"  "image61.tif"  "image62.tif" 
## [161] "image63.tif"  "image64.tif"  "image65.tif"  "image66.tif" 
## [165] "image67.tif"  "image68.tif"  "image69.tif"  "image7.tif"  
## [169] "image70.tif"  "image71.tif"  "image72.tif"  "image73.tif" 
## [173] "image74.tif"  "image75.tif"  "image76.tif"  "image77.tif" 
## [177] "image78.tif"  "image79.tif"  "image8.tif"   "image80.tif" 
## [181] "image81.tif"  "image82.tif"  "image83.tif"  "image84.tif" 
## [185] "image85.tif"  "image86.tif"  "image87.tif"  "image88.tif" 
## [189] "image89.tif"  "image9.tif"   "image90.tif"  "image91.tif" 
## [193] "image92.tif"  "image93.tif"  "image94.tif"  "image95.tif" 
## [197] "image96.tif"  "image97.tif"  "image98.tif"  "image99.tif"

Retrieve the content of the train_images sub-directory

list <- list.files(path='train_images/', full.names=TRUE)

Check attributes of first element

pic1 <- raster(list[3])
pic1
## class       : RasterLayer 
## band        : 1  (of  3  bands)
## dimensions  : 101, 101, 10201  (nrow, ncol, ncell)
## resolution  : 1, 1  (x, y)
## extent      : 0, 101, 0, 101  (xmin, xmax, ymin, ymax)
## coord. ref. : NA 
## data source : /Users/slackoverflow/stats/kaggle/Solar Array Object /train_images/image100.tif 
## names       : image100 
## values      : 0, 255  (min, max)
  • Nrow, Ncol: the number of rows and columns in the data (imagine a spreadsheet or a matrix).
  • Ncells: the total number of pixels or cells that make up the raster.
  • Resolution: the size of each pixel (in meters in this case). 1 meter pixels means that each pixel represents a 1m x 1m area on the earth’s surface.
  • Extent: the spatial extent of the raster. This value will be in the same coordinate units as the coordinate reference system of the raster.
  • Coord ref: the coordinate reference system string for the raster.

Graphical displays

Plot first .tif picture of the list

plot(pic1)

We can also create a histogram to view the distribution of values in our raster. Note that the max number of pixels that R will plot by default is 100,000. We can tell it to plot more using the maxpixels attribute.

#check out histinfo first to acquire information about binwidth
histinfo <- hist(pic1)

histinfo
## $breaks
##  [1]  20  40  60  80 100 120 140 160 180 200 220 240 260
## 
## $counts
##  [1]  130  604  751 1795 1948  800 2477 1232  318   77   50   19
## 
## $density
##  [1] 6.371924e-04 2.960494e-03 3.681012e-03 8.798157e-03 9.548084e-03
##  [6] 3.921184e-03 1.214097e-02 6.038624e-03 1.558671e-03 3.774140e-04
## [11] 2.450740e-04 9.312812e-05
## 
## $mids
##  [1]  30  50  70  90 110 130 150 170 190 210 230 250
## 
## $xname
## [1] "v"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
hist(pic1, breaks=20, main="Distribution of elevation values", 
     col= "purple")

Basic Raster Math

We can also perform calculations on our raster. For instance, we could multiply all values within the raster by 2.

pic1Doubled <- pic1 * 2
pic1
## class       : RasterLayer 
## band        : 1  (of  3  bands)
## dimensions  : 101, 101, 10201  (nrow, ncol, ncell)
## resolution  : 1, 1  (x, y)
## extent      : 0, 101, 0, 101  (xmin, xmax, ymin, ymax)
## coord. ref. : NA 
## data source : /Users/slackoverflow/stats/kaggle/Solar Array Object /train_images/image100.tif 
## names       : image100 
## values      : 0, 255  (min, max)
pic1Doubled
## class       : RasterLayer 
## dimensions  : 101, 101, 10201  (nrow, ncol, ncell)
## resolution  : 1, 1  (x, y)
## extent      : 0, 101, 0, 101  (xmin, xmax, ymin, ymax)
## coord. ref. : NA 
## data source : in memory
## names       : image100 
## values      : 42, 510  (min, max)
plot(pic1Doubled)

R has an image function that allows you to control the way a raster is rendered on the screen. The plot command in R has a base setting for the number of pixels that it will plot (100,000 pixels). The image command thus might be better for rendering larger rasters.

image(pic1)

Specify the range of values that you want to plot Just plot pixels between 50 and 125 in elevation

image(pic1, zlim=c(50,125))

We can specify the colors too

col <- terrain.colors(6)
image(pic1, zlim=c(50,125), main="Digital Elevation Model (DEM)", col=col)

And the max. amount of pixels

image(pic1, zlim=c(50,125), main="Digital Elevation Model (DEM)", maxpixels=200)

Stack data

# trainStack <- stack(list)
# changed this 14.07.2016 at 23:06 in order to try out something new
trainStack <- brick(list[3])

Plot picture in RGB color mode

?plotRGB
plotRGB(trainStack, 1,2,3)

Crop pictures by drawing a box by clicking upper left and lower right corner in the plot (code is commented out since it requires user’s interaction and thus will create an error while uploading as HTML)

#border <- drawExtent()
#cropped <- crop(trainStack, border)
#plotRGB(cropped, 1,2,3)

Classify the data

The classification itself will be done on a corresponding dataframe:

df <- as.data.frame(trainStack)

Check for any NA’s

table(is.na(df))
## 
## FALSE 
## 30603

K-means clustering

k = 5
clust <- kmeans(df, k, iter.max=1000)
str(clust)
## List of 9
##  $ cluster     : int [1:10201] 3 3 3 3 3 3 3 3 3 3 ...
##  $ centers     : num [1:5, 1:3] 99.1 129.6 59.3 155.3 191.7 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:5] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:3] "image100.1" "image100.2" "image100.3"
##  $ totss       : num 29192305
##  $ withinss    : num [1:5] 692753 240733 634400 519805 675821
##  $ tot.withinss: num 2763512
##  $ betweenss   : num 26428793
##  $ size        : int [1:5] 3493 1359 1403 3257 689
##  $ iter        : int 2
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"

Extract clusters

dfClusters <- clust$cluster

Create an empty raster with same extent than trainStack

clusteredRaster <- raster(trainStack)
clusteredRaster <- setValues(clusteredRaster, dfClusters)
clusteredRaster
## class       : RasterLayer 
## dimensions  : 101, 101, 10201  (nrow, ncol, ncell)
## resolution  : 1, 1  (x, y)
## extent      : 0, 101, 0, 101  (xmin, xmax, ymin, ymax)
## coord. ref. : NA 
## data source : in memory
## names       : layer 
## values      : 1, 5  (min, max)
plot(clusteredRaster)

Plot using image

image(clusteredRaster, axes = FALSE, col=rainbow(k))