#setwd("Solar Array Object /")
#install.packages("raster")
#install.packages('sp')
#install.packages('rgdal')
library(raster)
## Warning: package 'raster' was built under R version 3.2.5
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.2.5
##
## Attaching package: 'raster'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:dplyr':
##
## select
library(sp)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.2.5
## rgdal: version: 1.1-10, (SVN revision 622)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
## Linking to sp version: 1.2-3
To get only the files with .tif extension
# list.files(path="train_images/", pattern = glob2rx('*.tif'))
Or if you are familiar with regular expressions
list.files(path = "train_images/", pattern = '^.*\\.tif$')
## [1] "image1.tif" "image10.tif" "image100.tif" "image101.tif"
## [5] "image102.tif" "image103.tif" "image104.tif" "image105.tif"
## [9] "image106.tif" "image107.tif" "image108.tif" "image109.tif"
## [13] "image11.tif" "image110.tif" "image111.tif" "image112.tif"
## [17] "image113.tif" "image114.tif" "image115.tif" "image116.tif"
## [21] "image117.tif" "image118.tif" "image119.tif" "image12.tif"
## [25] "image120.tif" "image121.tif" "image122.tif" "image123.tif"
## [29] "image124.tif" "image125.tif" "image126.tif" "image127.tif"
## [33] "image128.tif" "image129.tif" "image13.tif" "image130.tif"
## [37] "image131.tif" "image132.tif" "image133.tif" "image134.tif"
## [41] "image135.tif" "image136.tif" "image137.tif" "image138.tif"
## [45] "image139.tif" "image14.tif" "image140.tif" "image141.tif"
## [49] "image142.tif" "image143.tif" "image144.tif" "image145.tif"
## [53] "image146.tif" "image147.tif" "image148.tif" "image149.tif"
## [57] "image15.tif" "image150.tif" "image151.tif" "image152.tif"
## [61] "image153.tif" "image154.tif" "image155.tif" "image156.tif"
## [65] "image157.tif" "image158.tif" "image159.tif" "image16.tif"
## [69] "image160.tif" "image161.tif" "image162.tif" "image163.tif"
## [73] "image164.tif" "image165.tif" "image166.tif" "image167.tif"
## [77] "image168.tif" "image169.tif" "image17.tif" "image170.tif"
## [81] "image171.tif" "image172.tif" "image173.tif" "image174.tif"
## [85] "image175.tif" "image176.tif" "image177.tif" "image178.tif"
## [89] "image179.tif" "image18.tif" "image180.tif" "image181.tif"
## [93] "image182.tif" "image183.tif" "image184.tif" "image185.tif"
## [97] "image186.tif" "image187.tif" "image188.tif" "image189.tif"
## [101] "image19.tif" "image190.tif" "image191.tif" "image192.tif"
## [105] "image193.tif" "image194.tif" "image195.tif" "image196.tif"
## [109] "image197.tif" "image198.tif" "image199.tif" "image2.tif"
## [113] "image20.tif" "image200.tif" "image21.tif" "image22.tif"
## [117] "image23.tif" "image24.tif" "image25.tif" "image26.tif"
## [121] "image27.tif" "image28.tif" "image29.tif" "image3.tif"
## [125] "image30.tif" "image31.tif" "image32.tif" "image33.tif"
## [129] "image34.tif" "image35.tif" "image36.tif" "image37.tif"
## [133] "image38.tif" "image39.tif" "image4.tif" "image40.tif"
## [137] "image41.tif" "image42.tif" "image43.tif" "image44.tif"
## [141] "image45.tif" "image46.tif" "image47.tif" "image48.tif"
## [145] "image49.tif" "image5.tif" "image50.tif" "image51.tif"
## [149] "image52.tif" "image53.tif" "image54.tif" "image55.tif"
## [153] "image56.tif" "image57.tif" "image58.tif" "image59.tif"
## [157] "image6.tif" "image60.tif" "image61.tif" "image62.tif"
## [161] "image63.tif" "image64.tif" "image65.tif" "image66.tif"
## [165] "image67.tif" "image68.tif" "image69.tif" "image7.tif"
## [169] "image70.tif" "image71.tif" "image72.tif" "image73.tif"
## [173] "image74.tif" "image75.tif" "image76.tif" "image77.tif"
## [177] "image78.tif" "image79.tif" "image8.tif" "image80.tif"
## [181] "image81.tif" "image82.tif" "image83.tif" "image84.tif"
## [185] "image85.tif" "image86.tif" "image87.tif" "image88.tif"
## [189] "image89.tif" "image9.tif" "image90.tif" "image91.tif"
## [193] "image92.tif" "image93.tif" "image94.tif" "image95.tif"
## [197] "image96.tif" "image97.tif" "image98.tif" "image99.tif"
Retrieve the content of the train_images sub-directory
list <- list.files(path='train_images/', full.names=TRUE)
Check attributes of first element
pic1 <- raster(list[3])
pic1
## class : RasterLayer
## band : 1 (of 3 bands)
## dimensions : 101, 101, 10201 (nrow, ncol, ncell)
## resolution : 1, 1 (x, y)
## extent : 0, 101, 0, 101 (xmin, xmax, ymin, ymax)
## coord. ref. : NA
## data source : /Users/slackoverflow/stats/kaggle/Solar Array Object /train_images/image100.tif
## names : image100
## values : 0, 255 (min, max)
Plot first .tif picture of the list
plot(pic1)
We can also create a histogram to view the distribution of values in our raster. Note that the max number of pixels that R will plot by default is 100,000. We can tell it to plot more using the maxpixels attribute.
#check out histinfo first to acquire information about binwidth
histinfo <- hist(pic1)
histinfo
## $breaks
## [1] 20 40 60 80 100 120 140 160 180 200 220 240 260
##
## $counts
## [1] 130 604 751 1795 1948 800 2477 1232 318 77 50 19
##
## $density
## [1] 6.371924e-04 2.960494e-03 3.681012e-03 8.798157e-03 9.548084e-03
## [6] 3.921184e-03 1.214097e-02 6.038624e-03 1.558671e-03 3.774140e-04
## [11] 2.450740e-04 9.312812e-05
##
## $mids
## [1] 30 50 70 90 110 130 150 170 190 210 230 250
##
## $xname
## [1] "v"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
hist(pic1, breaks=20, main="Distribution of elevation values",
col= "purple")
We can also perform calculations on our raster. For instance, we could multiply all values within the raster by 2.
pic1Doubled <- pic1 * 2
pic1
## class : RasterLayer
## band : 1 (of 3 bands)
## dimensions : 101, 101, 10201 (nrow, ncol, ncell)
## resolution : 1, 1 (x, y)
## extent : 0, 101, 0, 101 (xmin, xmax, ymin, ymax)
## coord. ref. : NA
## data source : /Users/slackoverflow/stats/kaggle/Solar Array Object /train_images/image100.tif
## names : image100
## values : 0, 255 (min, max)
pic1Doubled
## class : RasterLayer
## dimensions : 101, 101, 10201 (nrow, ncol, ncell)
## resolution : 1, 1 (x, y)
## extent : 0, 101, 0, 101 (xmin, xmax, ymin, ymax)
## coord. ref. : NA
## data source : in memory
## names : image100
## values : 42, 510 (min, max)
plot(pic1Doubled)
R has an image
function that allows you to control the way a raster is rendered on the screen. The plot command in R has a base setting for the number of pixels that it will plot (100,000 pixels). The image command thus might be better for rendering larger rasters.
image(pic1)
Specify the range of values that you want to plot Just plot pixels between 50 and 125 in elevation
image(pic1, zlim=c(50,125))
We can specify the colors too
col <- terrain.colors(6)
image(pic1, zlim=c(50,125), main="Digital Elevation Model (DEM)", col=col)
And the max. amount of pixels
image(pic1, zlim=c(50,125), main="Digital Elevation Model (DEM)", maxpixels=200)
# trainStack <- stack(list)
# changed this 14.07.2016 at 23:06 in order to try out something new
trainStack <- brick(list[3])
Plot picture in RGB color mode
?plotRGB
plotRGB(trainStack, 1,2,3)
Crop pictures by drawing a box by clicking upper left and lower right corner in the plot (code is commented out since it requires user’s interaction and thus will create an error while uploading as HTML)
#border <- drawExtent()
#cropped <- crop(trainStack, border)
#plotRGB(cropped, 1,2,3)
The classification itself will be done on a corresponding dataframe:
df <- as.data.frame(trainStack)
Check for any NA’s
table(is.na(df))
##
## FALSE
## 30603
k = 5
clust <- kmeans(df, k, iter.max=1000)
str(clust)
## List of 9
## $ cluster : int [1:10201] 3 3 3 3 3 3 3 3 3 3 ...
## $ centers : num [1:5, 1:3] 99.1 129.6 59.3 155.3 191.7 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:5] "1" "2" "3" "4" ...
## .. ..$ : chr [1:3] "image100.1" "image100.2" "image100.3"
## $ totss : num 29192305
## $ withinss : num [1:5] 692753 240733 634400 519805 675821
## $ tot.withinss: num 2763512
## $ betweenss : num 26428793
## $ size : int [1:5] 3493 1359 1403 3257 689
## $ iter : int 2
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
Extract clusters
dfClusters <- clust$cluster
Create an empty raster with same extent than trainStack
clusteredRaster <- raster(trainStack)
clusteredRaster <- setValues(clusteredRaster, dfClusters)
clusteredRaster
## class : RasterLayer
## dimensions : 101, 101, 10201 (nrow, ncol, ncell)
## resolution : 1, 1 (x, y)
## extent : 0, 101, 0, 101 (xmin, xmax, ymin, ymax)
## coord. ref. : NA
## data source : in memory
## names : layer
## values : 1, 5 (min, max)
plot(clusteredRaster)
Plot using image
image(clusteredRaster, axes = FALSE, col=rainbow(k))