require(rgdal)
require(raster)
require(RStoolbox)
require(ggplot2)
require(reshape2)
require(knitr)
require(kableExtra)
imadir2 <- "./pcatest2"
1. PCA with R
Input image
b <- brick(file.path(imadir2,"mini1.tif"))
ggRGB(b,5,3,1,stretch="lin")

Run PCA with RStoolbox and extract eigenmatrix.
(read again for time benchmarking)
t0 <- Sys.time()
b <- brick(file.path(imadir2,"mini1.tif"))
bpca <- rasterPCA(b,spca=FALSE)
trpca <- Sys.time() -t0
trpca
Time difference of 5.875336 secs
eigenvR <- loadings(bpca$model)[]
PCA image (composite of PC1 to PC3)
ggRGB(bpca$map,1,2,3,stretch="lin")

2. PCA with OTB
For OTB the current (v7.1) implementation of otbcli_DimensionalityReduction implies an awkard sequence to cope with nodata: (otbPCAtest_mini1.scr)
C:\ALOBO\OTB-7.1.0-Win64\otbenv.bat
cd D:\OTBtests\OTBpca\pcatest2
otbcli_ManageNoData.bat -in ..\mini1.tif -out mini1MASK.tif uint8 -mode.buildmask.inv 1 -mode.buildmask.outv 0
otbcli_ManageNoData.bat -in mini1.tif -out mini1Mask.tif uint8 -mode.buildmask.inv 1 -mode.buildmask.outv 0
#without whitening
otbcli_DimensionalityReduction.bat -in mini1.tif -bv 0 -out mini1PCAotb.tif -method pca -method.pca.whiten false -outmatrix mini1PCAotbnowhit.eigmat.csv
otbcli_ManageNoData.bat -in mini1PCAotb.tif -out mini1PCAotbM.tif -mode apply -mode.apply.mask mini1Mask.tif -mode.apply.ndval 0
3. Compare eigenvectors
eigenvOTB <- t(read.csv(file.path(imadir2,"mini1PCAotbnowhit.eigmat.csv"),header=FALSE,sep=""))
eigenvOTB <- data.frame(eigenvOTB)
eigenvR <- read.csv(file.path(imadir2,"Rmini1eigenmat.csv"),stringsAsFactors = FALSE)[,-1]
colnames(eigenvR) <- colnames(eigenvOTB) <- paste0("PC",1:5)
rownames(eigenvR) <- rownames(eigenvOTB) <- paste0("Band_",1:5)
Eigenvectors from R:
kable(eigenvR) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| |
PC1 |
PC2 |
PC3 |
PC4 |
PC5 |
| Band_1 |
0.0217211 |
0.2969163 |
0.2623071 |
0.2694614 |
0.8774705 |
| Band_2 |
0.1326099 |
0.4015052 |
0.0335164 |
-0.8967462 |
0.1262185 |
| Band_3 |
0.0331460 |
0.6279545 |
0.5775238 |
0.2428258 |
-0.4605176 |
| Band_4 |
0.3865421 |
0.5127257 |
-0.7221213 |
0.2534016 |
-0.0450125 |
| Band_5 |
0.9118275 |
-0.3056467 |
0.2740052 |
0.0077485 |
-0.0034370 |
Eigenvectors from OTB:
kable(eigenvOTB) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| |
PC1 |
PC2 |
PC3 |
PC4 |
PC5 |
| Band_1 |
0.0225902 |
-0.2931714 |
-0.2664791 |
0.1835331 |
0.8993579 |
| Band_2 |
0.1346418 |
-0.3977168 |
-0.0786326 |
-0.9037259 |
0.0280966 |
| Band_3 |
0.0361656 |
-0.6199247 |
-0.5704303 |
0.3142812 |
-0.4361444 |
| Band_4 |
0.3874017 |
-0.5230245 |
0.7251642 |
0.2244495 |
-0.0111637 |
| Band_5 |
0.9110287 |
0.3130665 |
-0.2674920 |
0.0210914 |
-0.0043922 |
Difference: most values < 0.01, but note larger difference values in some elements of the eigenvectors
kable(abs(eigenvR) - abs(eigenvOTB)) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| |
PC1 |
PC2 |
PC3 |
PC4 |
PC5 |
| Band_1 |
-0.0008691 |
0.0037449 |
-0.0041720 |
0.0859282 |
-0.0218874 |
| Band_2 |
-0.0020319 |
0.0037884 |
-0.0451162 |
-0.0069797 |
0.0981219 |
| Band_3 |
-0.0030196 |
0.0080297 |
0.0070935 |
-0.0714554 |
0.0243732 |
| Band_4 |
-0.0008596 |
-0.0102988 |
-0.0030430 |
0.0289522 |
0.0338488 |
| Band_5 |
0.0007988 |
-0.0074198 |
0.0065132 |
-0.0133429 |
-0.0009552 |
4. Plot PC values
rversion <- bpca$map
otbversion <- brick(file.path(imadir2,"mini1PCAotbM.tif")) #from otbPCAtest_mini1.scr
names(rversion) <- names(otbversion) <- paste0("PC",1:5)
Extract random values and tidy up for ggplot in long format
set.seed(121)
pdata <- sampleRandom(stack(rversion,otbversion), size=1000, na.rm=TRUE,
rowcol=TRUE, xy=TRUE, sp=FALSE)
pdata1 <- pdata[,1:9]
pdata2 <- pdata[,c(1:4,10:14)]
colnames(pdata1)[5:9] <- colnames(pdata2)[5:9] <- paste0("PC",1:5)
pdata1lf <- melt(data.frame(pdata1),id.vars=1:4)
names(pdata1lf)[6] <- "Rversion"
pdata2lf <- melt(data.frame(pdata2),id.vars=1:4)
names(pdata2lf)[6] <- "OTBversion"
pdatalf <- data.frame(cbind(pdata1lf,OTBversion=pdata2lf$OTBversion))
#kable(head(pdatalf)) %>%
# kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Note dispersion increases for higher PCs. I’ve found this is consistent across images
ggplot(data=pdatalf) +
geom_point(aes(x=Rversion,y=OTBversion)) +
theme(aspect.ratio = 1) +
facet_wrap(~variable,scale="free",ncol=3)

5. Execution times
I often need a pca output in the form of a linearly stretched PCs image in uint8 and leaving 0 for nodata, thus I include this step in the benchmarking too. The stretching is done from the 2% and 98% quantiles to the 1:255 range. For R, this linear stretching can done by combining raster::clamp() and RStoolbox::RescalImage
writeRaster(rescaleImage(clamp(bpca$map,lower=q[,1], upper=q[,2]),
xmin=q[,1], xmax=q[,2],ymin=1, ymax=255),
file.path(imadir2,"mini1PCARresc"),
datatype="INT1U", NAflag=0,
format="GTiff",overwrite=TRUE)
Stretching with the current OTB v7.1 implies fiddling again to cope with nodata:
otbcli_DynamicConvert -in mini1PCAotbM.tif type linear -mask mini1Mask.tif -out mini1PCAotbMresc.tif uint8 -outmin 1 -outmax 255
otbcli_ManageNoData.bat -in mini1PCAotbMresc.tif -out mini1PCAotbMrescM.tif -mode apply -mode.apply.mask mini1Mask.tif -mode.apply.ndval 0
5.1 Small test image
Test with mini1.tif (small test image 85 rows x 116 cols x 5 bands x 32 bit).
Runing on a very low-profile, consumer grade old laptop:
- Lenovo X220 with Intel Core i5-2520M CPU @2.50 GHz
- SSD hard-drive
- 8 Gb of RAM
Values in the table are in seconds
extimes <- read.csv("TimeBenchmark_PCA.csv",header=FALSE,stringsAsFactors = FALSE)
extimes1 <- extimes[2:4,1:3]
names(extimes1) <- extimes1[1,]
rownames(extimes1) <- extimes1[,1]
kable(extimes1[-1,-1]) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| |
R |
OTB |
| PCA |
2.05 |
1.7 |
| stretching |
0.15 |
1.04 |
5.2 Large-size image
Execution times with a real image (BertMICA20190531v2.tif) (8282 rows x 11329 cols x 5 bands x 32 bit).
For images with a real-life size, an R alternative is using gdalUtilities::gdal_transform(), but the output cannot be clamped to the 1:255 range using gdalUtilities, thus the image has to read back into R for clamp(), which significantly increases the execution time:
gdal_translate(src_dataset=file.path(rasdirpca,"BertMICA20190531v2PCARfloat.tif"),
dst_dataset=file.path(rasdirpca,"BertMICA20190531v2PCARresc.tif"),
scale=mscal,dryrun=FALSE)
BertMICA20190531v2PCARresc <- brick(file.path(rasdirpca,"BertMICA20190531v2PCARresc.tif"))
a1 <- clamp(BertMICA20190531v2PCARresc,lower=1,upper=255, useValues=TRUE,
format="GTiff",datatype="INT1U",NAflag=0,
file=file.path(rasdirpca,"BertMICA20190531v2PCARrescG.tif"),
overwrite=TRUE)
Note the advantage of OTB: despite the fact so much reading and writing in OTB severely increases execution time, the OTB run is performed in a reasonable time while waiting for R is unpractical. Values in the table are in minutes.
extimes2 <- extimes[7:10,1:4]
names(extimes2) <- extimes2[1,]
rownames(extimes2) <- extimes2[,1]
kable(extimes2[-1,-1]) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| |
R |
gdal+R |
OTB |
| PCA |
33.73 |
|
3.36 |
| PCA with resampling |
6.9 |
|
|
| stretching |
6.38 |
4.43 |
1.66 |
---
title: " Comparing PCA results from R and from OTB"
output:
  html_notebook:
   code_folding: hide
editor_options: 
  chunk_output_type: inline
---

* Agustin.Lobo@ictja.csic.es
* 20200508

```{r}
require(rgdal)
require(raster)
require(RStoolbox)
require(ggplot2)
require(reshape2)
require(knitr)
require(kableExtra)
imadir2 <- "./pcatest2"
```

## 1. PCA with R
Input image
```{r fig.height=4, fig.width=6}
b <- brick(file.path(imadir2,"mini1.tif"))
ggRGB(b,5,3,1,stretch="lin")
```
Run PCA with RStoolbox and extract eigenmatrix.  
(read again for time benchmarking)
```{r}
t0 <- Sys.time()
b <- brick(file.path(imadir2,"mini1.tif"))
bpca <- rasterPCA(b,spca=FALSE)
trpca <- Sys.time() -t0
trpca
eigenvR <- loadings(bpca$model)[]
```
PCA image (composite of PC1 to PC3)
```{r fig.height=4, fig.width=6}
ggRGB(bpca$map,1,2,3,stretch="lin")
```

## 2. PCA with OTB
For OTB the current (v7.1) implementation of otbcli_DimensionalityReduction implies an awkard sequence to cope with nodata:
(otbPCAtest_mini1.scr)

```
C:\ALOBO\OTB-7.1.0-Win64\otbenv.bat
cd D:\OTBtests\OTBpca\pcatest2
otbcli_ManageNoData.bat -in ..\mini1.tif -out mini1MASK.tif uint8 -mode.buildmask.inv 1 -mode.buildmask.outv 0 
otbcli_ManageNoData.bat -in mini1.tif -out mini1Mask.tif uint8 -mode.buildmask.inv 1 -mode.buildmask.outv 0 
#without whitening
otbcli_DimensionalityReduction.bat -in mini1.tif -bv 0 -out mini1PCAotb.tif -method pca -method.pca.whiten false -outmatrix mini1PCAotbnowhit.eigmat.csv
otbcli_ManageNoData.bat -in mini1PCAotb.tif -out mini1PCAotbM.tif -mode apply -mode.apply.mask mini1Mask.tif -mode.apply.ndval 0
```
## 3. Compare eigenvectors
```{r}
eigenvOTB <- t(read.csv(file.path(imadir2,"mini1PCAotbnowhit.eigmat.csv"),header=FALSE,sep=""))
eigenvOTB <- data.frame(eigenvOTB)
eigenvR <- read.csv(file.path(imadir2,"Rmini1eigenmat.csv"),stringsAsFactors = FALSE)[,-1]
colnames(eigenvR) <- colnames(eigenvOTB) <- paste0("PC",1:5)
rownames(eigenvR) <- rownames(eigenvOTB) <- paste0("Band_",1:5)
```
Eigenvectors from R:
```{r class.source = 'fold-hide'}
kable(eigenvR)  %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
```
Eigenvectors from OTB:
```{r class.source = 'fold-hide'}
kable(eigenvOTB)  %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
```
Difference: most values < 0.01, but note larger difference values in some elements of the eigenvectors
```{r class.source = 'fold-hide'}
kable(abs(eigenvR) - abs(eigenvOTB)) %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
```
## 4. Plot PC values
```{r}
rversion <- bpca$map
otbversion <- brick(file.path(imadir2,"mini1PCAotbM.tif")) #from otbPCAtest_mini1.scr
names(rversion) <- names(otbversion) <- paste0("PC",1:5)
```

Extract random values and tidy up for ggplot in long format
```{r}
set.seed(121)
pdata <- sampleRandom(stack(rversion,otbversion), size=1000, na.rm=TRUE,
                      rowcol=TRUE, xy=TRUE, sp=FALSE)
pdata1 <- pdata[,1:9]
pdata2 <- pdata[,c(1:4,10:14)]
colnames(pdata1)[5:9] <- colnames(pdata2)[5:9] <- paste0("PC",1:5)
pdata1lf <- melt(data.frame(pdata1),id.vars=1:4)
names(pdata1lf)[6] <- "Rversion"
pdata2lf <- melt(data.frame(pdata2),id.vars=1:4)
names(pdata2lf)[6] <- "OTBversion"
pdatalf <- data.frame(cbind(pdata1lf,OTBversion=pdata2lf$OTBversion))
#kable(head(pdatalf))  %>%
#  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
```

Note dispersion increases for higher PCs. I've found this is consistent across images

```{r}
ggplot(data=pdatalf) +
    geom_point(aes(x=Rversion,y=OTBversion)) +
    theme(aspect.ratio = 1) +
    facet_wrap(~variable,scale="free",ncol=3)
```
## 5. Execution times
I often need a pca output in the form of a linearly stretched PCs image in uint8 and leaving 0 for nodata, thus I include this step in the benchmarking too. The stretching is done from the 2% and 98% quantiles to the 1:255 range.
For R, this linear stretching can done by combining raster::clamp() and RStoolbox::RescalImage
```{r, eval = FALSE}
writeRaster(rescaleImage(clamp(bpca$map,lower=q[,1], upper=q[,2]), 
            xmin=q[,1], xmax=q[,2],ymin=1, ymax=255),
            file.path(imadir2,"mini1PCARresc"),
            datatype="INT1U", NAflag=0,
            format="GTiff",overwrite=TRUE)
```

Stretching with the current OTB v7.1 implies fiddling again to cope with nodata:
```
otbcli_DynamicConvert -in mini1PCAotbM.tif type linear -mask mini1Mask.tif -out mini1PCAotbMresc.tif uint8 -outmin 1 -outmax 255
otbcli_ManageNoData.bat -in mini1PCAotbMresc.tif -out mini1PCAotbMrescM.tif -mode apply -mode.apply.mask mini1Mask.tif -mode.apply.ndval 0

```
### 5.1 Small test image
Test with mini1.tif (small test image 85 rows x 116 cols x 5 bands x 32 bit).  
Runing on a very low-profile, consumer grade old laptop:

* Lenovo X220 with Intel Core i5-2520M CPU @2.50 GHz
* SSD hard-drive
* 8 Gb of RAM

Values in the table are in seconds
```{r}
extimes <- read.csv("TimeBenchmark_PCA.csv",header=FALSE,stringsAsFactors = FALSE)
extimes1 <- extimes[2:4,1:3]
names(extimes1) <- extimes1[1,]
rownames(extimes1) <- extimes1[,1]
kable(extimes1[-1,-1])  %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
```
### 5.2 Large-size image
Execution times with a real image (BertMICA20190531v2.tif) (8282 rows x 11329 cols x 5 bands x 32 bit).  
For images with a real-life size, an R alternative is using gdalUtilities::gdal_transform(), but the output cannot be clamped to the 1:255 range using gdalUtilities, thus the image has to read back into R for clamp(), which significantly increases the execution time:
```{r, eval = FALSE}
gdal_translate(src_dataset=file.path(rasdirpca,"BertMICA20190531v2PCARfloat.tif"),
               dst_dataset=file.path(rasdirpca,"BertMICA20190531v2PCARresc.tif"),
               scale=mscal,dryrun=FALSE)
BertMICA20190531v2PCARresc <- brick(file.path(rasdirpca,"BertMICA20190531v2PCARresc.tif"))
a1 <- clamp(BertMICA20190531v2PCARresc,lower=1,upper=255, useValues=TRUE,
            format="GTiff",datatype="INT1U",NAflag=0,
            file=file.path(rasdirpca,"BertMICA20190531v2PCARrescG.tif"),
            overwrite=TRUE)
```

Note the advantage of OTB: despite the fact so much reading and writing in OTB severely increases execution time, the OTB run is performed in a reasonable time while waiting for R is unpractical.
Values in the table are in minutes.


```{r}
extimes2 <- extimes[7:10,1:4]
names(extimes2) <- extimes2[1,]
rownames(extimes2) <- extimes2[,1]
kable(extimes2[-1,-1]) %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
```
