The goal is to calculate NDVI index for Warsaw city for each cell separately and save each observation into a data frame, along with the cell’s coordinates.

Step 1 - downloading Sentinel-2 data

Log in on the Scihub copernicus website and mark your area of interest (in my case, Warsaw city):

Use Advanced Search to filter observations with 0% of cloud cover:

Click “search” and download observations you’re interested in (in my case, one observation for lower-left tile, one for upper left tile and one for upper-right tile. Put together, they cover the whole Warsaw’s area)

Open downloaded folder, and then GRANULE->(really long and senseless name of the folder)->IMG_DATA. There you will find some files with extensions .jp2. Copy those files to your working directory.

Step 2 - reading data into R and extracting the image of Warsaw from a tile (in my case, UDC tile was the first one)

library(raster)
library(dplyr)
library(ggplot2)
library(rgdal)
library(sp)
library(sf)
library(tabularaster)
library(ggplot2)
setwd("C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat")

# Reading Sentinel-2 data into a raster object
satellite=stack("T34UDC_20220906T093601_B02_10m.jp2"," T34UDC_20220906T093601_B03_10m.jp2","T34UDC_20220906T093601_B04_10m.jp2","T34UDC_20220906T093601_B08_10m.jp2") 

# Reading & extracting Warsaw area
setwd("C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat")
pow.sf <- st_read("powiaty.shp")
## Reading layer `powiaty' from data source 
##   `C:\Users\ewado\OneDrive\Dokumenty\Studia\R-mat\powiaty.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 380 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 171677.6 ymin: 133223.7 xmax: 861895.7 ymax: 774923.7
## Projected CRS: ETRS89 / Poland CS92
pow.sf <- st_transform(pow.sf, crs = "+proj=longlat +datum=NAD83")
waw.pow.sf<-pow.sf[pow.sf$jpt_nazwa_=='powiat Warszawa',]

waw.pow.sf2<-st_transform(waw.pow.sf, crs(satellite)@projargs)
waw.cropped<-crop(satellite, extent(waw.pow.sf2))
waw.masked<-mask(waw.cropped, mask=waw.pow.sf2)
ndvi<-(waw.masked[[4]]-waw.masked[[3]])/(waw.masked[[4]]+waw.masked[[3]])

plot(waw.masked[[1]]) 

plot(ndvi) 

Step 3 - creating a data frame

Create a spatial object using xyFromCell function (the function that returns coordinates of the center of the raster cell). I used xyFromCell(name_of_the_raster, spatial=TRUE, all_cells) and change it’s projection. Then, save coordinates of this spatial object using a function coordinates():

ndvi.crds<-spTransform(xyFromCell(ndvi, spatial=TRUE, 1:length(as.vector(ndvi))), CRS("+proj=longlat +datum=NAD83"))
crds<-coordinates(ndvi.crds)

class(crds)
## [1] "matrix" "array"
dim(crds)
## [1] 5591900       2
head(crds)
##             x        y
## [1,] 20.85120 52.35033
## [2,] 20.85135 52.35033
## [3,] 20.85150 52.35033
## [4,] 20.85164 52.35034
## [5,] 20.85179 52.35034
## [6,] 20.85194 52.35034

It looks nice, but it’s a matrix, and we want a data frame:

crds.df<-data.frame(ID=c(1:length(as.vector(ndvi))), X=crds[,1], Y=crds[,2])
class(crds.df)
## [1] "data.frame"
dim(crds.df)
## [1] 5591900       3
head(crds.df)
##   ID        X        Y
## 1  1 20.85120 52.35033
## 2  2 20.85135 52.35033
## 3  3 20.85150 52.35033
## 4  4 20.85164 52.35034
## 5  5 20.85179 52.35034
## 6  6 20.85194 52.35034

The data frame of the coordinates is ready, now we need to add corresponding values of NDVI index:

ndvi.values<-as.data.frame(ndvi)
crds.df$NDVI<-ndvi.values[,1]
head(crds.df)
##   ID        X        Y NDVI
## 1  1 20.85120 52.35033   NA
## 2  2 20.85135 52.35033   NA
## 3  3 20.85150 52.35033   NA
## 4  4 20.85164 52.35034   NA
## 5  5 20.85179 52.35034   NA
## 6  6 20.85194 52.35034   NA
summary(crds.df)
##        ID                X               Y              NDVI        
##  Min.   :      1   Min.   :20.85   Min.   :52.10   Min.   :-0.7     
##  1st Qu.:1397976   1st Qu.:20.92   1st Qu.:52.16   1st Qu.: 0.2     
##  Median :2795950   Median :21.00   Median :52.22   Median : 0.3     
##  Mean   :2795950   Mean   :21.00   Mean   :52.22   Mean   : 0.3     
##  3rd Qu.:4193925   3rd Qu.:21.07   3rd Qu.:52.29   3rd Qu.: 0.5     
##  Max.   :5591900   Max.   :21.14   Max.   :52.35   Max.   : 0.8     
##                                                    NA's   :1567343

As you can wee, it worked :)

crds.df[100000:100010,]
##            ID        X        Y      NDVI
## 100000 100000 20.92447 52.34591 0.5134719
## 100001 100001 20.92462 52.34591 0.4689709
## 100002 100002 20.92477 52.34591 0.4433845
## 100003 100003 20.92491 52.34591 0.3818013
## 100004 100004 20.92506 52.34591 0.2718563
## 100005 100005 20.92521 52.34591 0.2231986
## 100006 100006 20.92535 52.34591 0.3133380
## 100007 100007 20.92550 52.34591 0.4540721
## 100008 100008 20.92565 52.34591 0.4982524
## 100009 100009 20.92579 52.34591 0.5396066
## 100010 100010 20.92594 52.34591 0.5218509
ggplot(crds.df, aes(x=X, y=Y, col=NDVI))+
 geom_point()+
 theme_minimal()

You can also remove observations with NA as ndvi index:

dim(crds.df)
## [1] 5591900       4
crds.df<-crds.df[!is.na(crds.df$NDVI),]
dim(crds.df)
## [1] 4024557       4
head(crds.df)
##      ID        X       Y          NDVI
## 439 439 20.91551 52.3504 -0.0008123477
## 440 440 20.91566 52.3504 -0.0064935065
## 441 441 20.91580 52.3504 -0.0076458753
## 442 442 20.91595 52.3504 -0.0012053033
## 443 443 20.91610 52.3504 -0.0020251114
## 444 444 20.91624 52.3504 -0.0106295993
ggplot(crds.df, aes(x=X, y=Y, col=NDVI))+
  geom_point()+
  theme_minimal()

Step 4 - repeat steps 2&3 for the rest of the tiles and join data frames.

UEC tile
satellite2=stack("C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UEC_20220827T093601_B02_10m.jp2",
         "C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UEC_20220827T093601_B03_10m.jp2",
         "C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UEC_20220827T093601_B04_10m.jp2",
         "C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UEC_20220827T093601_B08_10m.jp2")

waw.pow.sf2<-st_transform(waw.pow.sf, crs(satellite2)@projargs)
waw.cropped<-crop(satellite2, extent(waw.pow.sf2))
waw.masked<-mask(waw.cropped, mask=waw.pow.sf2)
ndvi2<-(waw.masked[[4]]-waw.masked[[3]])/(waw.masked[[4]]+waw.masked[[3]])

plot(waw.masked[[1]])

plot(ndvi2)

ndvi2.crds<-spTransform(xyFromCell(ndvi2, spatial=TRUE, 1:length(as.vector(ndvi2))), CRS("+proj=longlat +datum=NAD83"))
crds2<-coordinates(ndvi2.crds)
crds2.df<-data.frame(ID=c(1:length(as.vector(ndvi2))), X=crds2[,1], Y=crds2[,2])
ndvi2.values<-as.data.frame(ndvi2)
crds2.df$NDVI<-ndvi2.values[,1]
head(crds2.df)
##   ID        X        Y      NDVI
## 1  1 20.99978 52.35043 0.4125428
## 2  2 20.99993 52.35043 0.4184139
## 3  3 21.00007 52.35043 0.3951057
## 4  4 21.00022 52.35043 0.3944778
## 5  5 21.00037 52.35043 0.4010343
## 6  6 21.00051 52.35043 0.4037090
crds2.df<-crds2.df[!is.na(crds2.df$NDVI),]

ggplot(crds2.df, aes(x=X, y=Y, col=NDVI))+
  geom_point()+
  theme_minimal()

UDD tile
satellite3=stack("C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UDD_20220701T095041_B02_10m.jp2",
         "C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UDD_20220701T095041_B03_10m.jp2",
         "C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UDD_20220701T095041_B04_10m.jp2",
         "C:/Users/ewado/OneDrive/Dokumenty/Studia/R-mat/T34UDD_20220701T095041_B08_10m.jp2")

waw.pow.sf2<-st_transform(waw.pow.sf, crs(satellite3)@projargs)
waw.cropped<-crop(satellite3, extent(waw.pow.sf2))
waw.masked<-mask(waw.cropped, mask=waw.pow.sf2)
ndvi3<-(waw.masked[[4]]-waw.masked[[3]])/(waw.masked[[4]]+waw.masked[[3]])

plot(waw.masked[[1]])

plot(ndvi3)

ndvi3.crds<-spTransform(xyFromCell(ndvi3, spatial=TRUE, 1:length(as.vector(ndvi3))), CRS("+proj=longlat +datum=NAD83"))
crds3<-coordinates(ndvi3.crds)
crds3.df<-data.frame(ID=c(1:length(as.vector(ndvi3))), X=crds3[,1], Y=crds3[,2])
ndvi3.values<-as.data.frame(ndvi3)
crds3.df$NDVI<-ndvi3.values[,1]
head(crds3.df)
##   ID        X        Y NDVI
## 1  1 20.85114 52.36805   NA
## 2  2 20.85129 52.36805   NA
## 3  3 20.85144 52.36805   NA
## 4  4 20.85158 52.36805   NA
## 5  5 20.85173 52.36805   NA
## 6  6 20.85188 52.36805   NA
crds3.df<-crds3.df[!is.na(crds3.df$NDVI),]

ggplot(crds3.df, aes(x=X, y=Y, col=NDVI))+
  geom_point()+
  theme_minimal()

Merging data frames:

df.ca<-rbind(crds.df, crds2.df, crds3.df)

dim(df.ca)
## [1] 8823978       4
head(df.ca)
##      ID        X       Y          NDVI
## 439 439 20.91551 52.3504 -0.0008123477
## 440 440 20.91566 52.3504 -0.0064935065
## 441 441 20.91580 52.3504 -0.0076458753
## 442 442 20.91595 52.3504 -0.0012053033
## 443 443 20.91610 52.3504 -0.0020251114
## 444 444 20.91624 52.3504 -0.0106295993
ggplot(df.ca, aes(x=X, y=Y, col=NDVI))+
  geom_point()+
  theme_minimal()

You can save the data:

#write.csv(df.ca, "ndvi_2023.csv")