For complete script go to https://github.com/zarrarkhan/rSpatialAggMethods
This summary does not show all code
Testing for three methods of spatial aggregation
Conclusion: * For volume (km3/yr) where need to sum use m2 which uses the grid cell weighted area * For depth (mm/yr) where need to take mean use m3 which takes the grid cell value and weights it by the polygon area
Read in polygon and raster data. In this example we use spatial boundaries from the GADM dataset for the region Argentina and Tethys water demand gridded outputs. All raw files available in the github repository.
tm<-system.time({ # system time
df1<-df
# Convert to Spatial Point Data Frames
df1 = SpatialPointsDataFrame(
SpatialPoints(coords=(cbind(df1$lon,df1$lat))),
data=df1)
proj4string(df1)<-projX
dfp<-raster::intersect(df1,shpa1); # Crop to Layer shpa
gridded(dfp)<-TRUE # Create Gridded Raster Data
}) # close system time
## SUM
tmS<-system.time({ # system time
dxp<-as.data.frame(dfp@data)
dxp<- dxp %>% subset(select=c("X2005","NAME_1")) %>%
group_by(NAME_1) %>%
summarise_all(funs(round(sum(.,na.rm=T),2))) %>% as.data.frame
sum_m1<-dxp}) # close system time
## MEAN
tmM<-system.time({ # system time
dxp<-as.data.frame(dfp@data)
dxp<- dxp %>% subset(select=c("X2005","NAME_1")) %>%
group_by(NAME_1) %>%
summarise_all(funs(round(mean(.,na.rm=T),2))) %>% as.data.frame
mean_m1<-dxp}) # close system time
##
names(sum_m1)[names(sum_m1)=="X2005"]<-"X2005m1"
names(mean_m1)[names(mean_m1)=="X2005"]<-"X2005m1"
sum_m1$timeSecm1<-tm[[3]]+tmS[[3]]
mean_m1$timeSecm1<-tm[[3]]+tmM[[3]]
Method 2 uses the Weighted area of the raster grid cells. This method is appropriate for volume measurements where the sum is being taken.
tm<-system.time({ # system time
w <- raster::extract(r,shpa1, method="simple",weights=T, normalizeWeights=F)
dfx<-data.frame()
for (i in seq(w)){
x<-as.data.frame(w[[i]])
x$ID<-shpa1@data$NAME_1[[i]]
x$WeightedValue<-x$value*x$weight
dfx<-rbind.data.frame(dfx,x)
}
names(dfx)[names(dfx)=="ID"]<-"NAME_1"
names(dfx)[names(dfx)=="WeightedValue"]<-"X2005m2"
## SUM
dxp<- dfx %>% subset(select=c("X2005m2","NAME_1")) %>%
group_by(NAME_1) %>%
summarise_all(funs(round(sum(.,na.rm=T),2))) %>% as.data.frame
sum_m2<-dxp
}) # close system time
sum_m2$timeSecm2<-tm[[3]];
Method 3 converts rasters to polygons and then uses the Weighted area of the cropped raster polygons. This method is appropriate for depth measurements where the mean is being taken.
tm<-system.time({ # system time
x<-raster::intersect(shpa1,rcropP)
x@data<-x@data%>%dplyr::select(NAME_1,X2005)
x@data$area<-area(x);
s1<-shpa1
s1$subRegAreaSum<-area(shpa1);head(s1)
s1<-s1@data%>%dplyr::select(NAME_1,subRegAreaSum);
x@data<-join(x@data,s1,by="NAME_1");
x@data$areaPrcnt<-x@data$area/x@data$subRegAreaSum;
x@data$X2005m3<-x@data$X2005*x@data$areaPrcnt;
## MEAN
dxp<- x@data %>% subset(select=c("X2005m3","NAME_1")) %>%
group_by(NAME_1) %>%
summarise_all(funs(round(sum(.,na.rm=T),2))) %>% as.data.frame
mean_m3<-dxp
}) # close system time
mean_m3$timeSecm3<-tm[[3]]
Summary of Sums Method 1 and Method 2
## NAME_1 X2005m1 X2005m2 diffm1m2 diffPrcntm1m2
## 1 Buenos Aires 496.85 503.52 6.7 1.3
## 2 Catamarca 113.26 126.51 13.2 11.7
## 3 Chaco 25.02 23.69 -1.3 -5.3
## 4 Chubut 46.61 45.22 -1.4 -3.0
## 5 Córdoba 231.62 235.53 3.9 1.7
## 6 Corrientes 377.49 340.77 -36.7 -9.7
## 7 Entre RÃos 320.28 320.38 0.1 0.0
## 8 Formosa 32.33 26.24 -6.1 -18.8
## 9 Jujuy 373.88 358.53 -15.4 -4.1
## 10 La Pampa 17.99 15.28 -2.7 -15.1
## 11 La Rioja 61.01 66.37 5.4 8.8
## 12 Mendoza 895.90 894.43 -1.5 -0.2
## 13 Misiones 1.47 11.95 10.5 712.9
## 14 Neuquén 75.09 61.32 -13.8 -18.3
## 15 RÃo Negro 292.85 296.69 3.8 1.3
## 16 Salta 430.07 451.85 21.8 5.1
## 17 San Juan 190.57 191.09 0.5 0.3
## 18 San Luis 46.23 43.38 -2.8 -6.2
## 19 Santa Cruz 7.68 9.59 1.9 24.9
## 20 Santa Fe 131.33 122.33 -9.0 -6.9
## 21 Santiago del Estero 389.14 402.06 12.9 3.3
## 22 Tierra del Fuego 0.00 0.00 0.0 NaN
## 23 Tucumán 223.62 184.72 -38.9 -17.4
Summary of Means Method 1 and Method 3
## NAME_1 X2005m1 X2005m3 diffm1m3 diffPrcntm1m3
## 1 Buenos Aires 3.97 4.03 0.1 1.5
## 2 Catamarca 2.98 3.37 0.4 13.1
## 3 Chaco 0.76 0.65 -0.1 -14.5
## 4 Chubut 0.47 0.45 0.0 -4.3
## 5 Córdoba 3.74 3.75 0.0 0.3
## 6 Corrientes 11.80 10.38 -1.4 -12.0
## 7 Entre RÃos 11.04 10.81 -0.2 -2.1
## 8 Formosa 1.11 0.97 -0.1 -12.6
## 9 Jujuy 20.77 18.90 -1.9 -9.0
## 10 La Pampa 0.33 0.27 -0.1 -18.2
## 11 La Rioja 1.97 1.97 0.0 0.0
## 12 Mendoza 14.45 15.48 1.0 7.1
## 13 Misiones 0.13 1.06 0.9 715.4
## 14 Neuquén 1.88 1.53 -0.3 -18.6
## 15 RÃo Negro 3.45 3.52 0.1 2.0
## 16 Salta 7.82 8.18 0.4 4.6
## 17 San Juan 5.44 5.69 0.2 4.6
## 18 San Luis 1.54 1.47 -0.1 -4.5
## 19 Santa Cruz 0.06 0.08 0.0 33.3
## 20 Santa Fe 2.43 2.39 0.0 -1.6
## 21 Santiago del Estero 7.21 8.02 0.8 11.2
## 22 Tierra del Fuego 0.00 0.00 0.0 NaN
## 23 Tucumán 24.85 22.45 -2.4 -9.7
In this section we zoom into the area with the largest percentage difference (Misiones) and examine the differences closer.
Summary of Sums Method 1 and Method 2
## NAME_1 X2005m1 X2005m2 diffm1m2 diffPrcntm1m2
## 1 Corrientes 14.82 37.00 22.2 149.7
## 2 Misiones 1.47 11.95 10.5 712.9
Summary of Means Method 1 and Method 3
## NAME_1 X2005m1 X2005m3 diffm1m3 diffPrcntm1m3
## 1 Corrientes 3.70 5.77 2.1 55.9
## 2 Misiones 0.13 1.06 0.9 715.4