Extracting insolation statistics using tracked points

Load libraries and read in the tracking data

library(sf)
library(rgdal)
library(mapview)
library(leaflet.extras)
library(lubridate)
library(dplyr)
library(raster)
library(ggplot2)
library(concaveman)

mapviewOptions(basemaps =c("Esri.WorldImagery",  "OpenTopoMap", "OpenStreetMap.BlackAndWhite", "OpenStreetMap.HOT"))

pnts<-read.csv("/home/chris/Behav_clean_reproj.csv")
pnts<-pnts[!is.na(pnts$x_UTM),]
pnts$Date<-dmy(pnts$Date)
pnts$Date_time<-dmy_hm(pnts$Date_time)
pnts$hr<-hour(pnts$Date_time)
coordinates(pnts) = ~x_UTM+y_UTM  # Set coordinates
pnts<-st_as_sf(pnts)  # Make sf object
st_crs(pnts) <- 32647## Set CRS
pnts_3857<-st_transform(pnts,3857) ## The rasters are in Google mercator, which is useful for visualisation and reasonably accurate for measurements near the equator, although not perfect. This could be corrected by reprojection to UTM. Anyway, we'll transform to this for now.

Cloudy days

load("data_loggers.rda")
d$Date<-ymd(d$Date)
d %>% group_by(Date) %>% summarise(max=max(LUX))  -> cloudy
g0<-ggplot(cloudy, aes(x=Date,y=max)) + geom_line()
g0

hist(cloudy$max,breaks=100)

Not clear which criteria to use, but could set a cutoff at 5000.

sunny<-cloudy$Date[cloudy$max>5000]

Use only sunny days

pnts_3857 %>% filter(Date %in% sunny) -> pnts_3857

Load the raster

If we set up a variable for the hour the whole analysis can be re rerun for a different time by just changing this line.

h<-12
raster_name<- sprintf("insol_tiffs/sol%s.tif",h)

insol<-raster(raster_name)
insol

## class       : RasterLayer 
## dimensions  : 1909, 2905, 5545645  (nrow, ncol, ncell)
## resolution  : 1, 1  (x, y)
## extent      : 10916095, 10919000, 440389.1, 442298.1  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +no_defs 
## data source : /home/rstudio/shiny/data_loggers/data_loggers2/insol_tiffs/sol12.tif 
## names       : sol12 
## values      : 0, 374.9321  (min, max)

Crop the raster layer to a concave hull of the points extended by 50m

chull<-concaveman(pnts_3857)
chull<-st_buffer(chull,50)
st_crs(chull) <- 3857
hull<-as(chull, "Spatial")
crs(insol) <- crs(hull)
insol<-raster:::crop (insol,hull)

Extract values

There are a lot of choices that can be made here. We need to extract some statistic from the raster in an area around the presence points and contrast the data obtained from the places where the siamangs chose to be with places that they could be.

One very simple approach is to contrast any stats calculated for the presence points with the stats calculated for an equal number of points selected at random within the area that the siamangs could have been.

The questions to be answered are

What is the most appropriate distance around the points?
Which statistic best describes the environment (mean, median, variability or some measure of the number of “hot pixels” in the region)
What do we mean by the area that the siamangs could have chosen?
Once the stats for the buffer area are extracted what is the best summary of them?

So one very simple approach would be to choose a buffer distance of 20m, take the mean insolation within this area and compare it to random points taken within the convex hull excluding this area.

pnts_3857 %>% filter(hr==h) -> points
library(rgeos)
bdist<- 20 ## Choose buffer distance in meters (1 pixel = 1m)


points_sp<-as(points, "Spatial") ## Change to sp
b_points<- buffer(points_sp, bdist) ## Buffer points
hull2<-gDifference(hull,b_points) ## Take the difference between the hull and the buffered points (i.e. exclude the area around the siamangs)


n<-length(points_sp)

random_points<-spsample(hull2, n=n, type='random')
b_p<-raster::extract(insol,points_sp,buffer=bdist,fun=mean)
b_r<-raster::extract(insol,random_points,buffer=bdist,fun=mean)

Summarise presence points

summary(b_p)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.1648  22.5445  37.4876  42.6828  58.0191 164.5450

Summarise random points

summary(b_r)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   15.60   34.94   42.95   62.51  175.49

A simple significance test

One very simple signficance test would be just use a wilcox rank sum test.

wilcox.test(b_p,b_r)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  b_p and b_r
## W = 45255, p-value = 0.4001
## alternative hypothesis: true location shift is not equal to 0

A better test

The problem with just using the rank sum test is that it doesn’t produce confidence intervals for any difference. Also, if a different set of random points were chosen the result may differ. One possible solution would be to replicate the analysis at least 100 times and take the results. Clearly this would take more computational time, so it is not worth doing until all decisions regarding the analysis are made. This code would produce a vector of values if it were run.

#res<-replicate(100,mean(raster::extract(insol,spsample(hull2, n=n, type='random'),buffer=bdist,fun=mean)))

Mapview

mp<-mapview(points) + mapview(insol) + mapview(hull2)
mp@map %>% addFullscreenControl()