QEOP Sensor Audit

Libraries required:

library(ggplot2)
library(dplyr)
library(here)
library(reader)
library(knitr)
library(anytime)
library(scales)

Reading in the data from the CE Tools Server:

#dump<-read.table("qeop_dump_all.csv", fill=T, sep=",")

dump<-read.table("qeop_dump_2019_02_23.csv", fill=T, sep=",")

##adding in a column to identify each row with a sensor

dump$sensor<-NA
for (i in 1:15){
  start_id<-min(which(grepl( paste("qeop_batstore_",i, sep=""),dump[,1])))
  end_id<-max(which(grepl( paste("qeop_batstore_",i, sep=""),dump[,1])))
  dump[start_id:end_id,]$sensor<-paste("Sensor_",i,sep="")
  #print(i)
}

table(dump$sensor)

Removing redundant rows and the classification data as it is not relevant here:

dump<-dump[,c(1,2,4,5,6,13,ncol(dump))]
dump<-dump[grepl("\t",dump[,1]),]
dump[,1]<-as.numeric(gsub("[\t(]","",dump[,1]))
dump[,3]<-as.numeric(gsub("[^0-9.]", "",  dump[,3]))
dump[,4]<-as.numeric(gsub("[^0-9.]", "",  dump[,4]))
dump[,5]<-gsub("[[:punct:]]", "", dump[,5])
dump[,5]<-substring(dump[,5],2)
dump[,6]<-as.numeric(gsub("[^0-9.]", "",  dump[,6]))

dump$sensor_no<-as.numeric(gsub("[^0-9.]", "",  dump$sensor))

colnames(dump)<-c("ID", "datetime", "prob_bat","prob_sp","species_class","call_time","sensor", "sensor_no")

dump$datetime<-as.POSIXct(dump$datetime,format="%Y-%m-%d %H:%M:%S")

dump$date<- as.Date(dump$datetime)

dump$species_class<-substring(dump$species_class, 2)

dump$time<- format(dump$datetime, "%H:%M:%S")

Calculating the number of detections per day for each sensor:

library(dplyr)

days_active<-dump %>%
  dplyr::select(date, sensor_no)%>%
  group_by(date,sensor_no)%>%
  mutate(count = n())%>%
  distinct()%>%
  ungroup()

days_active$sensor_no<-as.factor(days_active$sensor_no)
days_active$server<-"CE Tools"

Most of the daily data are saved in individual folders, data between 24th October 2017 and 9th May 2018 are grouped together into 6 folders. This code creates new folders for each date between 24th Oct and 9th May

lf<-list.files(paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more", sep=""))

#subsetting folders with more than 10 chars
lf_gr<-lf[nchar(lf)>10]

#create dirs for all the missing dates - need to check which days are missing within this
fol_ds<-as.Date(unlist(strsplit(lf_gr, "_")))
mdays<-seq(min(fol_ds), max(fol_ds),by="days")

dir_maker<-function(mday){
  
  if (!dir.exists(paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/",mday, sep=""))){
    dir.create(path = paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/",mday, sep=""))
  }
    }
lapply(mdays, dir_maker)

This code breaks up the .csv files by date and sensor, saves a file for each sensor within each date folder

fold_split<-function(fol_name){
  
  fp<-paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/", fol_name, sep="" )
  fpl<-list.files(fp)
  lapply(fpl, file_split, fp = fp)
}

#here the anytime and anydate functions convert unix date formats to regular calendar dates and times

file_split<-function(file_name, fp){
  
  sensor_no<-as.numeric(gsub("[^0-9.]", "",  file_name))
  spl_csv<-read.csv(paste(fp,"/",file_name, sep=""))
  spl_csv$datetime<-anytime(spl_csv$timestamp/1000)#have to divide by 1000 as the numbers are stored to capture milliseconds
  spl_csv$date<-anydate(spl_csv$timestamp/1000)#have to divide by 1000 as the numbers are stored to capture milliseconds
  sp_df<-split(spl_csv, spl_csv$date)
  lapply(names(sp_df),function(x){write.csv(sp_df[[x]],file = paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/", sp_df[[x]]$date[1],"/","bat",sensor_no, ".csv", sep="" ),row.names = FALSE)})
  
}

lapply(lf_gr, fold_split)

Reading in the OpenSensors data, counting the number of rows in each file. Each row is a detection:

lf<-lf[nchar(lf) <=10]

sensor_count<-function(file, dir_id){
  
  sensor_no<-as.numeric(gsub("[^0-9.]", "",  file))
  count<-file.nrow(paste(here::here(), "/data_archive/opensensors_Daniyar/qeoplogs_more/", dir_id,"/", file,sep="")) #gives warning
  #count<-length(readr::count_fields(paste(here(), "/data_archive/opensensors_Daniyar/qeoplogs_more/", dir_id,"/", file,sep=""), tokenizer = readr::tokenizer_csv()))

  #remove file header if the file is a csv
  if (grepl("*csv", file)){
    count<-count - 1
  }

  out<-data.frame(dir_id,sensor_no, count)
  return(out)
  }

dir_count<-function(dir_id){
  files<-list.files(paste(here::here(), "/data_archive/opensensors_Daniyar/qeoplogs_more/", dir_id, sep=""))
  dir_out<-lapply(files, sensor_count, dir_id = dir_id)
  do.call("rbind", dir_out)
}

opensensors<-lapply(lf, dir_count)

open_df<-do.call("rbind", opensensors)
colnames(open_df)[1]<-"date"

open_df$server<-"Opensensors"

open_df$date<-as.Date(open_df$date)
open_df$sensor_no<-as.numeric(open_df$sensor_no)

Combining data from CE Tools and OpenSensors:

all_sensors<-rbind(open_df, days_active)

all_sensors$sensor_no<-as.numeric(all_sensors$sensor_no)
#all_sensors<-all_sensors[complete.cases(all_sensors),]

min_date<-min(all_sensors$date)
max_date<-max(all_sensors$date)

all_dates<-seq(min_date, max_date, by="day")
all_df<-data.frame(rep(all_dates,15),rep(1:15, each = length(all_dates))) 
colnames(all_df)<-c("date", "sensor_no")


all_sensors_merge<-merge(all_df, all_sensors,by = c("date", "sensor_no" ) ,all=T)
#all_sensors_merge<-all_sensors_merge[complete.cases(all_sensors_merge),]

all_sensors_merge$sensor_no<-as.numeric(all_sensors_merge$sensor_no)

Additional information about the sensors from the documentation

sensor_info<-read.csv("sensor_info.csv")

all_sensors_merge<-merge(all_sensors_merge, sensor_info, by="sensor_no")

Sensor 4 moved on 14th May 2018 so renaming data at this point onwards as 4_2

# all_sensors_merge<-all_sensors_merge %>%
#  # filter(sensor_no == 4 & date >= as.Date("2018-05-14"))
#   mutate(sensor_no=replace(sensor_no, sensor_no == 4 & date >= as.Date("2018-05-14"), 4.5))

all_sensors_merge %>%
  filter(server == "Opensensors")%>%
  summarize(max_date = max(date))

#last date opensensors was working

all_sensors_merge<-all_sensors_merge %>%
  mutate(server = replace(server, date <= as.Date("2018-06-18"), "Opensensors" ), server = replace(server, date > as.Date("2018-06-18"), "CETools"))


#save(all_sensors_merge, file = "all_sensor_data_2017-07-12_2019-2019_01_14")
#save(all_sensors_merge, file = "all_sensor_data_2017_07_12-2019_02_23b")

#load("all_sensor_data_2017-07-12_2019-2019_01_14")
#load("all_sensor_data_2017_07_12-2019_02_23")
load("all_sensor_data_2017_07_12-2019_02_23b")

all_sensors_merge$deployment<-1


#changing the deployment of all sensor four records after 14th May 2018 to deploment 2
#Update all deployments here
#When did 5 move to Duncan's garden??
#6 might've moved?
#8 has or is due to be removed due to building work - is likely to be in the bin.

all_sensors_merge[all_sensors_merge$date >= as.Date("2018-05-14") & all_sensors_merge$sensor_no == 4,]<-all_sensors_merge %>%
  filter(date >= as.Date("2018-05-14") & sensor_no == 4)%>%
  mutate(deployment=replace(deployment, deployment==1, 2), Lat = replace(Lat, Lat==51.5367, 51.536553), Lon = replace(Lon, Lon==-0.0128, -0.013169))

##sensor 5 moved to muswelll hill on the 7th Aug 2018

all_sensors_merge[all_sensors_merge$date >= as.Date("2018-08-07") & all_sensors_merge$sensor_no == 5,]<-all_sensors_merge %>%
  filter(date >= as.Date("2018-08-07") & sensor_no == 5)%>%
  mutate(deployment=replace(deployment, deployment==1, 2), Lat = replace(Lat, Lat==51.5362, 51.590616), Lon = replace(Lon, Lon==-0.0127, -0.0139243))


all_sensors_merge$deployment_id<-paste(all_sensors_merge$sensor_no, all_sensors_merge$deployment, sep = ".")

Figure 1: Plot of each sensor from 2017-07-12 to 2019-02-23, the height of each column relates to the number of detections in each day:

Creating a heuristic to identify when a sensor is down rather than there being no detections.

Figure 2: Red line shows where there have been no detections for seven continuous days

library(OpenStreetMap)

library(ggmap)
library(sp)

min_lat =  51.15 
max_lat = 51.70
min_lon = -0.123 
max_lon = 0.095

map <- openmap(c(max_lat,min_lon), c(min_lat,max_lon), zoom = NULL,type = "osm",mergeTiles = TRUE)

 all_active_df<-all_sensors_df %>%
  filter(deployment_id != "5.2")%>%
  group_by(deployment_id)%>%
  mutate(total_days = n_distinct(date),  inactive_days = sum(!is.na(inactive_week)),active_days = total_days - inactive_days, active_days_count = sum(na.omit(count)), average_daily_active_counts = active_days_count/active_days) %>%
  filter(is.na(inactive_week))%>%
  select(date,deployment_id, inactive_week, average_daily_active_counts, Lat, Lon, Habitat)%>%
  distinct()


# saveRDS(all_active_df, "bat_activity_qeop.RDS")

all_active_df<-readRDS("bat_activity_qeop.RDS")

library(ggmap)
library(sp)

xy<-cbind(all_active_df$Lon, all_active_df$Lat)
S<-SpatialPoints(xy)
lnd<-bbox(S)
buff<-0.0015

lnd[1]<-lnd[1]-buff
lnd[2]<-lnd[2]-buff
lnd[3]<-lnd[3]+buff
lnd[4]<-lnd[4]+buff

lnd.b1 <- ggmap(get_map(location = lnd))

qeop<-lnd.b1 +  geom_point(aes(size = average_daily_active_counts, x = Lon, y = Lat, fill = Habitat), colour = "black",pch=21,data = all_active_df) + 
  scale_size_continuous(range=c(5, 15)) +
  labs(size = "Daily Average\nSensor Detections") + 
  theme_bw()+
  guides(fill=guide_legend(override.aes=list(size=5)))+
  xlab("Longitude")+
  ylab("Latitude")


png("QEOP_Map.png", width = 8, height = 8, units = 'in', res = 750)
qeop# Make plot
dev.off()

## png 
##   2

Figure 3: Red line shows where there have been no detections for fourteen continuous days

Figure 4: Red line shows where there have been no detections for thirty continuous days

Adding a month column and formatting data for plotting record data over the year

library(chron)
library(scales)
library(lubridate)
library(ggplot2)

ce_sens_df<-read.csv("all_sensors_ce_records.csv")
os_sens_df<-read.csv("all_sensors_os_records_May_2019.csv")

ce_sens_df$call_time<-as.numeric(as.character(ce_sens_df$call_time))

all_sens_df<-rbind(ce_sens_df, os_sens_df)

sensor_info<-read.csv("sensor_info.csv")


all_sens_info<-merge(all_sens_df, sensor_info, by.x = "sensor_id", by.y = "sensor_no")

Adding deployment information column

all_sens_info_night<-all_sens_info[all_sens_info$time < as.POSIXct(paste(Sys.Date()+1, " 06:00:00 GMT", sep="")),]

all_sens_info_night <-all_sens_info_night %>%
  group_by(sensor_id, month) %>%
  mutate(count = n())%>%
  ungroup

give.n <- function(x){
   #return(c(y = as.POSIXct(paste(Sys.Date(), "18:00:00", sep=" "), format = "%Y-%m-%d %H:%M:%S"), label = length(x)))
  return(c(y = min(x), label = length(x)))
}

ggplot(all_sens_info_night, aes(x = month, y = time))+
  geom_boxplot(aes(fill=Habitat))+
  facet_wrap(.~as.numeric(deployment_id))+
  labs(x = "Month", y = "Time")+
  #stat_summary(fun.data = give.n, geom = "text")+
  scale_x_discrete(labels=month.abb[c(1,4,7,10)], breaks = c("January", "April", "July", "October")) +
  theme_bw()

# all_sensors_df$month<-months.Date(as.Date(all_sensors_df$date))
# all_sensors_df$month<-factor(all_sensors_df$month, levels = month.name)

all_sens_info$date_ch<-as.character(all_sens_info$date)
all_sens_info$c1<-as.character(all_sens_info$c1)
all_sens_info$month_ch<-all_sens_info$month

all_sens_info<-all_sens_info %>%
  group_by(date_ch)%>%
  mutate(count_date = n())%>%
  ungroup()

Figure 5: Distribution of total daily calls for each month

ggplot(all_sens_info, aes(x = all_sens_info$month, y = all_sens_info$count_date))+
  geom_boxplot()+
  scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
  labs(x = "Month", y = "Daily Detected Calls")+
  scale_x_discrete(labels=month.abb) +
  theme_bw()+
  theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
        axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20))

Distribution of daily calls over the year by species - some of the species names are spelt multiple ways, some wrong, some right.

all_sens_info<-all_sens_info %>%
  group_by(sensor_id,c1, date)%>%
  mutate(count_sp = n())%>%
  ungroup()

# all_sens_info<-all_sens_info %>%
#   mutate(c1 = replace(c1, c1 == "Nytalus leisleri", "Nyctalus leisleri"))%>%
#   mutate(c1 = replace(c1, c1 == "Nytalus notula", "Nyctalus noctula"))%>%
#   mutate(c1 = replace(c1, c1 == "Pleotus auritus", "Plecotus auritus"))%>%
#   mutate(c1 = replace(c1, c1 == "Pleotus austriaus", "Plecotus austriacus"))

Figure 6: Distribution of total daily calls for each month, by species.

ggplot(all_sens_info, aes(x = month_ch, y = count_sp))+
  scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
  labs(x = "Month", y = "Daily Detected Calls")+
  geom_boxplot()+
  facet_wrap(.~c1)+
  scale_x_discrete(labels=month.abb) +
  theme_bw()+
  theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
        axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20))

Figure 7: Distribution of total daily calls for each month, by genus.

all_sens_info$g1<-vapply(strsplit(as.character(all_sens_info$c1)," "), `[`, 1, FUN.VALUE=character(1))

all_sens_info<-all_sens_info %>%
  group_by(sensor_id,g1, date)%>%
  mutate(count_gen = n())%>%
  ungroup()



ggplot(all_sens_info, aes(x = month_ch, y = count_gen))+
    scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
  labs(x = "Month", y = "Daily Detected Calls")+
  geom_boxplot()+
  facet_wrap(.~g1)+
  scale_x_discrete(labels=month.abb) +
  theme_bw()+
  theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
        axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20))

all_sensors_active<-merge(all_active_df[, c("date", "deployment_id", "inactive_week")], all_sens_info, by = c("date", "deployment_id"))


all_sensors_active %>%
  filter(is.na(inactive_week))%>%
ggplot(aes(x = month_ch, y = count_gen))+
  geom_boxplot()+
   scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
  labs(x = "Month", y = "Daily Detected Calls")+
  facet_wrap(.~as.factor(g1))+
  scale_x_discrete(labels=month.abb) +
  theme_bw()+
  theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
        axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20))

ggplot(all_sens_info, aes(p1, group = c1))+
  geom_histogram()+
  #scale_y_continuous(trans = "log1p")+
  #ylim(0,5000)+
  facet_wrap(~c1)

ggplot(all_sens_info, aes(p1, group = c1))+
  geom_histogram()+
  #scale_y_log10()+
  scale_y_continuous(breaks=c(1,10,100,1000,10000,100000, 1000000),trans = "log1p")+
  #ylim(0,5000)+
  ylab("Count (log1p)")+
  xlab("Classification Probability")+
  facet_wrap(~c1)

####Figure 8: Distribution of total daily calls for each month, by habitat type.

all_sens_info<-all_sens_info %>%
  group_by(sensor_id,Habitat, date)%>%
  mutate(count_hab = n())%>%
  ungroup()

ggplot(all_sens_info, aes(x = month_ch, y = log10(count_hab)))+
  geom_boxplot()+
  facet_wrap(.~Habitat)+
  labs(x = "Month", y = "Log10 Daily Calls")+
  scale_x_discrete(labels=month.abb) +
  theme_bw()+
  theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
        axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20))

Figure 9: Distribution of total daily calls for each month, by deployment.

all_sens_info<-all_sens_info %>%
  group_by(deployment_id, date)%>%
  mutate(count_sens = n())%>%
  ungroup()

ggplot(all_sens_info, aes(x = month_ch, y = log10(count_sens)))+
  geom_boxplot()+
  facet_wrap(.~as.numeric(deployment_id))+
  labs(x = "Month", y = "Log10 Daily Calls")+
  scale_x_discrete(labels=month.abb) +
  theme_bw()+
  theme(axis.text.y = element_text(lineheight = 0.5 , size = 12),
        axis.text.x = element_text(size = 12, angle= 45, hjust = 1),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20))