Libraries required:
library(ggplot2)
library(dplyr)
library(here)
library(reader)
library(knitr)
library(anytime)
library(scales)
Reading in the data from the CE Tools Server:
#dump<-read.table("qeop_dump_all.csv", fill=T, sep=",")
dump<-read.table("qeop_dump_2019_02_23.csv", fill=T, sep=",")
##adding in a column to identify each row with a sensor
dump$sensor<-NA
for (i in 1:15){
start_id<-min(which(grepl( paste("qeop_batstore_",i, sep=""),dump[,1])))
end_id<-max(which(grepl( paste("qeop_batstore_",i, sep=""),dump[,1])))
dump[start_id:end_id,]$sensor<-paste("Sensor_",i,sep="")
#print(i)
}
table(dump$sensor)
Removing redundant rows and the classification data as it is not relevant here:
dump<-dump[,c(1,2,4,5,6,13,ncol(dump))]
dump<-dump[grepl("\t",dump[,1]),]
dump[,1]<-as.numeric(gsub("[\t(]","",dump[,1]))
dump[,3]<-as.numeric(gsub("[^0-9.]", "", dump[,3]))
dump[,4]<-as.numeric(gsub("[^0-9.]", "", dump[,4]))
dump[,5]<-gsub("[[:punct:]]", "", dump[,5])
dump[,5]<-substring(dump[,5],2)
dump[,6]<-as.numeric(gsub("[^0-9.]", "", dump[,6]))
dump$sensor_no<-as.numeric(gsub("[^0-9.]", "", dump$sensor))
colnames(dump)<-c("ID", "datetime", "prob_bat","prob_sp","species_class","call_time","sensor", "sensor_no")
dump$datetime<-as.POSIXct(dump$datetime,format="%Y-%m-%d %H:%M:%S")
dump$date<- as.Date(dump$datetime)
dump$species_class<-substring(dump$species_class, 2)
dump$time<- format(dump$datetime, "%H:%M:%S")
Calculating the number of detections per day for each sensor:
library(dplyr)
days_active<-dump %>%
dplyr::select(date, sensor_no)%>%
group_by(date,sensor_no)%>%
mutate(count = n())%>%
distinct()%>%
ungroup()
days_active$sensor_no<-as.factor(days_active$sensor_no)
days_active$server<-"CE Tools"
Most of the daily data are saved in individual folders, data between 24th October 2017 and 9th May 2018 are grouped together into 6 folders. This code creates new folders for each date between 24th Oct and 9th May
lf<-list.files(paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more", sep=""))
#subsetting folders with more than 10 chars
lf_gr<-lf[nchar(lf)>10]
#create dirs for all the missing dates - need to check which days are missing within this
fol_ds<-as.Date(unlist(strsplit(lf_gr, "_")))
mdays<-seq(min(fol_ds), max(fol_ds),by="days")
dir_maker<-function(mday){
if (!dir.exists(paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/",mday, sep=""))){
dir.create(path = paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/",mday, sep=""))
}
}
lapply(mdays, dir_maker)
This code breaks up the .csv files by date and sensor, saves a file for each sensor within each date folder
fold_split<-function(fol_name){
fp<-paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/", fol_name, sep="" )
fpl<-list.files(fp)
lapply(fpl, file_split, fp = fp)
}
#here the anytime and anydate functions convert unix date formats to regular calendar dates and times
file_split<-function(file_name, fp){
sensor_no<-as.numeric(gsub("[^0-9.]", "", file_name))
spl_csv<-read.csv(paste(fp,"/",file_name, sep=""))
spl_csv$datetime<-anytime(spl_csv$timestamp/1000)#have to divide by 1000 as the numbers are stored to capture milliseconds
spl_csv$date<-anydate(spl_csv$timestamp/1000)#have to divide by 1000 as the numbers are stored to capture milliseconds
sp_df<-split(spl_csv, spl_csv$date)
lapply(names(sp_df),function(x){write.csv(sp_df[[x]],file = paste(here::here(),"/data_archive/opensensors_Daniyar/qeoplogs_more/", sp_df[[x]]$date[1],"/","bat",sensor_no, ".csv", sep="" ),row.names = FALSE)})
}
lapply(lf_gr, fold_split)
Reading in the OpenSensors data, counting the number of rows in each file. Each row is a detection:
lf<-lf[nchar(lf) <=10]
sensor_count<-function(file, dir_id){
sensor_no<-as.numeric(gsub("[^0-9.]", "", file))
count<-file.nrow(paste(here::here(), "/data_archive/opensensors_Daniyar/qeoplogs_more/", dir_id,"/", file,sep="")) #gives warning
#count<-length(readr::count_fields(paste(here(), "/data_archive/opensensors_Daniyar/qeoplogs_more/", dir_id,"/", file,sep=""), tokenizer = readr::tokenizer_csv()))
#remove file header if the file is a csv
if (grepl("*csv", file)){
count<-count - 1
}
out<-data.frame(dir_id,sensor_no, count)
return(out)
}
dir_count<-function(dir_id){
files<-list.files(paste(here::here(), "/data_archive/opensensors_Daniyar/qeoplogs_more/", dir_id, sep=""))
dir_out<-lapply(files, sensor_count, dir_id = dir_id)
do.call("rbind", dir_out)
}
opensensors<-lapply(lf, dir_count)
open_df<-do.call("rbind", opensensors)
colnames(open_df)[1]<-"date"
open_df$server<-"Opensensors"
open_df$date<-as.Date(open_df$date)
open_df$sensor_no<-as.numeric(open_df$sensor_no)
Combining data from CE Tools and OpenSensors:
all_sensors<-rbind(open_df, days_active)
all_sensors$sensor_no<-as.numeric(all_sensors$sensor_no)
#all_sensors<-all_sensors[complete.cases(all_sensors),]
min_date<-min(all_sensors$date)
max_date<-max(all_sensors$date)
all_dates<-seq(min_date, max_date, by="day")
all_df<-data.frame(rep(all_dates,15),rep(1:15, each = length(all_dates)))
colnames(all_df)<-c("date", "sensor_no")
all_sensors_merge<-merge(all_df, all_sensors,by = c("date", "sensor_no" ) ,all=T)
#all_sensors_merge<-all_sensors_merge[complete.cases(all_sensors_merge),]
all_sensors_merge$sensor_no<-as.numeric(all_sensors_merge$sensor_no)
Additional information about the sensors from the documentation
sensor_info<-read.csv("sensor_info.csv")
all_sensors_merge<-merge(all_sensors_merge, sensor_info, by="sensor_no")
Sensor 4 moved on 14th May 2018 so renaming data at this point onwards as 4_2
# all_sensors_merge<-all_sensors_merge %>%
# # filter(sensor_no == 4 & date >= as.Date("2018-05-14"))
# mutate(sensor_no=replace(sensor_no, sensor_no == 4 & date >= as.Date("2018-05-14"), 4.5))
all_sensors_merge %>%
filter(server == "Opensensors")%>%
summarize(max_date = max(date))
#last date opensensors was working
all_sensors_merge<-all_sensors_merge %>%
mutate(server = replace(server, date <= as.Date("2018-06-18"), "Opensensors" ), server = replace(server, date > as.Date("2018-06-18"), "CETools"))
#save(all_sensors_merge, file = "all_sensor_data_2017-07-12_2019-2019_01_14")
#save(all_sensors_merge, file = "all_sensor_data_2017_07_12-2019_02_23b")
#load("all_sensor_data_2017-07-12_2019-2019_01_14")
#load("all_sensor_data_2017_07_12-2019_02_23")
load("all_sensor_data_2017_07_12-2019_02_23b")
all_sensors_merge$deployment<-1
#changing the deployment of all sensor four records after 14th May 2018 to deploment 2
#Update all deployments here
#When did 5 move to Duncan's garden??
#6 might've moved?
#8 has or is due to be removed due to building work - is likely to be in the bin.
all_sensors_merge[all_sensors_merge$date >= as.Date("2018-05-14") & all_sensors_merge$sensor_no == 4,]<-all_sensors_merge %>%
filter(date >= as.Date("2018-05-14") & sensor_no == 4)%>%
mutate(deployment=replace(deployment, deployment==1, 2), Lat = replace(Lat, Lat==51.5367, 51.536553), Lon = replace(Lon, Lon==-0.0128, -0.013169))
##sensor 5 moved to muswelll hill on the 7th Aug 2018
all_sensors_merge[all_sensors_merge$date >= as.Date("2018-08-07") & all_sensors_merge$sensor_no == 5,]<-all_sensors_merge %>%
filter(date >= as.Date("2018-08-07") & sensor_no == 5)%>%
mutate(deployment=replace(deployment, deployment==1, 2), Lat = replace(Lat, Lat==51.5362, 51.590616), Lon = replace(Lon, Lon==-0.0127, -0.0139243))
all_sensors_merge$deployment_id<-paste(all_sensors_merge$sensor_no, all_sensors_merge$deployment, sep = ".")
Creating a heuristic to identify when a sensor is down rather than there being no detections.
library(OpenStreetMap)
library(ggmap)
library(sp)
min_lat = 51.15
max_lat = 51.70
min_lon = -0.123
max_lon = 0.095
map <- openmap(c(max_lat,min_lon), c(min_lat,max_lon), zoom = NULL,type = "osm",mergeTiles = TRUE)
all_active_df<-all_sensors_df %>%
filter(deployment_id != "5.2")%>%
group_by(deployment_id)%>%
mutate(total_days = n_distinct(date), inactive_days = sum(!is.na(inactive_week)),active_days = total_days - inactive_days, active_days_count = sum(na.omit(count)), average_daily_active_counts = active_days_count/active_days) %>%
filter(is.na(inactive_week))%>%
select(date,deployment_id, inactive_week, average_daily_active_counts, Lat, Lon, Habitat)%>%
distinct()
# saveRDS(all_active_df, "bat_activity_qeop.RDS")
all_active_df<-readRDS("bat_activity_qeop.RDS")
library(ggmap)
library(sp)
xy<-cbind(all_active_df$Lon, all_active_df$Lat)
S<-SpatialPoints(xy)
lnd<-bbox(S)
buff<-0.0015
lnd[1]<-lnd[1]-buff
lnd[2]<-lnd[2]-buff
lnd[3]<-lnd[3]+buff
lnd[4]<-lnd[4]+buff
lnd.b1 <- ggmap(get_map(location = lnd))
qeop<-lnd.b1 + geom_point(aes(size = average_daily_active_counts, x = Lon, y = Lat, fill = Habitat), colour = "black",pch=21,data = all_active_df) +
scale_size_continuous(range=c(5, 15)) +
labs(size = "Daily Average\nSensor Detections") +
theme_bw()+
guides(fill=guide_legend(override.aes=list(size=5)))+
xlab("Longitude")+
ylab("Latitude")
png("QEOP_Map.png", width = 8, height = 8, units = 'in', res = 750)
qeop# Make plot
dev.off()
## png
## 2
Adding a month column and formatting data for plotting record data over the year
library(chron)
library(scales)
library(lubridate)
library(ggplot2)
ce_sens_df<-read.csv("all_sensors_ce_records.csv")
os_sens_df<-read.csv("all_sensors_os_records_May_2019.csv")
ce_sens_df$call_time<-as.numeric(as.character(ce_sens_df$call_time))
all_sens_df<-rbind(ce_sens_df, os_sens_df)
sensor_info<-read.csv("sensor_info.csv")
all_sens_info<-merge(all_sens_df, sensor_info, by.x = "sensor_id", by.y = "sensor_no")
Adding deployment information column
all_sens_info_night<-all_sens_info[all_sens_info$time < as.POSIXct(paste(Sys.Date()+1, " 06:00:00 GMT", sep="")),]
all_sens_info_night <-all_sens_info_night %>%
group_by(sensor_id, month) %>%
mutate(count = n())%>%
ungroup
give.n <- function(x){
#return(c(y = as.POSIXct(paste(Sys.Date(), "18:00:00", sep=" "), format = "%Y-%m-%d %H:%M:%S"), label = length(x)))
return(c(y = min(x), label = length(x)))
}
ggplot(all_sens_info_night, aes(x = month, y = time))+
geom_boxplot(aes(fill=Habitat))+
facet_wrap(.~as.numeric(deployment_id))+
labs(x = "Month", y = "Time")+
#stat_summary(fun.data = give.n, geom = "text")+
scale_x_discrete(labels=month.abb[c(1,4,7,10)], breaks = c("January", "April", "July", "October")) +
theme_bw()
# all_sensors_df$month<-months.Date(as.Date(all_sensors_df$date))
# all_sensors_df$month<-factor(all_sensors_df$month, levels = month.name)
all_sens_info$date_ch<-as.character(all_sens_info$date)
all_sens_info$c1<-as.character(all_sens_info$c1)
all_sens_info$month_ch<-all_sens_info$month
all_sens_info<-all_sens_info %>%
group_by(date_ch)%>%
mutate(count_date = n())%>%
ungroup()
ggplot(all_sens_info, aes(x = all_sens_info$month, y = all_sens_info$count_date))+
geom_boxplot()+
scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
labs(x = "Month", y = "Daily Detected Calls")+
scale_x_discrete(labels=month.abb) +
theme_bw()+
theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20))
Distribution of daily calls over the year by species - some of the species names are spelt multiple ways, some wrong, some right.
all_sens_info<-all_sens_info %>%
group_by(sensor_id,c1, date)%>%
mutate(count_sp = n())%>%
ungroup()
# all_sens_info<-all_sens_info %>%
# mutate(c1 = replace(c1, c1 == "Nytalus leisleri", "Nyctalus leisleri"))%>%
# mutate(c1 = replace(c1, c1 == "Nytalus notula", "Nyctalus noctula"))%>%
# mutate(c1 = replace(c1, c1 == "Pleotus auritus", "Plecotus auritus"))%>%
# mutate(c1 = replace(c1, c1 == "Pleotus austriaus", "Plecotus austriacus"))
ggplot(all_sens_info, aes(x = month_ch, y = count_sp))+
scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
labs(x = "Month", y = "Daily Detected Calls")+
geom_boxplot()+
facet_wrap(.~c1)+
scale_x_discrete(labels=month.abb) +
theme_bw()+
theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20))
all_sens_info$g1<-vapply(strsplit(as.character(all_sens_info$c1)," "), `[`, 1, FUN.VALUE=character(1))
all_sens_info<-all_sens_info %>%
group_by(sensor_id,g1, date)%>%
mutate(count_gen = n())%>%
ungroup()
ggplot(all_sens_info, aes(x = month_ch, y = count_gen))+
scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
labs(x = "Month", y = "Daily Detected Calls")+
geom_boxplot()+
facet_wrap(.~g1)+
scale_x_discrete(labels=month.abb) +
theme_bw()+
theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
strip.text.x = element_text(size = 20))
all_sensors_active<-merge(all_active_df[, c("date", "deployment_id", "inactive_week")], all_sens_info, by = c("date", "deployment_id"))
all_sensors_active %>%
filter(is.na(inactive_week))%>%
ggplot(aes(x = month_ch, y = count_gen))+
geom_boxplot()+
scale_y_continuous(trans = log10_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))+
labs(x = "Month", y = "Daily Detected Calls")+
facet_wrap(.~as.factor(g1))+
scale_x_discrete(labels=month.abb) +
theme_bw()+
theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
strip.text.x = element_text(size = 20))
ggplot(all_sens_info, aes(p1, group = c1))+
geom_histogram()+
#scale_y_continuous(trans = "log1p")+
#ylim(0,5000)+
facet_wrap(~c1)
ggplot(all_sens_info, aes(p1, group = c1))+
geom_histogram()+
#scale_y_log10()+
scale_y_continuous(breaks=c(1,10,100,1000,10000,100000, 1000000),trans = "log1p")+
#ylim(0,5000)+
ylab("Count (log1p)")+
xlab("Classification Probability")+
facet_wrap(~c1)
####Figure 8: Distribution of total daily calls for each month, by habitat type.
all_sens_info<-all_sens_info %>%
group_by(sensor_id,Habitat, date)%>%
mutate(count_hab = n())%>%
ungroup()
ggplot(all_sens_info, aes(x = month_ch, y = log10(count_hab)))+
geom_boxplot()+
facet_wrap(.~Habitat)+
labs(x = "Month", y = "Log10 Daily Calls")+
scale_x_discrete(labels=month.abb) +
theme_bw()+
theme(axis.text.y = element_text(lineheight = 0.5 , size = 18),
axis.text.x = element_text(size = 14, angle= 45, hjust = 1),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20))
all_sens_info<-all_sens_info %>%
group_by(deployment_id, date)%>%
mutate(count_sens = n())%>%
ungroup()
ggplot(all_sens_info, aes(x = month_ch, y = log10(count_sens)))+
geom_boxplot()+
facet_wrap(.~as.numeric(deployment_id))+
labs(x = "Month", y = "Log10 Daily Calls")+
scale_x_discrete(labels=month.abb) +
theme_bw()+
theme(axis.text.y = element_text(lineheight = 0.5 , size = 12),
axis.text.x = element_text(size = 12, angle= 45, hjust = 1),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20))