QEOP Sensor Tests

library(dplyr)
library(ggplot2)
library(knitr)
library(lubridate)

source("D:/Fiona/useful_functions.R")

all_bat_files<-list.files("Q:/data_tagged/bats/all/",full.names = TRUE, recursive = TRUE)
all_bat_files<-all_bat_files[grepl(".wav", all_bat_files)]

path_split<-strsplit(all_bat_files, "_")

path_out<-lapply(path_split, get_last)
unl_path_out_bat<-unlist(path_out)


bats_dates<- dates_files[dates_files$all_files_out %in% unl_path_out_bat,]

bats_dates$classification<-"Bat"


all_no_bat_files<-list.files("Q:/data_tagged/Sample_nobats/All/",full.names = TRUE, recursive = TRUE)
all_no_bat_files<-all_no_bat_files[grepl(".wav", all_no_bat_files)]


path_split<-strsplit(all_no_bat_files, "/")
path_out<-lapply(path_split, get_last)
unl_path_out_no_bat<-unlist(path_out)

no_bats_dates<- dates_files[dates_files$all_files_out %in% unl_path_out_no_bat,]

no_bats_dates$classification<-"No bat"


all_samp<-rbind(bats_dates, no_bats_dates)

fp<-read.csv("Q:/data_tagged/bats/output/meta.csv")
fn<-read.csv("Q:/data_tagged/Sample_nobats/output/meta.csv")

path_split<-strsplit(as.character(fp$IN.FILE), "_")
path_out<-lapply(path_split, get_last)
fp$IN.FILE<-unlist(path_out)

fpfn<-rbind(fp, fn)

verification<-dplyr::select(fpfn, c("IN.FILE", "MANUAL.ID"))

all_df<-merge(all_samp, verification, by.x="all_files_out", by.y = "IN.FILE")


all_df$FP<-ifelse(all_df$classification == "Bat" & all_df$MANUAL.ID == "No bat", 1, 0)

all_df$FN<-ifelse(all_df$classification == "No bat" & all_df$MANUAL.ID == "Bat", 1, 0)

all_df$correct_class<-all_df$classification == all_df$MANUAL.ID

ggplot(all_df, aes(x = classification, y = dates, colour = correct_class))+
  geom_jitter(alpha = 0.4)

# 
# ggplot(all_df, aes(x = MANUAL.ID, y = dates, colour = correct_class))+
#   geom_jitter(alpha = 0.4)


path_split<-strsplit(all_files, "/")

path_out<-lapply(path_split, get_second_last)
sensor_out<-unlist(path_out)
sensor_no<-as.numeric(gsub("[^0-9]", "", sensor_out))

dates_files$sensor_no<-sensor_no

all_df<-merge(all_df, dates_files, by = c("all_files_out", "all_files", "dates"))

ggplot(all_df, aes(x = MANUAL.ID, y = sensor_no))+
  geom_jitter(alpha = 0.4)

bat<-all_df %>%
  filter(classification == "Bat")

#table(bat$correct_class)

tp_rate<-(nrow(bat[bat$correct_class == TRUE,])/nrow(bat))*100

# wrong_bat<-all_df %>%
#   filter(correct_class == FALSE & classification == "Bat")

no_bat<-all_df %>%
  filter(classification == "No bat")

#table(no_bat$correct_class)

tn_rate<-(nrow(no_bat[no_bat$correct_class == TRUE,])/nrow(no_bat))*100

## [1] "The true positive rate is 90.848%"

## [1] "The false positive rate is 9.152%"

## [1] "The true negative rate is 99.794%"

## [1] "The false negative rate is 0.206%"

Looking at true positive across sensors. Sensor 6 performs very poorly with a true positive rate of 32.5%, this sensor is in quite a “busy” location. It is near two roads, a roundabout, a canal, some footpaths and near the ArcelorMittal structure. The sensor seems to continually mistake machine noise for bats.

Sensors 1 and 11 also do not perform well with TPR of 61.5% and 64.4% respectively. There are very few detections at Sensor 1 (13), but 38.5% of these are misdetected machine noise. Sensor 1 is near both the olympic stadium and the ArcelorMittal orbit. Sensor 11 also seems to experience a lot of machine noise but is not near an obviously “busy” site.

bat_sens<-all_df %>%
  group_by(sensor_no)%>%
  filter(classification == "Bat")%>%
  mutate(true_count = sum(correct_class[correct_class== TRUE]),total_count = n() , false_pos_prop = 1 -  (true_count/total_count))%>%
  select(sensor_no, true_count, total_count, false_pos_prop)%>%
  arrange(sensor_no)%>%
  distinct()


kable(bat_sens)

sensor_no	true_count	total_count	false_pos_prop
1	8	13	0.3846154
2	8	10	0.2000000
6	14	40	0.6500000
7	1257	1350	0.0688889
8	22	24	0.0833333
9	14	14	0.0000000
10	16	20	0.2000000
11	48	73	0.3424658
12	203	213	0.0469484
13	3	3	0.0000000
14	174	185	0.0594595

no_bat_sens<-all_df %>%
  group_by(sensor_no)%>%
  filter(classification == "No bat")%>%
  mutate(true_count = sum(correct_class[correct_class== TRUE]),total_count = n() , false_neg_prop = 1 - (true_count/total_count))%>%
  select(sensor_no, true_count, total_count, false_neg_prop)%>%
  arrange(sensor_no)%>%
  distinct()


knitr::kable(no_bat_sens)

sensor_no	true_count	total_count	false_neg_prop
1	210	210	0.0000000
2	158	158	0.0000000
6	182	183	0.0054645
7	174	175	0.0057143
8	198	198	0.0000000
9	189	189	0.0000000
10	146	147	0.0068027
11	171	171	0.0000000
12	147	147	0.0000000
13	192	192	0.0000000
14	174	175	0.0057143

#all_files<-merge(dates_files, all_df, by = c("all_files_out"))

all_files<-all_df[,c("all_files_out", "all_files", "dates", "classification", "MANUAL.ID", "FP","FN","correct_class", "sensor_no")]
write.csv(all_files, "all_sensor_fn_fp.csv", row.names = FALSE)

QEOP Sensor Tests

Fiona Spooner

April 26, 2019