library(dplyr)
library(ggplot2)
library(knitr)
library(lubridate)

source("D:/Fiona/useful_functions.R")
all_bat_files<-list.files("Q:/data_tagged/bats/all/",full.names = TRUE, recursive = TRUE)
all_bat_files<-all_bat_files[grepl(".wav", all_bat_files)]

path_split<-strsplit(all_bat_files, "_")

path_out<-lapply(path_split, get_last)
unl_path_out_bat<-unlist(path_out)


bats_dates<- dates_files[dates_files$all_files_out %in% unl_path_out_bat,]

bats_dates$classification<-"Bat"


all_no_bat_files<-list.files("Q:/data_tagged/Sample_nobats/All/",full.names = TRUE, recursive = TRUE)
all_no_bat_files<-all_no_bat_files[grepl(".wav", all_no_bat_files)]


path_split<-strsplit(all_no_bat_files, "/")
path_out<-lapply(path_split, get_last)
unl_path_out_no_bat<-unlist(path_out)

no_bats_dates<- dates_files[dates_files$all_files_out %in% unl_path_out_no_bat,]

no_bats_dates$classification<-"No bat"


all_samp<-rbind(bats_dates, no_bats_dates)
fp<-read.csv("Q:/data_tagged/bats/output/meta.csv")
fn<-read.csv("Q:/data_tagged/Sample_nobats/output/meta.csv")

path_split<-strsplit(as.character(fp$IN.FILE), "_")
path_out<-lapply(path_split, get_last)
fp$IN.FILE<-unlist(path_out)

fpfn<-rbind(fp, fn)

verification<-dplyr::select(fpfn, c("IN.FILE", "MANUAL.ID"))

all_df<-merge(all_samp, verification, by.x="all_files_out", by.y = "IN.FILE")


all_df$FP<-ifelse(all_df$classification == "Bat" & all_df$MANUAL.ID == "No bat", 1, 0)

all_df$FN<-ifelse(all_df$classification == "No bat" & all_df$MANUAL.ID == "Bat", 1, 0)

all_df$correct_class<-all_df$classification == all_df$MANUAL.ID
ggplot(all_df, aes(x = classification, y = dates, colour = correct_class))+
  geom_jitter(alpha = 0.4)

# 
# ggplot(all_df, aes(x = MANUAL.ID, y = dates, colour = correct_class))+
#   geom_jitter(alpha = 0.4)


path_split<-strsplit(all_files, "/")

path_out<-lapply(path_split, get_second_last)
sensor_out<-unlist(path_out)
sensor_no<-as.numeric(gsub("[^0-9]", "", sensor_out))

dates_files$sensor_no<-sensor_no

all_df<-merge(all_df, dates_files, by = c("all_files_out", "all_files", "dates"))
ggplot(all_df, aes(x = MANUAL.ID, y = sensor_no))+
  geom_jitter(alpha = 0.4)

bat<-all_df %>%
  filter(classification == "Bat")

#table(bat$correct_class)

tp_rate<-(nrow(bat[bat$correct_class == TRUE,])/nrow(bat))*100

# wrong_bat<-all_df %>%
#   filter(correct_class == FALSE & classification == "Bat")

no_bat<-all_df %>%
  filter(classification == "No bat")

#table(no_bat$correct_class)

tn_rate<-(nrow(no_bat[no_bat$correct_class == TRUE,])/nrow(no_bat))*100
## [1] "The true positive rate is 90.848%"
## [1] "The false positive rate is 9.152%"
## [1] "The true negative rate is 99.794%"
## [1] "The false negative rate is 0.206%"

Looking at true positive across sensors. Sensor 6 performs very poorly with a true positive rate of 32.5%, this sensor is in quite a “busy” location. It is near two roads, a roundabout, a canal, some footpaths and near the ArcelorMittal structure. The sensor seems to continually mistake machine noise for bats.

Sensors 1 and 11 also do not perform well with TPR of 61.5% and 64.4% respectively. There are very few detections at Sensor 1 (13), but 38.5% of these are misdetected machine noise. Sensor 1 is near both the olympic stadium and the ArcelorMittal orbit. Sensor 11 also seems to experience a lot of machine noise but is not near an obviously “busy” site.

bat_sens<-all_df %>%
  group_by(sensor_no)%>%
  filter(classification == "Bat")%>%
  mutate(true_count = sum(correct_class[correct_class== TRUE]),total_count = n() , false_pos_prop = 1 -  (true_count/total_count))%>%
  select(sensor_no, true_count, total_count, false_pos_prop)%>%
  arrange(sensor_no)%>%
  distinct()


kable(bat_sens)
sensor_no true_count total_count false_pos_prop
1 8 13 0.3846154
2 8 10 0.2000000
6 14 40 0.6500000
7 1257 1350 0.0688889
8 22 24 0.0833333
9 14 14 0.0000000
10 16 20 0.2000000
11 48 73 0.3424658
12 203 213 0.0469484
13 3 3 0.0000000
14 174 185 0.0594595
no_bat_sens<-all_df %>%
  group_by(sensor_no)%>%
  filter(classification == "No bat")%>%
  mutate(true_count = sum(correct_class[correct_class== TRUE]),total_count = n() , false_neg_prop = 1 - (true_count/total_count))%>%
  select(sensor_no, true_count, total_count, false_neg_prop)%>%
  arrange(sensor_no)%>%
  distinct()


knitr::kable(no_bat_sens)
sensor_no true_count total_count false_neg_prop
1 210 210 0.0000000
2 158 158 0.0000000
6 182 183 0.0054645
7 174 175 0.0057143
8 198 198 0.0000000
9 189 189 0.0000000
10 146 147 0.0068027
11 171 171 0.0000000
12 147 147 0.0000000
13 192 192 0.0000000
14 174 175 0.0057143
#all_files<-merge(dates_files, all_df, by = c("all_files_out"))

all_files<-all_df[,c("all_files_out", "all_files", "dates", "classification", "MANUAL.ID", "FP","FN","correct_class", "sensor_no")]
write.csv(all_files, "all_sensor_fn_fp.csv", row.names = FALSE)