library(dplyr)
library(ggplot2)
library(knitr)
library(lubridate)
source("D:/Fiona/useful_functions.R")
all_bat_files<-list.files("Q:/data_tagged/bats/all/",full.names = TRUE, recursive = TRUE)
all_bat_files<-all_bat_files[grepl(".wav", all_bat_files)]
path_split<-strsplit(all_bat_files, "_")
path_out<-lapply(path_split, get_last)
unl_path_out_bat<-unlist(path_out)
bats_dates<- dates_files[dates_files$all_files_out %in% unl_path_out_bat,]
bats_dates$classification<-"Bat"
all_no_bat_files<-list.files("Q:/data_tagged/Sample_nobats/All/",full.names = TRUE, recursive = TRUE)
all_no_bat_files<-all_no_bat_files[grepl(".wav", all_no_bat_files)]
path_split<-strsplit(all_no_bat_files, "/")
path_out<-lapply(path_split, get_last)
unl_path_out_no_bat<-unlist(path_out)
no_bats_dates<- dates_files[dates_files$all_files_out %in% unl_path_out_no_bat,]
no_bats_dates$classification<-"No bat"
all_samp<-rbind(bats_dates, no_bats_dates)
fp<-read.csv("Q:/data_tagged/bats/output/meta.csv")
fn<-read.csv("Q:/data_tagged/Sample_nobats/output/meta.csv")
path_split<-strsplit(as.character(fp$IN.FILE), "_")
path_out<-lapply(path_split, get_last)
fp$IN.FILE<-unlist(path_out)
fpfn<-rbind(fp, fn)
verification<-dplyr::select(fpfn, c("IN.FILE", "MANUAL.ID"))
all_df<-merge(all_samp, verification, by.x="all_files_out", by.y = "IN.FILE")
all_df$FP<-ifelse(all_df$classification == "Bat" & all_df$MANUAL.ID == "No bat", 1, 0)
all_df$FN<-ifelse(all_df$classification == "No bat" & all_df$MANUAL.ID == "Bat", 1, 0)
all_df$correct_class<-all_df$classification == all_df$MANUAL.ID
ggplot(all_df, aes(x = classification, y = dates, colour = correct_class))+
geom_jitter(alpha = 0.4)
#
# ggplot(all_df, aes(x = MANUAL.ID, y = dates, colour = correct_class))+
# geom_jitter(alpha = 0.4)
path_split<-strsplit(all_files, "/")
path_out<-lapply(path_split, get_second_last)
sensor_out<-unlist(path_out)
sensor_no<-as.numeric(gsub("[^0-9]", "", sensor_out))
dates_files$sensor_no<-sensor_no
all_df<-merge(all_df, dates_files, by = c("all_files_out", "all_files", "dates"))
ggplot(all_df, aes(x = MANUAL.ID, y = sensor_no))+
geom_jitter(alpha = 0.4)
bat<-all_df %>%
filter(classification == "Bat")
#table(bat$correct_class)
tp_rate<-(nrow(bat[bat$correct_class == TRUE,])/nrow(bat))*100
# wrong_bat<-all_df %>%
# filter(correct_class == FALSE & classification == "Bat")
no_bat<-all_df %>%
filter(classification == "No bat")
#table(no_bat$correct_class)
tn_rate<-(nrow(no_bat[no_bat$correct_class == TRUE,])/nrow(no_bat))*100
## [1] "The true positive rate is 90.848%"
## [1] "The false positive rate is 9.152%"
## [1] "The true negative rate is 99.794%"
## [1] "The false negative rate is 0.206%"
Looking at true positive across sensors. Sensor 6 performs very poorly with a true positive rate of 32.5%, this sensor is in quite a “busy” location. It is near two roads, a roundabout, a canal, some footpaths and near the ArcelorMittal structure. The sensor seems to continually mistake machine noise for bats.
Sensors 1 and 11 also do not perform well with TPR of 61.5% and 64.4% respectively. There are very few detections at Sensor 1 (13), but 38.5% of these are misdetected machine noise. Sensor 1 is near both the olympic stadium and the ArcelorMittal orbit. Sensor 11 also seems to experience a lot of machine noise but is not near an obviously “busy” site.
bat_sens<-all_df %>%
group_by(sensor_no)%>%
filter(classification == "Bat")%>%
mutate(true_count = sum(correct_class[correct_class== TRUE]),total_count = n() , false_pos_prop = 1 - (true_count/total_count))%>%
select(sensor_no, true_count, total_count, false_pos_prop)%>%
arrange(sensor_no)%>%
distinct()
kable(bat_sens)
| sensor_no | true_count | total_count | false_pos_prop |
|---|---|---|---|
| 1 | 8 | 13 | 0.3846154 |
| 2 | 8 | 10 | 0.2000000 |
| 6 | 14 | 40 | 0.6500000 |
| 7 | 1257 | 1350 | 0.0688889 |
| 8 | 22 | 24 | 0.0833333 |
| 9 | 14 | 14 | 0.0000000 |
| 10 | 16 | 20 | 0.2000000 |
| 11 | 48 | 73 | 0.3424658 |
| 12 | 203 | 213 | 0.0469484 |
| 13 | 3 | 3 | 0.0000000 |
| 14 | 174 | 185 | 0.0594595 |
no_bat_sens<-all_df %>%
group_by(sensor_no)%>%
filter(classification == "No bat")%>%
mutate(true_count = sum(correct_class[correct_class== TRUE]),total_count = n() , false_neg_prop = 1 - (true_count/total_count))%>%
select(sensor_no, true_count, total_count, false_neg_prop)%>%
arrange(sensor_no)%>%
distinct()
knitr::kable(no_bat_sens)
| sensor_no | true_count | total_count | false_neg_prop |
|---|---|---|---|
| 1 | 210 | 210 | 0.0000000 |
| 2 | 158 | 158 | 0.0000000 |
| 6 | 182 | 183 | 0.0054645 |
| 7 | 174 | 175 | 0.0057143 |
| 8 | 198 | 198 | 0.0000000 |
| 9 | 189 | 189 | 0.0000000 |
| 10 | 146 | 147 | 0.0068027 |
| 11 | 171 | 171 | 0.0000000 |
| 12 | 147 | 147 | 0.0000000 |
| 13 | 192 | 192 | 0.0000000 |
| 14 | 174 | 175 | 0.0057143 |
#all_files<-merge(dates_files, all_df, by = c("all_files_out"))
all_files<-all_df[,c("all_files_out", "all_files", "dates", "classification", "MANUAL.ID", "FP","FN","correct_class", "sensor_no")]
write.csv(all_files, "all_sensor_fn_fp.csv", row.names = FALSE)