library(binom)
source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat/Rsrc/"

script_names = c("center_flight_data.R", # Re-centers data 
                 "clean_flight_data.R", # Loads and cleans data
                 "unique_flight_data.R",  # 1 function: create_delta_data()
                 "clean_flight_data-Fall.R",  # 1 function: clean_flight_data.Fall()
                 "unique_flight_data-Fall.R" # 1 function: create_delta_data.Fall()
                )

for (script in script_names) { 
  path = paste0(source_path, script)
  source(path) 
}
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## Attaching package: 'chron'
## The following objects are masked from 'package:lubridate':
## 
##     days, hours, minutes, seconds, years

Season

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
data_all <- data[[1]]
data_tested <- data[[2]] 

d_all <- create_delta_data(data_tested) # winter data
winter <- center_data(d_all, is_not_unique_data = FALSE)

data_path = paste0(dir,"/Dispersal/Winter_2020/stats/data/full_data-Fall2019.csv")
d = clean_flight_data.Fall(data_path)
fall = create_delta_data.Fall(d)

fall$season <- "fall"
winter$season <- "winter"
# just extract the first two trials 
fall$num_flew <- 0
fall$num_notflew <- 0
  
for(row in 1:length(fall$flew_b)){
    n_flew <- sum(fall$flew_b[[row]][1:2] == 1)
    fall$num_flew[[row]] <- n_flew 
    
    n_notflew <- sum(fall$flew_b[[row]][1:2] == 0)
    fall$num_notflew[[row]] <- n_notflew
    
}
keep = c("ID", "sex", "population", "site", "host_plant", "sym_dist", "sex_c", "w_morph_c", "num_notflew", "num_flew", "season")

sd = rbind(fall[, keep], winter[, keep]) # unique
sd$f_prob<-sd$num_flew/(sd$num_flew+sd$num_notflew)
####season vs. flight prob
data_temp <- aggregate(f_prob~season, data=sd, FUN=mean)
data_temp$trials <-c(sum(sd$num_flew[sd$season=="fall"]+sd$num_notflew[sd$season=="fall"]), sum(sd$num_flew[sd$season=="winter"]+sd$num_notflew[sd$season=="winter"]))

#calculate binomial confidence interval
data_temp$successes <- c(sum(sd$num_flew[sd$season=="fall"]), sum(sd$num_flew[sd$season=="winter"]))
data_temp$CI<-binom.confint(data_temp$successes, data_temp$trials, methods="exact")

Seems like there is some difference by season for the first two trials of each bug (however, experimental setup differed).

The low accuracies could also be due to age - a factor not controlled for. Possibly younger reproductively active females fly more than older reproductively active females? It’s unclear.