#knitr::purl("flight_predictions.Rmd", output = "flight_predictions.R") # convert Rmd to R script
Using delta_yes_no.Rmd models to predict Fall delta flight response.
Clean the data
source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat/Rsrc/"
script_names = c("center_flight_data.R", # Re-centers data
"clean_flight_data-Fall.R", # Loads and cleans data
"unique_flight_data-Fall.R",
"clean_flight_data.R",
"unique_flight_data.R")
for (script in script_names) {
path = paste0(source_path, script)
source(path)
}
morph_d = read.csv("data/old_data/bug_morphology_flight-trials-Autumn2019.csv", header=TRUE, sep=",",
stringsAsFactors=TRUE)
data = clean_flight_data.Fall("data/full_data-Fall2019.csv")
morph_d[morph_d$ID == 146,]$sex = data[data$ID == 146,]$sex # this bug broke apart before morph measurements taken so using flight sex identification
Keeping only the tests that were similar to Winter experiment design
data_mass = data %>%
filter(!is.na(mass))
# 60 min trial
data60 = data_mass %>%
filter(set_number < 53)
# 90 min trial
data90 = data_mass %>%
filter(set_number < 72 & set_number > 52)
# ongoing trial
ongoing_data = data_mass %>%
filter(set_number > 71)
Create delta data
d = create_delta_data.Fall(ongoing_data)
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Calculating individual predicted probabilities
This is the format:
Here are the equations:
[1] “Where F = 1”
[1] “log(pi_-1 / pi_1) = 5.81 + 0.05 Mass Percent Change + 4.93 Sex Flew in T1, rather than T2”
[2] “log(pi_2 / pi_-1) = 1.14 + -0.02 Mass Percent Change + -0.21 Sex Flew in both, rather than T1”
[3] “log(pi_2 / pi_1) = 6.94 + 0.03 Mass Percent Change + 4.72 Sex Flew in both, rather than T2”
[4] “log(pi_-1 / pi_0) = -1.02 + 0.043 Mass Percent Change - 0.69 Sex Flew in T1, rather than none”
[5] “log(pi_1 / pi_0) = -6.82 - 0.009 Mass Percent Change - 5.63 Sex Flew in both, rather than none”
[6] “log(pi_2 / pi_0) = 0.12 + 0.019 Mass Percent Change - 0.90 Sex Flew in both, rather than none”
Codifying individual predicted probability equations
d <- d[with(d, order(mass_per)),]
neither = c()
T1_rather_than_none = c()
T2_rather_than_none = c()
both_rather_than_none = c()
for (i in 1:nrow(d)) {
mp = d$mass_per[[i]]
s = d$sex_c[[i]]
top0 = exp(0) # just equals 1
top1 = exp(-1.02 + 0.043*mp - 0.69*s)
top2 = exp(-6.82 - 0.009*mp - 5.63*s)
top3 = exp(0.12 + 0.019*mp- 0.90*s)
bottom = top0 + top1 + top2 + top3
neither = c(neither, top0/bottom)
T1_rather_than_none = c(T1_rather_than_none, top1/bottom)
T2_rather_than_none = c(T2_rather_than_none, top2/bottom)
both_rather_than_none = c(both_rather_than_none, top3/bottom)
}
Plotting predicted probabilities
d$index = 1:nrow(d)
females = d %>%
filter(sex=="F")
males = d %>%
filter(sex=="M")
Frows = females$index
Mrows = males$index
Notice that the percent change masses are much narrower than the scale we had in the Winter. This suggests that there are a lot of males and few females.
Accuracy
probs = round(cbind(neither, T1_rather_than_none, T2_rather_than_none, both_rather_than_none),2)
summary_probs = cbind(as.character(d$flight_case), as.character(d$sex), probs)
colnames(summary_probs) = c("event", "sex", "none", "T1", "T2", "both")
dataframe = as.data.frame(summary_probs)
nrow(dataframe)
## [1] 45
calculate_accuracy = function(data, cfirst, clast) {
probs = data[cfirst:clast]
most_likely_events = colnames(probs)[apply(probs,1,which.max)]
actual_events = c()
for (i in 1:nrow(data)) {
if (data[i,1] == "0") {
actual_event = "none"
}
if (data[i,1] == "-1") {
actual_event = "T1"
}
if (data[i,1] == "1") {
actual_event = "T2"
}
if (data[i,1] == "2") {
actual_event = "both"
}
actual_events = c(actual_events, actual_event)
}
events = actual_events == most_likely_events
accurate_events = sum(events)
total_events = length(events)
accuracy = accurate_events / total_events
return(accuracy)
}
Overall and Grouped Accuracies
# overall
acc = calculate_accuracy(dataframe,3,6)
paste("Overall prediction accuracy, ", round(acc,2))
## [1] "Overall prediction accuracy, 0.62"
# by sex
femdata = dataframe %>%
filter(sex=="F")
maledata = dataframe %>%
filter(sex=="M")
accF = calculate_accuracy(femdata,3,6)
paste("Female prediction accuracy, ", round(accF,2))
## [1] "Female prediction accuracy, 0.46"
accM = calculate_accuracy(maledata,3,6)
paste("Male prediction accuracy, ", round(accM,2))
## [1] "Male prediction accuracy, 0.69"
It seems like for males, this model will lead you to overestimating its flight probability. Males will not always fly twice, but they will be most likely to fly twice. Additionally, predicting female flight is less accurate probably because of egg-laying events. So, let’s use the mass + eggs laid model to predict female flight case probabilities.
Confusion Matrix
For details see: https://medium.com/analytics-vidhya/calculating-accuracy-of-an-ml-model-8ae7894802e
and
4 X 4 Matrix
acc_table = get_confusion_matrix(dataframe,3,6)
acc_table
## # A tibble: 1 x 16
## `Overall Accuracy` `Balanced Accuracy` F1 Sensitivity Specificity
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.622 0.555 NaN 0.312 0.798
## # … with 11 more variables: Pos Pred Value <dbl>, Neg Pred Value <dbl>,
## # Kappa <dbl>, MCC <dbl>, Detection Rate <dbl>, Detection Prevalence <dbl>,
## # Prevalence <dbl>, Predictions <list>, Confusion Matrix <list>,
## # Class Level Results <list>, Process <list>
confusion_matrix <- acc_table$'Confusion Matrix'[[1]]
confusion_matrix
## # A tibble: 16 x 3
## Prediction Target N
## <chr> <chr> <int>
## 1 both both 22
## 2 none both 6
## 3 T1 both 0
## 4 T2 both 0
## 5 both none 7
## 6 none none 6
## 7 T1 none 0
## 8 T2 none 0
## 9 both T1 3
## 10 none T1 0
## 11 T1 T1 0
## 12 T2 T1 0
## 13 both T2 0
## 14 none T2 1
## 15 T1 T2 0
## 16 T2 T2 0
plot_confusion_matrix(confusion_matrix, add_sums=TRUE)
For more on confusion maxtrices see: https://cran.r-project.org/web/packages/cvms/vignettes/Creating_a_confusion_matrix.html
wing2body
Calculating individual predicted probabilities
Here are the equations:
Edits 03/01/2021 - need to center the wing2body data I have -1 -0.935 0.041 -0.571 23.739 1 -8.177 -0.005 -6.954 -6.595 2 0.201 0.018 -0.760 28.094
d$wing2body = 0
for (i in 1:nrow(d)) {
d$wing2body[i] = d$wing[[i]][1] / d$body[[i]][1]
}
d$wing2body_c = 0
d$wing2body_c = d$wing2body - mean(d$wing2body)
neither = c()
T1_rather_than_none = c()
T2_rather_than_none = c()
both_rather_than_none = c()
for (i in 1:nrow(d)) {
m = d$mass_per[[i]]
s = d$sex_c[[i]]
#w = d$wing[[i]][1] / d$body[[i]][1]
w = d$wing2body_c[i]
top0 = exp(0) # just equals 1
#top1 = exp(-17.862 + 0.041*m - 0.571*s + 23.558*w )
#top2 = exp(-4.395 - 0.005*m - 9.580*s - 8.937*w)
#top3 = exp(-19.931 + 0.018*m - 0.760*s + 28.019*w)
top1 = exp(-0.935 + 0.041*m - 0.571*s + 23.739*w)
top2 = exp(-8.177 - 0.005*m - 6.954*s - 6.595*w)
top3 = exp(0.201 + 0.018*m - 0.760*s + 28.094*w)
bottom = top0 + top1 + top2 + top3
neither = c(neither, top0/bottom)
T1_rather_than_none = c(T1_rather_than_none, top1/bottom)
T2_rather_than_none = c(T2_rather_than_none, top2/bottom)
both_rather_than_none = c(both_rather_than_none, top3/bottom)
}
Accuracy
probs = round(cbind(neither, T1_rather_than_none, T2_rather_than_none, both_rather_than_none),2)
summary_probs = cbind(as.character(d$flight_case), as.character(d$sex), probs)
colnames(summary_probs) = c("event", "sex", "none", "T1", "T2", "both")
dataframe = as.data.frame(summary_probs)
nrow(dataframe)
## [1] 45
# overall
acc = calculate_accuracy(dataframe,3,6)
paste("Overall prediction accuracy, ", round(acc,2))
## [1] "Overall prediction accuracy, 0.6"
# by sex
femdata = dataframe %>%
filter(sex=="F")
maledata = dataframe %>%
filter(sex=="M")
accF = calculate_accuracy(femdata,3,6)
paste("Female prediction accuracy, ", round(accF,2))
## [1] "Female prediction accuracy, 0.38"
accM = calculate_accuracy(maledata,3,6)
paste("Male prediction accuracy, ", round(accM,2))
## [1] "Male prediction accuracy, 0.69"
acc_table = get_confusion_matrix(dataframe,3,6)
acc_table
## # A tibble: 1 x 16
## `Overall Accuracy` `Balanced Accuracy` F1 Sensitivity Specificity
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.6 0.538 NaN 0.293 0.784
## # … with 11 more variables: Pos Pred Value <dbl>, Neg Pred Value <dbl>,
## # Kappa <dbl>, MCC <dbl>, Detection Rate <dbl>, Detection Prevalence <dbl>,
## # Prevalence <dbl>, Predictions <list>, Confusion Matrix <list>,
## # Class Level Results <list>, Process <list>
confusion_matrix <- acc_table$'Confusion Matrix'[[1]]
confusion_matrix
## # A tibble: 16 x 3
## Prediction Target N
## <chr> <chr> <int>
## 1 both both 22
## 2 none both 6
## 3 T1 both 0
## 4 T2 both 0
## 5 both none 8
## 6 none none 5
## 7 T1 none 0
## 8 T2 none 0
## 9 both T1 3
## 10 none T1 0
## 11 T1 T1 0
## 12 T2 T1 0
## 13 both T2 0
## 14 none T2 1
## 15 T1 T2 0
## 16 T2 T2 0
plot_confusion_matrix(confusion_matrix, add_sums=TRUE)
Females Only
d = d %>%
filter(sex=="F")
Problem with these models is that there is no case in which females only fly in T2 so that’s a possible underestimation in our model.
d <- d[with(d, order(mass_diff)),]
neither = c()
T1_rather_than_none = c()
both_rather_than_none = c()
for (i in 1:nrow(d)) {
md = d$mass_diff[[i]]
ed = d$egg_diff[[i]]
top0 = exp(0) # just equals 1
top1 = exp(-0.88 - 0.53*ed + 57.43*md )
top2 = exp(-0.53 - 1.09*ed + 18.67*md)
bottom = top0 + top1 + top2
neither = c(neither, top0/bottom)
T1_rather_than_none = c(T1_rather_than_none, top1/bottom)
both_rather_than_none = c(both_rather_than_none, top2/bottom)
}
Accuracy
probs = round(cbind(neither, T1_rather_than_none, both_rather_than_none),2)
summary_probs = cbind(as.character(d$flight_case), as.character(d$egg_diff), probs)
colnames(summary_probs) = c("event", "egg_diff", "none", "T1", "both")
egg2 = c(1,2,3,5,6,7,9,10,11,13)
noegg = c(4,8,12)
dataframe = as.data.frame(summary_probs)
dataframe$egg_cat = c(2,2,2,0,2,2,2,0,2,2,2,0,2)
Plotting predicted probabilities
Calculate Accuracy
accF_eggs = calculate_accuracy(dataframe,3,5)
paste("Female prediction accuracy for mass diff and egg model, ", round(accF_eggs,2))
## [1] "Female prediction accuracy for mass diff and egg model, 0.46"
Female flight was underestimated, and, considering that the prediction for females is so low, this leads me to believe that season is an important factor in predicting flight case probability.
Confusion Matrix
acc_table = get_confusion_matrix(dataframe,3,5)
acc_table
## # A tibble: 1 x 16
## `Overall Accuracy` `Balanced Accuracy` F1 Sensitivity Specificity
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.462 0.5 NaN 0.333 0.667
## # … with 11 more variables: Pos Pred Value <dbl>, Neg Pred Value <dbl>,
## # Kappa <dbl>, MCC <dbl>, Detection Rate <dbl>, Detection Prevalence <dbl>,
## # Prevalence <dbl>, Predictions <list>, Confusion Matrix <list>,
## # Class Level Results <list>, Process <list>
confusion_matrix <- acc_table$'Confusion Matrix'[[1]]
confusion_matrix
## # A tibble: 9 x 3
## Prediction Target N
## <chr> <chr> <int>
## 1 both both 0
## 2 none both 6
## 3 T2 both 0
## 4 both none 0
## 5 none none 6
## 6 T2 none 0
## 7 both T2 0
## 8 none T2 1
## 9 T2 T2 0
plot_confusion_matrix(confusion_matrix, add_sums=TRUE)