#Questions:
#How to treat intruder variable? Just a few intruders used, not equal distribution. Mostly used WW and JJ but used others when they were not available options.
NOV<- Behavioral.Trials.NOV.COMPLETE <- read.csv("C:/Users/jessi/OneDrive - College of Charleston/Jesi CofC/Thesis/Behavioral Design/Jesi Thesis Behavior Data/Pilot/Formatted Datasheets/Behavioral Trials NOV COMPLETE.csv")
#Add new composite DV's of interest
NOV$body <- NOV$bodyback + NOV$bodyfront
NOV$allfront <- NOV$headfront +NOV$bodyfront
NOV$bold <- NOV$headfront + NOV$body
NOV$inburrow <- NOV$notvisible + NOV$clawback + NOV$clawfront
#inspect distributions of DV’s - heavy kurtosis and skewness
# Log+1 transformations for right-skewed variables with zeros
NOV$body_log <- log(NOV$body + 1)
NOV$allfront_log <- log(NOV$allfront + 1)
NOV$bold_log <- log(NOV$bold + 1)
#Overall: log+1 consistently outperforms square root for these variables, giving better approximations to normality and likely better-behaved residuals in models.
#transform notvisible and inburrow - left skew, so reflect then transform:
# Reflect notvisible
reflect_const_notvisible <- max(NOV$notvisible, na.rm=TRUE) + 1
NOV$notvisible_reflected <- reflect_const_notvisible - NOV$notvisible
# Log+1 transform reflected notvisible
NOV$notvisible_log <- log(NOV$notvisible_reflected + 1)
# Reflect inburrow
reflect_const_inburrow <- max(NOV$inburrow, na.rm=TRUE) + 1
NOV$inburrow_reflected <- reflect_const_inburrow - NOV$inburrow
# Log+1 transform reflected inburrow
NOV$inburrow_log <- log(NOV$inburrow_reflected + 1)
# Compare to original data
dv_names <- c("notvisible", "notvisible_log", "inburrow", "inburrow_log")
psych::describe(NOV[dv_names])
## vars n mean sd median trimmed mad min max range
## notvisible 1 35 470.78 151.85 538.84 488.52 90.63 153.55 600.02 446.47
## notvisible_log 2 35 3.33 2.25 4.15 3.32 2.62 0.69 6.11 5.41
## inburrow 3 35 558.42 84.50 598.50 576.24 2.25 205.61 600.02 394.41
## inburrow_log 4 35 2.16 1.76 1.26 1.97 0.82 0.69 5.98 5.29
## skew kurtosis se
## notvisible -0.73 -0.99 25.67
## notvisible_log -0.11 -1.83 0.38
## inburrow -2.49 6.50 14.28
## inburrow_log 0.81 -0.92 0.30
#notvisible_log looks excellent: skew almost gone (−0.11), tails light — ready for modeling.
#inburrow_log also improved dramatically: from extreme skew (−2.49) to mild positive skew (+0.81) with kurtosis ~normal.
#Use above log+1 transformations for all DV’s.
library(dplyr)
dv_data <- dplyr::select(NOV, body_log, allfront_log, bold_log, notvisible_log, inburrow_log)
cor_matrix <- cor(dv_data, use = "pairwise.complete.obs")
print(round(cor_matrix, 2))
## body_log allfront_log bold_log notvisible_log inburrow_log
## body_log 1.00 0.80 0.95 0.60 0.90
## allfront_log 0.80 1.00 0.92 0.60 0.83
## bold_log 0.95 0.92 1.00 0.66 0.93
## notvisible_log 0.60 0.60 0.66 1.00 0.80
## inburrow_log 0.90 0.83 0.93 0.80 1.00
#ran correlation of all factors - DV’s very correlated. Run PCA instead.
pca_result <- prcomp(dv_data, center=TRUE, scale.=TRUE)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 2.0535 0.7105 0.45799 0.22889 0.12708
## Proportion of Variance 0.8434 0.1009 0.04195 0.01048 0.00323
## Cumulative Proportion 0.8434 0.9443 0.98629 0.99677 1.00000
#one significant PC: PC1 includes significant loadings from all DV’s
#PCA found that molt and length have no bearing on the data
#Making results more interpretable - flipping sign of loadings.
# Flip the first principal component's scores
NOV$PC1 <- -1 * pca_result$x[,1]
# Flip the first principal component's loadings
pca_result$rotation[,1] <- -1 * pca_result$rotation[,1]
# Now higher PC1 scores = higher original DV values = more exposure/more time out of burrow
#Loadings
print(round(pca_result$rotation, 2))
## PC1 PC2 PC3 PC4 PC5
## body_log 0.45 -0.29 -0.60 -0.44 -0.39
## allfront_log 0.44 -0.30 0.77 -0.10 -0.33
## bold_log 0.48 -0.25 -0.02 -0.06 0.84
## notvisible_log 0.38 0.87 0.09 -0.31 0.02
## inburrow_log 0.47 0.11 -0.19 0.83 -0.18
model_snaplat <- lmer(PC1 ~ snaplat + (1 | subject), data=NOV)
model_hot <- lmer(PC1 ~ hot + (1 | subject), data=NOV)
model_hotsnaplat <- lmer(PC1 ~ snaplat + hot + (1 | subject), data=NOV)
model_interact <- lmer(PC1 ~ snaplat * hot + (1 | subject), data=NOV)
anova(model_snaplat, model_hot, model_hotsnaplat, model_interact)
## Data: NOV
## Models:
## model_snaplat: PC1 ~ snaplat + (1 | subject)
## model_hot: PC1 ~ hot + (1 | subject)
## model_hotsnaplat: PC1 ~ snaplat + hot + (1 | subject)
## model_interact: PC1 ~ snaplat * hot + (1 | subject)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_snaplat 4 146.50 152.72 -69.251 138.50
## model_hot 4 149.09 155.31 -70.547 141.09 0.0000 0
## model_hotsnaplat 5 148.00 155.78 -69.002 138.00 3.0904 1 0.07876 .
## model_interact 6 149.91 159.24 -68.953 137.91 0.0976 1 0.75474
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#hot + snaplat is best fit model (or maybe just snaplat)
#Also Ran full model - confirmed no significant effect of other variables (sex, day, tank, molt, length) on PC1.
#Best fit model
summary(model_hotsnaplat)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PC1 ~ snaplat + hot + (1 | subject)
## Data: NOV
##
## REML criterion at convergence: 142.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5654 -0.4131 -0.1673 0.3060 1.7584
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 2.149 1.466
## Residual 1.778 1.334
## Number of obs: 35, groups: subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.27678 0.53567 23.45534 -0.517 0.610
## snaplat 0.06139 0.03550 15.94228 1.729 0.103
## hoty -0.31193 0.45455 16.64482 -0.686 0.502
##
## Correlation of Fixed Effects:
## (Intr) snaplt
## snaplat -0.456
## hoty -0.448 0.011
#No significant effect of either. Same estimate for snaplat if hot is removed from model.
#Check residuals
plot(model_hotsnaplat, which = 1) # Residuals vs fitted
qqnorm(resid(model_hotsnaplat))
qqline(resid(model_hotsnaplat))
#residuals okay
#——————————————————————————–
#I also tested all DV’s separately. It was a mess and not really relevant so I spared you the details.
#Across all five DVs, hot treatment shows no statistically significant effect (all p-values for hot >0.23, most >0.4).
#Point estimates suggest a possible trend toward lower “bold” or “front” behavior in hot treatment (negative estimates for allfront, bold, notvisible, inburrow), but these trends are very weak and not significant.
#Intercepts for each DV are significant - confirms transformed DVs have stable baseline levels, but treatment alone doesn’t explain differences
#Weirdly, there is a trend toward and association between higher latency to snap with more bold behaviors here. Not significant.
#Ultimate conclusion: boldness measures (i.e., variably out of burrow) are not altered by treatment. Or cannot be statistically said to be with this small sample size (18 subjects, 35 observations).
#Likely, this experiment did not properly test boldness. Some individuals were out of burrows tentatively exploring, while others were out building barriers or frantically performing escape behavior. It was not possible to easily put them into scientific behavioral categories.
#other limitations - experiments occurred during the day, not during exploratory time. Lived with many other shrimp, maybe one more a bit closer was not really novel, small small sample (had to throw some out).
library(tidyr)
library(dplyr)
# Gather all transformed DVs into a long format dataframe
NOV_long <- NOV %>%
select(subject, snaplat, body_log, allfront_log, bold_log, notvisible_log, inburrow_log) %>%
pivot_longer(cols = ends_with("_log"), names_to = "DV", values_to = "DV_value")
library(ggplot2)
ggplot(NOV_long, aes(x = snaplat, y = DV_value, color = DV)) +
geom_point(alpha = 0.4, size = 1) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
labs(title = "Snaplat vs All DVs",
x = "Snaplat (snappiness)",
y = "Dependent Variable (log-transformed)",
color = "Dependent Variable") +
theme_minimal(base_size = 14) +
theme(legend.position = "right")
#not_visible and inburrow were reflected before transformation. Positive values=less time visible. These trends aren’t significant and really represent one thing - in or out of the burrow. Probably not a trend, just random.
ggplot(NOV, aes(x = subject, y = rock, color = hot)) +
geom_jitter(position = position_jitter(width = 0.2), size = 3) +
labs(title = "",
x = "Subject",
y = "# times shrimp moved a rock") +
theme_minimal()+
scale_color_manual(values = c("y" = "red", "n" = "grey"))
#Number of times shrimp touched intruder tube - not enough observations to draw conclusions. One subject in hot group touched once, two subjects in control group touched 4 and 5 times.