#Questions:

#How to treat intruder variable? Just a few intruders used, not equal distribution. Mostly used WW and JJ but used others when they were not available options.

NOV<- Behavioral.Trials.NOV.COMPLETE <- read.csv("C:/Users/jessi/OneDrive - College of Charleston/Jesi CofC/Thesis/Behavioral Design/Jesi Thesis Behavior Data/Pilot/Formatted Datasheets/Behavioral Trials NOV COMPLETE.csv")
#Add new composite DV's of interest
NOV$body <- NOV$bodyback + NOV$bodyfront
NOV$allfront <- NOV$headfront +NOV$bodyfront
NOV$bold <- NOV$headfront + NOV$body
NOV$inburrow <- NOV$notvisible + NOV$clawback + NOV$clawfront

#inspect distributions of DV’s - heavy kurtosis and skewness

# Log+1 transformations for right-skewed variables with zeros
NOV$body_log <- log(NOV$body + 1)
NOV$allfront_log <- log(NOV$allfront + 1)
NOV$bold_log <- log(NOV$bold + 1)

#Overall: log+1 consistently outperforms square root for these variables, giving better approximations to normality and likely better-behaved residuals in models.

#transform notvisible and inburrow - left skew, so reflect then transform: 

# Reflect notvisible
reflect_const_notvisible <- max(NOV$notvisible, na.rm=TRUE) + 1
NOV$notvisible_reflected <- reflect_const_notvisible - NOV$notvisible

# Log+1 transform reflected notvisible
NOV$notvisible_log <- log(NOV$notvisible_reflected + 1)

# Reflect inburrow
reflect_const_inburrow <- max(NOV$inburrow, na.rm=TRUE) + 1
NOV$inburrow_reflected <- reflect_const_inburrow - NOV$inburrow

# Log+1 transform reflected inburrow
NOV$inburrow_log <- log(NOV$inburrow_reflected + 1)

# Compare to original data
dv_names <- c("notvisible", "notvisible_log", "inburrow", "inburrow_log")
psych::describe(NOV[dv_names])
##                vars  n   mean     sd median trimmed   mad    min    max  range
## notvisible        1 35 470.78 151.85 538.84  488.52 90.63 153.55 600.02 446.47
## notvisible_log    2 35   3.33   2.25   4.15    3.32  2.62   0.69   6.11   5.41
## inburrow          3 35 558.42  84.50 598.50  576.24  2.25 205.61 600.02 394.41
## inburrow_log      4 35   2.16   1.76   1.26    1.97  0.82   0.69   5.98   5.29
##                 skew kurtosis    se
## notvisible     -0.73    -0.99 25.67
## notvisible_log -0.11    -1.83  0.38
## inburrow       -2.49     6.50 14.28
## inburrow_log    0.81    -0.92  0.30

#notvisible_log looks excellent: skew almost gone (−0.11), tails light — ready for modeling.

#inburrow_log also improved dramatically: from extreme skew (−2.49) to mild positive skew (+0.81) with kurtosis ~normal.

#Use above log+1 transformations for all DV’s.

library(dplyr)
dv_data <- dplyr::select(NOV, body_log, allfront_log, bold_log, notvisible_log, inburrow_log)

cor_matrix <- cor(dv_data, use = "pairwise.complete.obs")
print(round(cor_matrix, 2))
##                body_log allfront_log bold_log notvisible_log inburrow_log
## body_log           1.00         0.80     0.95           0.60         0.90
## allfront_log       0.80         1.00     0.92           0.60         0.83
## bold_log           0.95         0.92     1.00           0.66         0.93
## notvisible_log     0.60         0.60     0.66           1.00         0.80
## inburrow_log       0.90         0.83     0.93           0.80         1.00

#ran correlation of all factors - DV’s very correlated. Run PCA instead.

pca_result <- prcomp(dv_data, center=TRUE, scale.=TRUE)
summary(pca_result)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5
## Standard deviation     2.0535 0.7105 0.45799 0.22889 0.12708
## Proportion of Variance 0.8434 0.1009 0.04195 0.01048 0.00323
## Cumulative Proportion  0.8434 0.9443 0.98629 0.99677 1.00000

Run PCA, test for component significance (not included in knit)

#one significant PC: PC1 includes significant loadings from all DV’s

#PCA found that molt and length have no bearing on the data

#Making results more interpretable - flipping sign of loadings. 

# Flip the first principal component's scores
NOV$PC1 <- -1 * pca_result$x[,1]

# Flip the first principal component's loadings
pca_result$rotation[,1] <- -1 * pca_result$rotation[,1]

# Now higher PC1 scores = higher original DV values = more exposure/more time out of burrow
#Loadings
print(round(pca_result$rotation, 2))
##                 PC1   PC2   PC3   PC4   PC5
## body_log       0.45 -0.29 -0.60 -0.44 -0.39
## allfront_log   0.44 -0.30  0.77 -0.10 -0.33
## bold_log       0.48 -0.25 -0.02 -0.06  0.84
## notvisible_log 0.38  0.87  0.09 -0.31  0.02
## inburrow_log   0.47  0.11 -0.19  0.83 -0.18
model_snaplat <- lmer(PC1 ~ snaplat + (1 | subject), data=NOV)
model_hot <- lmer(PC1 ~ hot + (1 | subject), data=NOV)
model_hotsnaplat <- lmer(PC1 ~ snaplat + hot + (1 | subject), data=NOV)
model_interact <- lmer(PC1 ~ snaplat * hot + (1 | subject), data=NOV)

anova(model_snaplat, model_hot, model_hotsnaplat, model_interact)
## Data: NOV
## Models:
## model_snaplat: PC1 ~ snaplat + (1 | subject)
## model_hot: PC1 ~ hot + (1 | subject)
## model_hotsnaplat: PC1 ~ snaplat + hot + (1 | subject)
## model_interact: PC1 ~ snaplat * hot + (1 | subject)
##                  npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## model_snaplat       4 146.50 152.72 -69.251   138.50                       
## model_hot           4 149.09 155.31 -70.547   141.09 0.0000  0             
## model_hotsnaplat    5 148.00 155.78 -69.002   138.00 3.0904  1    0.07876 .
## model_interact      6 149.91 159.24 -68.953   137.91 0.0976  1    0.75474  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#hot + snaplat is best fit model (or maybe just snaplat)

#Also Ran full model - confirmed no significant effect of other variables (sex, day, tank, molt, length) on PC1.

#Best fit model
summary(model_hotsnaplat)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PC1 ~ snaplat + hot + (1 | subject)
##    Data: NOV
## 
## REML criterion at convergence: 142.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5654 -0.4131 -0.1673  0.3060  1.7584 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 2.149    1.466   
##  Residual             1.778    1.334   
## Number of obs: 35, groups:  subject, 18
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)
## (Intercept) -0.27678    0.53567 23.45534  -0.517    0.610
## snaplat      0.06139    0.03550 15.94228   1.729    0.103
## hoty        -0.31193    0.45455 16.64482  -0.686    0.502
## 
## Correlation of Fixed Effects:
##         (Intr) snaplt
## snaplat -0.456       
## hoty    -0.448  0.011

#No significant effect of either. Same estimate for snaplat if hot is removed from model.

#Check residuals
plot(model_hotsnaplat, which = 1)  # Residuals vs fitted

qqnorm(resid(model_hotsnaplat))
qqline(resid(model_hotsnaplat))

#residuals okay

#——————————————————————————–

#I also tested all DV’s separately. It was a mess and not really relevant so I spared you the details.

Conclusions

#Across all five DVs, hot treatment shows no statistically significant effect (all p-values for hot >0.23, most >0.4).

#Point estimates suggest a possible trend toward lower “bold” or “front” behavior in hot treatment (negative estimates for allfront, bold, notvisible, inburrow), but these trends are very weak and not significant.

#Intercepts for each DV are significant - confirms transformed DVs have stable baseline levels, but treatment alone doesn’t explain differences

#Weirdly, there is a trend toward and association between higher latency to snap with more bold behaviors here. Not significant.

#Ultimate conclusion: boldness measures (i.e., variably out of burrow) are not altered by treatment. Or cannot be statistically said to be with this small sample size (18 subjects, 35 observations).

#Likely, this experiment did not properly test boldness. Some individuals were out of burrows tentatively exploring, while others were out building barriers or frantically performing escape behavior. It was not possible to easily put them into scientific behavioral categories.

#other limitations - experiments occurred during the day, not during exploratory time. Lived with many other shrimp, maybe one more a bit closer was not really novel, small small sample (had to throw some out).

Snaplat vs DV’s plots

library(tidyr)
library(dplyr)

# Gather all transformed DVs into a long format dataframe
NOV_long <- NOV %>%
  select(subject, snaplat, body_log, allfront_log, bold_log, notvisible_log, inburrow_log) %>%
  pivot_longer(cols = ends_with("_log"), names_to = "DV", values_to = "DV_value")
library(ggplot2)

ggplot(NOV_long, aes(x = snaplat, y = DV_value, color = DV)) +
  geom_point(alpha = 0.4, size = 1) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
  labs(title = "Snaplat vs All DVs",
       x = "Snaplat (snappiness)",
       y = "Dependent Variable (log-transformed)",
       color = "Dependent Variable") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "right")

#not_visible and inburrow were reflected before transformation. Positive values=less time visible. These trends aren’t significant and really represent one thing - in or out of the burrow. Probably not a trend, just random.

Extras ———————————————————————————-

Number of times shrimp moved a rock - not enough observations to draw conclusions

ggplot(NOV, aes(x = subject, y = rock, color = hot)) +
  geom_jitter(position = position_jitter(width = 0.2), size = 3) +
  labs(title = "",
       x = "Subject",
       y = "# times shrimp moved a rock") +
  theme_minimal()+
  scale_color_manual(values = c("y" = "red", "n" = "grey"))

#Number of times shrimp touched intruder tube - not enough observations to draw conclusions. One subject in hot group touched once, two subjects in control group touched 4 and 5 times.