Code-part 1

Insert and read data

setwd("C:/Users/faith/OneDrive/Documents/biologie 2024-2025/advanced biostatistics/GROEPSWERK")
data <- read.table("Data1.txt", header = TRUE)
data
##    exp pop  cd    mt
## 1    R   1   0  84.0
## 2    R   1   0  88.6
## 3    R   1   0 102.5
## 4    R   1  10  97.2
## 5    R   1  10 111.7
## 6    R   1  10  91.6
## 7    R   1  20 130.0
## 8    R   1  20 135.0
## 9    R   1  20  97.2
## 10   R   1  50 133.7
## 11   R   1  50 101.8
## 12   R   1  50 123.4
## 13   R   1 100 171.4
## 14   R   1 100 141.8
## 15   R   1 100 155.2
## 16   R   2   0  96.6
## 17   R   2   0 119.4
## 18   R   2   0  98.8
## 19   R   2  10  78.0
## 20   R   2  10 107.1
## 21   R   2  10 107.2
## 22   R   2  20 101.9
## 23   R   2  20  85.2
## 24   R   2  20 103.3
## 25   R   2  50  81.0
## 26   R   2  50  98.3
## 27   R   2  50  82.9
## 28   R   2 100  41.2
## 29   R   2 100  81.5
## 30   R   2 100  80.3
## 31   E   3   0 101.1
## 32   E   3   0 107.3
## 33   E   3   0 114.4
## 34   E   3  10 106.0
## 35   E   3  10 101.4
## 36   E   3  10 148.6
## 37   E   3  20 108.4
## 38   E   3  20 136.3
## 39   E   3  20 166.4
## 40   E   3  50 189.5
## 41   E   3  50 176.0
## 42   E   3  50 203.3
## 43   E   3 100 259.0
## 44   E   3 100 280.8
## 45   E   3 100 225.5
## 46   E   4   0 120.2
## 47   E   4   0  99.8
## 48   E   4   0 104.6
## 49   E   4  10  91.8
## 50   E   4  10 128.4
## 51   E   4  10 134.8
## 52   E   4  20 123.3
## 53   E   4  20 148.5
## 54   E   4  20 116.5
## 55   E   4  50 176.8
## 56   E   4  50 181.5
## 57   E   4  50 196.1
## 58   E   4 100 231.2
## 59   E   4 100 245.3
## 60   E   4 100 222.3
data$exp <- factor(data$exp)
data$pop <- factor(data$pop)

Data exploration

Boxplot R vs E

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
ggplot(data, aes(x = exp, y = mt, fill = exp)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "MT Expression by Exposure Group", x = "Exposure Group", y = "MT Expression")

Scatterplot with regression line R vs E

ggplot(data, aes(x = cd, y = mt, color = exp)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_minimal() +
  labs(title = "Relationship Between Cadmium and MT Expression",
       x = "Cadmium Concentration (µg/g)", y = "MT Expression")
## `geom_smooth()` using formula = 'y ~ x'

Scatterplot with regression line of each population

ggplot(data, aes(x = cd, y = mt, color = pop)) +
  geom_point() +  
  geom_smooth(method = "lm", se = FALSE) +  
  labs(
    title = "Relationship Between Cadmium Exposure and MT Expression",
    x = "Cadmium Concentration (µg/g)",
    y = "MT Expression Level",
    color = "Population"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Model construction

library(lme4)
## Loading required package: Matrix
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 4.2.3
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
model1 <- lmer(mt~cd+exp+cd:exp+(1|pop), data= data)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mt ~ cd + exp + cd:exp + (1 | pop)
##    Data: data
## 
## REML criterion at convergence: 528.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.89300 -0.75941  0.00417  0.77246  2.35987 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  pop      (Intercept) 163.9    12.80   
##  Residual             417.0    20.42   
## Number of obs: 60, groups:  pop, 4
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept) 107.6997    10.4716   2.6161  10.285  0.00347 ** 
## cd            1.4020     0.1032  54.0000  13.579  < 2e-16 ***
## expR         -7.6491    14.8091   2.6161  -0.517  0.64594    
## cd:expR      -1.2850     0.1460  54.0000  -8.801 5.13e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) cd     expR  
## cd      -0.355              
## expR    -0.707  0.251       
## cd:expR  0.251 -0.707 -0.355
anova(model1, ddf = "Satterthwaite")
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF  DenDF  F value    Pr(>F)    
## cd      45125   45125     1 54.000 108.2188 1.659e-14 ***
## exp       111     111     1  2.616   0.2668    0.6459    
## cd:exp  32299   32299     1 54.000  77.4603 5.135e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

checking assumptions

plot(model1)

qqnorm(resid(model1))
qqline(resid(model1))

shapiro.test(resid(model1))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model1)
## W = 0.98244, p-value = 0.5401

Post-hoc analyses

em_means1 <- emmeans(model1, pairwise ~ exp|cd, adjust = "bonferroni")
summary(em_means1)
## $emmeans
## cd = 36:
##  exp emmean   SE df lower.CL upper.CL
##  E      158 9.79  2    116.0      200
##  R      104 9.79  2     62.1      146
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## cd = 36:
##  contrast estimate   SE df t.ratio p.value
##  E - R        53.9 13.8  2   3.894  0.0601
## 
## Degrees-of-freedom method: kenward-roger