Insert and read data
setwd("C:/Users/faith/OneDrive/Documents/biologie 2024-2025/advanced biostatistics/GROEPSWERK")
data <- read.table("Data1.txt", header = TRUE)
data
## exp pop cd mt
## 1 R 1 0 84.0
## 2 R 1 0 88.6
## 3 R 1 0 102.5
## 4 R 1 10 97.2
## 5 R 1 10 111.7
## 6 R 1 10 91.6
## 7 R 1 20 130.0
## 8 R 1 20 135.0
## 9 R 1 20 97.2
## 10 R 1 50 133.7
## 11 R 1 50 101.8
## 12 R 1 50 123.4
## 13 R 1 100 171.4
## 14 R 1 100 141.8
## 15 R 1 100 155.2
## 16 R 2 0 96.6
## 17 R 2 0 119.4
## 18 R 2 0 98.8
## 19 R 2 10 78.0
## 20 R 2 10 107.1
## 21 R 2 10 107.2
## 22 R 2 20 101.9
## 23 R 2 20 85.2
## 24 R 2 20 103.3
## 25 R 2 50 81.0
## 26 R 2 50 98.3
## 27 R 2 50 82.9
## 28 R 2 100 41.2
## 29 R 2 100 81.5
## 30 R 2 100 80.3
## 31 E 3 0 101.1
## 32 E 3 0 107.3
## 33 E 3 0 114.4
## 34 E 3 10 106.0
## 35 E 3 10 101.4
## 36 E 3 10 148.6
## 37 E 3 20 108.4
## 38 E 3 20 136.3
## 39 E 3 20 166.4
## 40 E 3 50 189.5
## 41 E 3 50 176.0
## 42 E 3 50 203.3
## 43 E 3 100 259.0
## 44 E 3 100 280.8
## 45 E 3 100 225.5
## 46 E 4 0 120.2
## 47 E 4 0 99.8
## 48 E 4 0 104.6
## 49 E 4 10 91.8
## 50 E 4 10 128.4
## 51 E 4 10 134.8
## 52 E 4 20 123.3
## 53 E 4 20 148.5
## 54 E 4 20 116.5
## 55 E 4 50 176.8
## 56 E 4 50 181.5
## 57 E 4 50 196.1
## 58 E 4 100 231.2
## 59 E 4 100 245.3
## 60 E 4 100 222.3
data$exp <- factor(data$exp)
data$pop <- factor(data$pop)
Data exploration
Boxplot R vs E
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
ggplot(data, aes(x = exp, y = mt, fill = exp)) +
geom_boxplot() +
theme_minimal() +
labs(title = "MT Expression by Exposure Group", x = "Exposure Group", y = "MT Expression")

Scatterplot with regression line R vs E
ggplot(data, aes(x = cd, y = mt, color = exp)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal() +
labs(title = "Relationship Between Cadmium and MT Expression",
x = "Cadmium Concentration (µg/g)", y = "MT Expression")
## `geom_smooth()` using formula = 'y ~ x'

Scatterplot with regression line of each population
ggplot(data, aes(x = cd, y = mt, color = pop)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Relationship Between Cadmium Exposure and MT Expression",
x = "Cadmium Concentration (µg/g)",
y = "MT Expression Level",
color = "Population"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Model construction
library(lme4)
## Loading required package: Matrix
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 4.2.3
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
model1 <- lmer(mt~cd+exp+cd:exp+(1|pop), data= data)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mt ~ cd + exp + cd:exp + (1 | pop)
## Data: data
##
## REML criterion at convergence: 528.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.89300 -0.75941 0.00417 0.77246 2.35987
##
## Random effects:
## Groups Name Variance Std.Dev.
## pop (Intercept) 163.9 12.80
## Residual 417.0 20.42
## Number of obs: 60, groups: pop, 4
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 107.6997 10.4716 2.6161 10.285 0.00347 **
## cd 1.4020 0.1032 54.0000 13.579 < 2e-16 ***
## expR -7.6491 14.8091 2.6161 -0.517 0.64594
## cd:expR -1.2850 0.1460 54.0000 -8.801 5.13e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cd expR
## cd -0.355
## expR -0.707 0.251
## cd:expR 0.251 -0.707 -0.355
anova(model1, ddf = "Satterthwaite")
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## cd 45125 45125 1 54.000 108.2188 1.659e-14 ***
## exp 111 111 1 2.616 0.2668 0.6459
## cd:exp 32299 32299 1 54.000 77.4603 5.135e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
checking assumptions
plot(model1)

qqnorm(resid(model1))
qqline(resid(model1))

shapiro.test(resid(model1))
##
## Shapiro-Wilk normality test
##
## data: resid(model1)
## W = 0.98244, p-value = 0.5401
Post-hoc analyses
em_means1 <- emmeans(model1, pairwise ~ exp|cd, adjust = "bonferroni")
summary(em_means1)
## $emmeans
## cd = 36:
## exp emmean SE df lower.CL upper.CL
## E 158 9.79 2 116.0 200
## R 104 9.79 2 62.1 146
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## cd = 36:
## contrast estimate SE df t.ratio p.value
## E - R 53.9 13.8 2 3.894 0.0601
##
## Degrees-of-freedom method: kenward-roger