Required Packages

limma
dplyr

Step 1: Read Expression Data

# Read expression data
df = read.delim("raw_expression_data.txt", row.names = 1, header = TRUE)
# Read sample information
target = read.delim("sample_info.txt")

Step 2: Match Expression Data with Sample Information

# Ensure expression data and target are in the same order
df = df[, target$ID]

Step 4: Data Quality Check

# Check for any NA, 0, or negative values
max(df)
min(df)

# Convert NA values to 0
df[is.na(df)] = 0

Step 5: Normalization

df = normalizeBetweenArrays(df, method = "quantile")

Step 6: Log Transformation

# Convert to log scale
exp = log2(df)

Step 7: Create Design Matrix

# Create design matrix based on the group
conditions = paste(target$Group)
conditions = factor(conditions, levels=unique(conditions))
design = model.matrix(~0 + conditions)
colnames(design) = levels(conditions)
unique(conditions)

Step 8: Fit Linear Model

# Fit the design to the expression data
fit = lmFit(exp, design)
# Make contrast matrix
contrast.matrix <- makeContrasts(deg1 = Group2 - Group1, deg2 = Group3 - Group1, levels = design)

Step 9: Apply Contrasts

# Fit contrast matrix
fit.cont = contrasts.fit(fit, contrast.matrix)

Step 10: Empirical Bayes Moderation

# Fit eBayes
fit.cont = eBayes(fit.cont)

Step 11: Extract Differentially Expressed Genes (DEGs)

# Generate DEGs by comparison
res1 = topTable(fit.cont, coef = "degs1", adjust.method = "BH", number = nrow(exp), sort.by = 'none')
res2 = topTable(fit.cont, coef = "degs2", adjust.method = "BH", number = nrow(exp), sort.by = 'none')

This limma workflow allows for the identification of differentially expressed genes across different conditions, accounting for multiple testing and ensuring robust statistical inference.