read files - GRM matrix
GRM <- read.table("/Users/xiaohanj/Documents/rproject/genetics/Genotypes/GRM", header = TRUE, row.names = 1, sep = "")
GRM <- as.matrix(GRM)
colnames(GRM) <- gsub("X", "", colnames(GRM))
read phenotype file
health_data <- read.csv("/Users/xiaohanj/Documents/rproject/genetics/genetics/health_data.csv")
ID <- unique(health_data$ID)
check file information
1) how many animals cannot be mapped between phenotype and grm?
sum(!(ID %in% rownames(GRM)))
## [1] 7
2) distribution of diagnoal values of GRM
hist(diag(GRM),
main = "Distribution of diagnoal values of GRM",
xlab = "Diagnoal values of GRM",
ylab = "Frequency",
col = "lightblue",
border = "black")

summarize diagnoal values of GRM
summary(diag(GRM))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5597 0.6340 0.6548 0.6551 0.6747 0.7467
4) compare GRM inverse with giv
GRM_inverse<-solve(GRM)
GI.giv <- read.table("/Users/xiaohanj/Documents/rproject/genetics/Genotypes/GI.giv", header = FALSE, sep = "")
count IDs that are in giv file but not in GRM file
# GRM_inverse and giv comparision
ids <- unique(c(GI.giv$V1, GI.giv$V2))
GRM_giv <- matrix(0, nrow = length(ids), ncol = length(ids))
rownames(GRM_giv) <- ids
colnames(GRM_giv ) <- ids
for(i in 1:nrow(GI.giv)) {
id1 <- GI.giv[i, "V1"]
id2 <- GI.giv[i, "V2"]
value <- GI.giv[i, "V3"]
GRM_giv[as.character(id1), as.character(id2)] <- value
GRM_giv[as.character(id2), as.character(id1)] <- value
}
# count number of IDs that are in giv file but not in GRM file
sum(!(rownames(GRM_giv) %in% rownames(GRM_inverse)))
## [1] 4
compare upper triangle values of GRM inverse and giv
# Find the IDs that are in GRM_giv but not in GRM_inverse
diff_ids <- rownames(GRM_giv)[!(rownames(GRM_giv) %in% rownames(GRM_inverse))]
GRM_giv <- GRM_giv[!(rownames(GRM_giv) %in% diff_ids), !(colnames(GRM_giv) %in% diff_ids)]
# reorder ID
GRM_giv <- GRM_giv[rownames(GRM_inverse), colnames(GRM_inverse)]
# draw point graph to compare
plot(GRM_inverse[upper.tri(GRM_inverse)], GRM_giv[upper.tri(GRM_giv)],
xlab = "GRM inverse - Upper Triangular Values",
ylab = "giv - Upper Triangular Values",
main = "Comparison of Upper Triangular Values between GRM inverse and giv",
pch = 16,
cex = 0.5)
abline(0, 1, col = "red")

compare diagnoal values of GRM inverse and giv
plot(diag(GRM_inverse),diag(GRM_giv),
xlab = "GRM inverse - Diagonal Values",
ylab = "giv - Diagonal Values",
main = "Comparison of Diagonal Values between GRM inverse and giv",
pch = 16,
cex = 0.5)

5) compare GRM with A matrix
load("Am_reduced.RData")
# Identify common IDs
common_ids <- intersect(rownames(Am_reduced), rownames(GRM))
# Subset both matrices by common IDs
Am_reduced_match <- Am_reduced[common_ids, common_ids]
GRM_match <- GRM[common_ids, common_ids]
# obtain upper triangle values
Am_values_upper <- Am_reduced_match[upper.tri(Am_reduced_match, diag = TRUE)]
GRM_values_upper <- GRM_match[upper.tri(GRM_match, diag = TRUE)]
Am_GRM_upper <- data.frame(Am_value = Am_values_upper, GRM_value = GRM_values_upper)
# make violin graph
library(ggplot2)
ggplot(Am_GRM_upper, aes(x = factor(Am_value), y = GRM_value, fill = factor(Am_value))) +
geom_violin(trim = FALSE) +
stat_summary(fun = median, geom = "point", size = 2, color = "black") +
labs(x = "A matrix values",
y = "GRM values",
fill = "A value",
title = "Violin Plot of GRM and A matrix") +
theme_minimal()
