read files - GRM matrix

GRM <- read.table("/Users/xiaohanj/Documents/rproject/genetics/Genotypes/GRM", header = TRUE, row.names = 1, sep = "")
GRM <- as.matrix(GRM)
colnames(GRM) <- gsub("X", "", colnames(GRM))

read phenotype file

health_data <- read.csv("/Users/xiaohanj/Documents/rproject/genetics/genetics/health_data.csv")
ID <- unique(health_data$ID)

check file information

1) how many animals cannot be mapped between phenotype and grm?

sum(!(ID %in% rownames(GRM)))
## [1] 7

2) distribution of diagnoal values of GRM

hist(diag(GRM), 
     main = "Distribution of diagnoal values of GRM", 
     xlab = "Diagnoal values of GRM", 
     ylab = "Frequency", 
     col = "lightblue", 
     border = "black")

summarize diagnoal values of GRM
summary(diag(GRM))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5597  0.6340  0.6548  0.6551  0.6747  0.7467

4) compare GRM inverse with giv

GRM_inverse<-solve(GRM)
GI.giv <- read.table("/Users/xiaohanj/Documents/rproject/genetics/Genotypes/GI.giv", header = FALSE, sep = "")
count IDs that are in giv file but not in GRM file
# GRM_inverse and giv comparision 
ids <- unique(c(GI.giv$V1, GI.giv$V2))
GRM_giv <- matrix(0, nrow = length(ids), ncol = length(ids))

rownames(GRM_giv) <- ids
colnames(GRM_giv ) <- ids

for(i in 1:nrow(GI.giv)) {
  id1 <- GI.giv[i, "V1"]
  id2 <- GI.giv[i, "V2"]
  value <- GI.giv[i, "V3"]
  GRM_giv[as.character(id1), as.character(id2)] <- value
  GRM_giv[as.character(id2), as.character(id1)] <- value  
}

# count number of IDs that are in giv file but not in GRM file
sum(!(rownames(GRM_giv) %in% rownames(GRM_inverse)))
## [1] 4
compare upper triangle values of GRM inverse and giv
# Find the IDs that are in GRM_giv but not in GRM_inverse
diff_ids <- rownames(GRM_giv)[!(rownames(GRM_giv) %in% rownames(GRM_inverse))]
GRM_giv <- GRM_giv[!(rownames(GRM_giv) %in% diff_ids), !(colnames(GRM_giv) %in% diff_ids)]

# reorder ID
GRM_giv <- GRM_giv[rownames(GRM_inverse), colnames(GRM_inverse)]

# draw point graph to compare
plot(GRM_inverse[upper.tri(GRM_inverse)], GRM_giv[upper.tri(GRM_giv)],
     xlab = "GRM inverse - Upper Triangular Values",
     ylab = "giv - Upper Triangular Values",
     main = "Comparison of Upper Triangular Values between GRM inverse and giv",
     pch = 16,    
     cex = 0.5)   
abline(0, 1, col = "red")  

compare diagnoal values of GRM inverse and giv
plot(diag(GRM_inverse),diag(GRM_giv),
     xlab = "GRM inverse - Diagonal Values",
     ylab = "giv - Diagonal Values",
     main = "Comparison of Diagonal Values between GRM inverse and giv",
     pch = 16,    
     cex = 0.5)   

5) compare GRM with A matrix

load("Am_reduced.RData")
# Identify common IDs
common_ids <- intersect(rownames(Am_reduced), rownames(GRM))

# Subset both matrices by common IDs
Am_reduced_match <- Am_reduced[common_ids, common_ids]
GRM_match <- GRM[common_ids, common_ids]

# obtain upper triangle values
Am_values_upper <- Am_reduced_match[upper.tri(Am_reduced_match, diag = TRUE)]
GRM_values_upper <- GRM_match[upper.tri(GRM_match, diag = TRUE)]
Am_GRM_upper <- data.frame(Am_value = Am_values_upper, GRM_value = GRM_values_upper)


# make violin graph
library(ggplot2)
ggplot(Am_GRM_upper, aes(x = factor(Am_value), y = GRM_value, fill = factor(Am_value))) +
  geom_violin(trim = FALSE) +
  stat_summary(fun = median, geom = "point", size = 2, color = "black") +
  labs(x = "A matrix values", 
       y = "GRM values", 
       fill = "A value",
       title = "Violin Plot of GRM and A matrix") +
  theme_minimal()