#Topics to review:
  #What is genetic linage?
  #Parametric linkage analysis
   # LOD score method
   # Factors influencing linage analysis
     #1. Phenotypes and penetrance
     #2. heterogeneity 
  #Linkage analysis using sibling pairs
   # Affected sib pairs
   # Quantitative trait analysis

Linkage Analysis

sibpair <- read.csv("HW2_sibpairs.csv")

head(sibpair)
##   famid id1 id2 ibd trait1 trait2  resid1  resid2 disease1 disease2
## 1     1   1   2   1   30.7   33.0  0.2285  0.8570        1        1
## 2     2   3   4   1   29.0   31.0 -0.2430 -0.0543        2        2
## 3     3   5   6   0   31.0   28.6  0.3557 -0.4258        2        2
## 4     4   7   8   0   28.1   29.7 -0.6286 -0.1067        2        1
## 5     5   9  10   2   29.1   27.7 -0.3164 -0.6969        2        1
## 6     6  11  12   1   31.1   29.8  0.3804 -0.0122        1        1
n <- table(sibpair $ibd) #identity by descent 

print(n) 
## 
##   0   1   2 
## 275 600 325
print(n/sum(n)) 
## 
##         0         1         2 
## 0.2291667 0.5000000 0.2708333

Affected sib-Pairs

#need to restrict to affected sib pairs: table to check affection status

table(sibpair$disease1,sibpair$disease2) 
##    
##       1   2
##   1 229 296
##   2 301 374
asp <- sibpair[sibpair$disease1==2 & sibpair$disease2==2,]
print(asp[1:10,])
##    famid id1 id2 ibd trait1 trait2  resid1  resid2 disease1 disease2
## 2      2   3   4   1   29.0   31.0 -0.2430 -0.0543        2        2
## 3      3   5   6   0   31.0   28.6  0.3557 -0.4258        2        2
## 12    12  23  24   1   29.2   29.7 -0.2459 -0.0897        2        2
## 15    15  29  30   2   30.7   31.4  0.2875  0.3908        2        2
## 20    20  39  40   2   31.3   30.8  0.2817  0.2261        2        2
## 23    23  45  46   2   30.0   28.8  0.0028 -0.4146        2        2
## 24    24  47  48   2   31.6   30.4  0.5209  0.2066        2        2
## 29    29  57  58   1   31.0   30.5  0.2815  0.1879        2        2
## 30    30  59  60   1   28.9   30.5 -0.3377  0.1781        2        2
## 32    32  63  64   0   32.3   29.9  0.7601 -0.1831        2        2
ibd.count <- table(asp$ibd)
print(ibd.count)
## 
##   0   1   2 
##  72 176 126
print(ibd.count/sum(ibd.count))
## 
##         0         1         2 
## 0.1925134 0.4705882 0.3368984
#one-sided test- increased sharing
goodness.test <- chisq.test(ibd.count,p=c(0.25,0.5,0.25))
print(goodness.test) 
## 
##  Chi-squared test for given probabilities
## 
## data:  ibd.count
## X-squared = 16.888, df = 2, p-value = 0.0002152
print(goodness.test$p.value/2) 
## [1] 0.0001076099
#p-value is sig, evidence of linkage between marker and disease status

Mean Sharing Test

N <- nrow(asp)
n2 <- sum(asp$ibd==2)
n1 <- sum(asp$ibd==1)
T2 <- sqrt(2*N)*(n1/N + 2*n2/N - 1)
print(T2)
## [1] 3.948871
pval2 <- pt(T2,N-1,lower=F) 
print(pval2) 
## [1] 4.693788e-05

Quantitative Trait Analysis -Originial Haseman_Eltson Approach

##################################################################

sibpair$Yd <- (sibpair$resid1 - sibpair$resid2)^2

sibpair$pi <- sibpair$ibd/2

print(sibpair[1:10,])
##    famid id1 id2 ibd trait1 trait2  resid1  resid2 disease1 disease2
## 1      1   1   2   1   30.7   33.0  0.2285  0.8570        1        1
## 2      2   3   4   1   29.0   31.0 -0.2430 -0.0543        2        2
## 3      3   5   6   0   31.0   28.6  0.3557 -0.4258        2        2
## 4      4   7   8   0   28.1   29.7 -0.6286 -0.1067        2        1
## 5      5   9  10   2   29.1   27.7 -0.3164 -0.6969        2        1
## 6      6  11  12   1   31.1   29.8  0.3804 -0.0122        1        1
## 7      7  13  14   1   31.4   29.4  0.4067 -0.1985        2        1
## 8      8  15  16   2   30.1   28.9  0.0110 -0.3645        1        2
## 9      9  17  18   1   28.7   29.5 -0.4094 -0.1296        2        1
## 10    10  19  20   1   32.5   29.7  0.8240 -0.1536        1        2
##            Yd  pi
## 1  0.39501225 0.5
## 2  0.03560769 0.5
## 3  0.61074225 0.0
## 4  0.27237961 0.0
## 5  0.14478025 1.0
## 6  0.15413476 0.5
## 7  0.36626704 0.5
## 8  0.14100025 1.0
## 9  0.07828804 0.5
## 10 0.95570176 0.5
original.he <- lm(Yd ~ pi,data=sibpair)

#as expected, slope is negative, p-value indictes high significance
#marker is linked to the quantitative trait locus in sib

x<- summary(original.he)
print(summary(original.he))
## 
## Call:
## lm(formula = Yd ~ pi, data = sibpair)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2070 -0.1543 -0.1043  0.0230  6.7799 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20716    0.02081   9.956   <2e-16 ***
## pi          -0.07402    0.03307  -2.238   0.0254 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4043 on 1198 degrees of freedom
## Multiple R-squared:  0.004164,   Adjusted R-squared:  0.003333 
## F-statistic:  5.01 on 1 and 1198 DF,  p-value: 0.02539
tvalue<- summary(original.he)$coef[2,3] 
tvalue
## [1] -2.238273
onesidedp <- pt(tvalue,original.he$df) 
#left tail (since t #value is neg)
print(onesidedp) 
## [1] 0.0126932
lod <- tvalue^2/(2*log(10)) #log in R is the narual log
print(lod) #HE stats
## [1] 1.087878

Quantitative Trait Analysis - Haseman_Eltson Approach

##################################################################

#product of centered phenotype and the slope is expected to be positive 
popmean <- mean(c(sibpair$resid1,sibpair$resid2)) #vector

sibpair$Yp <- (sibpair$resid1 - popmean)*(sibpair$resid2-popmean)

revised.he <- lm(Yp ~ pi,data=sibpair)

print(summary(revised.he))
## 
## Call:
## lm(formula = Yp ~ pi, data = sibpair)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98541 -0.04267 -0.01135  0.03541  0.62567 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.001077   0.005402  -0.199    0.842    
## pi           0.043593   0.008586   5.077 4.44e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.105 on 1198 degrees of freedom
## Multiple R-squared:  0.02107,    Adjusted R-squared:  0.02025 
## F-statistic: 25.78 on 1 and 1198 DF,  p-value: 4.435e-07
revised.tvalue <- summary(revised.he)$coef[2,3] #positive t values
revised.onesidedp <- pt(revised.tvalue,revised.he$df,lower=F)

print(revised.onesidedp)
## [1] 2.217511e-07
revised.lod <- revised.tvalue^2/(2*log(10))
print(revised.lod)
## [1] 5.597882