#-Key terms: heritability, coefficient of relationship (additive coefficient), IBD, kinship coefficient, ICC.

#Topics reivew:
#-Defining trait for genetic analysis
#-Determining the genetic component ("heritability") of a trait     
#     - Estimating heritability for a quantitative trait
#     - Estimating of heritability for dichotomous traits 

Read in Data & Data Structure, hdl dataset

Summary

setwd('C:/Users/miche/Desktop/BS 858 Stats Genetics')

famdata <- read.csv("hdl.csv")
  dim(famdata) 
## [1] 226   9
  length(unique(famdata$famid))
## [1] 226
  head(famdata,10)
##    famid kid1id kid2id momid dadid kid1hdl kid2hdl momhdl dadhdl
## 1      1      3      4     2     1      49      54     64     46
## 2      2      7      8     6     5      51      46     48     38
## 3      3     11     12    10     9      36      54     46     41
## 4      4     15     16    14    13      51      67     48     44
## 5      5     19     20    18    17      41      47     24     74
## 6      6     23     24    22    21      39      36     41     48
## 7      7     27     28    26    25      60      76     72     55
## 8      8     31     32    30    29      41      68     67     35
## 9      9     35     36    34    33      42      26     66     46
## 10    10     39     40    38    37      37      57     69     47
  tail(famdata,10)
##     famid kid1id kid2id momid dadid kid1hdl kid2hdl momhdl dadhdl
## 217   217    867    868   866   865      52      38     47     52
## 218   218    871    872   870   869      37      72     71     45
## 219   219    875    876   874   873      32      17     48     28
## 220   220    879    880   878   877      47      54     72     42
## 221   221    883    884   882   881      51      41     44     63
## 222   222    887    888   886   885      69      60     52     34
## 223   223    891    892   890   889      42      38     56     32
## 224   224    895    896   894   893      36      58     47     42
## 225   225    899    900   898   897      47      52     43     37
## 226   226    903    904   902   901      40      58     48     44
  names(famdata)
## [1] "famid"   "kid1id"  "kid2id"  "momid"   "dadid"   "kid1hdl" "kid2hdl"
## [8] "momhdl"  "dadhdl"
summary(famdata)
##      famid            kid1id        kid2id        momid         dadid    
##  Min.   :  1.00   Min.   :  3   Min.   :  4   Min.   :  2   Min.   :  1  
##  1st Qu.: 57.25   1st Qu.:228   1st Qu.:229   1st Qu.:227   1st Qu.:226  
##  Median :113.50   Median :453   Median :454   Median :452   Median :451  
##  Mean   :113.50   Mean   :453   Mean   :454   Mean   :452   Mean   :451  
##  3rd Qu.:169.75   3rd Qu.:678   3rd Qu.:679   3rd Qu.:677   3rd Qu.:676  
##  Max.   :226.00   Max.   :903   Max.   :904   Max.   :902   Max.   :901  
##     kid1hdl          kid2hdl          momhdl           dadhdl     
##  Min.   : 14.00   Min.   :17.00   Min.   : 18.00   Min.   :14.00  
##  1st Qu.: 39.00   1st Qu.:45.25   1st Qu.: 46.00   1st Qu.:35.00  
##  Median : 46.00   Median :54.00   Median : 54.00   Median :42.00  
##  Mean   : 48.49   Mean   :53.98   Mean   : 55.87   Mean   :44.05  
##  3rd Qu.: 56.00   3rd Qu.:60.75   3rd Qu.: 65.00   3rd Qu.:50.00  
##  Max.   :115.00   Max.   :90.00   Max.   :124.00   Max.   :97.00

Using Parent Offspring Pair

#Heritability based on parent offspring

  #prarental average hdl#
famdata$pavehdl = (famdata$momhdl + famdata$dadhdl)/2

print(lm(kid1hdl ~ momhdl,data=famdata)) #use the first child, regress on mom
## 
## Call:
## lm(formula = kid1hdl ~ momhdl, data = famdata)
## 
## Coefficients:
## (Intercept)       momhdl  
##     39.6145       0.1589
  #beta of 0.1589 
  # if x2 = 31.78%
print(lm(kid1hdl ~ dadhdl,data=famdata)) #use the first child, regress on dad
## 
## Call:
## lm(formula = kid1hdl ~ dadhdl, data = famdata)
## 
## Coefficients:
## (Intercept)       dadhdl  
##      39.547        0.203
  #beta of 0.203
  #if x2 40.6% 
print(lm(kid1hdl ~ pavehdl,data=famdata)) #use the first child, regress on average
## 
## Call:
## lm(formula = kid1hdl ~ pavehdl, data = famdata)
## 
## Coefficients:
## (Intercept)      pavehdl  
##     32.4435       0.3212
  #beta of 32.12%

  #using summary (instead of only coefficients)
print(summary(lm(kid2hdl ~ momhdl,data=famdata))) #use the second child, regress on mom
## 
## Call:
## lm(formula = kid2hdl ~ momhdl, data = famdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.814  -9.143  -0.542   7.139  37.780 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 45.68606    2.92353   15.63  < 2e-16 ***
## momhdl       0.14850    0.05034    2.95  0.00352 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.99 on 224 degrees of freedom
## Multiple R-squared:  0.03739,    Adjusted R-squared:  0.03309 
## F-statistic:   8.7 on 1 and 224 DF,  p-value: 0.003519
  #beta of 0.1485 
  #if x2 = 29.7%
print(summary(lm(kid2hdl ~ dadhdl,data=famdata))) #use of the second child, regress on dad
## 
## Call:
## lm(formula = kid2hdl ~ dadhdl, data = famdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.333  -8.653  -0.304   6.924  36.191 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 46.71190    2.81058  16.620   <2e-16 ***
## dadhdl       0.16504    0.06116   2.698   0.0075 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.03 on 224 degrees of freedom
## Multiple R-squared:  0.03148,    Adjusted R-squared:  0.02716 
## F-statistic: 7.281 on 1 and 224 DF,  p-value: 0.007498
  #beta of 0.1650
  #if x2 = 33.0%
print(summary(lm(kid2hdl ~ pavehdl,data=famdata))) #use of the second child, regress on average 
## 
## Call:
## lm(formula = kid2hdl ~ pavehdl, data = famdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.609  -8.897  -0.148   7.032  37.840 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.89323    3.73295  10.687  < 2e-16 ***
## pavehdl      0.28201    0.07304   3.861 0.000148 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.83 on 224 degrees of freedom
## Multiple R-squared:  0.0624, Adjusted R-squared:  0.05821 
## F-statistic: 14.91 on 1 and 224 DF,  p-value: 0.0001477
  #p value *** 
  #beta of 0.07

Using Sib Pair

#Heritability based on sibling pair (ICC calculation - formula depends on): 
kidhdl = c(famdata$kid1hdl,famdata$kid2hdl) # combine values into a vector
    meankidhdl = mean(kidhdl,na.rm=T) 
    sdkidhdl = sd(kidhdl,na.rm=T)
    N = nrow(famdata)

  adjkid1 = famdata$kid1hdl - meankidhdl
  adjkid2 = famdata$kid2hdl - meankidhdl
icc = sum(adjkid1*adjkid2)/sdkidhdl^2/(N-1)

h2 = 2*icc
h2
## [1] 0.463714
#Heritability based on siblings (ANOVA - formula depends on )
  #each child to be its own obs
kid1 = famdata[,c("famid","kid1id","kid1hdl")]
kid2 = famdata[,c("famid","kid2id","kid2hdl")]
names(kid1) <- c("famid","kidid","hdl")
names(kid2) <- c("famid","kidid","hdl")
kids <- rbind(kid1,kid2)
  dim(kids) #452 3
## [1] 452   3
print(summary(aov(hdl ~ as.factor(famid),data=kids))) 
##                   Df Sum Sq Mean Sq F value   Pr(>F)    
## as.factor(famid) 225  48187   214.2   1.609 0.000192 ***
## Residuals        226  30080   133.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  #obtain msb abd msw #each family is a cluster
  #k=2 
  #2*ICC
2*(214.2-133.1) /(214.2+(2-1)*133.1)
## [1] 0.4670314