#-Key terms: heritability, coefficient of relationship (additive coefficient), IBD, kinship coefficient, ICC.
#Topics reivew:
#-Defining trait for genetic analysis
#-Determining the genetic component ("heritability") of a trait
# - Estimating heritability for a quantitative trait
# - Estimating of heritability for dichotomous traits
setwd('C:/Users/miche/Desktop/BS 858 Stats Genetics')
famdata <- read.csv("hdl.csv")
dim(famdata)
## [1] 226 9
length(unique(famdata$famid))
## [1] 226
head(famdata,10)
## famid kid1id kid2id momid dadid kid1hdl kid2hdl momhdl dadhdl
## 1 1 3 4 2 1 49 54 64 46
## 2 2 7 8 6 5 51 46 48 38
## 3 3 11 12 10 9 36 54 46 41
## 4 4 15 16 14 13 51 67 48 44
## 5 5 19 20 18 17 41 47 24 74
## 6 6 23 24 22 21 39 36 41 48
## 7 7 27 28 26 25 60 76 72 55
## 8 8 31 32 30 29 41 68 67 35
## 9 9 35 36 34 33 42 26 66 46
## 10 10 39 40 38 37 37 57 69 47
tail(famdata,10)
## famid kid1id kid2id momid dadid kid1hdl kid2hdl momhdl dadhdl
## 217 217 867 868 866 865 52 38 47 52
## 218 218 871 872 870 869 37 72 71 45
## 219 219 875 876 874 873 32 17 48 28
## 220 220 879 880 878 877 47 54 72 42
## 221 221 883 884 882 881 51 41 44 63
## 222 222 887 888 886 885 69 60 52 34
## 223 223 891 892 890 889 42 38 56 32
## 224 224 895 896 894 893 36 58 47 42
## 225 225 899 900 898 897 47 52 43 37
## 226 226 903 904 902 901 40 58 48 44
names(famdata)
## [1] "famid" "kid1id" "kid2id" "momid" "dadid" "kid1hdl" "kid2hdl"
## [8] "momhdl" "dadhdl"
summary(famdata)
## famid kid1id kid2id momid dadid
## Min. : 1.00 Min. : 3 Min. : 4 Min. : 2 Min. : 1
## 1st Qu.: 57.25 1st Qu.:228 1st Qu.:229 1st Qu.:227 1st Qu.:226
## Median :113.50 Median :453 Median :454 Median :452 Median :451
## Mean :113.50 Mean :453 Mean :454 Mean :452 Mean :451
## 3rd Qu.:169.75 3rd Qu.:678 3rd Qu.:679 3rd Qu.:677 3rd Qu.:676
## Max. :226.00 Max. :903 Max. :904 Max. :902 Max. :901
## kid1hdl kid2hdl momhdl dadhdl
## Min. : 14.00 Min. :17.00 Min. : 18.00 Min. :14.00
## 1st Qu.: 39.00 1st Qu.:45.25 1st Qu.: 46.00 1st Qu.:35.00
## Median : 46.00 Median :54.00 Median : 54.00 Median :42.00
## Mean : 48.49 Mean :53.98 Mean : 55.87 Mean :44.05
## 3rd Qu.: 56.00 3rd Qu.:60.75 3rd Qu.: 65.00 3rd Qu.:50.00
## Max. :115.00 Max. :90.00 Max. :124.00 Max. :97.00
#Heritability based on parent offspring
#prarental average hdl#
famdata$pavehdl = (famdata$momhdl + famdata$dadhdl)/2
print(lm(kid1hdl ~ momhdl,data=famdata)) #use the first child, regress on mom
##
## Call:
## lm(formula = kid1hdl ~ momhdl, data = famdata)
##
## Coefficients:
## (Intercept) momhdl
## 39.6145 0.1589
#beta of 0.1589
# if x2 = 31.78%
print(lm(kid1hdl ~ dadhdl,data=famdata)) #use the first child, regress on dad
##
## Call:
## lm(formula = kid1hdl ~ dadhdl, data = famdata)
##
## Coefficients:
## (Intercept) dadhdl
## 39.547 0.203
#beta of 0.203
#if x2 40.6%
print(lm(kid1hdl ~ pavehdl,data=famdata)) #use the first child, regress on average
##
## Call:
## lm(formula = kid1hdl ~ pavehdl, data = famdata)
##
## Coefficients:
## (Intercept) pavehdl
## 32.4435 0.3212
#beta of 32.12%
#using summary (instead of only coefficients)
print(summary(lm(kid2hdl ~ momhdl,data=famdata))) #use the second child, regress on mom
##
## Call:
## lm(formula = kid2hdl ~ momhdl, data = famdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.814 -9.143 -0.542 7.139 37.780
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.68606 2.92353 15.63 < 2e-16 ***
## momhdl 0.14850 0.05034 2.95 0.00352 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.99 on 224 degrees of freedom
## Multiple R-squared: 0.03739, Adjusted R-squared: 0.03309
## F-statistic: 8.7 on 1 and 224 DF, p-value: 0.003519
#beta of 0.1485
#if x2 = 29.7%
print(summary(lm(kid2hdl ~ dadhdl,data=famdata))) #use of the second child, regress on dad
##
## Call:
## lm(formula = kid2hdl ~ dadhdl, data = famdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.333 -8.653 -0.304 6.924 36.191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.71190 2.81058 16.620 <2e-16 ***
## dadhdl 0.16504 0.06116 2.698 0.0075 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.03 on 224 degrees of freedom
## Multiple R-squared: 0.03148, Adjusted R-squared: 0.02716
## F-statistic: 7.281 on 1 and 224 DF, p-value: 0.007498
#beta of 0.1650
#if x2 = 33.0%
print(summary(lm(kid2hdl ~ pavehdl,data=famdata))) #use of the second child, regress on average
##
## Call:
## lm(formula = kid2hdl ~ pavehdl, data = famdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.609 -8.897 -0.148 7.032 37.840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.89323 3.73295 10.687 < 2e-16 ***
## pavehdl 0.28201 0.07304 3.861 0.000148 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.83 on 224 degrees of freedom
## Multiple R-squared: 0.0624, Adjusted R-squared: 0.05821
## F-statistic: 14.91 on 1 and 224 DF, p-value: 0.0001477
#p value ***
#beta of 0.07
#Heritability based on sibling pair (ICC calculation - formula depends on):
kidhdl = c(famdata$kid1hdl,famdata$kid2hdl) # combine values into a vector
meankidhdl = mean(kidhdl,na.rm=T)
sdkidhdl = sd(kidhdl,na.rm=T)
N = nrow(famdata)
adjkid1 = famdata$kid1hdl - meankidhdl
adjkid2 = famdata$kid2hdl - meankidhdl
icc = sum(adjkid1*adjkid2)/sdkidhdl^2/(N-1)
h2 = 2*icc
h2
## [1] 0.463714
#Heritability based on siblings (ANOVA - formula depends on )
#each child to be its own obs
kid1 = famdata[,c("famid","kid1id","kid1hdl")]
kid2 = famdata[,c("famid","kid2id","kid2hdl")]
names(kid1) <- c("famid","kidid","hdl")
names(kid2) <- c("famid","kidid","hdl")
kids <- rbind(kid1,kid2)
dim(kids) #452 3
## [1] 452 3
print(summary(aov(hdl ~ as.factor(famid),data=kids)))
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(famid) 225 48187 214.2 1.609 0.000192 ***
## Residuals 226 30080 133.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#obtain msb abd msw #each family is a cluster
#k=2
#2*ICC
2*(214.2-133.1) /(214.2+(2-1)*133.1)
## [1] 0.4670314