virco <- read.csv(file="Virco_data.csv", header=T, sep=",")
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
attach(virco)
Trait <- NFV.Fold - IDV.Fold
VircoGeno <- data.frame(virco[,substr(names(virco),1,1)=="P"]!="-")
Trait.c <- Trait[!is.na(Trait)]
VircoGeno.c <- VircoGeno[!is.na(Trait),]
RegRF <- randomForest(VircoGeno.c, Trait.c, importance=TRUE)
RegRF
##
## Call:
## randomForest(x = VircoGeno.c, y = Trait.c, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 33
##
## Mean of squared residuals: 5676.754
## % Var explained: 13.94
varImpPlot(RegRF)

#RF with missing SNP data|single imputation
fmsURL <- "http://www.stat-gen.org/book.e1/data/FMS_data.txt"
fms <- read.delim(file=fmsURL, header=T, sep="\t")
attach(fms)
Trait <- NDRM.CH[Race=="Caucasian" & !is.na(Race) & !is.na(NDRM.CH)]
NamesAkt1Snps <- names(fms)[substr(names(fms), 1, 4)=="akt1"]
FMSgeno <- fms[,is.element(names(fms), NamesAkt1Snps)][
Race=="Caucasian" & !is.na(Race) & !is.na(NDRM.CH),]
dim(FMSgeno)
## [1] 777 24
round(apply(is.na(FMSgeno), 2, sum)/dim(FMSgeno)[1],3)
## akt1_t22932c akt1_g15129a akt1_g14803t
## 0.069 0.067 0.021
## akt1_c10744t_c12886t akt1_t10726c_t12868c akt1_t10598a_t12740a
## 0.071 0.075 0.071
## akt1_c9756a_c11898t akt1_t8407g akt1_a7699g
## 0.066 0.069 0.067
## akt1_c6148t_c8290t akt1_c6024t_c8166t akt1_c5854t_c7996t
## 0.021 0.066 0.021
## akt1_c832g_c3359g akt1_g288c akt1_g1780a_g363a
## 0.021 0.021 0.021
## akt1_g2347t_g205t akt1_g2375a_g233a akt1_g4362c
## 0.066 0.066 0.069
## akt1_c15676t akt1_a15756t akt1_g20703a
## 0.021 0.021 0.021
## akt1_g22187a akt1_a22889g akt1_g23477a
## 0.071 0.021 0.021
library(randomForest)
FMSgenoRough <- na.roughfix(FMSgeno)
table(FMSgeno$"akt1_t22932c")
##
## CC TC TT
## 3 55 665
round(apply(is.na(FMSgenoRough), 2, sum)/dim(FMSgeno)[1],3)
## akt1_t22932c akt1_g15129a akt1_g14803t
## 0 0 0
## akt1_c10744t_c12886t akt1_t10726c_t12868c akt1_t10598a_t12740a
## 0 0 0
## akt1_c9756a_c11898t akt1_t8407g akt1_a7699g
## 0 0 0
## akt1_c6148t_c8290t akt1_c6024t_c8166t akt1_c5854t_c7996t
## 0 0 0
## akt1_c832g_c3359g akt1_g288c akt1_g1780a_g363a
## 0 0 0
## akt1_g2347t_g205t akt1_g2375a_g233a akt1_g4362c
## 0 0 0
## akt1_c15676t akt1_a15756t akt1_g20703a
## 0 0 0
## akt1_g22187a akt1_a22889g akt1_g23477a
## 0 0 0
RandForRough <- randomForest(FMSgenoRough, Trait, importance=TRUE)
RandForRough$"importance"[order(RandForRough$"importance"[,1],
decreasing=TRUE),]
## %IncMSE IncNodePurity
## akt1_t10726c_t12868c 229.298998 26883.175
## akt1_t8407g 213.128158 17441.659
## akt1_g14803t 125.715662 19648.549
## akt1_g288c 114.945974 23556.695
## akt1_g15129a 110.494154 14908.236
## akt1_c6148t_c8290t 105.427665 16280.954
## akt1_a15756t 98.248661 24600.903
## akt1_c832g_c3359g 90.335295 10677.514
## akt1_g2347t_g205t 85.429095 17414.339
## akt1_t10598a_t12740a 80.734216 17823.259
## akt1_a22889g 80.026485 26946.885
## akt1_g22187a 68.671443 21745.846
## akt1_a7699g 65.293270 8014.179
## akt1_c6024t_c8166t 49.315844 8928.358
## akt1_c5854t_c7996t 48.862941 18699.723
## akt1_c9756a_c11898t 44.649145 8389.002
## akt1_g2375a_g233a 43.779970 16335.809
## akt1_g23477a 43.671012 20599.598
## akt1_c15676t 38.299891 20898.335
## akt1_g4362c 23.736501 7635.428
## akt1_g1780a_g363a 5.767889 2910.996
## akt1_c10744t_c12886t 4.196829 4025.974
## akt1_g20703a 1.655829 29279.506
## akt1_t22932c -5.887293 16978.598
#RF with missing SNP data|multiple imputation
attach(fms)
## The following objects are masked from fms (pos = 3):
##
## acdc_rs1501299, ace_id, actn3_1671064, actn3_r577x,
## actn3_rs1815739, actn3_rs540874, adrb2_1042713, adrb2_1042714,
## adrb2_rs1042718, adrb3_4994, Age, agrp_5030980, akt1_a15756t,
## akt1_a22889g, akt1_a7699g, akt1_c10744t_c12886t, akt1_c15676t,
## akt1_c5854t_c7996t, akt1_c6024t_c8166t, akt1_c6148t_c8290t,
## akt1_c832g_c3359g, akt1_c9756a_c11898t, akt1_g14803t,
## akt1_g15129a, akt1_g1780a_g363a, akt1_g20703a, akt1_g22187a,
## akt1_g23477a, akt1_g2347t_g205t, akt1_g2375a_g233a,
## akt1_g288c, akt1_g4362c, akt1_t10598a_t12740a,
## akt1_t10726c_t12868c, akt1_t22932c, akt1_t8407g, akt2_2304186,
## akt2_7254617, akt2_969531, akt2_rs892118, ampd1_c34t,
## ankrd6_a545t, ankrd6_a550t, ankrd6_g197805a, ankrd6_i128l,
## ankrd6_k710x, ankrd6_m485l, ankrd6_p636l, ankrd6_q122e,
## ankrd6_t233m, apoe_c472t, ardb1_1801253, b2b, bcl6_1056932,
## bcl6_17797517, bcl6_3774298, bcl6_4686467, bmp2_235768,
## bmp2_rs15705, c8orf68_rs6983944, Calc.post.BMI,
## carp.exon3_snp3, carp_3utr, carp_a8470g, carp_c105t,
## carp_exon3_snp1, carp_exon3_snp2, carp_exon5_snp1,
## carp_exon5_snp2, carp_exon6_ag, carp_exon8_ct, cast_rs754615,
## cast_rs7724759, cav2_q130e, Center, CHOL, CHOL_HDL_C,
## CK_day_0, CK_day_2, CK_day_4, cntf_g6a, cox5b_11904110,
## cox5b_17022045, CRP, ctsf_572846, D_Post_B_VOL, D_Post_BM_VOL,
## D_Post_F_VOL, D_Post_M_VOL, D_Post_WA_VOL, D_Post_WM_VOL,
## D_Pre_B_VOL, D_Pre_BM_VOL, D_Pre_F_VOL, D_Pre_M_VOL,
## D_Pre_WA_VOL, D_Pre_WM_VOL, D23.CH, D23_DIFF, DBP,
## ddit_rs1053227, Dom.Arm, DRM.CH, DRM_DIFF, esr1_rs1042717,
## esr1_rs1801132, esr1_rs2077647, esr1_rs2228480,
## esr1_rs2234693, esr1_rs9340799, esr2_1256112, esr2_3020450,
## fatso, fbox32_rs2891770, fbox32_rs3739287, fbox32_rs4871385,
## fbox32_rs6690663, fbox32_rs6983944, FINS, FLGU, fst_1423560,
## fst_722910, fstl1_rs9631455, galntl1_rs8005903, gapd_7307229,
## gapd_7971637, gdf8_1225t, gdf8_e164k, gdf8_k153r, gdf8_p198a,
## gdf8_rs1805085, gdf8_rs1805086, gdf8_t55a, Gender,
## gnb3_rs5443, gs_s287nga, HDL_C, HOMA, hsd11b1_rs3753519,
## hsd11b1_rs4844880, hsd11b1_rs846907, hsd11b1_rs846909,
## hsd11b1_rs846910, hsd11b1_rs846911, hsd11b1_rs860185, id,
## igf1_2288377, igf1_pro, igf1_t1245c, igf2_2230949,
## igf2_3213216, igf2_3213220, igf2_3213221, igf2_rs680,
## igfbp3_2132570, igfbp3_6670, il15_1057972, il15_1589241,
## il15_rs2296135, il15ra_2228059, il15ra_3136618, il6_c174g,
## il6_g572c, insig2_rs7566605, irs1_g972r, kchj11_rs5219, LDL_C,
## lep_2167270, lepr_1137100, lepr_1137101, lepr_1805096,
## lepr_8179183, lrp5_snp2, lrp5_snp3, lrp5_snp4, Mean_BP,
## Met_syn, mgst3_4147542, mgst3_7549530, mlck_c37885a,
## mlck_c49t, mlck_g91689t, mulk_3757470, mylk_c37885a,
## mylk_g91689t, myod1_rs2249104, ND_Post_B_VOL, ND_Post_BM_VOL,
## ND_Post_F_VOL, ND_Post_M_VOL, ND_Post_WA_VOL, ND_Post_WM_VOL,
## ND_Pre_B_VOL, ND_Pre_BM_VOL, ND_Pre_F_VOL, ND_Pre_M_VOL,
## ND_Pre_WA_VOL, ND_Pre_WM_VOL, ND23.CH, ND23_DIFF, NDRM.CH,
## NDRM_DIFF, nnmt_g5082t, nos3_rs1799983, nr3c1_rs10482614,
## nr3c1_rs10482616, nr3c1_rs4582314, nr3c1_rs4634384,
## nr3c1_rs6195, opg_2073618, opg_3102735, p2rx2_6560891,
## p2rx2_7966242, p2ry2_2511241, p2ry2_rs1783596, pai1_4g5g,
## pcr13_snp1, pcr15_snp1, pcr15_snp2, pcr15_snp3, pcr15_snp4,
## pcr5_snp1, pcr5_snp2, pcr8_snp1, pgc1_g76039a, pgm1_2284998,
## pik3_rs3173908, pik3cg_4727666, Post.Height, Post.weight,
## Post_D_avg, Post_DRM_Max, Post_LBi_Avg, Post_LTri_Avg,
## Post_ND_avg, Post_NDRM_Max, Post_RBi_Avg, Post_RTri_Avg,
## Post1_D_AVG, Post1_D1, Post1_D2, Post1_D3, Post1_ND_AVG,
## Post1_ND1, Post1_ND2, Post1_ND3, Post2_D_AVG, Post2_D1,
## Post2_D2, Post2_D3, Post2_ND_AVG, Post2_ND1, Post2_ND2,
## Post2_ND3, ppar_gp12a, ppara_135539, ppara_1800206,
## ppara_4252778, pparg_c1a_rs8192678, prdx6_4382766, pre.BMI,
## Pre.height, Pre.HR, Pre.weight, Pre_DRM_Max, Pre_LBi_Avg,
## Pre_LTri_Avg, Pre_NDRM_Max, Pre_RBi_Avg, Pre_RTri_Avg,
## pygm_620006, Race, rankl_17458177, rankl_17536280,
## rankl_17639305, rankl_4531631, resistin_a537c, resistin_c180g,
## resistin_c30t, resistin_c398t, resistin_c980g, resistin_g540a,
## rnf28_c16242t, rs11630261, rs302964, rs849409, runx_12526462,
## SBP, slc35f1_rs10484290, slc35f1_rs12110370, Status,
## syngr2_c886t, tcfl72_12255372, tcfl72_7903146,
## tcfl72_rs12255372, tcfl72_rs7903146, tdp52l1_9321028, Term,
## TG, tnfa_g308a, tnni1_2799672, tnni1_3738287, tpd52l1_3778458,
## tpd52l1_3799736, tpd52l1_4896782, tpd52l1_514096, ucp2_ct,
## V1_D_AVG, V1_D1, V1_D2, V1_D3, V1_ND_AVG, V1_ND1, V1_ND2,
## V1_ND3, V123_D_AVG, V123_ND_AVG, V2_D_AVG, V2_D1, V2_D2,
## V2_D3, V2_ND_AVG, V2_ND1, V2_ND2, V2_ND3, V23_D_AVG,
## V23_ND_AVG, V3_D_AVG, V3_D1, V3_D2, V3_D3, V3_ND_AVG, V3_ND1,
## V3_ND2, V3_ND3, vdr_bsm1, vdr_fok1, vdr_rs731236, vdr_taq1,
## vim_6602186, visfatin_10487820, visfatin_10953502,
## visfatin_2041681, visfatin_3801272, visfatin_6947766,
## visfatin_929604, VLDL_TG
Trait <- NDRM.CH[Race=="Caucasian" & !is.na(Race) & !is.na(NDRM.CH)]
NamesAkt1Snps <- names(fms)[substr(names(fms),1,4)=="akt1"]
FMSgeno <- fms[,is.element(names(fms), NamesAkt1Snps)][Race=="Caucasian" & !is.na(Race) & !is.na(NDRM.CH),]
library(randomForest)
FMSgenoMI <- rfImpute(FMSgeno,Trait)
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 300 | 1250 110.17 |
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 300 | 1254 110.52 |
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 300 | 1253 110.39 |
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 300 | 1251 110.22 |
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 300 | 1253 110.41 |
RandForFinal <- randomForest(FMSgenoMI[,-1], Trait, importance=TRUE)
RandForFinal$"importance"[order(RandForFinal$"importance"[,1], decreasing=TRUE),]
## %IncMSE IncNodePurity
## akt1_t10726c_t12868c 301.776691 26842.288
## akt1_t8407g 250.147820 20386.788
## akt1_g14803t 128.325573 18676.539
## akt1_c6148t_c8290t 113.426625 14019.186
## akt1_g288c 112.116453 22984.734
## akt1_a15756t 110.014610 24530.691
## akt1_c832g_c3359g 103.159673 11671.457
## akt1_g15129a 99.730780 14291.792
## akt1_t10598a_t12740a 89.475808 17621.946
## akt1_a22889g 84.360180 26242.372
## akt1_g2347t_g205t 78.246132 17623.846
## akt1_a7699g 70.591696 10006.024
## akt1_g22187a 63.783009 22982.268
## akt1_c6024t_c8166t 57.223566 9224.043
## akt1_c5854t_c7996t 56.203687 19831.074
## akt1_g2375a_g233a 44.617024 20191.306
## akt1_g23477a 43.400386 21439.565
## akt1_c9756a_c11898t 42.125874 8432.800
## akt1_g4362c 38.967608 10921.861
## akt1_c15676t 31.268554 20817.269
## akt1_c10744t_c12886t 10.639388 8121.024
## akt1_g1780a_g363a 9.071464 3768.972
## akt1_t22932c 1.397213 18646.102
## akt1_g20703a -3.796620 27899.226
table(FMSgenoMI$akt1_t10726c_t12868c, FMSgenoRough$akt1_t10726c_t12868c)
##
## CC TC TT
## CC 597 0 0
## TC 5 139 0
## TT 27 0 9
#Application of logic regression
attach(virco)
## The following objects are masked from virco (pos = 5):
##
## APV.Fold, APV.FoldMatch, ATV.Fold, ATV.FoldMatch, CompMutList,
## IDV.Fold, IDV.FoldMatch, IsolateName, LPV.Fold, LPV.FoldMatch,
## MajorMutList, Method, MinorMutList, NFV.Fold, NFV.FoldMatch,
## P1, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P2, P20,
## P21, P22, P23, P24, P25, P26, P27, P28, P29, P3, P30, P31,
## P32, P33, P34, P35, P36, P37, P38, P39, P4, P40, P41, P42,
## P43, P44, P45, P46, P47, P48, P49, P5, P50, P51, P52, P53,
## P54, P55, P56, P57, P58, P59, P6, P60, P61, P62, P63, P64,
## P65, P66, P67, P68, P69, P7, P70, P71, P72, P73, P74, P75,
## P76, P77, P78, P79, P8, P80, P81, P82, P83, P84, P85, P86,
## P87, P88, P89, P9, P90, P91, P92, P93, P94, P95, P96, P97,
## P98, P99, RefID, RTV.Fold, RTV.FoldMatch, SeqID, SQV.Fold,
## SQV.FoldMatch, SubType, TPV.Fold, TPV.FoldMatch, Type
Trait <- NFV.Fold - IDV.Fold
VircoGeno <- data.frame(virco[,substr(names(virco),1,1)=="P"]!="-")
Trait.c <- Trait[!is.na(Trait)]
VircoGeno.c <- VircoGeno[!is.na(Trait),]
library(LogicReg)
## Loading required package: survival
VircoLogicReg <- logreg(resp=Trait.c, bin=VircoGeno.c, select=1)
plot(VircoLogicReg)

VircoLogicReg
## score 78.337
## 3.59 +679 * ((not P45) and (((not P29) or P4) and (P91 and P25)))
VircoLogicRegMult <- logreg(resp=Trait.c, bin=VircoGeno.c, select=2,
ntrees=2, nleaves=8)
##
## The number of trees in these models is 2
##
## The model size is 8
## The best model with 2 trees of size 8 has a score of 32.4689
plot(VircoLogicRegMult)



VircoLogicRegMult
## 2 trees with 8 leaves: score is 32.469
## 1580 +682 * (P25 and P95) -1580 * ((P36 or (P72 or (not P35))) or ((P47 or (not P73)) or P53))
#Monte Carlo logic regression
library(LogicReg)
attach(virco)
## The following objects are masked from virco (pos = 5):
##
## APV.Fold, APV.FoldMatch, ATV.Fold, ATV.FoldMatch, CompMutList,
## IDV.Fold, IDV.FoldMatch, IsolateName, LPV.Fold, LPV.FoldMatch,
## MajorMutList, Method, MinorMutList, NFV.Fold, NFV.FoldMatch,
## P1, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P2, P20,
## P21, P22, P23, P24, P25, P26, P27, P28, P29, P3, P30, P31,
## P32, P33, P34, P35, P36, P37, P38, P39, P4, P40, P41, P42,
## P43, P44, P45, P46, P47, P48, P49, P5, P50, P51, P52, P53,
## P54, P55, P56, P57, P58, P59, P6, P60, P61, P62, P63, P64,
## P65, P66, P67, P68, P69, P7, P70, P71, P72, P73, P74, P75,
## P76, P77, P78, P79, P8, P80, P81, P82, P83, P84, P85, P86,
## P87, P88, P89, P9, P90, P91, P92, P93, P94, P95, P96, P97,
## P98, P99, RefID, RTV.Fold, RTV.FoldMatch, SeqID, SQV.Fold,
## SQV.FoldMatch, SubType, TPV.Fold, TPV.FoldMatch, Type
## The following objects are masked from virco (pos = 8):
##
## APV.Fold, APV.FoldMatch, ATV.Fold, ATV.FoldMatch, CompMutList,
## IDV.Fold, IDV.FoldMatch, IsolateName, LPV.Fold, LPV.FoldMatch,
## MajorMutList, Method, MinorMutList, NFV.Fold, NFV.FoldMatch,
## P1, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P2, P20,
## P21, P22, P23, P24, P25, P26, P27, P28, P29, P3, P30, P31,
## P32, P33, P34, P35, P36, P37, P38, P39, P4, P40, P41, P42,
## P43, P44, P45, P46, P47, P48, P49, P5, P50, P51, P52, P53,
## P54, P55, P56, P57, P58, P59, P6, P60, P61, P62, P63, P64,
## P65, P66, P67, P68, P69, P7, P70, P71, P72, P73, P74, P75,
## P76, P77, P78, P79, P8, P80, P81, P82, P83, P84, P85, P86,
## P87, P88, P89, P9, P90, P91, P92, P93, P94, P95, P96, P97,
## P98, P99, RefID, RTV.Fold, RTV.FoldMatch, SeqID, SQV.Fold,
## SQV.FoldMatch, SubType, TPV.Fold, TPV.FoldMatch, Type
Trait <- SQV.Fold
VircoGeno <- data.frame(virco[,substr(names(virco),1,1)=="P"]!="-")
Trait.c <- Trait[!is.na(Trait)]
VircoGeno.c <- VircoGeno[!is.na(Trait),]
VircoLogicRegMCMC <- logreg(resp=Trait.c, bin=VircoGeno.c, select=7)
plot(sort(VircoLogicRegMCMC$single), xlab="Sorted SNPs",
ylab="Number of selected models")

names(VircoGeno)[order(VircoLogicRegMCMC$single)]
## [1] "P51" "P27" "P57" "P2" "P40" "P59" "P88" "P11" "P81" "P62" "P12"
## [12] "P21" "P42" "P75" "P39" "P49" "P94" "P8" "P4" "P3" "P90" "P29"
## [23] "P74" "P56" "P14" "P58" "P86" "P52" "P78" "P35" "P28" "P98" "P85"
## [34] "P16" "P83" "P46" "P64" "P80" "P87" "P32" "P38" "P68" "P9" "P5"
## [45] "P1" "P25" "P65" "P43" "P76" "P77" "P26" "P67" "P31" "P72" "P34"
## [56] "P6" "P15" "P33" "P19" "P53" "P95" "P70" "P96" "P79" "P82" "P63"
## [67] "P41" "P91" "P61" "P66" "P23" "P97" "P60" "P37" "P36" "P50" "P45"
## [78] "P93" "P17" "P44" "P92" "P71" "P30" "P55" "P22" "P20" "P47" "P89"
## [89] "P24" "P10" "P69" "P48" "P7" "P73" "P99" "P13" "P18" "P84" "P54"
#MARS
attach(virco)
## The following objects are masked from virco (pos = 3):
##
## APV.Fold, APV.FoldMatch, ATV.Fold, ATV.FoldMatch, CompMutList,
## IDV.Fold, IDV.FoldMatch, IsolateName, LPV.Fold, LPV.FoldMatch,
## MajorMutList, Method, MinorMutList, NFV.Fold, NFV.FoldMatch,
## P1, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P2, P20,
## P21, P22, P23, P24, P25, P26, P27, P28, P29, P3, P30, P31,
## P32, P33, P34, P35, P36, P37, P38, P39, P4, P40, P41, P42,
## P43, P44, P45, P46, P47, P48, P49, P5, P50, P51, P52, P53,
## P54, P55, P56, P57, P58, P59, P6, P60, P61, P62, P63, P64,
## P65, P66, P67, P68, P69, P7, P70, P71, P72, P73, P74, P75,
## P76, P77, P78, P79, P8, P80, P81, P82, P83, P84, P85, P86,
## P87, P88, P89, P9, P90, P91, P92, P93, P94, P95, P96, P97,
## P98, P99, RefID, RTV.Fold, RTV.FoldMatch, SeqID, SQV.Fold,
## SQV.FoldMatch, SubType, TPV.Fold, TPV.FoldMatch, Type
## The following objects are masked from virco (pos = 6):
##
## APV.Fold, APV.FoldMatch, ATV.Fold, ATV.FoldMatch, CompMutList,
## IDV.Fold, IDV.FoldMatch, IsolateName, LPV.Fold, LPV.FoldMatch,
## MajorMutList, Method, MinorMutList, NFV.Fold, NFV.FoldMatch,
## P1, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P2, P20,
## P21, P22, P23, P24, P25, P26, P27, P28, P29, P3, P30, P31,
## P32, P33, P34, P35, P36, P37, P38, P39, P4, P40, P41, P42,
## P43, P44, P45, P46, P47, P48, P49, P5, P50, P51, P52, P53,
## P54, P55, P56, P57, P58, P59, P6, P60, P61, P62, P63, P64,
## P65, P66, P67, P68, P69, P7, P70, P71, P72, P73, P74, P75,
## P76, P77, P78, P79, P8, P80, P81, P82, P83, P84, P85, P86,
## P87, P88, P89, P9, P90, P91, P92, P93, P94, P95, P96, P97,
## P98, P99, RefID, RTV.Fold, RTV.FoldMatch, SeqID, SQV.Fold,
## SQV.FoldMatch, SubType, TPV.Fold, TPV.FoldMatch, Type
## The following objects are masked from virco (pos = 9):
##
## APV.Fold, APV.FoldMatch, ATV.Fold, ATV.FoldMatch, CompMutList,
## IDV.Fold, IDV.FoldMatch, IsolateName, LPV.Fold, LPV.FoldMatch,
## MajorMutList, Method, MinorMutList, NFV.Fold, NFV.FoldMatch,
## P1, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P2, P20,
## P21, P22, P23, P24, P25, P26, P27, P28, P29, P3, P30, P31,
## P32, P33, P34, P35, P36, P37, P38, P39, P4, P40, P41, P42,
## P43, P44, P45, P46, P47, P48, P49, P5, P50, P51, P52, P53,
## P54, P55, P56, P57, P58, P59, P6, P60, P61, P62, P63, P64,
## P65, P66, P67, P68, P69, P7, P70, P71, P72, P73, P74, P75,
## P76, P77, P78, P79, P8, P80, P81, P82, P83, P84, P85, P86,
## P87, P88, P89, P9, P90, P91, P92, P93, P94, P95, P96, P97,
## P98, P99, RefID, RTV.Fold, RTV.FoldMatch, SeqID, SQV.Fold,
## SQV.FoldMatch, SubType, TPV.Fold, TPV.FoldMatch, Type
Trait <- NFV.Fold - IDV.Fold
VircoGeno <- data.frame(virco[,substr(names(virco),1,1)=="P"]!="-")
Trait.c <- Trait[!is.na(Trait)]
VircoGeno.c <- VircoGeno[!is.na(Trait),]
library(earth)
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
VircoMARS <- earth(Trait.c~., data=VircoGeno.c, degree=2)
summary(VircoMARS)
## Call: earth(formula=Trait.c~., data=VircoGeno.c, degree=2)
##
## coefficients
## (Intercept) -1.49386
## P35TRUE 36.98821
## P76TRUE -34.95785
## P1TRUE * P73TRUE -30.79950
## P10TRUE * P35TRUE 29.81243
## P10TRUE * P73TRUE 65.50646
## P15TRUE * P25TRUE 751.24589
## P15TRUE * P35TRUE -34.54019
## P15TRUE * P54TRUE 32.95728
## P15TRUE * P73TRUE -58.53545
## P20TRUE * P35TRUE 47.11367
## P20TRUE * P54TRUE -41.71048
## P20TRUE * P73TRUE 77.58072
## P30TRUE * P70TRUE 158.97600
## P30TRUE * P77TRUE 42.81780
## P35TRUE * P36TRUE -42.06393
## P35TRUE * P54TRUE -33.73524
## P35TRUE * P73TRUE 78.73042
## P35TRUE * P82TRUE -31.25249
## P35TRUE * P84TRUE -59.43351
## P35TRUE * P93TRUE 23.76439
## P35TRUE * P95TRUE -60.69940
## P36TRUE * P54TRUE 30.17810
## P36TRUE * P73TRUE -113.98578
## P48TRUE * P54TRUE -20.80249
## P54TRUE * P72TRUE 24.06139
## P54TRUE * P73TRUE -63.96128
## P54TRUE * P84TRUE 34.96787
## P54TRUE * P93TRUE -18.74152
## P54TRUE * P94TRUE 207.51818
## P63TRUE * P73TRUE 67.33288
## P70TRUE * P73TRUE -103.04692
## P72TRUE * P73TRUE -69.71491
## P73TRUE * P74TRUE -54.83226
## P73TRUE * P76TRUE 101.72366
## P73TRUE * P77TRUE -54.40373
## P73TRUE * P84TRUE -65.68984
## P73TRUE * P93TRUE 49.44217
##
## Selected 38 of 100 terms, and 22 of 99 predictors
## Termination condition: Reached nk 199
## Importance: P15TRUE, P25TRUE, P35TRUE, P36TRUE, P73TRUE, P54TRUE, ...
## Number of terms at each degree of interaction: 1 2 35
## GCV 5155.408 RSS 4113795 GRSq 0.2200334 RSq 0.3610069
evimp(VircoMARS)
## nsubsets gcv rss
## P15TRUE 37 100.0 100.0
## P25TRUE 37 100.0 100.0
## P35TRUE 35 80.7 87.3
## P36TRUE 35 80.7 87.3
## P73TRUE 35 80.7 87.3
## P54TRUE 33 71.4 80.8
## P94TRUE 33 71.4 80.8
## P10TRUE 33 70.8 80.3
## P77TRUE 33 70.8 80.3
## P84TRUE 32 69.1 78.7
## P72TRUE 30 60.9 72.9
## P93TRUE 29 57.5 70.4
## P20TRUE 28 53.7 67.8
## P82TRUE 25 39.9 59.2
## P30TRUE 23 33.7 55.0
## P1TRUE 21 30.0 51.7
## P70TRUE 21 29.9 51.7
## P74TRUE 15 21.6 42.3
## P48TRUE 13 21.2 40.0
## P95TRUE 13 20.3 39.3
## P85TRUE-unused 11 18.8 36.6
## P89TRUE-unused 10 17.1 34.7
## P63TRUE 7 16.1 29.1
## P65TRUE-unused 7 13.3 28.7
## P76TRUE 5 14.8 24.8
## P45TRUE-unused 1 -1.8 10.0