数据整理和mixOmics的使用

文章:Rui, J. , Chunming, Z. , Binbin, G. , Jiawen, X. , Wei, S. , & Peng, S. . (2018). Mixomics analysis of breast cancer: long non-coding rna linc01561 acts as cerna involved in the progression of breast cancer. International Journal of Biochemistry & Cell Biology, 102, S1357272518301377-. 复现Fig1.A.The SPLSDA sample plots:miRNA

library(data.table)
library(dplyr)
BrCa_miR_Origin<-data.table::fread(file="1-s2.0-S1357272518301377-mmc5.csv",stringsAsFactors = F)#数据来自文章supplement tables
BrCaID<-colnames(BrCa_miR_Origin)
TumorID<-grep(pattern = '-01',x = BrCaID,value = T)
NormalID<-grep(pattern = '-11',x = BrCaID,value = T)
as.data.frame(t(BrCa_miR_Origin %>% select(V1,TumorID)))->BrCa_miR_Tumor0
colnames(BrCa_miR_Tumor0)<-BrCa_miR_Tumor0[1,]
BrCa_miR_Tumor0[-1,]->BrCa_miR_Tumor1
BrCa_miR_Tumor1 %>% mutate(SampleID=row.names(BrCa_miR_Tumor1),Tissue="Tumor") ->BrCa_miR_Tumor2

as.data.frame(t(BrCa_miR_Origin %>% select(V1,NormalID)))->BrCa_miR_Normal0
colnames(BrCa_miR_Normal0)<-BrCa_miR_Normal0[1,]
BrCa_miR_Normal0[-1,]->BrCa_miR_Normal1
BrCa_miR_Normal1 %>% mutate(SampleID=row.names(BrCa_miR_Normal1),Tissue="Normal") ->BrCa_miR_Normal2

dplyr::bind_rows(BrCa_miR_Tumor2, BrCa_miR_Normal2) -> BrCa_miR_Table
head(BrCa_miR_Table)
BrCa_miR_Table %>% select(-c(SampleID, Tissue))->M1
as.matrix(M1)->M2
GeneInfo <- matrix(as.numeric(M2),nrow=nrow(M2), dimnames = list(rownames(M2),colnames(M2)))

BrCa_miR_Table %>% select(Tissue) ->T1
TissueInfo<-as.factor(T1$Tissue)
class(GeneInfo)
[1] "matrix" "array" 
class(TissueInfo)
[1] "factor"
library(mixOmics)
#载入mixOmics

BrCamiRNA.plsda <- plsda(GeneInfo, TissueInfo, ncomp = 10)
set.seed(2523) # for reproducibility, only when the `cpus' argument is not used
perf.plsda.BrCamiRNA <- perf(BrCamiRNA.plsda, validation = "Mfold", folds = 5, 
                  progressBar = FALSE, auc = TRUE, nrepeat = 20) 
plot(perf.plsda.BrCamiRNA, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")
#作出的图可以看到ncom=6之后曲线变化变小了,后面就用ncomp=6进行


list.keepX <- c(10,15)  #非常不明白这个keepX的参数是怎么取的
#做出来的结果不一样可能也是因为这个keepX的参数不一样
#以下tuning sPLS-DA分析的参数
tune.splsda.BrCamiRNA <- tune.splsda(GeneInfo, TissueInfo, ncomp = 6, validation = 'Mfold', folds = 5, progressBar = F, dist = 'max.dist', measure = "BER", test.keepX = list.keepX, nrepeat = 10, cpus = 4)

error <- tune.splsda.BrCamiRNA$error.rate
ncomp <- tune.splsda.BrCamiRNA$choice.ncomp$ncomp
select.keepX <- tune.splsda.BrCamiRNA$choice.keepX[1:ncomp] 
#进行sPLS-DA分析
splsda.BrCamiRNA <- splsda(GeneInfo, TissueInfo, ncomp = ncomp, keepX = select.keepX)
#绘制The SPLSDA sample plots
plotIndiv(splsda.BrCamiRNA, comp = c(1,2),
          group = TissueInfo, ind.names = FALSE, 
          ellipse = TRUE, legend = TRUE,
          title = 'Block:miRNA')

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyMg5pWw5o2u5pW055CG5ZKMbWl4T21pY3PnmoTkvb/nlKgNCuaWh+eroO+8mlJ1aSwgSi4gLCBDaHVubWluZywgWi4gLCBCaW5iaW4sIEcuICwgSmlhd2VuLCBYLiAsIFdlaSwgUy4gLCAmIFBlbmcsIFMuIC4gKDIwMTgpLiBNaXhvbWljcyBhbmFseXNpcyBvZiBicmVhc3QgY2FuY2VyOiBsb25nIG5vbi1jb2Rpbmcgcm5hIGxpbmMwMTU2MSBhY3RzIGFzIGNlcm5hIGludm9sdmVkIGluIHRoZSBwcm9ncmVzc2lvbiBvZiBicmVhc3QgY2FuY2VyLiBJbnRlcm5hdGlvbmFsIEpvdXJuYWwgb2YgQmlvY2hlbWlzdHJ5ICYgQ2VsbCBCaW9sb2d5LCAxMDIsIFMxMzU3MjcyNTE4MzAxMzc3LS4NCuWkjeeOsEZpZzEuQS5UaGUgU1BMU0RBIHNhbXBsZSBwbG90czptaVJOQQ0KDQpgYGB7cn0NCmxpYnJhcnkoZGF0YS50YWJsZSkNCmxpYnJhcnkoZHBseXIpDQpCckNhX21pUl9PcmlnaW48LWRhdGEudGFibGU6OmZyZWFkKGZpbGU9IjEtczIuMC1TMTM1NzI3MjUxODMwMTM3Ny1tbWM1LmNzdiIsc3RyaW5nc0FzRmFjdG9ycyA9IEYpI+aVsOaNruadpeiHquaWh+eroHN1cHBsZW1lbnQgdGFibGVzDQpCckNhSUQ8LWNvbG5hbWVzKEJyQ2FfbWlSX09yaWdpbikNClR1bW9ySUQ8LWdyZXAocGF0dGVybiA9ICctMDEnLHggPSBCckNhSUQsdmFsdWUgPSBUKQ0KTm9ybWFsSUQ8LWdyZXAocGF0dGVybiA9ICctMTEnLHggPSBCckNhSUQsdmFsdWUgPSBUKQ0KYXMuZGF0YS5mcmFtZSh0KEJyQ2FfbWlSX09yaWdpbiAlPiUgc2VsZWN0KFYxLFR1bW9ySUQpKSktPkJyQ2FfbWlSX1R1bW9yMA0KY29sbmFtZXMoQnJDYV9taVJfVHVtb3IwKTwtQnJDYV9taVJfVHVtb3IwWzEsXQ0KQnJDYV9taVJfVHVtb3IwWy0xLF0tPkJyQ2FfbWlSX1R1bW9yMQ0KQnJDYV9taVJfVHVtb3IxICU+JSBtdXRhdGUoU2FtcGxlSUQ9cm93Lm5hbWVzKEJyQ2FfbWlSX1R1bW9yMSksVGlzc3VlPSJUdW1vciIpIC0+QnJDYV9taVJfVHVtb3IyDQoNCmFzLmRhdGEuZnJhbWUodChCckNhX21pUl9PcmlnaW4gJT4lIHNlbGVjdChWMSxOb3JtYWxJRCkpKS0+QnJDYV9taVJfTm9ybWFsMA0KY29sbmFtZXMoQnJDYV9taVJfTm9ybWFsMCk8LUJyQ2FfbWlSX05vcm1hbDBbMSxdDQpCckNhX21pUl9Ob3JtYWwwWy0xLF0tPkJyQ2FfbWlSX05vcm1hbDENCkJyQ2FfbWlSX05vcm1hbDEgJT4lIG11dGF0ZShTYW1wbGVJRD1yb3cubmFtZXMoQnJDYV9taVJfTm9ybWFsMSksVGlzc3VlPSJOb3JtYWwiKSAtPkJyQ2FfbWlSX05vcm1hbDINCg0KZHBseXI6OmJpbmRfcm93cyhCckNhX21pUl9UdW1vcjIsIEJyQ2FfbWlSX05vcm1hbDIpIC0+IEJyQ2FfbWlSX1RhYmxlDQpoZWFkKEJyQ2FfbWlSX1RhYmxlKQ0KQnJDYV9taVJfVGFibGUgJT4lIHNlbGVjdCgtYyhTYW1wbGVJRCwgVGlzc3VlKSktPk0xDQphcy5tYXRyaXgoTTEpLT5NMg0KR2VuZUluZm8gPC0gbWF0cml4KGFzLm51bWVyaWMoTTIpLG5yb3c9bnJvdyhNMiksIGRpbW5hbWVzID0gbGlzdChyb3duYW1lcyhNMiksY29sbmFtZXMoTTIpKSkNCg0KQnJDYV9taVJfVGFibGUgJT4lIHNlbGVjdChUaXNzdWUpIC0+VDENClRpc3N1ZUluZm88LWFzLmZhY3RvcihUMSRUaXNzdWUpDQpjbGFzcyhHZW5lSW5mbykNCmNsYXNzKFRpc3N1ZUluZm8pDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KG1peE9taWNzKQ0KI+i9veWFpW1peE9taWNzDQoNCkJyQ2FtaVJOQS5wbHNkYSA8LSBwbHNkYShHZW5lSW5mbywgVGlzc3VlSW5mbywgbmNvbXAgPSAxMCkNCnNldC5zZWVkKDI1MjMpICMgZm9yIHJlcHJvZHVjaWJpbGl0eSwgb25seSB3aGVuIHRoZSBgY3B1cycgYXJndW1lbnQgaXMgbm90IHVzZWQNCnBlcmYucGxzZGEuQnJDYW1pUk5BIDwtIHBlcmYoQnJDYW1pUk5BLnBsc2RhLCB2YWxpZGF0aW9uID0gIk1mb2xkIiwgZm9sZHMgPSA1LCANCiAgICAgICAgICAgICAgICAgIHByb2dyZXNzQmFyID0gRkFMU0UsIGF1YyA9IFRSVUUsIG5yZXBlYXQgPSAyMCkgDQpwbG90KHBlcmYucGxzZGEuQnJDYW1pUk5BLCBjb2wgPSBjb2xvci5taXhvKDU6NyksIHNkID0gVFJVRSwgbGVnZW5kLnBvc2l0aW9uID0gImhvcml6b250YWwiKQ0KI+S9nOWHuueahOWbvuWPr+S7peeci+WIsG5jb209NuS5i+WQjuabsue6v+WPmOWMluWPmOWwj+S6hu+8jOWQjumdouWwseeUqG5jb21wPTbov5vooYwNCg0KDQpsaXN0LmtlZXBYIDwtIGMoMTAsMTUpICAj6Z2e5bi45LiN5piO55m96L+Z5Liqa2VlcFjnmoTlj4LmlbDmmK/mgI7kuYjlj5bnmoQNCiPlgZrlh7rmnaXnmoTnu5PmnpzkuI3kuIDmoLflj6/og73kuZ/mmK/lm6DkuLrov5nkuKprZWVwWOeahOWPguaVsOS4jeS4gOagtw0KI+S7peS4i3R1bmluZyBzUExTLURB5YiG5p6Q55qE5Y+C5pWwDQp0dW5lLnNwbHNkYS5CckNhbWlSTkEgPC0gdHVuZS5zcGxzZGEoR2VuZUluZm8sIFRpc3N1ZUluZm8sIG5jb21wID0gNiwgdmFsaWRhdGlvbiA9ICdNZm9sZCcsIGZvbGRzID0gNSwgcHJvZ3Jlc3NCYXIgPSBGLCBkaXN0ID0gJ21heC5kaXN0JywgbWVhc3VyZSA9ICJCRVIiLCB0ZXN0LmtlZXBYID0gbGlzdC5rZWVwWCwgbnJlcGVhdCA9IDEwLCBjcHVzID0gNCkNCg0KZXJyb3IgPC0gdHVuZS5zcGxzZGEuQnJDYW1pUk5BJGVycm9yLnJhdGUNCm5jb21wIDwtIHR1bmUuc3Bsc2RhLkJyQ2FtaVJOQSRjaG9pY2UubmNvbXAkbmNvbXANCnNlbGVjdC5rZWVwWCA8LSB0dW5lLnNwbHNkYS5CckNhbWlSTkEkY2hvaWNlLmtlZXBYWzE6bmNvbXBdIA0KDQpgYGANCg0KYGBge3J9DQoj6L+b6KGMc1BMUy1EQeWIhuaekA0Kc3Bsc2RhLkJyQ2FtaVJOQSA8LSBzcGxzZGEoR2VuZUluZm8sIFRpc3N1ZUluZm8sIG5jb21wID0gbmNvbXAsIGtlZXBYID0gc2VsZWN0LmtlZXBYKQ0KI+e7mOWItlRoZSBTUExTREEgc2FtcGxlIHBsb3RzDQpwbG90SW5kaXYoc3Bsc2RhLkJyQ2FtaVJOQSwgY29tcCA9IGMoMSwyKSwNCiAgICAgICAgICBncm91cCA9IFRpc3N1ZUluZm8sIGluZC5uYW1lcyA9IEZBTFNFLCANCiAgICAgICAgICBlbGxpcHNlID0gVFJVRSwgbGVnZW5kID0gVFJVRSwNCiAgICAgICAgICB0aXRsZSA9ICdCbG9jazptaVJOQScpDQpgYGANCg0K