Preparation of potential features for G
#
#
require(GA)
GA_r Loading required package: GA
GA_r Package 'GA' version 2.2
GA_r Type 'citation("GA")' for citing this R package in publications.
require(doParallel)
GA_r Loading required package: doParallel
require(plyr)
#
#
area=function(v,nv) {
y=data.frame(x=as.numeric(nv),y=v)
xz = diff(as.numeric(y[,1]),1)
yz = as.numeric(y[,2])
z = sum(xz*rollmean(yz,2))
return(z)
}
fitnessG = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-10000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-10000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas[[cnjts[1]]]$mat),ncol=(nrow(par)+1)))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas[[cnjts[bi]]]$mat[,idxi] / (bandas[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
X[,ncol(X)]= YPT
colnames(X)[ncol(X)]="T"
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
fitnessG10 = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-10000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-10000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas10[[cnjts[1]]]$mat),ncol=(nrow(par)+1)))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas10[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas10[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas10[[cnjts[bi]]]$mat[,idxi] / (bandas10[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
X[,ncol(X)]= YPT
colnames(X)[ncol(X)]="T"
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
fitnessG50 = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-10000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-10000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas50[[cnjts[1]]]$mat),ncol=(nrow(par)+1)))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas50[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas50[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas50[[cnjts[bi]]]$mat[,idxi] / (bandas50[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
X[,ncol(X)]= YPT
colnames(X)[ncol(X)]="T"
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
#
SelFeatures=function(x,sets=cnjts,dat=bandas,fitness.=fitnessG,pp=FALSE){
pos=which.max(as.numeric(unlist(ldply(x@bestSol,fitness.,.parallel=pp))))
sol=x@bestSol[[pos]]
par = decode(sol)
res = list()
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
res[[i]]=list(set1=sets[bi],pos1=idxi,set2=sets[bj],pos2=idxj,par=par[i,],
x=dat[[sets[bi]]]$mat[,idxi],
x0=dat[[sets[bj]]]$mat[,idxj],
name=dat[[sets[bi]]]$nam[idxi],
name0=dat[[sets[bj]]]$nam[idxj])
}
return(res)
}
#
FeaturesExt=function(x,num,sets=cnjts,dat=bandas,fitness.=fitnessG,pp=FALSE){
vals=as.numeric(unlist(ldply(x@bestSol,fitness.,.parallel=pp)))
fiveval=summary(vals)
uv = unique(sort(vals,decreasing=TRUE))
uvals = uv[1:min(num,length(uv))]
isols=rep(0,length(uvals))
for (i in 1:length(uvals)) {
isols[i] = which(vals==uvals[i])[1]
}
res=NA
if ( length(isols) > 0) {
par=decode(x@bestSol[[1]])
res = as.data.frame(matrix(NA,nrow=nrow(par),ncol=(2*length(isols))))
nam = as.data.frame(matrix(NA,nrow=nrow(par),ncol=(2*length(isols))))
for (j in 1:length(isols)) {
sol=x@bestSol[[isols[j]]]
par = decode(sol)
res[,(2*(j-1)+1):(2*j)]=par
names(res)[(2*(j-1)+1):(2*j)] = c(paste(j,":s",sep=""),paste(j,":c",sep=""))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
nam[i,(2*(j-1)+1)]=dat[[sets[bi]]]$nam[idxi]
nam[i,(2*j)]=dat[[sets[bj]]]$nam[idxj]
}
names(nam)[(2*(j-1)+1):(2*j)] = c(paste(j,":s",sep=""),paste(j,":c",sep=""))
}
}
return(list(par=res,name=nam,pos=isols,vpos=vals[isols],fivevals=fiveval))
}
#
decode = function(string) {
string = gray2binary(string)
n = binary2decimal(string[1:NBITS])
c = binary2decimal(string[(NBITS + 1):(2*NBITS)])
res = data.frame(p1=n%%MAXV,p2=c%%MAXV)
ini = 2 * NBITS
if ( NG > 1) {
for (i in 2:NG) {
n = binary2decimal(string[(ini+1):(ini+NBITS)])
c = binary2decimal(string[(ini + NBITS + 1):(ini + 2*NBITS)])
res = rbind(res, c(n%%MAXV,c%%MAXV))
ini = ini + 2*NBITS
}
}
return(res)
}
#
encode = function(res,nbits=NBITS) {
if (! is.data.frame(res)) {
warning(paste("ENCODE: First parameter is not data.frame:",str(res),sep=" "))
return(NULL)
}
cad=rep(0,ncol(res)*nrow(res)*nbits)
for (i in 1:nrow(res)) {
for (j in 1:ncol(res)) {
k=((i-1)*ncol(res) + (j - 1))*nbits+1
cad[k:(k+nbits-1)]=decimal2binary(res[i,j],nbits)
}
}
return(binary2gray(cad))
}
#
#
NITER=9
if ( file.exists("~/git/M_prep_IPAC/GA_NG11F2-2013_data.RData")) {
j=1
load("~/git/M_prep_IPAC/GA_NG11F2-2013_data.RData")
tt50=list()
GAs50 = list()
solsG50=list()
tt10=list()
GAs10 = list()
solsG10=list()
tt=list()
GAs=list()
solsG=list()
while(j <= NITER) {
cat(paste("Reading Iteration:",j,"<br>",sep=""))
flush.console()
load(paste("~/git/M_prep_IPAC/GA_2013_G_",sprintf("%02d",j),"_50_f2_data.RData",sep=""))
tt50[[j]]= t_tot
GAs50[[j]]=GA1
solsG50[[j]]=FeaturesExt(GA1,12)
load(paste("~/git/M_prep_IPAC/GA_2013_G_",sprintf("%02d",j),"_10_f2_data.RData",sep=""))
tt10[[j]]= t_tot
GAs10[[j]]=GA1
solsG10[[j]]=FeaturesExt(GA1,12)
load(paste("~/git/M_prep_IPAC/GA_2013_G_",sprintf("%02d",j),"_f2_data.RData",sep=""))
tt[[j]]= t_tot
GAs[[j]]=GA1
solsG[[j]]=FeaturesExt(GA1,12)
j = j+1
}
} else {
# cnjts = c("s=1","s=6","s=11","s=16","s=21","s=26")
cnjts = c("s=1","s=6")
bpy=do.call(rbind,lapply(bp_clean,function(x){return(x$data[[1]][,2])}))
YPT=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[1])}))
YPG=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[2])}))
YPM=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[3])}))
vf=as.character(bp_clean[[1]]$data[[1]][,1])
colnames(bpy)=vf
bfy=do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
#
siz=10
bandas=list()
bfndas=list()
bandas10=list()
bandas50=list()
for (i in seq(1,10,5)) { # LSB asked for steps of 5 pixels
idx=paste("s=",i,sep="")
lvf=vf
if ( i > 1) {
lvf=vf[-c(1:(i-1))]
}
ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
st=split(lvf,ff)
for (j in 1:length(st)) {
if (length(st[[j]]) < siz)
st[[j]] = NULL
}
lst = length(st)
bandas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy)))
bfndas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bfy)))
bandas10[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy10)))
bandas50[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy50)))
for (j in 1:lst) {
bandas[[idx]]$mat[,j]=apply(bpy[,st[[j]]],1,area,st[[j]])
bandas10[[idx]]$mat[,j]=apply(bpy10[,st[[j]]],1,area,st[[j]])
bandas50[[idx]]$mat[,j]=apply(bpy50[,st[[j]]],1,area,st[[j]])
}
for (j in 1:lst) {
bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
}
}
#
# BE CAREFUL !!! y should be in retionship with the variable being predicted
y=as.numeric(unlist(YPG))
cnjts = c("s=1","s=6")
NG=10 # Número de Features
NBLK = length(cnjts)
NBITS= ceiling(log(NBLK*ncol(bandas[[cnjts[1]]]$mat),2))
MAXV = ncol(bandas[[cnjts[1]]]$mat)
#
save(bpy,bpy10,bpy50,bfy,YPT,YPG,YPM,vf,siz,bandas,bfndas, bandas10,bandas50,
cnjts,st,NG,NBLK,NBITS,MAXV,y,fitnessG,fitnessG10,fitnessG50, encode,decode,
SelFeatures,file="~/git/M_prep_IPAC/GA_NG11F2-2013_data.RData")
#
tt=list()
GAs = list()
solsG=list()
j=1
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt[[j]]= system.time({GA1 = ga(type="binary", fitness = fitnessG, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt[[j]]
GAs[[j]]=GA1
save(fitnessG,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IPAC/GA_2013_G_",sprintf("%02d",j),"_f2_data.RData",sep=""))
solsG[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsG,function(x){return(x$vpos[1])})),type="html")
#
j=1
tt10=list()
GAs10 = list()
solsG10=list()
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt10[[j]]= system.time({GA1 = ga("binary", fitness = fitnessG10, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt10[[j]]
GAs10[[j]]=GA1
save(fitnessG10,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IPAC/GA_2013_G_",sprintf("%02d",j),"_10_f2_data.RData",sep=""))
solsG10[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsG10,function(x){return(x$vpos[1])})),type="html")
#
j=1
tt50=list()
GAs50 = list()
solsG50=list()
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt50[[j]]= system.time({GA1 = ga("binary", fitness = fitnessG50, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt50[[j]]
GAs50[[j]]=GA1
save(fitnessG50,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IPAC/GA_2013_G_",sprintf("%02d",j),"_50_f2_data.RData",sep=""))
solsG50[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsG50,function(x){return(x$vpos[1])})),type="html")
}
Reading Iteration:1
Reading Iteration:2
Reading Iteration:3
Reading Iteration:4
Reading Iteration:5
Reading Iteration:6
Reading Iteration:7
Reading Iteration:8
Reading Iteration:9
#
print(xtable(ldply(solsG,function(x){return(x$vpos[1])})),type="html")
|
|
V1
|
|
1
|
-786.23
|
|
2
|
-714.96
|
|
3
|
-770.06
|
|
4
|
-680.51
|
|
5
|
-703.80
|
|
6
|
-840.59
|
|
7
|
-789.10
|
|
8
|
-711.47
|
|
9
|
-780.31
|
# ISOL holds the best solution !
isol = which.max(as.numeric(ldply(solsG,function(x){return(x$vpos[1])})[,1]))
isol
[1] 4
print(xtable(ldply(solsG10,function(x){return(x$vpos[1])})),type="html")
|
|
V1
|
|
1
|
-958.95
|
|
2
|
-961.26
|
|
3
|
-970.42
|
|
4
|
-923.48
|
|
5
|
-941.87
|
|
6
|
-969.82
|
|
7
|
-956.75
|
|
8
|
-914.09
|
|
9
|
-924.01
|
# ISOL holds the best solution !
isol10 = which.max(as.numeric(ldply(solsG10,function(x){return(x$vpos[1])})[,1]))
isol10
[1] 8
#
print(xtable(ldply(solsG50,function(x){return(x$vpos[1])})),type="html")
|
|
V1
|
|
1
|
-795.82
|
|
2
|
-818.83
|
|
3
|
-724.48
|
|
4
|
-738.79
|
|
5
|
-852.97
|
|
6
|
-820.19
|
|
7
|
-803.29
|
|
8
|
-760.46
|
|
9
|
-746.79
|
# ISOL holds the best solution !
isol50 = which.max(as.numeric(ldply(solsG50,function(x){return(x$vpos[1])})[,1]))
isol50
[1] 3
#
#
clase_luminosidad = function(x) {
# we look over Spectral SubClass
# for roman numbers V=>dwarft; III => giants I => Supergiants (II => I)
cl=c("V","IV","III","II","I")
i=1
res=NA
if ( ! is.na(x)) {
while(i <= length(cl) & is.na(res)) {
j=regexpr(cl[i],x)
if (length(j) > 0 ) {
if ( j[1] > 0 ) {
res=cl[i]
} else {
i=i+1
}
}
}
}
return(res)
}
clase_spt = function(x) {
# we look over Spectral SubType
# for roman numbers M0 ... M9
cl=c("M0","M1.5","M1","M2.5","M2","M3.5","M3","M4.5","M4","M5.5","M5",
"M6.5","M6","M7.5","M7","M8.5","M8","M9.5","M9")
i=1
res=NA
if ( ! is.na(x)) {
while(i <= length(cl) & is.na(res)) {
j=regexpr(cl[i],x)
if (length(j) > 0 ) {
if ( j[1] > 0 ) {
res=cl[i]
} else {
i=i+1
}
}
}
}
return(res)
}
#
prep_datosG=function(res,T){
X=matrix(NA,nrow=length(res[[1]]$x),ncol=(length(res)+1))
for (j in 1:length(res)) {
wini=-eval(parse(text=res[[j]]$name))
winj=-eval(parse(text=res[[j]]$name0))
X[,j]=wini-res[[j]]$x/(res[[j]]$x0/winj)
}
X[,(length(res)+1)] = T
colnames(X)=c(paste("C",1:length(res),sep=""),"T")
return(X)
}
#
# Function that returns Root Mean Squared Error
rmse <- function(error) {
sqrt(mean(error^2,na.rm=TRUE))
}
# Function that returns Mean Absolute Error
mae <- function(error) {
mean(abs(error),na.rm=TRUE)
}
#
#
modelado=function(X,Y) {
#
trn=1:nrow(X)
folds=5
repeats=1
#
mseeds <- vector(mode = "list", length = (folds+1))
for(i in 1:folds) mseeds[[i]] <- sample.int(nrow(X), 100)
mseeds[[(folds+1)]] <- sample.int(1000, 1)
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,seeds=mseeds)
#Train some models
model_1.1 = "train(X[trn,], Y[trn], method='gbm',
trControl=myControl,
tuneGrid=expand.grid(n.trees=seq(100,500,50) , n.minobsinnode=c(2,5),
interaction.depth=seq(3,15,2),
shrinkage = seq(0.01,0.1,0.02)),verbose=FALSE)"
model_1.2 = "train(X[trn,], Y[trn], method='blackboost',
trControl=myControl, tuneGrid=expand.grid(.mstop=10^(2:4),
.maxdepth=seq(5,15,3)))"
model_1.3 = "train(X[trn,], Y[trn], method='rf',
trControl=myControl,
tuneGrid=expand.grid(.mtry=(2:ncol(X))),
proximity=TRUE)"
model_1.4 = "train(X[trn,], Y[trn], method='knn',
trControl=myControl, trace=FALSE,
tuneGrid=expand.grid(k=(2:min(9,ncol(X)))))"
model_1.5 = "train(X[trn,], Y[trn], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=10))"
model_1.6 = "train(X[trn,], Y[trn], method='earth',
trControl=myControl,tuneGrid=expand.grid(.degree = 3,
.nprune = (1:10) * 2), metric = 'RMSE',
maximize = FALSE)"
model_1.7 = "train(X[trn,], Y[trn], method='glm',
trControl=myControl)"
model_1.8 = "train(X[trn,], Y[trn], method='svmRadial',
trControl=myControl,tuneGrid=expand.grid(.C=10^(-3:3),
.sigma=c(10^(-4:3))))"
model_1.9 = "train(X[trn,], Y[trn], method='cubist',
trControl=myControl,commiittees=10)"
model_1.10 = "train(X[trn,], Y[trn], method='kernelpls',
trControl=myControl,tuneGrid=expand.grid(.ncomp=seq(2,ncol(X))))"
model_1.11 = "train(X[trn,], Y[trn], method='nnet',
trControl=myControl,tuneGrid=expand.grid(.size=seq(2,round(2*ncol(X)),3),
.decay=c(1.e-5,1.e-2)),
linout=TRUE, rang = 300, trace=FALSE,
maxit=10000, reltol=1.0e-11, abstol=1.0e-6)"
model_1.12 = "train(X[trn,], Y[trn], method='bagEarth',
trControl=myControl,tuneGrid=expand.grid(.nprune=(1:10)*3,.degree=1:5))"
model_1.13 = "train(X[trn,], Y[trn], method='cubist', trControl=myControl,
tuneGrid=expand.grid(.committees=3:50, .neighbors=1:7))"
#
#Make a list of all the models
lmodels=c("model_1.1", "model_1.2", "model_1.3", "model_1.4",
"model_1.5", "model_1.6", "model_1.7", "model_1.8",
"model_1.9", "model_1.10","model_1.11","model_1.12",
"model_1.13")
#
all.models_5 =list()
for (i in lmodels) {
all.models_5[[length(all.models_5)+1]] = eval(parse(text=gsub('\n','',get(i))))
}
names(all.models_5) = sapply(all.models_5, function(x) x$method)
#
ensam_5 <- caretList(X[trn,],Y[trn],methodList=c('svmRadial',
'gbm','blackboost','rf','earth','bagEarth'))
ens <- caretEnsemble(ensam_5)
#Make a linear regression ensemble
if (exists("ens")){
all.models_5[[length(all.models_5)+1]] = get("ens")
names(all.models_5)[length(all.models_5)]="ENS"
}
return(all.models_5)
}
#
# Loading the predicted temperature
load(file="~/git/M_prep_IPAC/GA_predict_Trv_11F2.RData")
if ( file.exists("~/git/M_prep_IPAC/GA_IPACrv_data_MG-2013_GF2_Models.RData")) {
load("~/git/M_prep_IPAC/GA_IPACrv_data_MG-2013_GF2_Models.RData")
} else {
#
# Objects YTpknn00,YTpknn11,YTpknn55 are read from the file
# They were determined by the script prep_GA_case01_NT11F2_v1.Rmd
#
res00=SelFeatures(GAs[[isol]],cnjts,bandas)
res10=SelFeatures(GAs10[[isol10]],cnjts,bandas10)
res50=SelFeatures(GAs50[[isol50]],cnjts,bandas50)
#
X00 = prep_datosG(res00,as.numeric(YPT[,1]))
md5 = modelado(X00,as.numeric(YPG[,1]))
X10 = prep_datosG(res10,as.numeric(YPT[,1]))
md51= modelado(X10,as.numeric(YPG[,1]))
X50 = prep_datosG(res50,as.numeric(YPT[,1]))
md55= modelado(X50,as.numeric(YPG[,1]))
#
#
# Consideramos ahora los espectros IPAC corregidos por doppler (A Bello 16/7/2015)
load("../M_redshift/M_prep_clean_IPAC_corrected.RData")
bfy=do.call(rbind,lapply(bq_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
for (i in seq(1,siz,5)) { # LSB asked for steps of 5 pixels
idx=paste("s=",i,sep="")
lvf=vf
if ( i > 1) {
lvf=vf[-c(1:(i-1))]
}
ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
st=split(lvf,ff)
for (j in 1:length(st)) {
if (length(st[[j]]) < siz)
st[[j]] = NULL
}
lst = length(st)
for (j in 1:lst) {
bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
}
}
# Continuamos ahrora
xf0 = prep_datosG(SelFeatures(GAs[[isol]],cnjts,bfndas),YTpknn00)
xf1 = prep_datosG(SelFeatures(GAs10[[isol10]],cnjts,bfndas),YTpknn11)
xf5 = prep_datosG(SelFeatures(GAs50[[isol50]],cnjts,bfndas),YTpknn55)
#
YGn00=predict(md5[[which(names(md5)=="rf")]],xf0)
YGn01=predict(md51[[which(names(md51)=="rf")]],xf0)
YGn05=predict(md55[[which(names(md55)=="rf")]],xf0)
YGn10=predict(md5[[which(names(md5)=="rf")]],xf1)
YGn11=predict(md51[[which(names(md51)=="rf")]],xf1)
YGn15=predict(md55[[which(names(md55)=="rf")]],xf1)
YGn50=predict(md5[[which(names(md5)=="rf")]],xf5)
YGn51=predict(md51[[which(names(md51)=="rf")]],xf5)
YGn55=predict(md55[[which(names(md55)=="rf")]],xf5)
# Correciones a las estimaciones de la tabla de Cesetti et al ya aplicadas
# ddj=ddj[! ddj$Name %in% toberemoved,]
# for ( i in 1:nrow(tobeupdated)) {
# ddj[ddj$Name %in% tobeupdated[i,"name"],"Teff"]=tobeupdated[i,"Teff"]
# }
ref_tot0i=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,SpTorg=ref$SpT,chi2d_oo=ref$chi2_oo,
ICA_10=ref$ICA_LG_10,ICA_50=ref$ICA_LG_50,ICA_oo=ref$ICA_LG_00,
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad),
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt),
LG_ICA_10=G_coef3,LG_ICA_50=G_coef4,LG_rf_00=YGn00,LG_rf_11=YGn11,LG_rf_55=YGn55,
LogG=ref$LG_teo,Teff=ref$T_teo)
ref_tot2i=ref_tot0i
#
save(bpy,bfy,YPT,YPG,YPM,vf,siz,bandas,bfndas, bandas10,bandas50,cnjts,ref,
NG,NBLK,NBITS,MAXV,md5,md51,md55,res00,res10,res50,
X00,X10,X50,YGn00,YGn01,YGn05,YGn10,YGn11,YGn15,YGn50,YGn51,YGn55,xf0,xf1,xf5,
ref_tot0i,ref_tot2i,file="~/git/M_prep_IPAC/GA_IPACrv_data_MG-2013_GF2_Models.RData")
}
#
res00=SelFeatures(GAs[[isol]],cnjts,bandas)
res10=SelFeatures(GAs10[[isol10]],cnjts,bandas10)
res50=SelFeatures(GAs50[[isol50]],cnjts,bandas50)
ref_tot0=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,SpTorg=ref$SpT,
ICA_10=ref$ICA_LG_10,ICA_50=ref$ICA_LG_50,ICA_oo=ref$ICA_LG_00,
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad),
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt),
LG_ICA_10=G_coef3,LG_ICA_50=G_coef4,LG_rf_00=YGn00,LG_rf_11=YGn11,LG_rf_55=YGn55,
LogG=ref$LG_teo,Teff=ref$T_teo)
ref_tot2=ref_tot0
#
print(xtable(ldply(res00,function(x){return(c(x$name,x$name0))})),type="html")
|
|
V1
|
V2
|
|
1
|
7134-7166.4
|
7044-7076.4
|
|
2
|
6954-6986.4
|
7152-7184.4
|
|
3
|
7512-7544.4
|
7890-7922.4
|
|
4
|
7062-7094.4
|
7224-7256.4
|
|
5
|
6936-6968.4
|
7854-7886.4
|
|
6
|
6900-6932.4
|
7746-7778.4
|
|
7
|
6918-6950.4
|
7800-7832.4
|
|
8
|
7008-7040.4
|
7134-7166.4
|
|
9
|
7872-7904.4
|
7008-7040.4
|
|
10
|
7962-7994.4
|
7980-8012.4
|
print(xtable(ldply(res10,function(x){return(c(x$name,x$name0))})),type="html")
|
|
V1
|
V2
|
|
1
|
6990-7022.4
|
6918-6950.4
|
|
2
|
6900-6932.4
|
7278-7310.4
|
|
3
|
7062-7094.4
|
7242-7274.4
|
|
4
|
7692-7724.4
|
7008-7040.4
|
|
5
|
7656-7688.4
|
7998-8030.4
|
|
6
|
6936-6968.4
|
7836-7868.4
|
|
7
|
7206-7238.4
|
7062-7094.4
|
|
8
|
7512-7544.4
|
7926-7958.4
|
|
9
|
7764-7796.4
|
7710-7742.4
|
|
10
|
7404-7436.4
|
7548-7580.4
|
print(xtable(ldply(res50,function(x){return(c(x$name,x$name0))})),type="html")
|
|
V1
|
V2
|
|
1
|
6918-6950.4
|
6936-6968.4
|
|
2
|
6936-6968.4
|
7836-7868.4
|
|
3
|
7656-7688.4
|
7890-7922.4
|
|
4
|
6900-6932.4
|
7872-7904.4
|
|
5
|
7008-7040.4
|
7044-7076.4
|
|
6
|
7512-7544.4
|
7656-7688.4
|
|
7
|
7440-7472.4
|
7332-7364.4
|
|
8
|
7800-7832.4
|
7692-7724.4
|
|
9
|
7404-7436.4
|
7548-7580.4
|
|
10
|
7080-7112.4
|
7152-7184.4
|
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=chi2d_10,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") + ylab("Chi2 10 [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +

# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=chi2d_50,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") + ylab("Chi2 50 [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +

# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=LG_rf_00,shape=LC),size=3) +
xlab("Theoretical Log(G) [K]") + ylab("ML predicted Msnr=oo Fsnr=oo [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +

# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=LG_rf_11,shape=LC),size=3) +
xlab("Theoretical Log(G) [K]") + ylab("ML predicted Msnr=10 Fsnr=10 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +

# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=LG_rf_55,shape=LC),size=3) +
xlab("Theoretical Log(G) [K]") + ylab("ML predicted Msnr=50 Fsnr=50 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +

# xlim(2000,4200) + ylim(2000,4200) +
#
#
lmod=c("rf","gbm","svmRadial","nnet","knn","bagEarth","kernelpls","cubist")
nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
dataG = array(0,c(3,length(nmod),nrow(xf0))) # Dim 1 SNR, dim2 Star and Dim3 Model
nomG = unlist(lapply(bf_clean,function(x){nm=gsub('.fits','',x$name);pp=strsplit(nm,'_'); return(pp[[1]][2])}))
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad)
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt)
for (i in 1:length(lmod)) {
YGm00=predict(md5[[which(names(md5)==lmod[i])[1]]],xf0)
YGm11=predict(md51[[which(names(md51)==lmod[i])[1]]],xf1)
YGm55=predict(md55[[which(names(md55)==lmod[i])[1]]],xf5)
dataG[1,i,] = YGm00
dataG[2,i,] = YGm11
dataG[3,i,] = YGm55
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,chi2d_oo=ref$chi2_oo,
ICA_10=ref$ICA_LG_10,ICA_50=ref$ICA_LG_50,ICA_oo=ref$ICA_LG_00,
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad),
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt),
LG_ICA_10=ref$ICA_LG_10,LG_ICA_50=ref$ICA_LG_50,
YGm00,YGm11,YGm55,Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55,
LogG=ref$LG_teo,Teff=ref$T_teo)
ref_tot2m=ref_tot0m
diffs=apply(ref_tot2m[,! colnames(ref_tot2m) %in% c("Name","SpT","Teff","LC","Met",
"LogG","Tknn0","Tknn1","Tknn5")],2,FUN="-",ref_tot2m[,"LogG"])
rownames(diffs)=ref_tot2m[,"Name"]
#
cat(paste("Log(G) Modelling with ",lmod[i],". Error analysis follows.",sep=""))
errors=data.frame(drmse = apply(diffs,2,rmse),dmae=apply(diffs,2,mae))
print(xtable(errors),type="html")
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=oo",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=LogG,y=YGm00,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") +
ylab(paste(nmod[i]," predicted SNR=oo [dex]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2)))
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=10",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=LogG,y=YGm11,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") +
ylab(paste(nmod[i]," predicted SNR=10 [dex]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2)))
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=50",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=LogG,y=YGm55,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") +
ylab(paste(nmod[i]," predicted SNR=50 [dex]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2)))
)
}
GA_r3 Loading required package: randomForest
GA_r3 randomForest 4.6-12
GA_r3 Type rfNews() to see new features/changes/bug fixes.
Log(G) Modelling with rf. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
1.41
|
1.13
|
|
YGm11
|
1.35
|
1.08
|
|
YGm55
|
1.62
|
1.33
|
Log(G) Modelling with: RF. SNR=oo
Log(G) Modelling with: RF. SNR=10
Log(G) Modelling with: RF. SNR=50
GA_r3 Loading required package: gbm
GA_r3 Loading required package: survival
GA_r3
GA_r3 Attaching package: 'survival'
GA_r3
GA_r3 The following object is masked from 'package:caret':
GA_r3
GA_r3 cluster
GA_r3
GA_r3 Loading required package: splines
GA_r3 Loaded gbm 2.1.1

Log(G) Modelling with gbm. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
1.66
|
1.35
|
|
YGm11
|
1.59
|
1.33
|
|
YGm55
|
1.69
|
1.43
|
Log(G) Modelling with: GB. SNR=oo

Log(G) Modelling with: GB. SNR=10

Log(G) Modelling with: GB. SNR=50

Log(G) Modelling with svmRadial. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
2.28
|
1.88
|
|
YGm11
|
1.98
|
1.72
|
|
YGm55
|
2.13
|
1.89
|
Log(G) Modelling with: SVR. SNR=oo

Log(G) Modelling with: SVR. SNR=10

Log(G) Modelling with: SVR. SNR=50

Log(G) Modelling with nnet. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
3.22
|
2.87
|
|
YGm11
|
2.03
|
1.78
|
|
YGm55
|
2.25
|
2.04
|
Log(G) Modelling with: NNR. SNR=oo

Log(G) Modelling with: NNR. SNR=10

Log(G) Modelling with: NNR. SNR=50

Log(G) Modelling with knn. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
1.71
|
1.40
|
|
YGm11
|
2.05
|
1.68
|
|
YGm55
|
2.18
|
1.83
|
Log(G) Modelling with: KNN. SNR=oo
Log(G) Modelling with: KNN. SNR=10
Log(G) Modelling with: KNN. SNR=50
GA_r3 Loading required package: earth
GA_r3 Loading required package: plotmo
GA_r3 Loading required package: plotrix
GA_r3 Loading required package: TeachingDemos

Log(G) Modelling with bagEarth. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
2.03
|
1.65
|
|
YGm11
|
1.85
|
1.60
|
|
YGm55
|
2.03
|
1.77
|
Log(G) Modelling with: MARS. SNR=oo

Log(G) Modelling with: MARS. SNR=10

Log(G) Modelling with: MARS. SNR=50

Log(G) Modelling with kernelpls. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
2.75
|
2.44
|
|
YGm11
|
1.85
|
1.60
|
|
YGm55
|
2.01
|
1.79
|
Log(G) Modelling with: PLS. SNR=oo

Log(G) Modelling with: PLS. SNR=10

Log(G) Modelling with: PLS. SNR=50

Log(G) Modelling with cubist. Error analysis follows.
|
|
drmse
|
dmae
|
|
chi2d_10
|
2.20
|
1.80
|
|
chi2d_50
|
2.19
|
1.75
|
|
chi2d_oo
|
2.24
|
1.81
|
|
ICA_10
|
2.14
|
1.80
|
|
ICA_50
|
1.82
|
1.60
|
|
ICA_oo
|
4.31
|
4.12
|
|
LG_ICA_10
|
2.14
|
1.80
|
|
LG_ICA_50
|
1.82
|
1.60
|
|
YGm00
|
3.73
|
3.33
|
|
YGm11
|
2.01
|
1.75
|
|
YGm55
|
2.09
|
1.84
|
Log(G) Modelling with: Rule-Regression. SNR=oo
Log(G) Modelling with: Rule-Regression. SNR=10
Log(G) Modelling with: Rule-Regression. SNR=50
#
save(nomG,LC,SpT,dataG,file="LGrv_GA_models_IPAC.RData")
That’s all.
Check from the main sequence structure
Now, it’s time for having a look at the
#
ref_tot0=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,SpTorg=ref$SpT,chi2d_oo=ref$chi2_oo,
ICA_10=ref$ICA_LG_10,ICA_50=ref$ICA_LG_50,ICA_oo=ref$ICA_LG_00,
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad),
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt),
LG_ICA_10=G_coef3,LG_ICA_50=G_coef4,LG_rf_00=YGn00,LG_rf_11=YGn11,LG_rf_55=YGn55,
LogG=ref$LG_teo,Teff=ref$T_teo,Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55)
ref_tot2=ref_tot0
#
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=chi2d_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - Chi2-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=chi2d_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - Chi2-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=LG_ICA_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - ICA-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=LG_ICA_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - ICA-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

# T snr=10
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=chi2d_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - Chi2-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=chi2d_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - Chi2-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=LG_ICA_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - ICA-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=LG_ICA_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - ICA-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

# T snr=50
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=chi2d_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - Chi2-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=chi2d_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - Chi2-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=LG_ICA_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - ICA-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=LG_ICA_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - ICA-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Teff,10),y=LogG,shape=LC),size=3) +
xlab("Log(T) [dex] - RojasAyala") +
ylab("Log(G) [dex] - RojasAyala") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)

#
#
lmod=c("rf","gbm","svmRadial","nnet","knn","bagEarth","kernelpls","cubist")
nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
for (i in 1:length(lmod)) {
YGm00=predict(md5[[which(names(md5)==lmod[i])[1]]],xf0)
YGm11=predict(md51[[which(names(md51)==lmod[i])[1]]],xf1)
YGm55=predict(md55[[which(names(md55)==lmod[i])[1]]],xf5)
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,chi2d_oo=ref$chi2_oo,
ICA_10=ref$ICA_LG_10,ICA_50=ref$ICA_LG_50,ICA_oo=ref$ICA_LG_00,
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad),
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt),
YGm00,YGm11,YGm55,Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55)
ref_tot2m=ref_tot0m
#
cat(paste("Log(G) Modelling with ",lmod[i],". Plot main sequence follows.",sep=""))
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=oo",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=log(Tknn0,10),y=YGm00,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab(paste("Log(G) ",nmod[i]," predicted SNR=oo [dex]")) +
theme_bw() + scale_y_reverse() + scale_x_reverse() + theme(axis.title=
element_text(size=14,face="bold"),
legend.title=element_text(size=14,face="bold"),
legend.text=element_text(size=13))
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=10",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=log(Tknn1,10),y=YGm11,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab(paste("Log(G) ",nmod[i]," predicted SNR=10 [dex]")) +
theme_bw() + scale_y_reverse() + scale_x_reverse() + theme(axis.title=
element_text(size=14,face="bold"),
legend.title=element_text(size=14,face="bold"),
legend.text=element_text(size=13))
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=50",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=log(Tknn5,10),y=YGm55,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab(paste("Log(G) ",nmod[i]," predicted SNR=50 [dex]")) +
theme_bw() + scale_y_reverse() + scale_x_reverse() + theme(axis.title=
element_text(size=14,face="bold"),
legend.title=element_text(size=14,face="bold"),
legend.text=element_text(size=13))
)
}
Log(G) Modelling with rf. Plot main sequence follows.Log(G) Modelling with: RF. SNR=oo
Log(G) Modelling with: RF. SNR=10
Log(G) Modelling with: RF. SNR=50
Log(G) Modelling with gbm. Plot main sequence follows.Log(G) Modelling with: GB. SNR=oo
Log(G) Modelling with: GB. SNR=10
Log(G) Modelling with: GB. SNR=50
Log(G) Modelling with svmRadial. Plot main sequence follows.Log(G) Modelling with: SVR. SNR=oo
Log(G) Modelling with: SVR. SNR=10
Log(G) Modelling with: SVR. SNR=50
Log(G) Modelling with nnet. Plot main sequence follows.Log(G) Modelling with: NNR. SNR=oo
Log(G) Modelling with: NNR. SNR=10
Log(G) Modelling with: NNR. SNR=50
Log(G) Modelling with knn. Plot main sequence follows.Log(G) Modelling with: KNN. SNR=oo
Log(G) Modelling with: KNN. SNR=10
Log(G) Modelling with: KNN. SNR=50
Log(G) Modelling with bagEarth. Plot main sequence follows.Log(G) Modelling with: MARS. SNR=oo
Log(G) Modelling with: MARS. SNR=10
Log(G) Modelling with: MARS. SNR=50
Log(G) Modelling with kernelpls. Plot main sequence follows.Log(G) Modelling with: PLS. SNR=oo
Log(G) Modelling with: PLS. SNR=10
Log(G) Modelling with: PLS. SNR=50
Log(G) Modelling with cubist. Plot main sequence follows.Log(G) Modelling with: Rule-Regression. SNR=oo
Log(G) Modelling with: Rule-Regression. SNR=10
Log(G) Modelling with: Rule-Regression. SNR=50
#
# Function that returns Root Mean Squared Error
rmse <- function(error) {
sqrt(mean(error^2,na.rm=TRUE))
}
# Function that returns Mean Absolute Error
mae <- function(error) {
mean(abs(error),na.rm=TRUE)
}
#
rmdse <- function(error) {
sqrt(median(error^2,na.rm=TRUE))
}
# Function that returns Mean Absolute Error
made <- function(error) {
median(abs(error),na.rm=TRUE)
}
#
# Luis wants to get original cesetti as background and over the SVRoo
# i=3 # i=3 => SVR
dats=list()
for (i in 1:length(lmod)) {
dats[[i]]=list()
YGm00=predict(md5[[which(names(md5)==lmod[i])[1]]],xf0)
YGm11=predict(md51[[which(names(md51)==lmod[i])[1]]],xf1)
YGm55=predict(md55[[which(names(md55)==lmod[i])[1]]],xf5)
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,chi2d_oo=ref$chi2_oo,
ICA_10=ref$ICA_LG_10,ICA_50=ref$ICA_LG_50,ICA_oo=ref$ICA_LG_00,
LC=apply(as.data.frame(ref[,"SpT"]),1,clase_luminosidad),
SpT=apply(as.data.frame(ref[,"SpT"]),1,clase_spt),
YGm00,YGm11,YGm55,Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55)
ref_tot2m=ref_tot0m
data=ref[substr(trim(ref$SpT),1,1) %in% c('F','G','K','L','M'),]
data$ST=apply(as.matrix(data[,"SpT"]),1,clase_luminosidad)
data=data[! is.na(data$ST),]
data$col=1
ddat= data[,c("Name","T_teo","LG_teo","ST","col")]
ddt2i= ref_tot2m[,c("Name","Tknn0","YGm00","LC","chi2d_10","chi2d_50","chi2d_oo",
"ICA_10","ICA_50","ICA_oo","YGm11","YGm55")]
ddt2e=merge(ddt2i,ddat,by="Name")
colnames(ddat)=c("Name","Teff","Log_G","ST","col")
ddt2= ref_tot2m[,c("Name","Tknn0","YGm00","LC")]
ddt2$col=2
colnames(ddt2)=c("Name","Teff","Log_G","ST","col")
ddt =rbind(ddat,ddt2)
ddt$col=as.factor(ddt$col)
dats[[i]][[1]]=ddt
#
diffs=apply(ddt2e[,! colnames(ddt2e) %in% c("Name","ST","Tknn0","LC",
"LC","T_teo","col","LG_teo")],2,FUN="-",ddt2e[,"LG_teo"])
rownames(diffs)=ddt2e[,"Name"]
#
cat(paste("T Modelling with ",lmod[i],". Error analysis follows.",sep=""))
errors=data.frame(drmse = apply(diffs,2,rmse),dmae=apply(diffs,2,mae),
drmdse=apply(diffs,2,rmdse),dmade=apply(diffs,2,made))
print(xtable(errors),type="html")
dats[[i]][["errors"]]=errors
#
cat(paste("Log(G).Modelling with: ",nmod[i],". SNR=oo & T by KNN with SNR=oo",sep=""))
print(ggplot() +
geom_point(data=ddt,aes(x=log(Teff,10),y=Log_G,shape=ST,colour=col),size=3) +
xlab("Log(T) [dex]") +
ylab("Log(G) [dex]") + scale_colour_manual(name="Source",
labels=c("RojasAyala","M forecast"),
values=c('black','blue')) + theme_bw() +
scale_y_reverse() + scale_x_reverse() + theme(axis.title=
element_text(size=14,face="bold"),
legend.title=element_text(size=14,face="bold"),
legend.text=element_text(size=13)) # +
# geom_text(data=ddt,aes(label=ifelse(as.numeric(col)>1,Name,''),
# x=log(Teff,10),y=Log_G),hjust=1, vjust=0)
)
#
ddt2= ref_tot2m[,c("Name","Tknn0","YGm11","LC")]
ddt2$col=2
colnames(ddt2)=c("Name","Teff","Log_G","ST","col")
ddt =rbind(ddat,ddt2)
ddt$col=as.factor(ddt$col)
dats[[i]][[2]]=ddt
cat(paste("Log(G).Modelling with: ",nmod[i],". SNR=10 & T by KNN with SNR=oo",sep=""))
print(ggplot() +
geom_point(data=ddt, aes(x=log(Teff,10),y=Log_G,shape=ST,colour=col),size=3) +
xlab("Log(T) [dex]") +
ylab("Log(G) [dex]") + scale_colour_manual(name="Source",
labels=c("RojasAyala","M forecast"),
values=c('black','blue')) + theme_bw() +
scale_y_reverse() + scale_x_reverse() + theme(axis.title=
element_text(size=14,face="bold"),
legend.title=element_text(size=14,face="bold"),
legend.text=element_text(size=13))
)
#
ddt2= ref_tot2m[,c("Name","Tknn0","YGm55","LC")]
ddt2$col=2
colnames(ddt2)=c("Name","Teff","Log_G","ST","col")
ddt =rbind(ddat,ddt2)
ddt$col=as.factor(ddt$col)
dats[[i]][[3]]=ddt
cat(paste("Log(G).Modelling with: ",nmod[i],". SNR=50 & T by KNN with SNR=oo",sep=""))
print(ggplot() +
geom_point(data=ddt, aes(x=log(Teff,10),y=Log_G,shape=ST,colour=col),size=3) +
xlab("Log(T) [dex]") +
ylab("Log(G) [dex]") + scale_colour_manual(name="Source",
labels=c("RojasAyala","M forecast"),
values=c('black','blue')) + theme_bw() +
scale_y_reverse() + scale_x_reverse() + theme(axis.title=
element_text(size=14,face="bold"),
legend.title=element_text(size=14,face="bold"),
legend.text=element_text(size=13))
)
}
T Modelling with rf. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
1.41
|
1.13
|
0.93
|
0.93
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
1.35
|
1.08
|
0.97
|
0.97
|
|
YGm55
|
1.62
|
1.33
|
1.12
|
1.12
|
Log(G).Modelling with: RF. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: RF. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: RF. SNR=50 & T by KNN with SNR=oo

T Modelling with gbm. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
1.66
|
1.35
|
1.17
|
1.17
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
1.59
|
1.33
|
1.15
|
1.15
|
|
YGm55
|
1.69
|
1.43
|
1.37
|
1.37
|
Log(G).Modelling with: GB. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: GB. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: GB. SNR=50 & T by KNN with SNR=oo

T Modelling with svmRadial. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
2.28
|
1.88
|
1.58
|
1.58
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
1.98
|
1.72
|
1.81
|
1.81
|
|
YGm55
|
2.13
|
1.89
|
1.88
|
1.88
|
Log(G).Modelling with: SVR. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: SVR. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: SVR. SNR=50 & T by KNN with SNR=oo

T Modelling with nnet. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
3.22
|
2.87
|
2.78
|
2.78
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
2.03
|
1.78
|
1.78
|
1.78
|
|
YGm55
|
2.25
|
2.04
|
1.95
|
1.95
|
Log(G).Modelling with: NNR. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: NNR. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: NNR. SNR=50 & T by KNN with SNR=oo

T Modelling with knn. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
1.71
|
1.40
|
1.19
|
1.19
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
2.05
|
1.68
|
1.54
|
1.54
|
|
YGm55
|
2.18
|
1.83
|
1.73
|
1.73
|
Log(G).Modelling with: KNN. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: KNN. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: KNN. SNR=50 & T by KNN with SNR=oo

T Modelling with bagEarth. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
2.03
|
1.65
|
1.50
|
1.50
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
1.85
|
1.60
|
1.55
|
1.55
|
|
YGm55
|
2.03
|
1.77
|
1.73
|
1.73
|
Log(G).Modelling with: MARS. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: MARS. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: MARS. SNR=50 & T by KNN with SNR=oo

T Modelling with kernelpls. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
2.75
|
2.44
|
2.31
|
2.31
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
1.85
|
1.60
|
1.44
|
1.44
|
|
YGm55
|
2.01
|
1.79
|
1.72
|
1.72
|
Log(G).Modelling with: PLS. SNR=oo & T by KNN with SNR=oo

Log(G).Modelling with: PLS. SNR=10 & T by KNN with SNR=oo

Log(G).Modelling with: PLS. SNR=50 & T by KNN with SNR=oo

T Modelling with cubist. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
YGm00
|
3.73
|
3.33
|
3.22
|
3.22
|
|
chi2d_10
|
2.20
|
1.80
|
1.62
|
1.62
|
|
chi2d_50
|
2.19
|
1.75
|
1.49
|
1.49
|
|
chi2d_oo
|
2.24
|
1.81
|
1.56
|
1.56
|
|
ICA_10
|
2.14
|
1.80
|
1.78
|
1.78
|
|
ICA_50
|
1.82
|
1.60
|
1.71
|
1.71
|
|
ICA_oo
|
4.31
|
4.12
|
4.18
|
4.18
|
|
YGm11
|
2.01
|
1.75
|
1.76
|
1.76
|
|
YGm55
|
2.09
|
1.84
|
1.80
|
1.80
|
Log(G).Modelling with: Rule-Regression. SNR=oo & T by KNN with SNR=oo
Log(G).Modelling with: Rule-Regression. SNR=10 & T by KNN with SNR=oo
Log(G).Modelling with: Rule-Regression. SNR=50 & T by KNN with SNR=oo
save(md5,dats,lmod,nmod,xf0,xf1,xf5,file="G_values_plot.RData")
#