BT-Settl preparation for IPAC (T) based on class of features 1-Fs/Fc. Doppler compensated. V2 means to use new theoretical T brought by LSB instead of the initial ones also from LSB. ========================================================
Introduction
After the pre-processing analysis carried out for M stars within the IPAC dataset, we will proceed with a similar methodology to the one ran for IRTF.
Target T will be between 2000 K and 4200 K
Prediction for Temperature
We will use the I-Band according to GA models noised by SNR=50 and SNR=10 as well as the features proposed by Cestti et al. also noised by SNR=50 and SNR=10.
Recovering already built models
# Procesado del genético de Tª
range_elem<-function(z,l,bi){
look=function(x,l,y){
j<-which(y>= as.numeric(x[l]))[1];
k<-(which(y>as.numeric(x[(l+1)]))[1])-1;
return(c(j,k))
}
y=z$data[[1]][,1]
lm<-t(apply(bi,1,look,l,y))
rownames(lm)<-bi[,1]
colnames(lm)<-c("From","To")
return(lm)
}
int_spec<-function(x,idx,norm=0){
y<-x$data[[1]][eval(parse(text=idx)),]
xz<-diff(as.numeric(y[,1]),1)
yz<-as.numeric(y[,2])
if ( norm > 0 ){
z<-sum(xz)
} else {
z<-sum(xz*rollmean(yz,2))
}
return(z)
}
#
feature_extr<-function(sn,bp){
sig<-sn[1]
noi<-sn[2]
nof<-sn[3] # Allowing to consider 2 bands as the paper
Fcont<-unlist(lapply(bp,int_spec,noi,0))/
unlist(lapply(bp,int_spec,noi,1))
if (! is.na(nof)){ # In case of two bands => average
Fcont2=unlist(lapply(bp,int_spec,nof,0))/
unlist(lapply(bp,int_spec,nof,1))
Fcont = (Fcont + Fcont2) /2.
}
fea<-unlist(lapply(bp,int_spec,sig,1))-
unlist(lapply(bp,int_spec,sig,0))/Fcont
return(fea)
}
#
sacar=function(x,pos=2) {
if ( length(x) < pos ) {
return(NA);
}
return(x[pos]);
}
#
######################
#
# Loading the object BT-Settl
if (file.exists("~/git/M_prep_IPAC/BT-Settl-IPAC-2011-2013-Noise.RData")) {
load( file="~/git/M_prep_IPAC/BT-Settl-IPAC-2011-2013-Noise.RData")
} else {
cat("ERROR: ~/git/M_prep_IPAC/BT-Settl-IPAC-2011-2013-Noise.RData NOT FOUND !!!")
exit(2)
}
#
Now, we will read the M stars from the IPAC dataset in fits format.
Reading the IPAC dataset
#
scaling=function(x,xlim=NULL,xp=NULL) {
y=x
if (! is.null(y$factors)) {
y$data[,2]=y$data[,2]*y$factors
}
if ( is.null(xlim)){
ixp=rep(TRUE,nrow(y$data))
xlim=range(y$data[,1])
}
if ( ! is.null(xp)) {
xp = xp[(xp > xlim[1]) & (xp < xlim[2])]
xli0 = y$data[rev(which(y$data[,1] < xlim[1]))[1],1] - 0.001 # Rounding ...
xli1 = y$data[which(y$data[,1]>xlim[2])[1],1] + 0.001 # Rounding ...
ixp = (y$data[,1] >= xli0 & y$data[,1] <= xli1)
} else {
ixp = (y$data[,1]>= xlim[1] & y$data[,1] <= xlim[2])
}
z=y$data[ixp,]
jp=rep(0,nrow(z)+1)
# jp[is.finite(z[,2])]=1 # LSB has requested to interpolate the holes 30/7/2014
jp[nrow(z)+1]=1
zp=diff(jp)
ip=which(zp != 0)
ini=1
j=0
y$data=list()
for (i in ip) {
if (zp[i] != -1 ) {
j=j+1
y$data[[j]]=z[ini:i,]
} else {
ini=i+1
}
}
y$nch=j
y$factors=rep(NA,y$nch)
for (i in 1:y$nch) {
if ( is.na(y$data[[i]][nrow(y$data[[i]]),2])) {
kk = rev(which(! is.na(y$data[[i]][,2])))[1]
y$data[[i]][nrow(y$data[[i]]),2]=y$data[[i]][kk,2]
}
if ( is.na(y$data[[i]][1,2])) {
kk = which(! is.na(y$data[[i]][,2]))[1]
y$data[[i]][1,2]=y$data[[i]][kk,2]
}
if ( ! is.null(xp)) {
yp=(approx(y$data[[i]][,1],y$data[[i]][,2],xout=xp))$y
y$data[[i]]=data.frame(x=xp,y=yp)
} else {
yp=(approx(y$data[[i]][,1],y$data[[i]][,2],xout=y$data[[i]][,1]))$y
y$data[[i]]=data.frame(x=y$data[[i]][,1],y=yp)
}
y$factors[i]=sum(rollmean(y$data[[i]][,2],2)*diff(y$data[[i]][,1]))
}
ss=sum(y$factors)
for (i in 1:y$nch) {
y$data[[i]][,2]=y$data[[i]][,2]/ss
}
return(y)
}
#
pinta=function(x,cortes=NULL,borders=FALSE, boundary=FALSE, close=FALSE,
xlim=NULL, ylim=NULL,plt=NULL,width=12,height=8,...){
if (is.null(x$nch)){
nch=1
} else {
nch=x$nch
}
if ( length(xlim) < 2 ) {
xmin=min(x$data[[1]][,1])
xmax=max(x$data[[nch]][,1])
xl=c(xmin,xmax)
} else {
xl=xlim
}
if ( length(ylim) < 2 ) {
ymin=min(x$data[[1]][,2])
ymax=max(x$data[[1]][,2])
if ( nch > 1 ) {
for (i in 2:nch){
ymin=min(ymin,x$data[[i]][,2])
ymax=max(ymax,x$data[[i]][,2])
}
}
yl=c(ymin,ymax)
} else {
yl = ylim
}
if (! is.null(plt) ) {
pdf(file=plt,width,height)
}
if ( boundary) {
plot(0,xlim=xl,ylim=yl,...)
}
for (i in 1:nch) {
lines(x$data[[i]],...)
}
for (i in cortes) {
abline(v=i[1],col=2)
abline(v=i[2],col=2)
}
if ( borders) {
for (i in nch) {
abline(v=min(x$data[[i]][,1]),col=3)
abline(v=max(x$data[[i]][,1]),col=3)
}
}
if ( close ) {
dev.off()
}
}
setwd('~/git/M_prep_IPAC')
#
A number of 595 have been readed from the individual spectra dataset.
Preparation of potential features for T
#
if ( file.exists("~/git/M_prep_IPAC/Features_BT-settl_v3.RData")) {
load("~/git/M_prep_IPAC/Features_BT-settl_v3.RData")
} else {
bpy=do.call(rbind,lapply(bp_clean,function(x){return(x$data[[1]][,2])}))
bpy10=do.call(rbind,lapply(bp_10,function(x){return(x$data[[1]][,2])}))
bpy50=do.call(rbind,lapply(bp_50,function(x){return(x$data[[1]][,2])}))
YPT=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[1])}))
YPG=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[2])}))
YPM=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[3])}))
vf=as.character(bp_clean[[1]]$data[[1]][,1])
colnames(bpy)=vf
colnames(bpy10)=vf
colnames(bpy50)=vf
bfy=do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
#
area=function(v,nv) {
y=data.frame(x=as.numeric(nv),y=v)
xz = diff(as.numeric(y[,1]),1)
yz = as.numeric(y[,2])
z = sum(xz*rollmean(yz,2))
return(z)
}
#
######################################################################
#
siz=10
bandas=list()
bfndas=list()
bandas10=list()
bandas50=list()
for (i in seq(1,siz,5)) { # LSB asked for steps of 5 pixels
idx=paste("s=",i,sep="")
lvf=vf
if ( i > 1) {
lvf=vf[-c(1:(i-1))]
}
ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
st=split(lvf,ff)
for (j in 1:length(st)) {
if (length(st[[j]]) < siz)
st[[j]] = NULL
}
lst = length(st)
bandas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy)))
bfndas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bfy)))
bandas10[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy10)))
bandas50[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy50)))
for (j in 1:lst) {
bandas[[idx]]$mat[,j]=apply(bpy[,st[[j]]],1,area,st[[j]])
bandas10[[idx]]$mat[,j]=apply(bpy10[,st[[j]]],1,area,st[[j]])
bandas50[[idx]]$mat[,j]=apply(bpy50[,st[[j]]],1,area,st[[j]])
}
for (j in 1:lst) {
bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
}
}
#
save(bpy,bfy,bandas,bfndas,vf, bpy10,bpy50,st,siz,
bandas10,bandas50,YPT,YPG,YPM,
file="~/git/M_prep_IPAC/Features_BT-settl_v3.RData")
}
#
Otras features de genéticos que evolucionan construyendo las features (Población 8000 individuos, 1000 evoluciones)
#
#
require(GA)
require(doParallel)
require(plyr)
#
#
area=function(v,nv) {
y=data.frame(x=as.numeric(nv),y=v)
xz = diff(as.numeric(y[,1]),1)
yz = as.numeric(y[,2])
z = sum(xz*rollmean(yz,2))
return(z)
}
fitness = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-1000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-1000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas[[cnjts[1]]]$mat),ncol=(nrow(par))))
colnames(X)[1] ="Intercept"
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas[[cnjts[bi]]]$mat[,idxi] / (bandas[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
fitness10 = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-1000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-1000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas10[[cnjts[1]]]$mat),ncol=(nrow(par))))
colnames(X)[1] ="Intercept"
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas10[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas10[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas10[[cnjts[bi]]]$mat[,idxi] / (bandas10[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
fitness50 = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-1000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-1000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas50[[cnjts[1]]]$mat),ncol=(nrow(par))))
colnames(X)[1] ="Intercept"
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas50[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas50[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas50[[cnjts[bi]]]$mat[,idxi] / (bandas50[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
#
SelFeatures=function(x,sets=cnjts,dat=bandas,fitness.=fitness,pp=FALSE){
pos=which.max(as.numeric(unlist(ldply(x@bestSol,fitness.,.parallel=pp))))
sol=x@bestSol[[pos]]
par = decode(sol)
res = list()
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
res[[i]]=list(set1=sets[bi],pos1=idxi,set2=sets[bj],pos2=idxj,par=par[i,],
x=dat[[sets[bi]]]$mat[,idxi],
x0=dat[[sets[bj]]]$mat[,idxj],
name=dat[[sets[bi]]]$nam[idxi],
name0=dat[[sets[bj]]]$nam[idxj])
}
return(res)
}
#
FeaturesExt=function(x,num,sets=cnjts,dat=bandas,fitness.=fitness,pp=FALSE){
vals=as.numeric(unlist(ldply(x@bestSol,fitness.,.parallel=pp)))
fiveval=summary(vals)
uv = unique(sort(vals,decreasing=TRUE))
uvals = uv[1:min(num,length(uv))]
isols=rep(0,length(uvals))
for (i in 1:length(uvals)) {
isols[i] = which(vals==uvals[i])[1]
}
res=NA
if ( length(isols) > 0) {
par=decode(x@bestSol[[1]])
res = as.data.frame(matrix(NA,nrow=nrow(par),ncol=(2*length(isols))))
nam = as.data.frame(matrix(NA,nrow=nrow(par),ncol=(2*length(isols))))
for (j in 1:length(isols)) {
sol=x@bestSol[[isols[j]]]
par = decode(sol)
res[,(2*(j-1)+1):(2*j)]=par
names(res)[(2*(j-1)+1):(2*j)] = c(paste(j,":s",sep=""),paste(j,":c",sep=""))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
nam[i,(2*(j-1)+1)]=dat[[sets[bi]]]$nam[idxi]
nam[i,(2*j)]=dat[[sets[bj]]]$nam[idxj]
}
names(nam)[(2*(j-1)+1):(2*j)] = c(paste(j,":s",sep=""),paste(j,":c",sep=""))
}
}
return(list(par=res,name=nam,pos=isols,vpos=vals[isols],fivevals=fiveval))
}
#
SelFeaturesFixed=function(x,sets=cnjts,dat=bandas){
nr = nrow(x)
res = list()
for ( i in 1:nr) {
j=1
bi=0
while (j <= length(sets)) {
if (length(which(dat[[j]]$names ==x[i,1]))) {
bi = j
idxi = which(dat[[j]]$names ==x[i,1])
}
j = j +1
}
if ( bi==0) {
stop(paste("feature ",x[i,1], " does not match much !",sep=""))
}
j=1
bj=0
while (j <= length(sets)) {
if (length(which(dat[[j]]$names ==x[i,2]))) {
bj = j
idxj = which(dat[[j]]$names ==x[i,2])
}
j = j +1
}
if ( bj==0) {
stop(paste("feature ",x[i,1], " does not match much !",sep=""))
}
res[[i]]=list(set1=sets[bi],pos1=idxi,set2=sets[bj],pos2=idxj,par=NA,
x=dat[[sets[bi]]]$mat[,idxi],
x0=dat[[sets[bj]]]$mat[,idxj],
name=dat[[sets[bi]]]$nam[idxi],
name0=dat[[sets[bj]]]$nam[idxj])
}
return(res)
}
#
#
decode = function(string) {
string = gray2binary(string)
n = binary2decimal(string[1:NBITS])
c = binary2decimal(string[(NBITS + 1):(2*NBITS)])
res = data.frame(p1=n%%MAXV,p2=c%%MAXV)
ini = 2 * NBITS
if ( NG > 1) {
for (i in 2:NG) {
n = binary2decimal(string[(ini+1):(ini+NBITS)])
c = binary2decimal(string[(ini + NBITS + 1):(ini + 2*NBITS)])
res = rbind(res, c(n%%MAXV,c%%MAXV))
ini = ini + 2*NBITS
}
}
return(res)
}
#
encode = function(res,nbits=NBITS) {
if (! is.data.frame(res)) {
warning(paste("ENCODE: First parameter is not data.frame:",str(res),sep=" "))
return(NULL)
}
cad=rep(0,ncol(res)*nrow(res)*nbits)
for (i in 1:nrow(res)) {
for (j in 1:ncol(res)) {
k=((i-1)*ncol(res) + (j - 1))*nbits+1
cad[k:(k+nbits-1)]=decimal2binary(res[i,j],nbits)
}
}
return(binary2gray(cad))
}
#
#
NITER=9
if ( file.exists("~/git/M_prep_IPAC/GA_NT11F2_IPAC-v2.RData")) {
j=1
load("~/git/M_prep_IPAC/GA_NT11F2_IPAC-v2.RData")
tt50=list()
GAs50 = list()
solsT50=list()
tt10=list()
GAs10 = list()
solsT10=list()
tt=list()
GAs=list()
solsT=list()
while(j <= NITER) {
cat(paste("Reading Iteration:",j,"<br>",sep=""))
flush.console()
load(paste("~/git/M_prep_IPAC/GA_T_",sprintf("%02d",j),"_50_f2_IPAC-v2.RData",sep=""))
tt50[[j]]= t_tot
GAs50[[j]]=GA1
solsT50[[j]]=FeaturesExt(GA1,12)
load(paste("~/git/M_prep_IPAC/GA_T_",sprintf("%02d",j),"_10_f2_IPAC-v2.RData",sep=""))
tt10[[j]]= t_tot
GAs10[[j]]=GA1
solsT10[[j]]=FeaturesExt(GA1,12)
load(paste("~/git/M_prep_IPAC/GA_T_",sprintf("%02d",j),"_f2_IPAC-v2.RData",sep=""))
tt[[j]]= t_tot
GAs[[j]]=GA1
solsT[[j]]=FeaturesExt(GA1,12)
j = j+1
}
} else {
# cnjts = c("s=1","s=6","s=11","s=16","s=21","s=26")
cnjts = c("s=1","s=6")
bpy=do.call(rbind,lapply(bp_clean,function(x){return(x$data[[1]][,2])}))
YPT=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[1])}))
YPG=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[2])}))
YPM=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[3])}))
vf=as.character(bp_clean[[1]]$data[[1]][,1])
colnames(bpy)=vf
bfy=do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
#
siz=10
#
# bands* are already defined in ~/git/M_prep_IPAC/Features_BT-settl_v3-interp2.RData
#
# bandas=list()
# bfndas=list()
# bandas10=list()
# bandas50=list()
# for (i in seq(1,10,5)) { # LSB asked for steps of 5 pixels
# idx=paste("s=",i,sep="")
# lvf=vf
# if ( i > 1) {
# lvf=vf[-c(1:(i-1))]
# }
# ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
# st=split(lvf,ff)
# for (j in 1:length(st)) {
# if (length(st[[j]]) < siz)
# st[[j]] = NULL
# }
# lst = length(st)
# bandas[[idx]]=list(names= as.vector(unlist(lapply(st,
# function(x){return(paste(range(x),collapse="-"))}))),
# mat=matrix(NA,ncol=lst,nrow=nrow(bpy)))
# bfndas[[idx]]=list(names= as.vector(unlist(lapply(st,
# function(x){return(paste(range(x),collapse="-"))}))),
# mat=matrix(NA,ncol=lst,nrow=nrow(bfy)))
# bandas10[[idx]]=list(names= as.vector(unlist(lapply(st,
# function(x){return(paste(range(x),collapse="-"))}))),
# mat=matrix(NA,ncol=lst,nrow=nrow(bpy10)))
# bandas50[[idx]]=list(names= as.vector(unlist(lapply(st,
# function(x){return(paste(range(x),collapse="-"))}))),
# mat=matrix(NA,ncol=lst,nrow=nrow(bpy50)))
# for (j in 1:lst) {
# bandas[[idx]]$mat[,j]=apply(bpy[,st[[j]]],1,area,st[[j]])
# bandas10[[idx]]$mat[,j]=apply(bpy10[,st[[j]]],1,area,st[[j]])
# bandas50[[idx]]$mat[,j]=apply(bpy50[,st[[j]]],1,area,st[[j]])
# }
# for (j in 1:lst) {
# bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
# }
# }
# #
#
y=as.numeric(unlist(YPT))
cnjts = c("s=1","s=6")
NG=10 # Número de Features
NBLK = length(cnjts)
NBITS= ceiling(log(NBLK*ncol(bandas[[cnjts[1]]]$mat),2))
MAXV = ncol(bandas[[cnjts[1]]]$mat)
#
save(bpy,bpy10,bpy50,bfy,YPT,YPG,YPM,vf,siz,bandas,bfndas, bandas10,bandas50,
cnjts,st,NG,NBLK,NBITS,MAXV,y,fitness,encode,decode,
SelFeatures,file="~/git/M_prep_IPAC/GA_NT11F2_IPAC-v2.RData")
#
tt=list()
GAs = list()
solsT=list()
j=1
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt[[j]]= system.time({GA1 = ga("binary", fitness = fitness, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt[[j]]
GAs[[j]]=GA1
save(fitness,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IPAC/GA_T_",sprintf("%02d",j),"_f2_IPAC-v2.RData",sep=""))
solsT[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsT,function(x){return(x$vpos[1])})),type="html")
#
j=1
tt10=list()
GAs10 = list()
solsT10=list()
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt10[[j]]= system.time({GA1 = ga("binary", fitness = fitness10, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt10[[j]]
GAs10[[j]]=GA1
save(fitness10,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IPAC/GA_T_",sprintf("%02d",j),"_10_f2_IPAC-v2.RData",sep=""))
solsT10[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsT10,function(x){return(x$vpos[1])})),type="html")
#
j=1
tt50=list()
GAs50 = list()
solsT50=list()
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt50[[j]]= system.time({GA1 = ga("binary", fitness = fitness50, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt50[[j]]
GAs50[[j]]=GA1
save(fitness50,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IPAC/GA_T_",sprintf("%02d",j),"_50_f2_IPAC-v2.RData",sep=""))
solsT50[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsT50,function(x){return(x$vpos[1])})),type="html")
}
Reading Iteration:1
Reading Iteration:2
Reading Iteration:3
Reading Iteration:4
Reading Iteration:5
Reading Iteration:6
Reading Iteration:7
Reading Iteration:8
Reading Iteration:9
#
load("~/git/M_prep_IPAC/Features_BT-settl_v3.RData")
#
# print(xtable(ldply(solsT,function(x){return(x$vpos[1])})),type="html")
# ISOL holds the best solution !
# isol = which.max(as.numeric(ldply(solsT,function(x){return(x$vpos[1])})[,1]))
# isol
# print(xtable(ldply(solsT10,function(x){return(x$vpos[1])})),type="html")
# ISOL holds the best solution !
# isol10 = which.max(as.numeric(ldply(solsT10,function(x){return(x$vpos[1])})[,1]))
# isol10
#
# print(xtable(ldply(solsT50,function(x){return(x$vpos[1])})),type="html")
# ISOL holds the best solution !
# isol50 = which.max(as.numeric(ldply(solsT50,function(x){return(x$vpos[1])})[,1]))
# isol50
#
ICA analysis
SNR=10
# We prepare the coefs
if ( file.exists("~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp10.RData")) {
load("~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp10.RData")
} else {
dd2=as.data.frame(do.call(rbind,lapply(bp_10,function(x){return(x$data[[1]][,2])})))
jd2=JADE(dd2,10)
ss2=as.data.frame(t(jd2$W %*% t(dd2-jd2$Xmu)))
ss2[,(ncol(ss2)+1)]=unlist(lapply(bp_10,function(x){return(x$stellarp[1])}))
colnames(ss2)[ncol(ss2)]="T"
#
folds=5
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss2[,ncol(ss2)],
k=folds, times=repeats))
model2=train(ss2[,-ncol(ss2)], ss2[,ncol(ss2)], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=2:(ncol(ss2)-1)))
d2=as.data.frame(do.call(rbind,lapply(bf_clean,
function(x){return(ldply(x$data,
function(x){return(x)})[,2])})))
si2=as.data.frame(t(jd2$W %*% t(d2-jd2$Xmu)))
G_coef2=predict(model2,si2)
save(G_coef2,si2,d2,model2,myControl,folds,repeats,ss2,jd2,dd2,
file="~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp10.RData")
}
#
SNR=50
# We prepare the coefs
if ( file.exists("~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp50.RData")) {
load("~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp50.RData")
} else {
dd=as.data.frame(do.call(rbind,lapply(bp_50,function(x){return(x$data[[1]][,2])})))
jd=JADE(dd,10)
ss=as.data.frame(t(jd$W %*% t(dd-jd$Xmu)))
ss[,(ncol(ss)+1)]=unlist(lapply(bp_50,function(x){return(x$stellarp[1])}))
colnames(ss)[ncol(ss)]="T"
#
folds=5
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss[,ncol(ss)],
k=folds, times=repeats))
model=train(ss[,-ncol(ss)], ss[,ncol(ss)], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=3:(ncol(ss)-1)))
#
d1=as.data.frame(do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])})))
si=as.data.frame(t(jd$W %*% t(d1-jd$Xmu)))
G_coef=predict(model,si)
save(G_coef,si,dd,model,myControl,folds,repeats,ss,jd,dd,
file="~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp50.RData")
}
#
SNR=Infinity
# We prepare the coefs
if ( file.exists("~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp00.RData")) {
load("~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp00.RData")
} else {
dd0=as.data.frame(do.call(rbind,lapply(bp_clean,function(x){return(x$data[[1]][,2])})))
jd0=JADE(dd0,10)
ss0=as.data.frame(t(jd0$W %*% t(dd0-jd0$Xmu)))
ss0[,(ncol(ss0)+1)]=unlist(lapply(bp_clean,function(x){return(x$stellarp[1])}))
colnames(ss0)[ncol(ss0)]="T"
#
folds=5
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss[,ncol(ss)],
k=folds, times=repeats))
model=train(ss0[,-ncol(ss0)], ss0[,ncol(ss0)], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=3:(ncol(ss0)-1)))
#
d10=as.data.frame(do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])})))
si0=as.data.frame(t(jd0$W %*% t(d10-jd0$Xmu)))
G_coef0=predict(model,si0)
save(G_coef0,si0,dd0,model,myControl,folds,repeats,ss0,jd0,dd0,
file="~/git/M_prep_IPAC/BT-Settl-2013-PPR_bp00.RData")
}
#
T foreseen by Luminosity models
#
minchid=function(x,lib){
chid=function(x,y){
if ( nrow(x$data[[1]]) == nrow(y$data[[1]])) {
return(sum((x$data[[1]][,2]-y$data[[1]][,2])^2)/nrow(x$data[[1]]))
} else {
return (5000) # Enormous distance as it is not valid
}
}
ldist = ldply(lib,chid,x)
idx = which.min(ldist[,1])[1]
res_50 = data.frame(T=lib[[idx]]$stellarp[1],logg=lib[[idx]]$stellarp[2],
met=lib[[idx]]$stellarp[3],name=lib[[idx]]$name,
dist=ldist[idx,1])
return(res_50)
}
# Loading IPAC prediction from Dr Sarro (objeto piac_temp)
load("~/git/M_prep_Noise/Teffs.RData")
load("~/git/M_prep_IPAC/Teffs_150722.RData")
ipac_temp[,7]=teff2 # CHANGE THE ESTIMATED T FROM THE INITIAL VALUES (req Prof LSB)
ipac_temp[,1]=as.character(ipac_temp[,1])
ipac_temp[,2]=as.character(ipac_temp[,2])
ipac_temp[,3]=as.character(ipac_temp[,3])
ipac_temp[,4]=as.character(ipac_temp[,4])
ipac_temp[,5]=as.character(ipac_temp[,5])
ipac_temp[,6]=as.character(ipac_temp[,6])
ipac_temp[ipac_temp=='\xa0']=""
#
lnm<-unlist(lapply(bf_clean,function(x){return(x$name)}))
buscar=function(x,y,l=4){
for ( i in x[1:l]) {
if (nchar(i) > 1) {
stg = gsub('+','p',gsub(' ','_',i),fixed=TRUE)
stg = gsub('\\(','\\\\(',gsub('\\)','\\\\)',stg))
if (nchar(stg) > 0) {
pos=grep(stg,y)
if ( length(pos) == 1 ) {
return (pos)
}
}
}
}
return(-1)
}
#
idx=apply(ipac_temp[,-c(5,7)],1,buscar,lnm,6)
Teff_scs = rep(NA,max(idx))
Tipo_scs = rep(NA,max(idx))
idpos=data.frame(LSB=1:length(idx),BPG=idx)
for ( i in 1:nrow(idpos)) {
if (idpos[i,2]>0) {
Teff_scs[idpos[i,2]]= ipac_temp[idpos[i,1],7]
Tipo_scs[idpos[i,2]]= ipac_temp[idpos[i,1],5]
}
}
#
# Chi2 min distance
chi2d_oo=ldply(bf_clean,minchid,bp_clean)
chi2d_50=ldply(bf_clean,minchid,bp_50)
chi2d_10=ldply(bf_clean,minchid,bp_10)
ref=data.frame(Name=lnm,Chi2_T_10=chi2d_10[,2],Chi2_T_50=chi2d_50[,2],Chi2_T_oo=chi2d_oo[,2],
ICA_T_10=G_coef2,ICA_T_50=G_coef,ICA_T_oo=G_coef0)
Modelado de T con esas Features
#
clase_luminosidad = function(x) {
# we look over Spectral SubClass
# for roman numbers V=>dwarft; III => giants I => Supergiants (II => I)
cl=c("V","III","I")
i=1
res=NA
if ( ! is.na(x) ) {
while(i <= length(cl) & is.na(res)) {
j=regexpr(cl[i],x)
if (j[1] > 0) {
res=cl[i]
} else {
i=i+1
}
}
}
return(res)
}
clase_m = function(x) {
# we look over Spectral SubClass
# for roman numbers V=>dwarft; III => giants I => Supergiants (II => I)
# cl=c("M0.5","M0","M1.5","M1","M2.5","M2","M3.5","M3","M4.5","M4","M5.5","M5","M6.5","M6",
# "M7.5","M7","M8.5","M8","M9.5","M9")
cl=c("M0","M1","M2","M3","M4","M5","M6","M7","M8","M9")
i=1
res=NA
if ( ! is.na(x) ) {
while(i <= length(cl) & is.na(res)) {
j=regexpr(cl[i],x)
if (j[1] > 0) {
res=cl[i]
} else {
i=i+1
}
}
}
return(res)
}
#
#
prep_datos=function(res){
X=matrix(NA,nrow=length(res[[1]]$x),ncol=length(res))
for (j in 1:length(res)) {
X[,j]=res[[j]]$x/res[[j]]$x0
}
colnames(X)=paste("C",1:length(res),sep="")
return(X)
}
#
modelado=function(X,Y) {
#
trn=1:nrow(X)
folds=5
repeats=1
#
mseeds <- vector(mode = "list", length = (folds+1))
for(i in 1:folds) mseeds[[i]] <- sample.int(nrow(X), 100)
mseeds[[(folds+1)]] <- sample.int(1000, 1)
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,seeds=mseeds)
#Train some models
model_1.1 = "train(X[trn,], Y[trn], method='gbm',
trControl=myControl,
tuneGrid=expand.grid(n.trees=seq(200,500,100), n.minobsinnode=c(2,5),
interaction.depth=seq(3,15,2),
shrinkage = seq(0.01,0.1,0.02)),verbose=FALSE)"
model_1.2 = "train(X[trn,], Y[trn], method='blackboost',
trControl=myControl, tuneGrid=expand.grid(.mstop=10^(2:4),
.maxdepth=seq(5,15,3)))"
model_1.3 = "train(X[trn,], Y[trn], method='rf',
trControl=myControl,
tuneGrid=expand.grid(.mtry=(2:ncol(X))),
proximity=TRUE)"
model_1.4 = "train(X[trn,], Y[trn], method='knn',
trControl=myControl, trace=FALSE,
tuneGrid=expand.grid(k=(2:min(9,ncol(X)))))"
model_1.5 = "train(X[trn,], Y[trn], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=10))"
model_1.6 = "train(X[trn,], Y[trn], method='earth',
trControl=myControl,tuneGrid=expand.grid(.degree = 3,
.nprune = (1:10) * 2), metric = 'RMSE',
maximize = FALSE)"
model_1.7 = "train(X[trn,], Y[trn], method='glm',
trControl=myControl)"
model_1.8 = "train(X[trn,], Y[trn], method='svmRadial',
trControl=myControl,tuneGrid=expand.grid(.C=10^(-3:3),
.sigma=c(10^(-4:3))))"
model_1.9 = "train(X[trn,], Y[trn], method='cubist',
trControl=myControl,commiittees=10)"
model_1.10 = "train(X[trn,], Y[trn], method='kernelpls',
trControl=myControl,tuneGrid=expand.grid(.ncomp=seq(2,ncol(X))))"
model_1.11 = "train(X[trn,], Y[trn], method='nnet',
trControl=myControl,tuneGrid=expand.grid(.size=seq(2,round(2*ncol(X)),3),
.decay=c(1.e-5,1.e-2)),
linout=TRUE, rang = 300, trace=FALSE,
maxit=10000, reltol=1.0e-11, abstol=1.0e-6)"
model_1.12 = "train(X[trn,], Y[trn], method='bagEarth',
trControl=myControl,tuneGrid=expand.grid(.nprune=(1:10)*3,.degree=1:5))"
model_1.13 = "train(X[trn,], Y[trn], method='cubist', trControl=myControl,
tuneGrid=expand.grid(.committees=3:50, .neighbors=1:7))"
#
#Make a list of all the models
lmodels=c("model_1.1", "model_1.2", "model_1.3", "model_1.4",
"model_1.5", "model_1.6", "model_1.7", "model_1.8",
"model_1.9", "model_1.10","model_1.11","model_1.12",
"model_1.13")
#
all.models_5 =list()
for (i in lmodels) {
all.models_5[[length(all.models_5)+1]] = eval(parse(text=gsub('\n','',get(i))))
}
names(all.models_5) = sapply(all.models_5, function(x) x$method)
#
ensam_5 <- caretList(X[trn,],Y[trn],methodList=c('svmRadial',
'gbm','blackboost','rf','earth','bagEarth'))
ens <- caretEnsemble(ensam_5)
#Make a linear regression ensemble
if (exists("ens")){
all.models_5[[length(all.models_5)+1]] = get("ens")
names(all.models_5)[length(all.models_5)]="ENS"
}
return(all.models_5)
}
#
# Function that returns Root Mean Squared Error
rmse <- function(error) {
sqrt(mean(error^2,na.rm=TRUE))
}
# Function that returns Mean Absolute Error
mae <- function(error) {
mean(abs(error),na.rm=TRUE)
}
#
rmdse <- function(error) {
sqrt(median(error^2,na.rm=TRUE))
}
# Function that returns Mean Absolute Error
made <- function(error) {
median(abs(error),na.rm=TRUE)
}
#
#
if ( file.exists("~/git/M_prep_IPAC/GA_IPACrv_MT_TF2-interp2_Models.RData")) {
load("~/git/M_prep_IPAC/GA_IPACrv_MT_TF2-interp2_Models.RData")
} else {
y=YPT[,1]
#res00=SelFeatures(GAs[[isol]],cnjts,bandas)
#res10=SelFeatures(GAs10[[isol10]],cnjts,bandas10)
#res50=SelFeatures(GAs50[[isol50]],cnjts,bandas50)
#
# Cargamos la lista de features que queremos probar.
# Se supone que de una generación que no correponde con la proporcionada por
# la generación existente de genéticos GA_2013_?_0x_??_f2_data.RData
#
org = function(lst) {
lsp = strsplit(gsub("[\t, ]","",lst),"&")
c1 = paste(lsp[[1]][1],"-",lsp[[1]][2],sep="")
c2 = paste(lsp[[1]][3],"-",lsp[[1]][4],sep="")
return(c(c1,c2))
}
lfea00=c(
"7062 & 7094.4 & 7314 & 7346.4",
"7116 & 7148.4 & 7782 & 7814.4",
"7134 & 7166.4 & 7872 & 7904.4",
"6900 & 6932.4 & 7764 & 7796.4",
"7170 & 7202.4 & 7890 & 7922.4",
"7080 & 7112.4 & 7926 & 7958.4",
"7188 & 7220.4 & 7548 & 7580.4",
"7800 & 7832.4 & 7962 & 7994.4",
"6990 & 7022.4 & 7008 & 7040.4",
"7026 & 7058.4 & 6990 & 7022.4" )
mfea00=t(apply(as.data.frame(lfea00),1,org))
#
lfea10=c(
"7692 & 7724.4 & 6936 & 6968.4",
"6990 & 7022.4 & 7998 & 8030.4",
"6900 & 6932.4 & 7548 & 7580.4",
"7854 & 7886.4 & 7710 & 7742.4",
"7116 & 7148.4 & 7908 & 7940.4",
"7278 & 7310.4 & 7926 & 7958.4",
"7152 & 7184.4 & 7746 & 7778.4",
"7134 & 7166.4 & 7764 & 7796.4",
"6918 & 6950.4 & 6900 & 6932.4",
"7224 & 7256.4 & 7962 & 7994.4")
mfea10=t(apply(as.data.frame(lfea10),1,org))
#
lfea50=c(
"7062 & 7094.4 & 7296 & 7328.4",
"7026 & 7058.4 & 7044 & 7076.4",
"7080 & 7112.4 & 7926 & 7958.4",
"6900 & 6932.4 & 7548 & 7580.4",
"7134 & 7166.4 & 7836 & 7868.4",
"7296 & 7328.4 & 7962 & 7994.4",
"6936 & 6968.4 & 7728 & 7760.4",
"6972 & 7004.4 & 6900 & 6932.4",
"6990 & 7022.4 & 7944 & 7976.4",
"6918 & 6950.4 & 7782 & 7814.4")
mfea50=t(apply(as.data.frame(lfea50),1,org))
#
y=YPT[,1]
res00=SelFeaturesFixed(mfea00,cnjts,bandas)
res10=SelFeaturesFixed(mfea10,cnjts,bandas10)
res50=SelFeaturesFixed(mfea50,cnjts,bandas50)
#
#
X00 = prep_datos(res00)
md5 = modelado(X00,as.numeric(YPT[,1]))
X10 = prep_datos(res10)
md51= modelado(X10,as.numeric(YPT[,1]))
X50 = prep_datos(res50)
md55= modelado(X50,as.numeric(YPT[,1]))
#
# Consideramos ahora los espectros IPAC corregidos por doppler (A Bello 16/7/2015)
load("../M_redshift/M_prep_clean_IPAC_corrected.RData")
bfy=do.call(rbind,lapply(bq_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
for (i in seq(1,siz,5)) { # LSB asked for steps of 5 pixels
idx=paste("s=",i,sep="")
lvf=vf
if ( i > 1) {
lvf=vf[-c(1:(i-1))]
}
ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
st=split(lvf,ff)
for (j in 1:length(st)) {
if (length(st[[j]]) < siz)
st[[j]] = NULL
}
lst = length(st)
for (j in 1:lst) {
bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
}
}
# Continuamos ahrora
xf0 = prep_datos(SelFeaturesFixed(mfea00,cnjts,bfndas))
xf1 = prep_datos(SelFeaturesFixed(mfea10,cnjts,bfndas))
xf5 = prep_datos(SelFeaturesFixed(mfea50,cnjts,bfndas))
#
YTn00=predict(md5[[which(names(md5)=="rf")]],xf0)
YTn01=predict(md51[[which(names(md51)=="rf")]],xf0)
YTn05=predict(md55[[which(names(md55)=="rf")]],xf0)
YTn10=predict(md5[[which(names(md5)=="rf")]],xf1)
YTn11=predict(md51[[which(names(md51)=="rf")]],xf1)
YTn15=predict(md55[[which(names(md55)=="rf")]],xf1)
YTn50=predict(md5[[which(names(md5)=="rf")]],xf5)
YTn51=predict(md51[[which(names(md51)=="rf")]],xf5)
YTn55=predict(md55[[which(names(md55)=="rf")]],xf5)
#
Clase_lum=apply(as.data.frame(as.character(Tipo_scs),
stringsAsFactors=FALSE),1,clase_luminosidad)
Clase_m =apply(as.data.frame(as.character(Tipo_scs),
stringsAsFactors=FALSE),1,clase_m)
SpT=Clase_m
LC =Clase_lum
ref_tot0=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$Chi2_T_10, chi2d_50=ref$Chi2_T_50,
ICA_10 = ref$ICA_T_10,ICA_50 = ref$ICA_T_50, ICA_oo=ref$ICA_T_oo,
YTn00,YTn01,YTn05,YTn10,YTn11,YTn15,YTn50,
YTn51,YTn55,Teff_LSB=Teff_scs,SpT=SpT,LC=LC)
#
save(bpy,bfy,YPT,YPG,YPM,vf,siz,bandas,bfndas, bandas10,
bandas50,cnjts, NG,NBLK,NBITS,MAXV,md5,md51,
md55,res00,res10,res50, X00,X10,X50,YTn00,YTn01,
YTn05,YTn10,YTn11,YTn15,YTn50,YTn51,YTn55,xf0,xf1,xf5,
ref_tot0,LC,SpT,
file="~/git/M_prep_IPAC/GA_IPACrv_MT_TF2-interp2_Models.RData")
}
#
YTpknn00=predict(md5[[which(names(md5)=="knn")]],xf0)
YTpknn11=predict(md51[[which(names(md51)=="knn")]],xf1)
YTpknn55=predict(md55[[which(names(md55)=="knn")]],xf5)
save(YTpknn00,YTpknn11,YTpknn55,xf0,xf1,xf5,
file="~/git/M_prep_IPAC/GA_predict_Trv_11F2_newT.RData")
#
y=YPT[,1]
ref_tot0=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$Chi2_T_10, chi2d_50=ref$Chi2_T_50,
ICA_10 = ref$ICA_T_10,ICA_50 = ref$ICA_T_50, ICA_oo=ref$ICA_T_oo,
YTn00,YTn01,YTn05,YTn10,YTn11,YTn15,YTn50,
YTn51,YTn55,Teff_LSB=Teff_scs,SpT=SpT,LC=LC)
#
ref_tot2=ref_tot0
#
print(xtable(ldply(res00,function(x){return(c(x$name,x$name0))})),type="html")
|
|
V1
|
V2
|
|
1
|
7062-7094.4
|
7314-7346.4
|
|
2
|
7116-7148.4
|
7782-7814.4
|
|
3
|
7134-7166.4
|
7872-7904.4
|
|
4
|
6900-6932.4
|
7764-7796.4
|
|
5
|
7170-7202.4
|
7890-7922.4
|
|
6
|
7080-7112.4
|
7926-7958.4
|
|
7
|
7188-7220.4
|
7548-7580.4
|
|
8
|
7800-7832.4
|
7962-7994.4
|
|
9
|
6990-7022.4
|
7008-7040.4
|
|
10
|
7026-7058.4
|
6990-7022.4
|
print(xtable(ldply(res10,function(x){return(c(x$name,x$name0))})),type="html")
|
|
V1
|
V2
|
|
1
|
7692-7724.4
|
6936-6968.4
|
|
2
|
6990-7022.4
|
7998-8030.4
|
|
3
|
6900-6932.4
|
7548-7580.4
|
|
4
|
7854-7886.4
|
7710-7742.4
|
|
5
|
7116-7148.4
|
7908-7940.4
|
|
6
|
7278-7310.4
|
7926-7958.4
|
|
7
|
7152-7184.4
|
7746-7778.4
|
|
8
|
7134-7166.4
|
7764-7796.4
|
|
9
|
6918-6950.4
|
6900-6932.4
|
|
10
|
7224-7256.4
|
7962-7994.4
|
print(xtable(ldply(res50,function(x){return(c(x$name,x$name0))})),type="html")
|
|
V1
|
V2
|
|
1
|
7062-7094.4
|
7296-7328.4
|
|
2
|
7026-7058.4
|
7044-7076.4
|
|
3
|
7080-7112.4
|
7926-7958.4
|
|
4
|
6900-6932.4
|
7548-7580.4
|
|
5
|
7134-7166.4
|
7836-7868.4
|
|
6
|
7296-7328.4
|
7962-7994.4
|
|
7
|
6936-6968.4
|
7728-7760.4
|
|
8
|
6972-7004.4
|
6900-6932.4
|
|
9
|
6990-7022.4
|
7944-7976.4
|
|
10
|
6918-6950.4
|
7782-7814.4
|
#
ggplot(data=ref_tot2) +
geom_point(aes(x=Teff_LSB,y=chi2d_10,shape=LC),size=3) +
xlab("Theoretical T [K]") + ylab("Chi2 10 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=Teff_LSB,y=chi2d_50,shape=LC),size=3) +
xlab("Theoretical T [K]") + ylab("Chi2 50 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=Teff_LSB,y=YTn00,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab("ML predicted Snr=oo [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=Teff_LSB,y=YTn11,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab("ML predicted Msnr=10 Fsnr=10 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=Teff_LSB,y=YTn55,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab("ML predicted Msnr=50 Fsnr=50 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
lmod=c("rf","gbm","svmRadial","nnet","knn","bagEarth","kernelpls","cubist")
nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
dataT = array(0,c(3,length(nmod),nrow(xf0))) # Dim 1 SNR, dim2 Star and Dim3 Model
nomT = unlist(lapply(bf_clean,function(x){nm=gsub('.fits','',x$name);pp=strsplit(nm,'_');
return(pp[[1]][2])}))
for(ll in c(1:2)) {
if (ll == 2) {
pdf(file="~/git/M_prep_IPAC/GA_model_Trv-interp2_newT.pdf",width=12,heigh=10)
}
for (i in 1:length(lmod)) {
YTm00=predict(md5[[which(names(md5)==lmod[i])[1]]],xf0)
YTm11=predict(md51[[which(names(md51)==lmod[i])[1]]],xf1)
YTm55=predict(md55[[which(names(md55)==lmod[i])[1]]],xf5)
dataT[1,i,] = YTm00
dataT[2,i,] = YTm11
dataT[3,i,] = YTm55
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$Chi2_T_10, chi2d_50=ref$Chi2_T_50,chi2d_oo=ref$Chi2_T_oo,
ICA_10 = ref$ICA_T_10,ICA_50 = ref$ICA_T_50, ICA_oo=ref$ICA_T_oo,
YTm00,YTm11,YTm55,Teff_LSB=Teff_scs,SpT=SpT,LC=LC)
ref_tot2m=ref_tot0m
diffs=apply(ref_tot2m[,! colnames(ref_tot2m) %in% c("Name","SpT","Teff_LSB","LC")],
2,FUN="-",ref_tot2m[,"Teff_LSB"])
rownames(diffs)=ref_tot2m[,"Name"]
#
cat(paste("T Modelling with ",lmod[i],". Error analysis follows.",sep=""))
errors=data.frame(drmse = apply(diffs,2,rmse),dmae=apply(diffs,2,mae),
drmdse=apply(diffs,2,rmdse),dmade=apply(diffs,2,made))
print(xtable(errors),type="html")
#
cat(paste("T Modelling with",nmod[i],". SNR=oo",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=Teff_LSB,y=YTm00,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab(paste(nmod[i]," predicted SNR=oo [K]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2)))
#
cat(paste("T Modelling with",nmod[i],". SNR=10",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=Teff_LSB,y=YTm11,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab(paste(nmod[i]," predicted SNR=10 [K]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2)))
#
cat(paste("T Modelling with",nmod[i],". SNR=50",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=Teff_LSB,y=YTm55,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab(paste(nmod[i]," predicted SNR=50 [K]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2)))
}
}
GA_r3 Loading required package: randomForest
GA_r3 randomForest 4.6-12
GA_r3 Type rfNews() to see new features/changes/bug fixes.
T Modelling with rf. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
145.18
|
113.42
|
94.12
|
94.12
|
|
YTm11
|
160.04
|
121.10
|
97.20
|
97.19
|
|
YTm55
|
195.72
|
137.85
|
103.00
|
103.00
|
T Modelling withRF. SNR=oo
T Modelling withRF. SNR=10
T Modelling withRF. SNR=50
GA_r3 Loading required package: gbm
GA_r3 Loading required package: survival
GA_r3
GA_r3 Attaching package: 'survival'
GA_r3
GA_r3 The following object is masked _by_ '.GlobalEnv':
GA_r3
GA_r3 colon
GA_r3
GA_r3 The following object is masked from 'package:caret':
GA_r3
GA_r3 cluster
GA_r3
GA_r3 Loading required package: splines
GA_r3 Loaded gbm 2.1.1

T Modelling with gbm. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
184.56
|
130.09
|
94.02
|
94.02
|
|
YTm11
|
175.35
|
130.35
|
104.55
|
104.55
|
|
YTm55
|
225.43
|
148.33
|
99.44
|
99.44
|
T Modelling withGB. SNR=oo

T Modelling withGB. SNR=10

T Modelling withGB. SNR=50

T Modelling with svmRadial. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
367.65
|
242.91
|
153.64
|
153.63
|
|
YTm11
|
203.11
|
143.48
|
112.06
|
112.06
|
|
YTm55
|
284.93
|
181.67
|
106.29
|
106.29
|
T Modelling withSVR. SNR=oo

T Modelling withSVR. SNR=10

T Modelling withSVR. SNR=50

T Modelling with nnet. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
394.81
|
280.14
|
202.17
|
202.17
|
|
YTm11
|
220.97
|
133.78
|
83.91
|
83.91
|
|
YTm55
|
313.43
|
194.54
|
111.41
|
111.41
|
T Modelling withNNR. SNR=oo

T Modelling withNNR. SNR=10

T Modelling withNNR. SNR=50

T Modelling with knn. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
223.81
|
156.17
|
109.66
|
109.66
|
|
YTm11
|
183.20
|
139.63
|
118.86
|
118.76
|
|
YTm55
|
192.93
|
142.26
|
109.04
|
109.04
|
T Modelling withKNN. SNR=oo
T Modelling withKNN. SNR=10
T Modelling withKNN. SNR=50
GA_r3 Loading required package: earth
GA_r3 Loading required package: plotmo
GA_r3 Loading required package: plotrix
GA_r3 Loading required package: TeachingDemos

T Modelling with bagEarth. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
373.72
|
244.34
|
156.74
|
156.73
|
|
YTm11
|
222.45
|
124.09
|
76.11
|
76.11
|
|
YTm55
|
360.74
|
196.89
|
102.74
|
102.74
|
T Modelling withMARS. SNR=oo

T Modelling withMARS. SNR=10

T Modelling withMARS. SNR=50

T Modelling with kernelpls. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
408.55
|
278.35
|
207.89
|
207.89
|
|
YTm11
|
227.00
|
125.72
|
72.27
|
72.27
|
|
YTm55
|
331.27
|
206.84
|
122.78
|
122.78
|
T Modelling withPLS. SNR=oo

T Modelling withPLS. SNR=10

T Modelling withPLS. SNR=50

T Modelling with cubist. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
378.43
|
292.12
|
238.49
|
238.49
|
|
YTm11
|
188.46
|
132.78
|
101.72
|
101.72
|
|
YTm55
|
257.15
|
160.42
|
94.45
|
94.45
|
T Modelling withRule-Regression. SNR=oo

T Modelling withRule-Regression. SNR=10

T Modelling withRule-Regression. SNR=50T Modelling with rf. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
145.18
|
113.42
|
94.12
|
94.12
|
|
YTm11
|
160.04
|
121.10
|
97.20
|
97.19
|
|
YTm55
|
195.72
|
137.85
|
103.00
|
103.00
|
T Modelling withRF. SNR=ooT Modelling withRF. SNR=10T Modelling withRF. SNR=50T Modelling with gbm. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
184.56
|
130.09
|
94.02
|
94.02
|
|
YTm11
|
175.35
|
130.35
|
104.55
|
104.55
|
|
YTm55
|
225.43
|
148.33
|
99.44
|
99.44
|
T Modelling withGB. SNR=ooT Modelling withGB. SNR=10T Modelling withGB. SNR=50T Modelling with svmRadial. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
367.65
|
242.91
|
153.64
|
153.63
|
|
YTm11
|
203.11
|
143.48
|
112.06
|
112.06
|
|
YTm55
|
284.93
|
181.67
|
106.29
|
106.29
|
T Modelling withSVR. SNR=ooT Modelling withSVR. SNR=10T Modelling withSVR. SNR=50T Modelling with nnet. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
394.81
|
280.14
|
202.17
|
202.17
|
|
YTm11
|
220.97
|
133.78
|
83.91
|
83.91
|
|
YTm55
|
313.43
|
194.54
|
111.41
|
111.41
|
T Modelling withNNR. SNR=ooT Modelling withNNR. SNR=10T Modelling withNNR. SNR=50T Modelling with knn. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
223.81
|
156.17
|
109.66
|
109.66
|
|
YTm11
|
183.20
|
139.63
|
118.86
|
118.76
|
|
YTm55
|
192.93
|
142.26
|
109.04
|
109.04
|
T Modelling withKNN. SNR=ooT Modelling withKNN. SNR=10T Modelling withKNN. SNR=50T Modelling with bagEarth. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
373.72
|
244.34
|
156.74
|
156.73
|
|
YTm11
|
222.45
|
124.09
|
76.11
|
76.11
|
|
YTm55
|
360.74
|
196.89
|
102.74
|
102.74
|
T Modelling withMARS. SNR=ooT Modelling withMARS. SNR=10T Modelling withMARS. SNR=50T Modelling with kernelpls. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
408.55
|
278.35
|
207.89
|
207.89
|
|
YTm11
|
227.00
|
125.72
|
72.27
|
72.27
|
|
YTm55
|
331.27
|
206.84
|
122.78
|
122.78
|
T Modelling withPLS. SNR=ooT Modelling withPLS. SNR=10T Modelling withPLS. SNR=50T Modelling with cubist. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
378.43
|
292.12
|
238.49
|
238.49
|
|
YTm11
|
188.46
|
132.78
|
101.72
|
101.72
|
|
YTm55
|
257.15
|
160.42
|
94.45
|
94.45
|
T Modelling withRule-Regression. SNR=ooT Modelling withRule-Regression. SNR=10T Modelling withRule-Regression. SNR=50
dev.off()
png 2
save(nomT,LC,SpT,dataT,file="Trv_GA_models_IPAC-interp2_newT.RData")
#
Comparation about Cesseti features
En lugar de usar Genéticos para encontrar las mejores features, ahora aplicaremos las features de Cesseti et al.
#
fea_x =t(matrix(c(8461,8474,8484,8513,8522,8562,8577,8619,8642,8682,8730,8772,
8802,8811,8850,8890,9000,9030,9080,9100),ncol=10))
fea_x0=t(matrix(c(8474,8484,8474,8484,8474,8484,8563,8577,8619,8642,8700,8725,
8776,8792,8815,8850,8983,8998,9040,9050),ncol=10))
fea_ix=fea_ix0=fea_x
for (i in (1:nrow(fea_x))) {
for (j in (1:ncol(fea_x))) {
fea_ix[i,j] =which.min(abs(as.numeric(vf)-fea_x[i,j]))
fea_ix0[i,j]=which.min(abs(as.numeric(vf)-fea_x0[i,j]))
}
}
#
res00=res10=res50=list()
for (j in 1:nrow(fea_ix)) {
x= apply(bpy[,((fea_ix[j,1]):(fea_ix[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix[j,],collapse=":")))])
x0=apply(bpy[,((fea_ix0[j,1]):(fea_ix0[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix0[j,],collapse=":")))])
name=paste(vf[fea_ix[j,]],collapse="-")
name0=paste(vf[fea_ix0[j,]],collapse="-")
res00[[j]]=list(x=x,x0=x0,name=name,name0=name0)
x= apply(bpy10[,((fea_ix[j,1]):(fea_ix[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix[j,],collapse=":")))])
x0=apply(bpy10[,((fea_ix0[j,1]):(fea_ix0[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix0[j,],collapse=":")))])
res10[[j]]=list(x=x,x0=x0,name=name,name0=name0)
x= apply(bpy50[,((fea_ix[j,1]):(fea_ix[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix[j,],collapse=":")))])
x0=apply(bpy50[,((fea_ix0[j,1]):(fea_ix0[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix0[j,],collapse=":")))])
res50[[j]]=list(x=x,x0=x0,name=name,name0=name0)
}
rm(x,x0,name,name0)
#
X00 = prep_datos(res00)
md50c = modelado(X00,as.numeric(YPT[,1]))
## Loading required package: party
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
##
## Attaching package: 'modeltools'
##
## The following object is masked from 'package:kernlab':
##
## prior
##
## The following object is masked from 'package:plyr':
##
## empty
##
## Loading required package: strucchange
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
##
## The following object is masked from 'package:stringr':
##
## boundary
##
## Loading required package: mboost
## Loading required package: stabs
##
## Attaching package: 'stabs'
##
## The following object is masked from 'package:modeltools':
##
## parameters
##
## This is mboost 2.5-0. See 'package?mboost' and 'news(package = "mboost")'
## for a complete list of changes.
##
##
## Attaching package: 'mboost'
##
## The following object is masked from 'package:ggplot2':
##
## %+%
Iter TrainDeviance ValidDeviance StepSize Improve 1 266636.6652 -nan 0.1000 48822.3173 2 227382.7631 -nan 0.1000 40599.9819 3 192812.3156 -nan 0.1000 33868.6449 4 165245.7054 -nan 0.1000 26505.1536 5 141505.2105 -nan 0.1000 23079.2634 6 122639.6905 -nan 0.1000 19638.5643 7 106429.5706 -nan 0.1000 15388.1168 8 93347.8620 -nan 0.1000 12463.6261 9 81818.3732 -nan 0.1000 10963.9506 10 72469.2281 -nan 0.1000 8789.1220 20 30548.5727 -nan 0.1000 2234.1097 40 15530.3702 -nan 0.1000 101.7234 60 11410.5210 -nan 0.1000 58.0038 80 8918.7435 -nan 0.1000 -32.0592 100 7332.4010 -nan 0.1000 13.7223 120 6233.1116 -nan 0.1000 -10.4959 140 5300.0092 -nan 0.1000 -32.2669 150 4918.2831 -nan 0.1000 23.6246
X10 = prep_datos(res10)
md51c= modelado(X10,as.numeric(YPT[,1]))
Iter TrainDeviance ValidDeviance StepSize Improve 1 277574.3283 -nan 0.1000 37321.5154 2 247351.4265 -nan 0.1000 28582.1356 3 220836.3564 -nan 0.1000 26176.4698 4 197939.5442 -nan 0.1000 21008.5420 5 178780.4137 -nan 0.1000 16038.4548 6 163510.9238 -nan 0.1000 15364.7196 7 149808.8717 -nan 0.1000 11628.4078 8 138807.4310 -nan 0.1000 9665.1916 9 128921.9252 -nan 0.1000 8920.8779 10 121069.2647 -nan 0.1000 6787.7046 20 83186.5492 -nan 0.1000 1140.0261 40 66362.0960 -nan 0.1000 -344.9204 50 62750.4072 -nan 0.1000 5.5254
X50 = prep_datos(res50)
md55c= modelado(X50,as.numeric(YPT[,1]))
Iter TrainDeviance ValidDeviance StepSize Improve 1 268782.0392 -nan 0.1000 42958.4216 2 229715.8580 -nan 0.1000 39180.0522 3 197891.7723 -nan 0.1000 31366.7192 4 170793.0716 -nan 0.1000 26063.2088 5 149475.2182 -nan 0.1000 21664.8045 6 132091.1946 -nan 0.1000 16774.7485 7 116927.9849 -nan 0.1000 14235.5400 8 104284.4288 -nan 0.1000 10246.4116 9 94402.9916 -nan 0.1000 9547.7163 10 85966.6426 -nan 0.1000 7711.2861 20 48184.9573 -nan 0.1000 1305.9999 40 33033.8165 -nan 0.1000 -171.5535 60 28329.3987 -nan 0.1000 114.7601 80 24740.0455 -nan 0.1000 27.9851 100 22401.8075 -nan 0.1000 60.3376 120 20333.1863 -nan 0.1000 -61.9443 140 18749.8603 -nan 0.1000 -116.9200 150 17991.7584 -nan 0.1000 -37.4124
#
rf00 = list()
for (j in 1:nrow(fea_ix)) {
x= apply(bfy[,((fea_ix[j,1]):(fea_ix[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix[j,],collapse=":")))])
x0=apply(bfy[,((fea_ix0[j,1]):(fea_ix0[j,2]))],1,area,vf[eval(parse(text=paste(fea_ix0[j,],collapse=":")))])
name=paste(vf[fea_ix[j,]],collapse="-")
name0=paste(vf[fea_ix0[j,]],collapse="-")
rf00[[j]]=list(x=x,x0=x0,name=name,name0=name0)
}
#
XFF = prep_datos(rf00)
for (i in 1:length(lmod)) {
YTm00=predict(md50c[[which(names(md50c)==lmod[i])[1]]],XFF)
YTm11=predict(md51c[[which(names(md51c)==lmod[i])[1]]],XFF)
YTm55=predict(md55c[[which(names(md55c)==lmod[i])[1]]],XFF)
dataT[1,i,] = YTm00
dataT[2,i,] = YTm11
dataT[3,i,] = YTm55
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$Chi2_T_10, chi2d_50=ref$Chi2_T_50,chi2d_oo=ref$Chi2_T_oo,
ICA_10 = ref$ICA_T_10,ICA_50 = ref$ICA_T_50, ICA_oo=ref$ICA_T_oo,
YTm00,YTm11,YTm55,Teff_LSB=Teff_scs,SpT=SpT,LC=LC)
ref_tot2m=ref_tot0m
diffs=apply(ref_tot2m[,! colnames(ref_tot2m) %in% c("Name","SpT","Teff_LSB","LC")],
2,FUN="-",ref_tot2m[,"Teff_LSB"])
rownames(diffs)=ref_tot2m[,"Name"]
#
cat(paste("T Modelling with ",lmod[i],". Error analysis follows.",sep=""))
errors=data.frame(drmse = apply(diffs,2,rmse),dmae=apply(diffs,2,mae),
drmdse=apply(diffs,2,rmdse),dmade=apply(diffs,2,made))
print(xtable(errors),type="html")
#
cat(paste("T Modelling with",nmod[i],". SNR=oo",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=Teff_LSB,y=YTm00,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab(paste(nmod[i]," predicted SNR=oo [K]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2)))
#
cat(paste("T Modelling with",nmod[i],". SNR=10",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=Teff_LSB,y=YTm11,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab(paste(nmod[i]," predicted SNR=10 [K]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2)))
#
cat(paste("T Modelling with",nmod[i],". SNR=50",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=Teff_LSB,y=YTm55,shape=LC),size=3) +
xlab("Theoretical T [K]") +
ylab(paste(nmod[i]," predicted SNR=50 [K]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") +
xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2)))
}
T Modelling with rf. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
305.72
|
228.43
|
171.58
|
171.58
|
|
YTm11
|
202.86
|
162.78
|
139.65
|
139.64
|
|
YTm55
|
243.13
|
174.66
|
120.82
|
120.82
|
T Modelling withRF. SNR=oo

T Modelling withRF. SNR=10

T Modelling withRF. SNR=50

T Modelling with gbm. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
336.78
|
268.60
|
222.11
|
222.11
|
|
YTm11
|
187.70
|
148.29
|
120.04
|
120.04
|
|
YTm55
|
260.76
|
190.25
|
138.07
|
138.07
|
T Modelling withGB. SNR=oo

T Modelling withGB. SNR=10

T Modelling withGB. SNR=50

T Modelling with svmRadial. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
840.30
|
709.50
|
688.47
|
688.47
|
|
YTm11
|
196.85
|
155.03
|
134.66
|
134.66
|
|
YTm55
|
379.36
|
266.94
|
193.56
|
193.56
|
T Modelling withSVR. SNR=oo

T Modelling withSVR. SNR=10

T Modelling withSVR. SNR=50

T Modelling with nnet. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
719.28
|
576.76
|
489.00
|
488.97
|
|
YTm11
|
206.89
|
165.66
|
135.18
|
135.17
|
|
YTm55
|
513.71
|
393.15
|
295.86
|
295.86
|
T Modelling withNNR. SNR=oo

T Modelling withNNR. SNR=10

T Modelling withNNR. SNR=50

T Modelling with knn. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
313.88
|
237.62
|
175.02
|
175.00
|
|
YTm11
|
235.29
|
191.53
|
158.22
|
158.22
|
|
YTm55
|
246.17
|
180.55
|
136.75
|
136.75
|
T Modelling withKNN. SNR=oo

T Modelling withKNN. SNR=10

T Modelling withKNN. SNR=50

T Modelling with bagEarth. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
3464.16
|
1481.70
|
784.14
|
784.14
|
|
YTm11
|
252.03
|
159.15
|
123.70
|
123.70
|
|
YTm55
|
789.30
|
343.08
|
186.29
|
186.29
|
T Modelling withMARS. SNR=oo

T Modelling withMARS. SNR=10

T Modelling withMARS. SNR=50

T Modelling with kernelpls. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
2246.74
|
1741.28
|
1424.20
|
1424.20
|
|
YTm11
|
250.14
|
210.91
|
200.68
|
200.68
|
|
YTm55
|
741.48
|
522.90
|
361.14
|
361.14
|
T Modelling withPLS. SNR=oo

T Modelling withPLS. SNR=10

T Modelling withPLS. SNR=50

T Modelling with cubist. Error analysis follows.
|
|
drmse
|
dmae
|
drmdse
|
dmade
|
|
chi2d_10
|
147.48
|
108.08
|
79.02
|
79.02
|
|
chi2d_50
|
121.40
|
79.89
|
55.78
|
55.78
|
|
chi2d_oo
|
126.03
|
84.56
|
57.26
|
57.26
|
|
ICA_10
|
188.50
|
149.48
|
125.89
|
125.89
|
|
ICA_50
|
164.24
|
122.07
|
94.71
|
94.71
|
|
ICA_oo
|
190.90
|
150.47
|
130.09
|
130.09
|
|
YTm00
|
828.00
|
760.42
|
774.26
|
774.26
|
|
YTm11
|
210.81
|
160.26
|
128.07
|
128.07
|
|
YTm55
|
400.04
|
305.00
|
238.60
|
238.59
|
T Modelling withRule-Regression. SNR=oo
T Modelling withRule-Regression. SNR=10
T Modelling withRule-Regression. SNR=50