After the analysis carried out for M stars within the IPAC analysis, we wnat to apply the band I features discovered there to estimate physical parameters from IRTF dataset. Target values came from Table III from Cesetti et al.
## Loading required package: MASS
## Loading required package: Cubist
## Loading required package: lattice
## Loading required package: caret
## Loading required package: ggplot2
## Loading required package: xtable
## Loading required package: Peaks
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'Peaks'
## Loading required package: magic
## Loading required package: abind
## Loading required package: JADE
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'JADE'
## Loading required package: doMC
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: segmented
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'segmented'
## Loading required package: fftw
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'fftw'
## Loading required package: FITSio
## Loading required package: stringr
## Loading required package: ztable
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ztable'
## Loading required package: signal
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'signal'
## Loading required package: e1071
## Loading required package: class
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: plyr
## Loading required package: compare
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'compare'
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-6. For overview type 'help("mgcv-package")'.
##
## Attaching package: 'mgcv'
##
## The following object is masked from 'package:magic':
##
## magic
##
## Loading required package: elasticnet
## Loading required package: lars
## Loaded lars 1.2
##
## Loading required package: pbapply
## Loading required package: nnet
## Loading required package: kernlab
## Loading required package: pls
##
## Attaching package: 'pls'
##
## The following object is masked from 'package:caret':
##
## R2
##
## The following object is masked from 'package:stats':
##
## loadings
##
## Loading required package: multicore
##
## Attaching package: 'multicore'
##
## The following objects are masked from 'package:parallel':
##
## mclapply, mcparallel, pvec
##
## The following object is masked from 'package:lattice':
##
## parallel
##
## Loading required package: devtools
## Loading required package: caretEnsemble
## Loading required package: mlbench
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'mlbench'
## Loading required package: mclust
## Package 'mclust' version 4.3
##
## Attaching package: 'mclust'
##
## The following object is masked from 'package:mgcv':
##
## mvn
##
## Loading required package: analogue
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'analogue'
## Loading required package: cluster
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] FALSE
##
## [[6]]
## [1] TRUE
##
## [[7]]
## [1] FALSE
##
## [[8]]
## [1] TRUE
##
## [[9]]
## [1] FALSE
##
## [[10]]
## [1] FALSE
##
## [[11]]
## [1] TRUE
##
## [[12]]
## [1] TRUE
##
## [[13]]
## [1] TRUE
##
## [[14]]
## [1] TRUE
##
## [[15]]
## [1] FALSE
##
## [[16]]
## [1] FALSE
##
## [[17]]
## [1] TRUE
##
## [[18]]
## [1] TRUE
##
## [[19]]
## [1] TRUE
##
## [[20]]
## [1] FALSE
##
## [[21]]
## [1] TRUE
##
## [[22]]
## [1] TRUE
##
## [[23]]
## [1] TRUE
##
## [[24]]
## [1] TRUE
##
## [[25]]
## [1] TRUE
##
## [[26]]
## [1] TRUE
##
## [[27]]
## [1] TRUE
##
## [[28]]
## [1] TRUE
##
## [[29]]
## [1] TRUE
##
## [[30]]
## [1] TRUE
##
## [[31]]
## [1] TRUE
##
## [[32]]
## [1] TRUE
##
## [[33]]
## [1] TRUE
##
## [[34]]
## [1] FALSE
##
## [[35]]
## [1] TRUE
##
## [[36]]
## [1] FALSE
##
## [[37]]
## [1] TRUE
Target T will be between 2000 K and 4200 K
We will use the I-Band according to GA models noised by SNR=50 and SNR=10 as well as the features proposed by Cestti et al. also noised by SNR=50 and SNR=10.
# Procesado del genético de Tª
range_elem<-function(z,l,bi){
look=function(x,l,y){
j<-which(y>= as.numeric(x[l]))[1];
k<-(which(y>as.numeric(x[(l+1)]))[1])-1;
return(c(j,k))
}
y=z$data[[1]][,1]
lm<-t(apply(bi,1,look,l,y))
rownames(lm)<-bi[,1]
colnames(lm)<-c("From","To")
return(lm)
}
int_spec<-function(x,idx,norm=0){
y<-x$data[[1]][eval(parse(text=idx)),]
xz<-diff(as.numeric(y[,1]),1)
yz<-as.numeric(y[,2])
if ( norm > 0 ){
z<-sum(xz)
} else {
z<-sum(xz*rollmean(yz,2))
}
return(z)
}
#
feature_extr<-function(sn,bp){
sig<-sn[1]
noi<-sn[2]
nof<-sn[3] # Allowing to consider 2 bands as the paper
Fcont<-unlist(lapply(bp,int_spec,noi,0))/
unlist(lapply(bp,int_spec,noi,1))
if (! is.na(nof)){ # In case of two bands => average
Fcont2=unlist(lapply(bp,int_spec,nof,0))/
unlist(lapply(bp,int_spec,nof,1))
Fcont = (Fcont + Fcont2) /2.
}
fea<-unlist(lapply(bp,int_spec,sig,1))-
unlist(lapply(bp,int_spec,sig,0))/Fcont
return(fea)
}
#
sacar=function(x,pos=2) {
if ( length(x) < pos ) {
return(NA);
}
return(x[pos]);
}
#
######################
#
# Loading the object BT-Settl
# if (file.exists("~/git/M_prep_IRTF/BT-Settl-2011-1013-IRTF.RData")) {
# load("~/git/M_prep_IRTF/BT-Settl-2011-1013-IRTF.RData")
# } else {
# cat("ERROR: ~/git/M_prep/M_prep_IRTF/BT-Settl-2011-1013-IRTF.RData NOT FOUND !!!")
# exit(2)
# }
#
Now, we will read the M stars from the IRTF dataset in fits format. The used dataset was grabbed out from http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/
#
scaling=function(x,xlim=NULL,xp=NULL) {
y=x
if (! is.null(y$factors)) {
y$data[,2]=y$data[,2]*y$factors
}
if ( is.null(xlim)){
ixp=rep(TRUE,nrow(y$data))
xlim=range(y$data[,1])
}
if ( ! is.null(xp)) {
xp = xp[(xp > xlim[1]) & (xp < xlim[2])]
xli0 = y$data[rev(which(y$data[,1] < xlim[1]))[1],1] - 0.001 # Rounding ...
xli1 = y$data[which(y$data[,1]>xlim[2])[1],1] + 0.001 # Rounding ...
ixp = (y$data[,1] >= xli0 & y$data[,1] <= xli1)
} else {
ixp = (y$data[,1]>= xlim[1] & y$data[,1] <= xlim[2])
}
z=y$data[ixp,]
jp=rep(0,nrow(z)+1)
# jp[is.finite(z[,2])]=1 # LSB has requested to interpolate the holes 30/7/2014
jp[nrow(z)+1]=1
zp=diff(jp)
ip=which(zp != 0)
ini=1
j=0
y$data=list()
for (i in ip) {
if (zp[i] != -1 ) {
j=j+1
y$data[[j]]=z[ini:i,]
} else {
ini=i+1
}
}
y$nch=j
y$factors=rep(NA,y$nch)
for (i in 1:y$nch) {
if ( is.na(y$data[[i]][nrow(y$data[[i]]),2])) {
kk = rev(which(! is.na(y$data[[i]][,2])))[1]
y$data[[i]][nrow(y$data[[i]]),2]=y$data[[i]][kk,2]
}
if ( is.na(y$data[[i]][1,2])) {
kk = which(! is.na(y$data[[i]][,2]))[1]
y$data[[i]][1,2]=y$data[[i]][kk,2]
}
if ( ! is.null(xp)) {
yp=(approx(y$data[[i]][,1],y$data[[i]][,2],xout=xp))$y
y$data[[i]]=data.frame(x=xp,y=yp)
} else {
yp=(approx(y$data[[i]][,1],y$data[[i]][,2],xout=y$data[[i]][,1]))$y
y$data[[i]]=data.frame(x=y$data[[i]][,1],y=yp)
}
y$factors[i]=sum(rollmean(y$data[[i]][,2],2)*diff(y$data[[i]][,1]))
}
ss=sum(y$factors)
for (i in 1:y$nch) {
y$data[[i]][,2]=y$data[[i]][,2]/ss
}
return(y)
}
#
pinta=function(x,cortes=NULL,borders=FALSE, boundary=FALSE, close=FALSE,
xlim=NULL, ylim=NULL,plt=NULL,width=12,height=8,...){
if (is.null(x$nch)){
nch=1
} else {
nch=x$nch
}
if ( length(xlim) < 2 ) {
xmin=min(x$data[[1]][,1])
xmax=max(x$data[[nch]][,1])
xl=c(xmin,xmax)
} else {
xl=xlim
}
if ( length(ylim) < 2 ) {
ymin=min(x$data[[1]][,2])
ymax=max(x$data[[1]][,2])
if ( nch > 1 ) {
for (i in 2:nch){
ymin=min(ymin,x$data[[i]][,2])
ymax=max(ymax,x$data[[i]][,2])
}
}
yl=c(ymin,ymax)
} else {
yl = ylim
}
if (! is.null(plt) ) {
pdf(file=plt,width,height)
}
if ( boundary) {
plot(0,xlim=xl,ylim=yl,...)
}
for (i in 1:nch) {
lines(x$data[[i]],...)
}
for (i in cortes) {
abline(v=i[1],col=2)
abline(v=i[2],col=2)
}
if ( borders) {
for (i in nch) {
abline(v=min(x$data[[i]][,1]),col=3)
abline(v=max(x$data[[i]][,1]),col=3)
}
}
if ( close ) {
dev.off()
}
}
#
if (file.exists("~/git/M_prep_IRTF/2011-IRTF.RData")) {
load("~/git/M_prep_IRTF/2011-IRTF.RData")
} else {
setwd('~/M')
files<-list.files(path="./irtf",pattern=".*fits$",ignore.case=TRUE)
nf <- length(files)
d_irf<-as.data.frame(matrix(0,nf,11))
d_irf[,1]<-files
colnames(d_irf)<-c("FileName","MinFL","MaxFL","MinWL","MaxWL","MinST","1Q_ST",
"Median","Mean","3Q_ST","Max_ST")
j<-0
irf<-list()
for (i in files) {
j = j+1
dat=readFITS(file=paste("irtf/",i,sep=""))
dat$imDat[,1]=dat$imDat[,1]*10000
ss =as.data.frame(dat$imDat[,1:2])
ssp=str_replace(i,'.fits','')
cmp=str_split(ssp,'_')
spt=cmp[[1]][1]
ppp=str_locate(ssp,'_')[1]
cpp= str_sub(ssp,(ppp+1),nchar(ssp))
starn = sub("^([^.]*).*", "\\1",cpp)
sp=""
if ( length(cmp[[1]]) == 3) {
sp="ext"
}
irf[[j]]<-list(data=ss,name=i,SpT=spt,starname=starn,type=sp)
d_irf[j,2:3]<-range(ss[,2],na.rm=T)
d_irf[j,4:5]<-range(ss[,1],na.rm=T)
d_irf[j,6:11]<-summary(diff(ss[,1],1))
}
#
sacax=function(x,cmp=1) {
return(ldply(x$data,function(x){return(x)})[,cmp])
}
indice=function(x,ip){
return(which(x<ip)[1]-1)
}
#
prepara_x=function(z,ip,tol=1){
zp = vector(mode="numeric",length=0)
zp[1]=floor(z[1])
xc = zp[1]
ix = 1
while (xc < ceiling(z[length(z)])) {
if ( (xc + tol * ip[ix]) >= z[ix] ) {
ix = ix + 1
}
if ( length(ip) < ix) {
ix = length(ip)
}
xc = xc + ip[ix]
zp[length(zp)+1] = xc
}
return(zp)
}
#
# buscamos la secuencia de puntos IRTF comunes para interpolar
# vsp=ldply(irf,function(x){return(range(x$data[,1]))})
# Calculating the x postion from man irtf
#
z = sort(unlist(lapply(bf_tot,sacax,cmp=1)))
iip=floor(rollmedian(diff(bf_tot[[1]]$data[[1]][,1]),5))
ip = prepara_x(z,iip)
zip=apply(as.data.frame(z),1,indice,ip)
vp=as.numeric(tapply(z,zip,median))
vsp=ldply(irf,function(x){return(range(x$data[,1]))})
#
cortes=list(c(ceiling(max(vsp[,1])),floor(min(vsp[,2]))))
bf_clean=lapply(irf,scaling,xlim=cortes[[1]],xp=vp)
bf_tot=lapply(irf,scaling)
save(irf,irf_clean,irf_tot,d_irf,nf,files,cortes,vp,vsp,file="~/git/M_prep_IRTF/2011-IRTF.RData")
rm(ss,dat,spt,cmp,starn,sp,files)
}
setwd('~/git/M_prep_IRTF')
#
A number of 85 have been readed from the individual spectra dataset.
Let’s create the R structure list similar to pb_clean and bq_clean. It will be called bf We will use the same interpolation and convolution tools than in the IPAC case.
Now, it’s time to prepare de bf list of spectra from IRTF
#
ordenar<-function(x){
x$data <- x$data[order(x$data[,1]),]
return(x)
}
#
rsmpl_irtf=function(x,irtf) {
y=x
nch=1
if (! is.null(x$nch)) {
nch = x$nch
}
dxorg=x$data
if ( ! is.null (x$factors) ) {
for ( i in 1:nch) {
dxorg[[i]][,2]=dxorg[[i]][,2]*x$factors[i]
}
}
ddx = ldply(dxorg,function(x){return(x)})
y$data=list()
y$nch=irtf$nch
for (i in 1:y$nch) {
ndat = matrix ( 0, nrow=nrow(irtf$data[[i]]), ncol=2)
ndat[,1] = irtf$data[[i]][,1]
ndat[,2] = NA
y$data[[i]] = ndat
y$data[[i]][,2]=(spline(ddx[,1],ddx[,2],xout=y$data[[i]][,1]))$y
y$factors[i]=sum(rollmean(y$data[[i]][,2],2)*diff(y$data[[i]][,1]))
}
ss=sum(y$factors)
for (i in 1:y$nch ) {
y$data[[i]][,2]=y$data[[i]][,2]/ss
}
return(y)
}
#
rsmpl=function(x,vp,cortes) {
y=x
nch=1
if (! is.null(x$nch)) {
nch = x$nch
}
y$data=list()
for (i in 1:length(cortes)) {
for (j in 1:nch) {
nx = vp > cortes[[i]][1] & vp < cortes[[i]][2]
if ( sum(nx > 0 )) {
ndat = matrix ( 0, nrow=sum(nx), ncol=2)
ndat[,1] = vp[nx]
ndat[,2] = NA
y$data[[i]] = ndat
y$data[[i]][,2]=(spline(x$data[,1],x$data[,2],xout=y$data[[i]][,1]))$y
y$factors[i]=sum(rollmean(y$data[[i]][,2],2)*diff(y$data[[i]][,1]))
}
}
}
y$nch=nch
ss= sum(y$factors)
for (j in 1:nch) {
y$data[[i]][,2]=y$data[[i]][,2]/ss
}
return(y)
}
#
cvol=function(x,vp,cortes=list(),R=2000,scale=TRUE){
xp=x$data[,1]
if (length(cortes)==0){
return(NULL)
}
y=x
y$data=list()
y$factors=rep(0,length(cortes))
for (i in 1:length(cortes)) {
org =cortes[[i]][1]
end =cortes[[i]][2]
iorg=rev(which(xp<org))[1]
iend=which(xp>end)[1]
sgm1=xp[iorg]/(R*2*sqrt(2.*log(2)))
sgm2=xp[iend]/(R*2*sqrt(2.*log(2)))
sioo =rev(which(xp < (xp[iorg]-3*sgm1)))[1]
siee =which(xp > (xp[iend]+3*sgm2))[1]
npm2 =max(iorg-sioo,siee-iend)
np = 2*npm2 + 1
xt =x$data[,1]
yt =x$data[,2]
xc=rep(0,(iend+1-iorg))
yc=xc
j=0
for ( ic in (iorg-npm2):(iend+npm2)) {
j=j+1
sgm = xt[ic]/(R*2*sqrt(2*log(2.)))
xsg = xt[(ic-npm2):(ic+npm2+1)]
flt = exp(-(xsg-xt[ic])^2/(2*sgm^2))/(sqrt(2*pi)*sgm)
yc[j] = sum(yt[(ic+npm2+1):(ic-npm2)]*flt*diff(xt[(ic-npm2-1):(ic+npm2+1)]))
xc[j] = xt[ic]
}
nx = vp > cortes[[i]][1] & vp < cortes[[i]][2]
ndat = matrix ( 0, nrow=sum(nx), ncol=2)
ndat[,1] = vp[nx]
ndat[,2] = NA
y$data[[i]] = ndat
y$data[[i]][,2]=(spline(xc,yc,xout=y$data[[i]][,1]))$y
y$factors[i]=sum(rollmean(y$data[[i]][,2],2)*diff(y$data[[i]][,1]))
y$data[[i]][,2]=y$data[[i]][,2]/y$factors[i]
}
return(y)
}
#
mettering = function(x){
nch=1
if (! is.null(x$nch)) {
nch = x$nch
}
dx = ldply(x$data,function(x){return(x)})
st = 0
yv = TRUE
md = 0
sv = 0
lg = 0
for ( i in 1:nch) {
y = x$data[[i]]
yv = yv & sum(is.na(y[,2]))==0
st = st + sum(diff(y[,1])*rollmean(y[,2],2))
md = md + mean(y[,2])* diff(range(y[,1]))/diff(range(dx[,1]))
sv = sv + sd(y[,2]) * diff(range(y[,1]))/diff(range(dx[,1]))
lg = lg + nrow(y)
}
return(list(ok=yv,area=st,mean=md,sdev=sv,lngth=lg,nch=nch))
}
#
dist_chk = function(x,y){
if ( x$nch != y$nch ) {
return(NULL)
}
}
scspec<-function(x,factor=1){
for ( i in x$nch){
x$factors[i] <- ifelse(x$factors[i] == 0 , factor , x$factors[i]*factor)
x$data[[i]][,2] <- x$data[[i]][,2]*factor
}
return(x)
}
#
`?` <- function(x, y)
eval(
sapply(
strsplit(
deparse(substitute(y)),
":"
),
function(e) parse(text = e)
)[[2 - as.logical(x)]])
#
fnspec=function(x,fn="max"){
FUN<-match.fun(fn)
return(unlist(lapply(x$data,function(x) FUN(x[,2]) )))
}
#
stellar=function(x,d_bt){
y=x
nm=paste(x$name,".bz2",sep="")
y$stellarp=d_bt[d_bt$FileName==nm,2:4]
colnames(y$stellarp)=c("T[K]","Log(g)[dex]","Met [dex")
return(y)
}
#
if (file.exists("~/git/M_prep_IRTF/BT-Settl-2011-2013-IRTF.RData")) {
load("~/git/M_prep_IRTF/BT-Settl-2011-2013-IRTF.RData")
} else {
#
# iir = dnorm(seq(-50,50,1),0,3.6)
bp = lapply(bt,cvol,vp=vp,cortes=cortes,R=2000,scale=TRUE)
bp_clean=lapply(bp,stellar,d_bt)
#
# rm(bt)
for ( ip in 1:length(bf_clean)) {
i =bf_clean[[ip]]$name
ssp=str_replace(i,'.fits','')
cmp=str_split(ssp,'_')
spt=cmp[[1]][1]
ppp=str_locate(ssp,'_')[1]
cpp= str_sub(ssp,(ppp+1),nchar(ssp))
starn = sub("^([^.]*).*", "\\1",cpp)
bf_clean[[ip]]$starname = starn
}
save(bp,bp_clean,irf,bf_clean,bf_tot,d_bt,d_irf,grd,vp,cortes,
file="~/git/M_prep_IRTF/BT-Settl-2011-2013-IRTF.RData")
}
#
The amount of produced spectra is 671 and they are now under bf list.
#
# Función de ensuacido de señal con ruido gausiano SNR=snr
ensucia=function(x,nr){
y = x+rnorm(length(x),0,x/nr)
return(y)
}
#
add_noise=function(x,snr){
if (is.null(x$nch)) {
nch=1
} else {
nch = x$nch
}
for (i in 1:nch) {
y=ensucia(x$data[[i]][,2],snr)
x$data[[i]][,2]=y
}
x$nch = nch
return(x)
}
#
prepare<-function(x,chunk=1){
tmp<-t(x$data[[chunk]])
colnames(tmp)<-paste("wl_",tmp[1,],sep="")
dtmp<-tmp[2,]
if ("stellarp" %in% (attributes(x)$names)) {
dtmp<-cbind(t(dtmp),x$stellarp)
}
return(dtmp)
}
#
# =================================================================================
# Processing starts
# =================================================================================
#
if ( file.exists("~/git/M_prep_IRTF/BT-Settl-2013-Noise.RData")) {
load("~/git/M_prep_IRTF/BT-Settl-2013-Noise.RData")
} else {
bp_50 = lapply(bp_clean,add_noise,50)
bp_10 = lapply(bp_clean,add_noise,10)
df_data_50 = t(sapply(bp_50,prepare,1))
df_data_10 = t(sapply(bp_10,prepare,1))
rownames(df_data_50) = d_bt[,1]
rownames(df_data_10) = d_bt[,1]
#
save(df_data_10,df_data_50,bp_50,bp_10,file="~/git/M_prep_IRTF/BT-Settl-2013-Noise.RData")
}
#
# Time to reinterpolate the IRTF
#
Let’s find the closest spectra at different SNR ratio per IPAC spectum.
minchid=function(x,lib){
chid=function(x,y){
if ( nrow(x$data[[1]]) == nrow(y$data[[1]])) {
return(sum((x$data[[1]][,2]-y$data[[1]][,2])^2)/nrow(x$data[[1]]))
} else {
return (5000) # Enormous distance as it is not valid
}
}
ldist=ldply(lib,chid,x)
idx = which.min(ldist[,1])[1]
res = data.frame(T=lib[[idx]]$stellarp[1],logg=lib[[idx]]$stellarp[2],
met=lib[[idx]]$stellarp[3],name=lib[[idx]]$name,
dist=ldist[idx,1])
return(res)
}
#
chi2d_50=ldply(bf_clean,minchid,bp_50)
#
And the same with SNR=10:
#
chi2d_10=ldply(bf_clean,minchid,bp_10)
#
Looking into the gravity properties ## SNR=10
# We prepare the coefs
if ( file.exists("~/git/M_prep_IRTF/BT-Settl-2013-PPR_bp10.RData")) {
load("~/git/M_prep_IRTF/BT-Settl-2013-PPR_bp10.RData")
} else {
dd2=as.data.frame(do.call(rbind,lapply(bp_10,function(x){return(x$data[[1]][,2])})))
jd2=JADE(dd2,10)
ss2=as.data.frame(t(jd2$W %*% t(dd2-jd2$Xmu)))
ss2[,(ncol(ss2)+1)]=unlist(lapply(bp_10,function(x){return(x$stellarp[2])}))
colnames(ss2)[ncol(ss2)]="LG"
#
folds=5
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss2[,ncol(ss2)],
k=folds, times=repeats))
model2=train(ss2[,-ncol(ss2)], ss2[,ncol(ss2)], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=3:(ncol(ss2)-1)))
d2=as.data.frame(do.call(rbind,lapply(bf_clean,
function(x){return(ldply(x$data,
function(x){return(x)})[,2])})))
si2=as.data.frame(t(jd2$W %*% t(d2-jd2$Xmu)))
G_coef2=predict(model2,si2)
save(G_coef2,si2,d2,model2,myControl,folds,repeats,ss2,jd2,dd2,
file="~/git/M_prep_IRTF/BT-Settl-2013-PPR_bp10.RData")
}
#
# We prepare the coefs
if ( file.exists("~/git/M_prep_IRTF/BT-Settl-2013-PPR_bp50.RData")) {
load("~/git/M_prep_IRTF/BT-Settl-2013-PPR_bp50.RData")
} else {
dd=as.data.frame(do.call(rbind,lapply(bp_50,function(x){return(x$data[[1]][,2])})))
jd=JADE(dd,10)
ss=as.data.frame(t(jd$W %*% t(dd-jd$Xmu)))
ss[,(ncol(ss)+1)]=unlist(lapply(bp_50,function(x){return(x$stellarp[2])}))
colnames(ss)[ncol(ss)]="LG"
#
folds=5
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss[,ncol(ss)],
k=folds, times=repeats))
model=train(ss[,-ncol(ss)], ss[,ncol(ss)], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=3:(ncol(ss)-1)))
#
d1=as.data.frame(do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])})))
si=as.data.frame(t(jd$W %*% t(d1-jd$Xmu)))
G_coef=predict(model,si)
save(G_coef,si,dd,model,myControl,folds,repeats,ss,jd,dd,
file="~/git/M_prep_IRTF/BT-Settl-2013-PPR_bp50.RData")
}
#
#
clase_luminosidad = function(x) {
# we look over Spectral SubClass
# for roman numbers V=>dwarft; III => giants I => Supergiants (II => I)
cl=c("V","III","I")
i=1
res=NA
while(i <= length(cl) & is.na(res)) {
j=regexpr(cl[i],x)
if (j[1] > 0) {
res=cl[i]
} else {
i=i+1
}
}
return(res)
}
setwd('~/git/M_prep_IRTF')
lit_val=read.csv2(file="tabla3_cesetti.csv",stringsAsFactors=FALSE)
lit_val[,6]=as.numeric(gsub(',','.',lit_val[,6]))
lit_val[,7]=as.numeric(gsub(',','.',lit_val[,7]))
lit_val[,8]=as.numeric(gsub(',','.',lit_val[,8]))
lit_val[,10]=as.numeric(gsub(',','.',lit_val[,10]))
lit_val[,1]=gsub('HD0','HD',lit_val[,1])
lit_val[,1]=gsub('HD0','HD',lit_val[,1])
lit_val[,1]=gsub('HD0','HD',lit_val[,1])
lit_val[,1]=gsub('HD0','HD',lit_val[,1])
colnames(lit_val)=c("Name","RA","DEC","SpT","Mv","Teff","Log_G","Met","Parallax","Ref")
m_with_spectra=as.vector(do.call(rbind,lapply(bf_clean,function(x){return(x$starname)})))
m_table_3 = lit_val[,1]
notfound=(1:length(m_with_spectra))[!m_with_spectra %in% m_table_3]
ddj=lit_val[lit_val[,1] %in% m_with_spectra,]
ddj$LC=sapply(ddj$SpT,clase_luminosidad)
ddj$LC[is.na(ddj$LC)] = "III"
#
#
# We apply specific analyses manually carried out by prof Sarro
# Outcome is to remove some stars from the available list.
toberemoved=c("HD35601","HD339034","IRAS01037+1219","IRAS15060+0947","IRAS14086-0703")
tobeupdated=data.frame(name=c("HD69243","HD14386","Gl752B",
"LP412-31","BRIB0021-0214","BRIB1219-1336",
"HD108849","HD156014"),
Teff=c(2000.,2400.,2557.,2557.,2322., 2400., 2700.,3200.))
ddj=ddj[! ddj$Name %in% toberemoved,]
for ( i in 1:nrow(tobeupdated)) {
ddj[ddj$Name %in% tobeupdated[i,"name"],"Teff"]=tobeupdated[i,"Teff"]
}
#
# We consider the second components of the Chi2 which is Log(G)
ref=data.frame(Name=as.vector(do.call(rbind,lapply(bf_clean,function(x)
{return(x$starname)}))), chi2_50=chi2d_50[,2],chi2_10=chi2d_10[,2],
LG_ica_50=G_coef,LG_ica_10=G_coef2)
#
# We prepare the coefs
if ( file.exists("~/git/M_prep_IRTF/BT-Settl-2013-PPR_ica10.RData")) {
load("~/git/M_prep_IRTF/BT-Settl-2013-PPR_ica10.RData")
} else {
dd3=as.data.frame(do.call(rbind,lapply(bp_10,function(x){return(x$data[[1]][,2])})))
jd3=JADE(dd3,10)
ss3=as.data.frame(t(jd3$W %*% t(dd3-jd3$Xmu)))
ss3[,(ncol(ss3)+1)]=unlist(lapply(bp_10,function(x){return(x$stellarp[2])}))
colnames(ss3)[ncol(ss3)]="LG"
#
folds=10
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss3[,ncol(ss3)],
k=folds, times=repeats))
model3=train(ss3[,-ncol(ss3)], ss3[,ncol(ss3)], method='gbm',
trControl=myControl,
tuneGrid=expand.grid(.n.trees=seq(10,500,50),
.interaction.depth=seq(3,10,2),
.shrinkage = seq(0.01,0.1,0.02)),verbose=FALSE)
#
d3=as.data.frame(do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])})))
si3=as.data.frame(t(jd3$W %*% t(d3-jd3$Xmu)))
G_coef3=predict(model3,si3)
ref[,(ncol(ref)+1)]=G_coef3
colnames(ref)[ncol(ref)]="LG_icaGBM_10"
save(G_coef3,si3,d3,model3,myControl,folds,repeats,ss3,jd3,dd3,ref,
file="~/git/M_prep_IRTF/BT-Settl-2013-PPR_ica10.RData")
}
#
# We prepare the coefs
if ( file.exists("~/git/M_prep_IRTF/BT-Settl-2013-PPR_icam10.RData")) {
load("~/git/M_prep_IRTF/BT-Settl-2013-PPR_icam10.RData")
} else {
dd4=as.data.frame(do.call(rbind,lapply(bp_10,function(x){return(x$data[[1]][,2])})))
jd4=JADE(dd4,6)
ss4=as.data.frame(t(jd4$W %*% t(dd4-jd4$Xmu)))
ss4[,(ncol(ss4)+1)]=unlist(lapply(bp_10,function(x){return(x$stellarp[3])}))
colnames(ss4)[ncol(ss4)]="M"
#
folds=10
repeats=1
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
index=createMultiFolds(ss4[,ncol(ss4)],
k=folds, times=repeats))
model4=train(ss4[,-ncol(ss4)], ss4[,ncol(ss4)], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=4:(ncol(ss4)-1)))
#
d4=as.data.frame(do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])})))
si4=as.data.frame(t(jd4$W %*% t(d4-jd4$Xmu)))
G_coef4=predict(model4,si4)
ref[,(ncol(ref)+1)]=G_coef4
colnames(ref)[ncol(ref)]="Met_ica_10"
#
reftot=merge(ref,ddj,by="Name")
save(G_coef4,si4,d4,model4,myControl,folds,repeats,ss4,jd4,dd4,reftot,
file="~/git/M_prep_IRTF/BT-Settl-2013-PPR_icam10.RData")
}
#
#
#
# Function that returns Root Mean Squared Error
rmse <- function(error) {
sqrt(mean(error^2,na.rm=TRUE))
}
# Function that returns Mean Absolute Error
mae <- function(error) {
mean(abs(error),na.rm=TRUE)
}
#
#
ddj=ddj[! ddj$Name %in% toberemoved,]
for ( i in 1:nrow(tobeupdated)) {
ddj[ddj$Name %in% tobeupdated[i,"name"],"Teff"]=tobeupdated[i,"Teff"]
}
ref_tot0i=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,
LG_ICA_10=G_coef2,LG_ICA_50=G_coef)
ref_tot1i=data.frame(Name=as.character(ddj$Name),SpT=ddj$SpT,Teff=ddj$Teff,LC=ddj$LC,
LogG=ddj$Log_G, Met=ddj$Met)
ref_tot2i=merge(ref_tot0i,ref_tot1i,by="Name")
diffs=apply(ref_tot2i[,! colnames(ref_tot2i) %in% c("Name","SpT","Teff","LC","Met","LogG")],
2,FUN="-",ref_tot2i[,"LogG"])
rownames(diffs)=ref_tot2i[,"Name"]
#
cat(paste("LogG Modelling with ICA. Error analysis follows.",sep=""))
LogG Modelling with ICA. Error analysis follows.
errors=data.frame(drmse = apply(diffs,2,rmse),dmae=apply(diffs,2,mae))
print(xtable(errors),type="html")
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
LG_ICA_10 | 0.54 | 0.49 |
LG_ICA_50 | 0.34 | 0.24 |
#
reftot = ref_tot2i
reftot$CL = sapply(reftot$SpT,clase_luminosidad)
reftot$CL[is.na(reftot$CL)] = "III"
ggplot(data=reftot) +
geom_point(aes(x=LogG,y=chi2d_50,shape=CL),size=3) +
ylab("Chi2-50 LogG [dex]") + xlab("Cesetti LogG [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
# xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=reftot) +
geom_point(aes(x=LogG,y=chi2d_10,shape=CL),size=3) +
ylab("Chi2-10 LogG [dex]") + xlab("Cesetti LogG [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
# xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=reftot) +
geom_point(aes(x=LogG,y=LG_ICA_50,shape=CL),size=3) +
ylab("ICA-50 LogG [dex]") + xlab("Cesetti LogG [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
# xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
ggplot(data=reftot) +
geom_point(aes(x=LogG,y=LG_ICA_10,shape=CL),size=3) +
ylab("ICA-10 LogG [dex]") + xlab("Cesetti LogG [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") +
# xlim(2000,4200) + ylim(2000,4200) +
guides(col=guide_legend(ncol=2))
#
# ggplot(data=reftot) +
# geom_point(aes(x=Log_G,y=LG_ica_10,shape=CL),size=3) +
# ylab("ICA-10 Log(g) [dex]") + xlab("Theoretical Log(g) [dex]") +
# theme_bw() + # scale_colour_brewer(palette="Set1") +
# geom_abline(position="identity", colour="gray") +
# guides(col=guide_legend(ncol=2))
# #
save(bf_clean,bp_clean,bp_10,bp_50,ref,reftot,
model,model2,model3,model4,
file="~/git/M_prep_IRTF/Models_BT_settl-2013_self33_v2.RData")
#
#
if ( file.exists("~/git/M_prep_IRTF/Features_BT-settl-2013_v3.RData")) {
load("~/git/M_prep_IRTF/Features_BT-settl-2013_v3.RData")
} else {
bpy=do.call(rbind,lapply(bp_clean,function(x){return(x$data[[1]][,2])}))
bpy10=do.call(rbind,lapply(bp_10,function(x){return(x$data[[1]][,2])}))
bpy50=do.call(rbind,lapply(bp_50,function(x){return(x$data[[1]][,2])}))
YPT=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[1])}))
YPG=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[2])}))
YPM=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[3])}))
vf=as.character(bp_clean[[1]]$data[[1]][,1])
colnames(bpy)=vf
colnames(bpy10)=vf
colnames(bpy50)=vf
bfy=do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
#
area=function(v,nv) {
y=data.frame(x=as.numeric(nv),y=v)
xz = diff(as.numeric(y[,1]),1)
yz = as.numeric(y[,2])
z = sum(xz*rollmean(yz,2))
return(z)
}
#
######################################################################
#
siz=30
bandas=list()
bfndas=list()
bandas10=list()
bandas50=list()
for (i in seq(1,siz,15)) { # LSB asked for steps of 5 pixels
idx=paste("s=",i,sep="")
lvf=vf
if ( i > 1) {
lvf=vf[-c(1:(i-1))]
}
ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
st=split(lvf,ff)
for (j in 1:length(st)) {
if (length(st[[j]]) < siz)
st[[j]] = NULL
}
lst = length(st)
bandas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy)))
bfndas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bfy)))
bandas10[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy10)))
bandas50[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy50)))
for (j in 1:lst) {
bandas[[idx]]$mat[,j]=apply(bpy[,st[[j]]],1,area,st[[j]])
bandas10[[idx]]$mat[,j]=apply(bpy10[,st[[j]]],1,area,st[[j]])
bandas50[[idx]]$mat[,j]=apply(bpy50[,st[[j]]],1,area,st[[j]])
}
for (j in 1:lst) {
bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
}
}
#
save(bpy,bfy,bandas,bfndas,vf, bpy10,bpy50,st,
bandas10,bandas50,YPT,YPG,YPM,
file="~/git/M_prep_IRTF/Features_BT-settl-2013_v3.RData")
}
#
Otras features de genéticos que evolucionan construyendo las features (Población 8000 individuos, 1000 evoluciones)
#
#
require(GA)
GA_r Loading required package: GA
GA_r Package 'GA' version 2.2
GA_r Type 'citation("GA")' for citing this R package in publications.
require(doParallel)
GA_r Loading required package: doParallel
require(plyr)
#
#
area=function(v,nv) {
y=data.frame(x=as.numeric(nv),y=v)
xz = diff(as.numeric(y[,1]),1)
yz = as.numeric(y[,2])
z = sum(xz*rollmean(yz,2))
return(z)
}
fitnessG = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-1000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-1000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas[[cnjts[1]]]$mat),ncol=(nrow(par)+1)))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas[[cnjts[bi]]]$mat[,idxi] / (bandas[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
X[,ncol(X)]= YPT
colnames(X)[ncol(X)]="T"
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
fitnessG10 = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-1000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-1000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas10[[cnjts[1]]]$mat),ncol=(nrow(par)+1)))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas10[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas10[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas10[[cnjts[bi]]]$mat[,idxi] / (bandas10[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
X[,ncol(X)]= YPT
colnames(X)[ncol(X)]="T"
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
fitnessG50 = function(string) {
par = decode(string)
if ( 0 %in% unlist(apply(par,1,sd)) ) {
return (-1000*NBLK*NBITS)
}
if ( length(unique(sort(par[,1]))) < nrow(par) |
length(unique(sort(par[,2]))) < nrow(par) ) {
return (-1000*NBLK*NBITS)
}
X = as.data.frame(matrix(0,nrow=nrow(bandas50[[cnjts[1]]]$mat),ncol=(nrow(par)+1)))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
wini=-eval(parse(text=bandas50[[cnjts[bi]]]$names[idxi]))
winj=-eval(parse(text=bandas50[[cnjts[bj]]]$names[idxj]))
X[,(i)]=wini-bandas50[[cnjts[bi]]]$mat[,idxi] / (bandas50[[cnjts[bj]]]$mat[,idxj]/winj)
colnames(X)[(i)] = paste(par[i,],collapse="-")
}
X[,ncol(X)]= YPT
colnames(X)[ncol(X)]="T"
mod = lm.fit(as.matrix(X), y)
class(mod) = "lm"
return( -BIC(mod))
}
#
SelFeatures=function(x,sets=cnjts,dat=bandas,fitness.=fitnessG,pp=FALSE){
pos=which.max(as.numeric(unlist(ldply(x@bestSol,fitness.,.parallel=pp))))
sol=x@bestSol[[pos]]
par = decode(sol)
res = list()
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
res[[i]]=list(set1=sets[bi],pos1=idxi,set2=sets[bj],pos2=idxj,par=par[i,],
x=dat[[sets[bi]]]$mat[,idxi],
x0=dat[[sets[bj]]]$mat[,idxj],
name=dat[[sets[bi]]]$nam[idxi],
name0=dat[[sets[bj]]]$nam[idxj])
}
return(res)
}
#
FeaturesExt=function(x,num,sets=cnjts,dat=bandas,fitness.=fitnessG,pp=FALSE){
vals=as.numeric(unlist(ldply(x@bestSol,fitness.,.parallel=pp)))
fiveval=summary(vals)
uv = unique(sort(vals,decreasing=TRUE))
uvals = uv[1:min(num,length(uv))]
isols=rep(0,length(uvals))
for (i in 1:length(uvals)) {
isols[i] = which(vals==uvals[i])[1]
}
res=NA
if ( length(isols) > 0) {
par=decode(x@bestSol[[1]])
res = as.data.frame(matrix(NA,nrow=nrow(par),ncol=(2*length(isols))))
nam = as.data.frame(matrix(NA,nrow=nrow(par),ncol=(2*length(isols))))
for (j in 1:length(isols)) {
sol=x@bestSol[[isols[j]]]
par = decode(sol)
res[,(2*(j-1)+1):(2*j)]=par
names(res)[(2*(j-1)+1):(2*j)] = c(paste(j,":s",sep=""),paste(j,":c",sep=""))
for ( i in 1:nrow(par)) {
bi=par[i,1] %% NBLK
bj=par[i,2] %% NBLK
if ( bi==0 ) bi=NBLK
if ( bj==0 ) bj=NBLK
idxi=(par[i,1] %/% NBLK) +1
idxj=(par[i,2] %/% NBLK) +1
nam[i,(2*(j-1)+1)]=dat[[sets[bi]]]$nam[idxi]
nam[i,(2*j)]=dat[[sets[bj]]]$nam[idxj]
}
names(nam)[(2*(j-1)+1):(2*j)] = c(paste(j,":s",sep=""),paste(j,":c",sep=""))
}
}
return(list(par=res,name=nam,pos=isols,vpos=vals[isols],fivevals=fiveval))
}
#
decode = function(string) {
string = gray2binary(string)
n = binary2decimal(string[1:NBITS])
c = binary2decimal(string[(NBITS + 1):(2*NBITS)])
res = data.frame(p1=n%%MAXV,p2=c%%MAXV)
ini = 2 * NBITS
if ( NG > 1) {
for (i in 2:NG) {
n = binary2decimal(string[(ini+1):(ini+NBITS)])
c = binary2decimal(string[(ini + NBITS + 1):(ini + 2*NBITS)])
res = rbind(res, c(n%%MAXV,c%%MAXV))
ini = ini + 2*NBITS
}
}
return(res)
}
#
encode = function(res,nbits=NBITS) {
if (! is.data.frame(res)) {
warning(paste("ENCODE: First parameter is not data.frame:",str(res),sep=" "))
return(NULL)
}
cad=rep(0,ncol(res)*nrow(res)*nbits)
for (i in 1:nrow(res)) {
for (j in 1:ncol(res)) {
k=((i-1)*ncol(res) + (j - 1))*nbits+1
cad[k:(k+nbits-1)]=decimal2binary(res[i,j],nbits)
}
}
return(binary2gray(cad))
}
#
#
NITER=9
if ( file.exists("~/git/M_prep_IRTF/GA_NT11F2-2013_data.RData")) {
j=1
load("~/git/M_prep_IRTF/GA_NT11F2-2013_data.RData")
tt50=list()
GAs50 = list()
solsG50=list()
tt10=list()
GAs10 = list()
solsG10=list()
tt=list()
GAs=list()
solsG=list()
while(j <= NITER) {
cat(paste("Reading Iteration:",j,"<br>",sep=""))
flush.console()
load(paste("~/git/M_prep_IRTF/GA_2013_",sprintf("%02d",j),"_50_f2_data.RData",sep=""))
tt50[[j]]= t_tot
GAs50[[j]]=GA1
solsG50[[j]]=FeaturesExt(GA1,12)
load(paste("~/git/M_prep_IRTF/GA_2013_",sprintf("%02d",j),"_10_f2_data.RData",sep=""))
tt10[[j]]= t_tot
GAs10[[j]]=GA1
solsG10[[j]]=FeaturesExt(GA1,12)
load(paste("~/git/M_prep_IRTF/GA_2013_",sprintf("%02d",j),"_f2_data.RData",sep=""))
tt[[j]]= t_tot
GAs[[j]]=GA1
solsG[[j]]=FeaturesExt(GA1,12)
j = j+1
}
} else {
# cnjts = c("s=1","s=6","s=11","s=16","s=21","s=26")
cnjts = c("s=1","s=16")
load(file="~/git/M_prep_IRTF/GA5.RData")
bpy=do.call(rbind,lapply(bp_clean,function(x){return(x$data[[1]][,2])}))
YPT=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[1])}))
YPG=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[2])}))
YPM=do.call(rbind,lapply(bp_clean,function(x){return(x$stellarp[3])}))
vf=as.character(bp_clean[[1]]$data[[1]][,1])
colnames(bpy)=vf
bfy=do.call(rbind,lapply(bf_clean,function(x){return(x$data[[1]][,2])}))
colnames(bfy)=vf
#
siz=30
bandas=list()
bfndas=list()
bandas10=list()
bandas50=list()
for (i in seq(1,30,15)) { # LSB asked for steps of 5 pixels
idx=paste("s=",i,sep="")
lvf=vf
if ( i > 1) {
lvf=vf[-c(1:(i-1))]
}
ff=as.factor(sort(rep(1:ceiling(length(lvf)/siz),siz))[1:length(lvf)])
st=split(lvf,ff)
for (j in 1:length(st)) {
if (length(st[[j]]) < siz)
st[[j]] = NULL
}
lst = length(st)
bandas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy)))
bfndas[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bfy)))
bandas10[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy10)))
bandas50[[idx]]=list(names= as.vector(unlist(lapply(st,
function(x){return(paste(range(x),collapse="-"))}))),
mat=matrix(NA,ncol=lst,nrow=nrow(bpy50)))
for (j in 1:lst) {
bandas[[idx]]$mat[,j]=apply(bpy[,st[[j]]],1,area,st[[j]])
bandas10[[idx]]$mat[,j]=apply(bpy10[,st[[j]]],1,area,st[[j]])
bandas50[[idx]]$mat[,j]=apply(bpy50[,st[[j]]],1,area,st[[j]])
}
for (j in 1:lst) {
bfndas[[idx]]$mat[,j]=apply(bfy[,st[[j]]],1,area,st[[j]])
}
}
#
# BE CAREFUL !!! y should be in retionship with the variable being predicted
y=as.numeric(unlist(YPG))
cnjts = c("s=1","s=16")
NG=10 # Número de Features
NBLK = length(cnjts)
NBITS= ceiling(log(NBLK*ncol(bandas[[cnjts[1]]]$mat),2))
MAXV = ncol(bandas[[cnjts[1]]]$mat)
#
save(bpy,bpy10,bpy50,bfy,YPT,YPG,YPM,vf,siz,bandas,bfndas, bandas10,bandas50,
cnjts,st,NG,NBLK,NBITS,MAXV,y,fitnessG,fitnessG10,fitnessG50, encode,decode,
SelFeatures,file="~/git/M_prep_IRTF/GA_NT11F2-2013_data.RData")
#
tt=list()
GAs = list()
solsG=list()
j=1
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt[[j]]= system.time({GA1 = ga(type="binary", fitness = fitnessG, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt[[j]]
GAs[[j]]=GA1
save(fitnessG,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IRTF/GA_2013_",sprintf("%02d",j),"_f2_data.RData",sep=""))
solsG[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsG,function(x){return(x$vpos[1])})),type="html")
#
j=1
tt10=list()
GAs10 = list()
solsG10=list()
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt10[[j]]= system.time({GA1 = ga("binary", fitness = fitnessG10, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt10[[j]]
GAs10[[j]]=GA1
save(fitnessG10,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IRTF/GA_2013_",sprintf("%02d",j),"_10_f2_data.RData",sep=""))
solsG10[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsG10,function(x){return(x$vpos[1])})),type="html")
#
j=1
tt50=list()
GAs50 = list()
solsG50=list()
while(j <= NITER) {
cat(paste("Starting Iteration:",j,"<br>",sep=""))
flush.console()
tt50[[j]]= system.time({GA1 = ga("binary", fitness = fitnessG50, nBits = NBITS*2*NG,
monitor = FALSE,parallel=18,maxiter=1000,popSize=8000,keepBest=TRUE)})
t_tot=tt50[[j]]
GAs50[[j]]=GA1
save(fitnessG50,encode,decode,SelFeatures,GA1,t_tot,j,
file=paste("~/git/M_prep_IRTF/GA_2013_",sprintf("%02d",j),"_50_f2_data.RData",sep=""))
solsG50[[j]]=FeaturesExt(GA1,12)
j=j+1
}
#
print(xtable(ldply(solsG50,function(x){return(x$vpos[1])})),type="html")
}
Reading Iteration:1
Reading Iteration:2
Reading Iteration:3
Reading Iteration:4
Reading Iteration:5
Reading Iteration:6
Reading Iteration:7
Reading Iteration:8
Reading Iteration:9
#
print(xtable(ldply(solsG,function(x){return(x$vpos[1])})),type="html")
V1 | |
---|---|
1 | 126.56 |
2 | 141.11 |
3 | 159.72 |
4 | 102.09 |
5 | 145.11 |
6 | 191.01 |
7 | 159.77 |
8 | 110.53 |
9 | 171.05 |
# ISOL holds the best solution !
isol = which.max(as.numeric(ldply(solsG,function(x){return(x$vpos[1])})[,1]))
isol
[1] 6
print(xtable(ldply(solsG10,function(x){return(x$vpos[1])})),type="html")
V1 | |
---|---|
1 | -299.04 |
2 | -317.45 |
3 | -386.54 |
4 | -344.19 |
5 | -299.17 |
6 | -317.68 |
7 | -385.64 |
8 | -426.41 |
9 | -404.51 |
# ISOL holds the best solution !
isol10 = which.max(as.numeric(ldply(solsG10,function(x){return(x$vpos[1])})[,1]))
isol10
[1] 1
#
print(xtable(ldply(solsG50,function(x){return(x$vpos[1])})),type="html")
V1 | |
---|---|
1 | -80.49 |
2 | 61.20 |
3 | -40.78 |
4 | 19.04 |
5 | 30.21 |
6 | 38.69 |
7 | 66.46 |
8 | -81.87 |
9 | -65.31 |
# ISOL holds the best solution !
isol50 = which.max(as.numeric(ldply(solsG50,function(x){return(x$vpos[1])})[,1]))
isol50
[1] 7
#
#
clase_luminosidad = function(x) {
# we look over Spectral SubClass
# for roman numbers V=>dwarft; III => giants I => Supergiants (II => I)
cl=c("V","III","I")
i=1
res=NA
while(i <= length(cl) & is.na(res)) {
j=regexpr(cl[i],x)
if (j[1] > 0) {
res=cl[i]
} else {
i=i+1
}
}
return(res)
}
#
prep_datosG=function(res,T){
X=matrix(NA,nrow=length(res[[1]]$x),ncol=(length(res)+1))
for (j in 1:length(res)) {
wini=-eval(parse(text=res[[j]]$name))
winj=-eval(parse(text=res[[j]]$name0))
X[,j]=wini-res[[j]]$x/(res[[j]]$x0/winj)
}
X[,(length(res)+1)] = T
colnames(X)=c(paste("C",1:length(res),sep=""),"T")
return(X)
}
#
modelado=function(X,Y) {
#
trn=1:nrow(X)
folds=5
repeats=1
#
mseeds <- vector(mode = "list", length = (folds+1))
for(i in 1:folds) mseeds[[i]] <- sample.int(nrow(X), 100)
mseeds[[(folds+1)]] <- sample.int(1000, 1)
myControl = trainControl(method='cv', number=folds,
repeats=repeats, returnResamp='none',
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,seeds=mseeds)
#Train some models
model_1.1 = "train(X[trn,], Y[trn], method='gbm',
trControl=myControl,
tuneGrid=expand.grid(.n.trees=seq(200,500,100),
.interaction.depth=seq(3,15,2),
.shrinkage = seq(0.01,0.1,0.02)),verbose=FALSE)"
model_1.2 = "train(X[trn,], Y[trn], method='blackboost',
trControl=myControl, tuneGrid=expand.grid(.mstop=10^(2:4),
.maxdepth=seq(5,15,3)))"
model_1.3 = "train(X[trn,], Y[trn], method='rf',
trControl=myControl,
tuneGrid=expand.grid(.mtry=(2:ncol(X))),
proximity=TRUE)"
model_1.4 = "train(X[trn,], Y[trn], method='knn',
trControl=myControl, trace=FALSE,
tuneGrid=expand.grid(k=(2:min(9,ncol(X)))))"
model_1.5 = "train(X[trn,], Y[trn], method='ppr',
trControl=myControl,tuneGrid=expand.grid(.nterms=10))"
model_1.6 = "train(X[trn,], Y[trn], method='earth',
trControl=myControl,tuneGrid=expand.grid(.degree = 3,
.nprune = (1:10) * 2), metric = 'RMSE',
maximize = FALSE)"
model_1.7 = "train(X[trn,], Y[trn], method='glm',
trControl=myControl)"
model_1.8 = "train(X[trn,], Y[trn], method='svmRadial',
trControl=myControl,tuneGrid=expand.grid(.C=10^(-3:3),
.sigma=c(10^(-4:3))))"
model_1.9 = "train(X[trn,], Y[trn], method='cubist',
trControl=myControl,commiittees=10)"
model_1.10 = "train(X[trn,], Y[trn], method='kernelpls',
trControl=myControl,tuneGrid=expand.grid(.ncomp=seq(2,ncol(X))))"
model_1.11 = "train(X[trn,], Y[trn], method='nnet',
trControl=myControl,tuneGrid=expand.grid(.size=seq(2,round(2*ncol(X)),3),
.decay=c(1.e-5,1.e-2)),
linout=TRUE, rang = 300, trace=FALSE,
maxit=10000, reltol=1.0e-11, abstol=1.0e-6)"
model_1.12 = "train(X[trn,], Y[trn], method='bagEarth',
trControl=myControl,tuneGrid=expand.grid(.nprune=(1:10)*3,.degree=1:5))"
model_1.13 = "train(X[trn,], Y[trn], method='cubist', trControl=myControl,
tuneGrid=expand.grid(.committees=3:50, .neighbors=1:7))"
#
#Make a list of all the models
lmodels=c("model_1.1", "model_1.2", "model_1.3", "model_1.4",
"model_1.5", "model_1.6", "model_1.7", "model_1.8",
"model_1.9", "model_1.10","model_1.11","model_1.12",
"model_1.13")
#
all.models_5 =list()
for (i in lmodels) {
all.models_5[[length(all.models_5)+1]] = eval(parse(text=gsub('\n','',get(i))))
}
names(all.models_5) = sapply(all.models_5, function(x) x$method)
#
ensam_5 <- caretList(X[trn,],Y[trn],methodList=c('svmRadial',
'gbm','blackboost','rf','earth','bagEarth'))
ens <- caretEnsemble(ensam_5)
#Make a linear regression ensemble
if (exists("ens")){
all.models_5[[length(all.models_5)+1]] = get("ens")
names(all.models_5)[length(all.models_5)]="ENS"
}
return(all.models_5)
}
#
# Loading the predicted temperature
load(file="~/git/M_prep_IRTF/GA_predict_T_11F2.RData")
if ( file.exists("~/git/M_prep_IRTF/GA_data_MG-2013_TF2_Models.RData")) {
load("~/git/M_prep_IRTF/GA_data_MG-2013_TF2_Models.RData")
} else {
#
# Objects YTpknn00,YTpknn11,YTpknn55 are read from the file
# They were determined by the script prep_GA_case01_NT11F2_v1.Rmd
#
res00=SelFeatures(GAs[[isol]],cnjts,bandas)
res10=SelFeatures(GAs10[[isol10]],cnjts,bandas10)
res50=SelFeatures(GAs50[[isol50]],cnjts,bandas50)
#
X00 = prep_datosG(res00,as.numeric(YPT[,1]))
md5 = modelado(X00,as.numeric(YPG[,1]))
X10 = prep_datosG(res10,as.numeric(YPT[,1]))
md51= modelado(X10,as.numeric(YPG[,1]))
X50 = prep_datosG(res50,as.numeric(YPT[,1]))
md55= modelado(X50,as.numeric(YPG[,1]))
#
xf0 = prep_datosG(SelFeatures(GAs[[isol]],cnjts,bfndas),YTpknn00)
xf1 = prep_datosG(SelFeatures(GAs10[[isol10]],cnjts,bfndas),YTpknn11)
xf5 = prep_datosG(SelFeatures(GAs50[[isol50]],cnjts,bfndas),YTpknn55)
#
YGn00=predict(md5[[which(names(md5)=="rf")]],xf0)
YGn01=predict(md51[[which(names(md51)=="rf")]],xf0)
YGn05=predict(md55[[which(names(md55)=="rf")]],xf0)
YGn10=predict(md5[[which(names(md5)=="rf")]],xf1)
YGn11=predict(md51[[which(names(md51)=="rf")]],xf1)
YGn15=predict(md55[[which(names(md55)=="rf")]],xf1)
YGn50=predict(md5[[which(names(md5)=="rf")]],xf5)
YGn51=predict(md51[[which(names(md51)=="rf")]],xf5)
YGn55=predict(md55[[which(names(md55)=="rf")]],xf5)
# Correciones a las estimaciones de la tabla de Cesetti et al ya aplicadas
# ddj=ddj[! ddj$Name %in% toberemoved,]
# for ( i in 1:nrow(tobeupdated)) {
# ddj[ddj$Name %in% tobeupdated[i,"name"],"Teff"]=tobeupdated[i,"Teff"]
# }
ref_tot0i=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,
LG_ICA_10=G_coef2,LG_ICA_50=G_coef,LG_rf_00=YGn00,LG_rf_11=YGn11,LG_rf_55=YGn55)
ref_tot1i=data.frame(Name=as.character(ddj$Name),SpT=ddj$SpT,Teff=ddj$Teff,LC=ddj$LC,
LogG=ddj$Log_G, Met=ddj$Met)
ref_tot2i=merge(ref_tot0i,ref_tot1i,by="Name")
#
save(bpy,bfy,YPT,YPG,YPM,vf,siz,bandas,bfndas, bandas10,bandas50,cnjts,ddj,
NG,NBLK,NBITS,MAXV,md5,md51,md55,res00,res10,res50,
X00,X10,X50,YGn00,YGn01,YGn05,YGn10,YGn11,YGn15,YGn50,YGn51,YGn55,xf0,xf1,xf5,
ref_tot0i,ref_tot1i,ref_tot2i,file="~/git/M_prep_IRTF/GA_data_MG-2013_TF2_Models.RData")
}
#
res00=SelFeatures(GAs[[isol]],cnjts,bandas)
res10=SelFeatures(GAs10[[isol10]],cnjts,bandas10)
res50=SelFeatures(GAs50[[isol50]],cnjts,bandas50)
ref_tot0=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,
LG_ICA_10=G_coef2,LG_ICA_50=G_coef,LG_rf_00=YGn00,LG_rf_11=YGn11,LG_rf_55=YGn55)
ddj$LC=sapply(ddj$SpT,clase_luminosidad)
ddj$LC[is.na(ddj$LC)] = "III"
ddj=ddj[! ddj$Name %in% toberemoved,]
for ( i in 1:nrow(tobeupdated)) {
ddj[ddj$Name %in% tobeupdated[i,"name"],"Teff"]=tobeupdated[i,"Teff"]
}
ref_tot1=data.frame(Name=as.character(ddj$Name),SpT=ddj$SpT,Teff=ddj$Teff,LC=ddj$LC,
LogG=ddj$Log_G, Met=ddj$Met)
ref_tot2=merge(ref_tot0,ref_tot1,by="Name")
#
print(xtable(ldply(res00,function(x){return(c(x$name,x$name0))})),type="html")
V1 | V2 | |
---|---|---|
1 | 10245.8834648132-10304.0170669556 | 11241.2893772125-11328.536272049 |
2 | 8415.91477394104-8473.95598888397 | 11511.5076303482-11598.5083580017 |
3 | 12906.5597057343-12993.6075210571 | 13041.4843559265-13133.8226795197 |
4 | 8715.9988284111-8773.98669719696 | 10425.8972406387-10484.1268062592 |
5 | 8805.93180656433-8863.97480964661 | 12816.7152404785-12903.7344455719 |
6 | 10126.0197162628-10183.9262247086 | 13086.4554643631-13194.0937042236 |
7 | 8176.02604627609-8234.12597179413 | 10971.5676307678-11058.4557056427 |
8 | 8626.02293491364-8683.98904800415 | 10746.431350708-10833.5745334625 |
9 | 8536.02588176727-8594.05636787415 | 10215.9470319748-10274.1026878357 |
10 | 12951.6196250916-13038.6245250702 | 11196.5644359589-11283.243894577 |
print(xtable(ldply(res10,function(x){return(c(x$name,x$name0))})),type="html")
V1 | V2 | |
---|---|---|
1 | 8176.02604627609-8234.12597179413 | 9165.87352752686-9223.91295433044 |
2 | 10485.9864711761-10563.4093284607 | 10002.0408630371-9999.92370605469 |
3 | 8656.085729599-8714.03753757477 | 10926.4612197876-11013.6014223099 |
4 | 9525.88737010956-9584.04839038849 | 10002.0408630371-9999.92370605469 |
5 | 8205.98006248474-8263.95511627197 | 13041.4843559265-13133.8226795197 |
6 | 10275.9730815887-10333.9564800262 | 11376.6288757324-11463.5121822357 |
7 | 10306.0281276703-10363.8756275177 | 11151.6261100769-11238.4647130966 |
8 | 9165.87352752686-9223.91295433044 | 8385.99264621735-8443.942964077 |
9 | 9645.81817388535-9704.14638519287 | 13137.9419565201-13253.9582252502 |
10 | 8325.99699497223-8383.94105434418 | 12726.6937494278-12813.7129545212 |
print(xtable(ldply(res50,function(x){return(c(x$name,x$name0))})),type="html")
V1 | V2 | |
---|---|---|
1 | 11151.6261100769-11238.4647130966 | 13086.4554643631-13194.0937042236 |
2 | 8385.99264621735-8443.942964077 | 13618.2010173798-13734.142780304 |
3 | 8176.02604627609-8234.12597179413 | 11241.2893772125-11328.536272049 |
4 | 8536.02588176727-8594.05636787415 | 13041.4843559265-13133.8226795197 |
5 | 12771.6958522797-12858.7293624878 | 10306.0281276703-10363.8756275177 |
6 | 13378.1176805496-13494.1327571869 | 10002.0408630371-9999.92370605469 |
7 | 8626.02293491364-8683.98904800415 | 10926.4612197876-11013.6014223099 |
8 | 9826.0509967804-9883.90862941742 | 10006.0677528381-10064.013004303 |
9 | 10521.5620994568-10608.4597110748 | 11736.7100715637-11823.4866857529 |
10 | 8205.98006248474-8263.95511627197 | 9796.0889339447-9853.93643379211 |
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=chi2d_10,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") + ylab("Chi2 10 [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=chi2d_50,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") + ylab("Chi2 50 [dex]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=LG_rf_00,shape=LC),size=3) +
xlab("Theoretical Log(G) [K]") + ylab("ML predicted Msnr=oo Fsnr=oo [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=LG_rf_11,shape=LC),size=3) +
xlab("Theoretical Log(G) [K]") + ylab("ML predicted Msnr=10 Fsnr=10 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
ggplot(data=ref_tot2) +
geom_point(aes(x=LogG,y=LG_rf_55,shape=LC),size=3) +
xlab("Theoretical Log(G) [K]") + ylab("ML predicted Msnr=50 Fsnr=50 [K]") +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="gray") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2))
#
lmod=c("rf","gbm","svmRadial","nnet","knn","bagEarth","kernelpls","cubist")
nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
for (i in 1:length(lmod)) {
YGm00=predict(md5[[which(names(md5)==lmod[i])[1]]],xf0)
YGm11=predict(md51[[which(names(md51)==lmod[i])[1]]],xf1)
YGm55=predict(md55[[which(names(md55)==lmod[i])[1]]],xf5)
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,
YGm00,YGm11,YGm55,Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55)
ref_tot1m=data.frame(Name=as.character(ddj$Name),SpT=ddj$SpT,Teff=ddj$Teff,LC=ddj$LC,
LogG=ddj$Log_G, Met=ddj$Met)
ref_tot2m=merge(ref_tot0m,ref_tot1m,by="Name")
diffs=apply(ref_tot2m[,! colnames(ref_tot2m) %in% c("Name","SpT","Teff","LC","Met",
"LogG","Tknn0","Tknn1","Tknn5")],2,FUN="-",ref_tot2m[,"LogG"])
rownames(diffs)=ref_tot2m[,"Name"]
#
cat(paste("Log(G) Modelling with ",lmod[i],". Error analysis follows.",sep=""))
errors=data.frame(drmse = apply(diffs,2,rmse),dmae=apply(diffs,2,mae))
print(xtable(errors),type="html")
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=oo",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=LogG,y=YGm00,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") +
ylab(paste(nmod[i]," predicted SNR=oo [dex]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2)))
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=10",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=LogG,y=YGm11,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") +
ylab(paste(nmod[i]," predicted SNR=10 [dex]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2)))
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=50",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=LogG,y=YGm55,shape=LC),size=3) +
xlab("Theoretical Log(G) [dex]") +
ylab(paste(nmod[i]," predicted SNR=50 [dex]","")) +
theme_bw() + # scale_colour_brewer(palette="Set1") +
geom_abline(position="identity", colour="red") # +
# xlim(2000,4200) + ylim(2000,4200) +
# guides(col=guide_legend(ncol=2)))
)
}
GA_r3 Loading required package: randomForest
GA_r3 randomForest 4.6-7
GA_r3 Type rfNews() to see new features/changes/bug fixes.
Log(G) Modelling with rf. Error analysis follows.
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 0.53 | 0.42 |
YGm11 | 0.64 | 0.47 |
YGm55 | 0.77 | 0.66 |
Log(G) Modelling with: RF. SNR=ooLog(G) Modelling with: RF. SNR=10
Log(G) Modelling with: RF. SNR=50
GA_r3 Loading required package: gbm
GA_r3 Loading required package: survival
GA_r3
GA_r3 Attaching package: 'survival'
GA_r3
GA_r3 The following object is masked from 'package:caret':
GA_r3
GA_r3 cluster
GA_r3
GA_r3 Loading required package: splines
GA_r3 Loaded gbm 2.1.1
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 0.49 | 0.42 |
YGm11 | 0.48 | 0.40 |
YGm55 | 0.61 | 0.53 |
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 0.46 | 0.36 |
YGm11 | 0.66 | 0.55 |
YGm55 | 0.63 | 0.54 |
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 1.20 | 1.09 |
YGm11 | 0.78 | 0.65 |
YGm55 | 0.47 | 0.43 |
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 1.60 | 1.26 |
YGm11 | 1.23 | 1.01 |
YGm55 | 1.39 | 1.21 |
Log(G) Modelling with: KNN. SNR=ooLog(G) Modelling with: KNN. SNR=10
Log(G) Modelling with: KNN. SNR=50
GA_r3 Loading required package: earth
GA_r3 Loading required package: plotmo
GA_r3 Loading required package: plotrix
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 0.99 | 0.87 |
YGm11 | 0.84 | 0.68 |
YGm55 | 0.54 | 0.42 |
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 0.96 | 0.84 |
YGm11 | 0.99 | 0.88 |
YGm55 | 0.51 | 0.46 |
drmse | dmae | |
---|---|---|
chi2d_10 | 0.82 | 0.59 |
chi2d_50 | 0.93 | 0.73 |
YGm00 | 0.57 | 0.48 |
YGm11 | 0.74 | 0.66 |
YGm55 | 0.50 | 0.41 |
Log(G) Modelling with: Rule-Regression. SNR=ooLog(G) Modelling with: Rule-Regression. SNR=10
Log(G) Modelling with: Rule-Regression. SNR=50
#
That’s all.
Now, it’s time for having a look at the
#
ref_tot0=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,
LG_ICA_10=G_coef2,LG_ICA_50=G_coef,LG_rf_00=YGn00,LG_rf_11=YGn11,LG_rf_55=YGn55,
Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55)
ddj$LC=sapply(ddj$SpT,clase_luminosidad)
ddj$LC[is.na(ddj$LC)] = "III"
ddj=ddj[! ddj$Name %in% toberemoved,]
for ( i in 1:nrow(tobeupdated)) {
ddj[ddj$Name %in% tobeupdated[i,"name"],"Teff"]=tobeupdated[i,"Teff"]
}
ref_tot1=data.frame(Name=as.character(ddj$Name),SpT=ddj$SpT,Teff=ddj$Teff,LC=ddj$LC,
LogG=ddj$Log_G, Met=ddj$Met)
ref_tot2=merge(ref_tot0,ref_tot1,by="Name")
#
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=chi2d_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - Chi2-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=chi2d_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - Chi2-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=LG_ICA_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - ICA-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn0,10),y=LG_ICA_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab("Log(G) [dex] - ICA-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
# T snr=10
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=chi2d_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - Chi2-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=chi2d_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - Chi2-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=LG_ICA_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - ICA-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn1,10),y=LG_ICA_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab("Log(G) [dex] - ICA-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
# T snr=50
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=chi2d_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - Chi2-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=chi2d_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - Chi2-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=LG_ICA_10,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - ICA-10") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
print(ggplot(data=ref_tot2) +
geom_point(aes(x=log(Tknn5,10),y=LG_ICA_50,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab("Log(G) [dex] - ICA-50") +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
#
#
lmod=c("rf","gbm","svmRadial","nnet","knn","bagEarth","kernelpls","cubist")
nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
for (i in 1:length(lmod)) {
YGm00=predict(md5[[which(names(md5)==lmod[i])[1]]],xf0)
YGm11=predict(md51[[which(names(md51)==lmod[i])[1]]],xf1)
YGm55=predict(md55[[which(names(md55)==lmod[i])[1]]],xf5)
#
ref_tot0m=data.frame(Name=as.character(ref$Name),
chi2d_10=ref$chi2_10, chi2d_50=ref$chi2_50,
YGm00,YGm11,YGm55,Tknn0=YTpknn00,Tknn1=YTpknn11,Tknn5=YTpknn55)
ref_tot1m=data.frame(Name=as.character(ddj$Name),SpT=ddj$SpT,Teff=ddj$Teff,LC=ddj$LC,
LogG=ddj$Log_G, Met=ddj$Met)
ref_tot2m=merge(ref_tot0m,ref_tot1m,by="Name")
#
cat(paste("Log(G) Modelling with ",lmod[i],". Plot main sequence follows.",sep=""))
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=oo",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=log(Tknn0,10),y=YGm00,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=oo") +
ylab(paste("Log(G) ",nmod[i]," predicted SNR=oo [dex]")) +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=10",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=log(Tknn1,10),y=YGm11,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=10") +
ylab(paste("Log(G) ",nmod[i]," predicted SNR=10 [dex]")) +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
#
cat(paste("Log(G) Modelling with: ",nmod[i],". SNR=50",sep=""))
print(ggplot(data=ref_tot2m) +
geom_point(aes(x=log(Tknn5,10),y=YGm55,shape=LC),size=3) +
xlab("Log(T) [dex] - snr=50") +
ylab(paste("Log(G) ",nmod[i]," predicted SNR=50 [dex]")) +
theme_bw() + scale_y_reverse() + scale_x_reverse()
)
}
Log(G) Modelling with rf. Plot main sequence follows.Log(G) Modelling with: RF. SNR=ooLog(G) Modelling with: RF. SNR=10
Log(G) Modelling with: RF. SNR=50
Log(G) Modelling with gbm. Plot main sequence follows.Log(G) Modelling with: GB. SNR=oo
Log(G) Modelling with: GB. SNR=10
Log(G) Modelling with: GB. SNR=50
Log(G) Modelling with svmRadial. Plot main sequence follows.Log(G) Modelling with: SVR. SNR=oo
Log(G) Modelling with: SVR. SNR=10
Log(G) Modelling with: SVR. SNR=50
Log(G) Modelling with nnet. Plot main sequence follows.Log(G) Modelling with: NNR. SNR=oo
Log(G) Modelling with: NNR. SNR=10
Log(G) Modelling with: NNR. SNR=50
Log(G) Modelling with knn. Plot main sequence follows.Log(G) Modelling with: KNN. SNR=oo
Log(G) Modelling with: KNN. SNR=10
Log(G) Modelling with: KNN. SNR=50
Log(G) Modelling with bagEarth. Plot main sequence follows.Log(G) Modelling with: MARS. SNR=oo
Log(G) Modelling with: MARS. SNR=10
Log(G) Modelling with: MARS. SNR=50
Log(G) Modelling with kernelpls. Plot main sequence follows.Log(G) Modelling with: PLS. SNR=oo
Log(G) Modelling with: PLS. SNR=10
Log(G) Modelling with: PLS. SNR=50
Log(G) Modelling with cubist. Plot main sequence follows.Log(G) Modelling with: Rule-Regression. SNR=oo
Log(G) Modelling with: Rule-Regression. SNR=10
Log(G) Modelling with: Rule-Regression. SNR=50
#