Caso de las estrellas IPAC

Lectura de Predicciones de Modelos con Doppler revisado

#
#
# Loading the predicted temperature
load("~/git/M_prep_IPAC/Trv_GA_models_IPAC.RData")
nomT_IPAC=gsub('.txt','',names(nomT))
T_IPAC = dataT
LCT=LC
SpTT=SpT
load("~/git/M_prep_IPAC/LGrv_GA_models_IPAC.RData")
nomG_IPAC=nomG
G_IPAC = dataG
nomG_IPAC=gsub('.txt','',names(nomG))
LCG=LC
SpTG=SpT
load("~/git/M_prep_IPAC/Mrv_GA_models_IPAC.RData")
nomM_IPAC=nomM
M_IPAC = dataM
LCM=LC
SpTM=SpT
nomM_IPAC=gsub('.txt','',names(nomM))
nomM_IPAC=unlist(lapply(strsplit(nomM_IPAC,'\\.'),function(x){return(gsub('_','',x[[1]][1]))}))
nomM_IPAC=gsub('MASP','MASS',nomM_IPAC)
#
rm(list=c("dataT","nomT","dataG","nomG","dataM","nomM","LC","SpT"))

Lectura de los datos de contraste de Neves III y Rojas y Ayala

#
dNeves=read.table(file="~/git/M_prep_TEO/NevesIII-IPAC-final.tsv",sep="|",
                  header=FALSE,stringsAsFactors=FALSE)
dNevesM=dNeves[,c(1,19,18)]
colnames(dNevesM)=c("Name1","Name2","Fe/H")
dNevesM[,1]=str_replace(dNevesM[,1],'\\+','p')
dNevesM[,1]=str_replace(dNevesM[,1],' ','_')
#
RA=read.table(file="~/git/M_prep_TEO/RA-IPAC-final.tsv",sep="|",
              header=FALSE,stringsAsFactors=FALSE)
RAf=RA[,c(1,5,18,19,22,23)]
colnames(RAf)=c("Name1","Name2","T","DT","Fe/H","DFe/H")
RAf[,1]=str_replace(RAf[,1],'\\+','p')
RAf[,1]=str_replace(RAf[,1],' ','_')
dd=merge(dNevesM,RAf,by="Name1")
#
#

Comparando las estiamciones para Fe/H+ de ambos tenemos 24 de un total de 28 muestras compatibles. Es decir un 85.7142857 %. O lo que es lo mismo un 14.2857143 % que no es compatible entre si.

Comparación de los modelos con Neves III

#
# extrae=function(x,nom,M,LC,SpT) {
#   tnm=x[2]
#   teo=x[3]
#   ip=which(tnm==nom)
#   fallo=NULL
#   mul=NULL
#   lst=NULL
#   if (length(ip) > 0 ) {
#       i = ip[1]
#       lst = c(tnm,teo,LC[i],SpT[i],M[1,,i],M[3,,i],M[2,,i])
#       names(lst) = c("Name","FeH","LC","SpT",paste("M_",1:length(M_IPAC[1,,1]),"_INF",sep=""),
#                    paste("M_",1:length(M_IPAC[1,,1]),"_50",sep=""),
#                    paste("M_",1:length(M_IPAC[1,,1]),"_10",sep=""))
#       if ( length(ip) > 1 ) {
#         mul=c(as.character(tnm),ip)
#       }
#   } else {
#     fallo=as.character(tnm)
#   }
#   return(list(datos=lst,falta=fallo,varios=mul))
# }
# #
# ref_tot2=apply(dNevesM,1,extrae,nomM_IPAC,M_IPAC,LCM,SpTM)
# datos=lapply(ref_tot2,function(x){if (length(x$datos) > 0) return(x$datos)})
# pos=0
# while(pos < length(datos)) {
#   pos = pos + 1
#   if (is.null(datos[[pos]])) {
#     datos[[pos]]=NULL
#   }
# }
# datos=ldply(datos,function(x){return(x)})
# datos$BE=apply(datos[,5:28],1,function(x){return(median(as.numeric(x)))})
# datos$FeH=as.numeric(datos$FeH)
# nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
# #
# ggplot(data=datos) + 
#          geom_point(aes(x=FeH,y=BE,shape=LC),size=3) +
#          xlab("Theoretical Met [dex]") + ylab("Ensemble median [dex]") +
#          theme_bw() + #  scale_colour_brewer(palette="Set1") +
#          geom_abline(position="identity", colour="gray") # +
# #         xlim(-1,0) + ylim(-1,0)   #+
# #         guides(col=guide_legend(ncol=2))
# #
# txtj=c("INF","50","10")
# for(i in 1:length(nmod)) {
#   for (j in 1:length(txtj)) {
#     datos$actual=as.numeric(datos[,(4+i)+(j-1)*length(nmod)])
#     plot(ggplot(data=datos) + 
#          geom_point(aes(x=FeH,y=actual,shape=LC),size=3) +
#          xlab("Theoretical Met [dex]") + ylab(paste(nmod[i]," SNR=",txtj[j]," [dex]",sep="")) +
#          theme_bw() + #  scale_colour_brewer(palette="Set1") +
#          geom_abline(position="identity", colour="gray") 
#          )
#   }
# }
# #
# datosN=datos
# rm(datos)
#

Comparación de los modelos con Rojas y Ayala

#
# delta=function(x,RA) {
#   nm=which(RA[,1]==x[1])[1]
#   if ( nm > 0 )
#     return(RA[nm,2])
# }
# #
# RAf[,2]=gsub(' ','',RAf[,2])
# ref_tot2=apply(RAf[,c(1,2,5)],1,extrae,nomM_IPAC,M_IPAC,LCM,SpTM)
# datos=lapply(ref_tot2,function(x){if (length(x$datos) > 0) return(x$datos)})
# pos=0
# while(pos < length(datos)) {
#   pos = pos + 1
#   if (is.null(datos[[pos]])) {
#     datos[[pos]]=NULL
#   }
# }
# datos=ldply(datos,function(x){return(x)})
# datos$BE=apply(datos[,5:28],1,function(x){return(median(as.numeric(x)))})
# datos$FeH=as.numeric(datos$FeH)
# nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
# #
# DFeH = apply(datos[,c(1,2)],1,delta,RAf[,c(2,6)])
# ggplot(data=datos) + 
#          geom_point(aes(x=FeH,y=BE,shape=LC),size=3) +
#          xlab("Theoretical Met [dex]") + ylab("Ensemble median [dex]") +
#          theme_bw() + #  scale_colour_brewer(palette="Set1") +
#          geom_abline(position="identity", colour="gray") # +
# #         xlim(-1,0) + ylim(-1,0)   #+
# #         guides(col=guide_legend(ncol=2))
# #
# cat(paste("Modelo ENSEMBLE: Número de estrellas en tolerancia",sum(abs(datos$FeH-datos$BE) <= DFeH),
#           "frente al total de",nrow(datos),sep=" "))
# txtj=c("INF","50","10")
# for(i in 1:length(nmod)) {
#   for (j in 1:length(txtj)) {
#     datos$actual=as.numeric(datos[,(4+i)+(j-1)*length(nmod)])    
#     cat(paste("Modelo",nmod[i],"SNR",txtj[j],": Número de estrellas en tolerancia",
#               sum(abs(datos$FeH-datos$actual) <= DFeH),
#           "frente al total de",nrow(datos),sep=" "))
#     plot(ggplot(data=datos) + 
#          geom_point(aes(x=FeH,y=actual,shape=LC),size=3) +
#          xlab("Theoretical Met [dex]") + ylab(paste(nmod[i]," SNR=",txtj[j]," [dex]",sep="")) +
#          theme_bw() + #  scale_colour_brewer(palette="Set1") +
#          geom_abline(position="identity", colour="gray") 
#          )
#   }
# }
#

Comparison between Rojas-Ayala and GA for Tmeperature

#
extraeT=function(x,nom,T,LC,SpT) {
  tnm=x[2]
  teo=x[3]
  ip=which(tnm==nom)
  fallo=NULL
  mul=NULL
  lst=NULL
  if (length(ip) > 0 ) {
      i = ip[1]
      lst = c(tnm,teo,LC[i],SpT[i],T[1,,i],T[3,,i],T[2,,i])
      names(lst) = c("Name","T","LC","SpT",
                   paste("T_",1:length(T_IPAC[1,,1]),"_INF",sep=""),
                   paste("T_",1:length(T_IPAC[1,,1]),"_50",sep=""),
                   paste("T_",1:length(T_IPAC[1,,1]),"_10",sep=""))
      if ( length(ip) > 1 ) {
        mul=c(as.character(tnm),ip)
      }
  } else {
    fallo=as.character(tnm)
  }
  return(list(datos=lst,falta=fallo,varios=mul))
}
#
RAf[,2]=gsub(' ','',RAf[,2])
ref_tot2T=apply(RAf[,c(1,2,3)],1,extraeT,nomM_IPAC,T_IPAC,LCM,SpTM)
datosT=lapply(ref_tot2T,function(x){if (length(x$datos) > 0) return(x$datos)})
pos=0
while(pos < length(datosT)) {
  pos = pos + 1
  if (is.null(datosT[[pos]])) {
    datosT[[pos]]=NULL
  }
}
datosT=ldply(datosT,function(x){return(x)})
datosT[,2]=log(as.numeric(datosT[,2]),10)
for (i in 5:ncol(datosT)){
  datosT[,i]=log(as.numeric(datosT[,i]),10)
}
datosT[which(datosT[,1]=="Gl849"),"LC"]="V"
nmod=c("RF","GB","SVR","NNR","KNN","MARS","PLS","Rule-Regression")
txtj=c("INF","50","10")
#
for(i in 1:length(nmod)) {
  cat (paste("Processing models type:",nmod[i],"\n",sep=""))
  for (j in 1:length(txtj)) {
      cat(paste("  Working out case SNR=",txtj[j],"\n",sep=""))
      datosT$actual=as.numeric(datosT[,(4+i)+(j-1)*length(nmod)])   
      plot(ggplot(data=datosT) + 
        geom_point(aes(x=T,y=actual,shape=LC),size=3) +
        xlab("Log(T) Rojas-Ayala [dex]") + 
        ylab(paste("Log(T) GA-",nmod[i]," [dex]",sep="")) +
        theme_bw() + #  scale_colour_brewer(palette="Set1") +
        scale_y_reverse() + scale_x_reverse() + 
        theme(axis.title=
             element_text(size=14,face="bold"),
                     legend.title=element_text(size=14,face="bold"),
                     legend.text=element_text(size=13)) +
        geom_abline(position="identity", colour="gray") 
      )
  }
}

Processing models type:RF Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:GB Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:SVR Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:NNR Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:KNN Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:MARS Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:PLS Working out case SNR=INF Working out case SNR=50 Working out case SNR=10 Processing models type:Rule-Regression Working out case SNR=INF Working out case SNR=50 Working out case SNR=10

#
save(datosT,file="datosT.RData")