Curvas reales, media y simuladas¶

In [ ]:
library(iAR)
library(polynom)
library(dplyr)
library(readr)
library(simex)
library(xtable)
require(Rdrw)
setwd("C:/Users/cataluna/OneDrive/Escritorio/usach/Tesis/AGN/AGN")  
AGN1=read.table("AGN1.csv",sep=",",header=T)
AGN2=read.table("AGN2.csv",sep=",",header=T)
AGN3=read.table("AGN3.csv",sep=",",header=T)
AGN4=read.table("AGN4.csv",sep=",",header=T)
AGN5=read.table("AGN5.csv",sep=",",header=T)
AGN6=read.table("AGN6.csv",sep=",",header=T)
AGN7=read.table("AGN7.csv",sep=",",header=T)
AGNF=rbind(AGN1,AGN2,AGN3,AGN4,AGN5,AGN6,AGN7)
aggregate(mjd~fid,AGNF,range)
object=names(table(AGNF[,1]))
l0=length(object)
crossmatchedobjects=read.csv("ts_v9.0.1_SMBH_ZTF_xmatch.csv")
object=data.frame("oid"=object)
join=merge(object, crossmatchedobjects, by = "oid")
MSEMLEg=rep(0,l0)
o2=iAR::utilities()
o2<-gentime(o2, n=400, distribution = "expmixture", lambda1 = 55, lambda2 = 3,p1 = 0.15, p2 = 0.85)
newdata=NULL
simdata=NULL
for(i in 1:l0)
{
  obj=join$oid[i]
  class=join$survey_class_mapped[i]
  pos=which(AGNF[,1]==obj)
  lc=AGNF[pos,]
  lcg=na.omit(lc[which(lc[,3]==1),c(2,4,5)])
  lcr=na.omit(lc[which(lc[,3]==2),c(2,4,5)])
  lcg=lcg[order(lcg[,1]),] 
  pos=which(diff(lcg[,1])==0)  
  if(length(pos)>0)
  {
    for(k in pos)
    {
      lcg[k,]=apply(lcg[c(k,k+1),],2,mean)
    }
    lcg=lcg[-c(pos+1),]
  }
  p=which(lcg[,2]>=30)  
  if(length(p)>0)
    lcg=lcg[-p,]
  if(dim(lcg)[1]<10) next 
  p=which(lcr[,2]>=30)
  if(length(p)>0)
    lcr=lcr[-p,]
  lcr=lcr[order(lcr[,1]),]
  pos=which(diff(lcr[,1])==0)
  if(length(pos)>0)
  {
    for(k in pos)
    {
      lcr[k,]=apply(lcr[c(k,k+1),],2,mean)
    }
    lcr=lcr[-c(pos+1),]
  }
  if(dim(lcr)[1]<10) next
  media=mean(lcg[,2])
  primervalor=c(58242.22,media,0.01)
  ultimovalor=c(60316.16,media,0.01)
  lcg=rbind(primervalor,lcg,ultimovalor)
  
  lcg <- lcg[order(lcg[,1]), , drop = FALSE]         
  lcg <- lcg[!duplicated(lcg[,1]), , drop = FALSE]   
  d <- diff(lcg[,1])
  if (any(d <= 0)) {                                  
    eps <- .Machine$double.eps^0.5
    for (j in which(d <= 0)) lcg[j+1,1] <- lcg[j,1] + eps
  }
  
  
  fid=1
  new=data.frame(obj,lcg[,1],fid,lcg[,2],lcg[,3])
  names(new)=names(AGNF)
  newdata=rbind(newdata,new)
  plot(lcg[,1],lcg[,2],type="l")
  points(lcg[,1],lcg[,2],pch=20)
  n=dim(lcg)[1]
  x <- iAR(family = "norm", times = lcg[,1], series = lcg[,2], series_esd = lcg[,3])
  x = loglik(x)
  #st=sort(o2@times)
  #simiar<- iAR(family = "norm", times=st,coef = x@coef)
  #simiar<- iAR::sim(simiar)
  #newsim=data.frame(obj,simiar@times,fid,simiar@series+media,0)
  
  st <- sort(unique(as.numeric(o2@times)))  
  simiar <- iAR(family = "norm", times = st, coef = x@coef)
  simiar <- iAR::sim(simiar)
  newsim <- data.frame(obj, st, fid, simiar@series + media, 0)
  
  names(newsim)=names(AGNF)
  simdata=rbind(simdata,newsim)
}

library(ggplot2)
library(ggridges)
library(patchwork)

purple_palette <- c(
  "AGN"    = "#240046",  
  "Blazar" = "#5A189A",  
  "QSO"    = "#E0AAFF"   
)

panel_theme <- theme_minimal(base_size = 13) +
  theme(
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.9),
    plot.margin = margin(6, 6, 6, 6),
    panel.grid.minor = element_blank()
  )

g1 <- ggplot(
  phi_no_out,
  aes(x = survey_class_mapped, y = phi, fill = survey_class_mapped)
) +
  geom_boxplot(
    color = "black",
    alpha = 0.70,
    outlier.shape = NA,
    linewidth = 0.4,
    width = 0.6
  ) +
  scale_fill_manual(values = purple_palette) +
  scale_y_continuous(
    limits = c(0.99, 1.0001),
    breaks = c(0.99, 0.995, 1.0),
    expand = expansion(mult = c(0.02, 0.08))
  ) +
  labs(
    x = "Clase de objeto",
    y = expression(phi)
  ) +
  panel_theme +
  theme(
    axis.text.x = element_text(size = 14, face = "bold"),
    axis.title.x = element_text(margin = margin(t = 6)),
    axis.title.y = element_text(margin = margin(r = 6)),
    legend.position = "none"   
  )

phi_plot <- phi_no_out
phi_plot$survey_class_mapped <- reorder(
  phi_plot$survey_class_mapped,
  phi_plot$phi,
  FUN = median
)

g2 <- ggplot(
  phi_plot,
  aes(x = phi, y = survey_class_mapped, fill = survey_class_mapped)
) +
  geom_density_ridges(
    color = "#2D004D",
    alpha = 0.75,
    scale = 1.15,
    rel_min_height = 0.01,
    linewidth = 0.6
  ) +
  scale_fill_manual(values = purple_palette) +
  labs(
    x = expression(phi),
    y = "Clase de objeto"
  ) +
  panel_theme +
  theme(
    axis.text.y = element_text(size = 14, face = "bold"),
    axis.title.x = element_text(margin = margin(t = 6)),
    axis.title.y = element_text(margin = margin(r = 6)),
    legend.position = "none"   
  )

final_plot <- (g1 | g2) +
  plot_layout(widths = c(1, 1)) +
  plot_annotation(
    title = "Distribución del coeficiente iAR (φ) por clase",
    theme = theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
      plot.margin = margin(6, 6, 6, 6)
    )
  )

final_plot

library(dplyr)
library(iAR)
library(ggplot2)

cat("Filas newdata:", nrow(newdata), "\n")
cat("Nombres newdata:\n"); print(names(newdata))

time_col <- if ("mjd" %in% names(newdata)) "mjd" else names(newdata)[2]
mag_col  <- if ("magpsf_corr" %in% names(newdata)) "magpsf_corr" else names(newdata)[4]
esd_col  <- if ("sigmapsf_corr" %in% names(newdata)) "sigmapsf_corr" else names(newdata)[5]

newdata[[time_col]] <- as.numeric(as.character(newdata[[time_col]]))
newdata[[mag_col]]  <- as.numeric(as.character(newdata[[mag_col]]))
newdata[[esd_col]]  <- as.numeric(as.character(newdata[[esd_col]]))

if ("fid" %in% names(newdata)) newdata$fid <- as.numeric(as.character(newdata$fid))

cat("Valores únicos de fid en newdata:\n")
print(unique(newdata$fid))

use_fid <- 1
if (!any(newdata$fid == 1, na.rm = TRUE)) {
  use_fid <- sort(unique(na.omit(newdata$fid)))[1]
  cat("OJO: no existe fid==1. Usaré fid =", use_fid, "\n")
}

get_phi_from_fit <- function(fit) {
  cc <- fit@coef
  nms <- names(cc)
  if (!is.null(nms) && "phi" %in% nms) return(as.numeric(cc["phi"]))
  if (!is.null(nms)) {
    idx <- which(grepl("phi|ar", nms, ignore.case = TRUE))
    if (length(idx) > 0) return(as.numeric(cc[idx[1]]))
  }
  if (length(cc) > 0) return(as.numeric(cc[1]))
  NA_real_
}

calc_phi_one <- function(df) {
  t   <- df[[time_col]]
  y   <- df[[mag_col]]
  esd <- df[[esd_col]]
  
  ok <- is.finite(t) & is.finite(y) & is.finite(esd)
  t <- t[ok]; y <- y[ok]; esd <- esd[ok]
  esd[!is.finite(esd) | esd <= 0] <- 0.01
  
  o <- order(t)
  t <- t[o]; y <- y[o]; esd <- esd[o]
  dup <- duplicated(t)
  if (any(dup)) { t <- t[!dup]; y <- y[!dup]; esd <- esd[!dup] }
  
  if (length(t) < 10) return(NA_real_)
  
  tryCatch({
    fit <- iAR(family="norm", times=t, series=y, series_esd=esd)
    fit <- loglik(fit)
    get_phi_from_fit(fit)
  }, error = function(e) {
    NA_real_
  })
}

dat_f <- newdata %>% filter(fid == use_fid)

cat("Filas después de filtrar fid:", nrow(dat_f), "\n")
cat("OIDs distintos:", length(unique(dat_f$oid)), "\n")

phi_df <- dat_f %>%
  group_by(oid) %>%
  summarise(phi = calc_phi_one(cur_data()), .groups = "drop") %>%
  filter(is.finite(phi))

cat("Phi calculados:", nrow(phi_df), "\n")

phi_class <- phi_df %>%
  left_join(join %>% select(oid, survey_class_mapped), by="oid") %>%
  filter(!is.na(survey_class_mapped))

cat("Con clase asignada:", nrow(phi_class), "\n")
print(table(phi_class$survey_class_mapped))

phi_no_out <- phi_class %>%
  group_by(survey_class_mapped) %>%
  filter(n() >= 5) %>%
  mutate(
    q1 = quantile(phi, 0.25, na.rm=TRUE),
    q3 = quantile(phi, 0.75, na.rm=TRUE),
    iqr = q3 - q1,
    low = q1 - 1.5*iqr,
    high = q3 + 1.5*iqr
  ) %>%
  filter(phi >= low, phi <= high) %>%
  ungroup()

cat("Post-outliers:", nrow(phi_no_out), "\n")

if (nrow(phi_no_out) == 0) {
  cat("No hay datos para graficar tras IQR. Probemos graficar sin filtrar outliers.\n")
} else {
  ggplot(phi_no_out, aes(x = survey_class_mapped, y = phi)) +
    geom_boxplot(fill="#7B2CBF", color="black", alpha=0.35, outlier.shape=NA) +
    scale_y_continuous(limits=c(0.99, 1.0001), breaks=c(0.99, 0.995, 1.0)) +
    labs(title="Distribución del Coeficiente iAR (phi) por Clase",
         x="Clase de Objeto", y="Coeficiente (phi)") +
    theme_minimal(base_size=12) +
    theme(
      plot.title = element_text(hjust=0.5, face="bold"),
      panel.grid.major = element_line(color="grey85"),
      panel.grid.minor = element_line(color="grey92")
    )
}

library(dplyr)
library(iAR)
library(ggplot2)
library(ggridges)
library(patchwork)

cat("Filas newdata:", nrow(newdata), "\n")
cat("Nombres newdata:\n"); print(names(newdata))

time_col <- if ("mjd" %in% names(newdata)) "mjd" else names(newdata)[2]
mag_col  <- if ("magpsf_corr" %in% names(newdata)) "magpsf_corr" else names(newdata)[4]
esd_col  <- if ("sigmapsf_corr" %in% names(newdata)) "sigmapsf_corr" else names(newdata)[5]

newdata[[time_col]] <- as.numeric(as.character(newdata[[time_col]]))
newdata[[mag_col]]  <- as.numeric(as.character(newdata[[mag_col]]))
newdata[[esd_col]]  <- as.numeric(as.character(newdata[[esd_col]]))

if ("fid" %in% names(newdata)) newdata$fid <- as.numeric(as.character(newdata$fid))

cat("Valores únicos de fid en newdata:\n")
print(unique(newdata$fid))

use_fid <- 1
if (!any(newdata$fid == 1, na.rm = TRUE)) {
  use_fid <- sort(unique(na.omit(newdata$fid)))[1]
  cat("OJO: no existe fid==1. Usaré fid =", use_fid, "\n")
}

get_phi_from_fit <- function(fit) {
  cc <- fit@coef
  nms <- names(cc)
  if (!is.null(nms) && "phi" %in% nms) return(as.numeric(cc["phi"]))
  if (!is.null(nms)) {
    idx <- which(grepl("phi|ar", nms, ignore.case = TRUE))
    if (length(idx) > 0) return(as.numeric(cc[idx[1]]))
  }
  if (length(cc) > 0) return(as.numeric(cc[1]))
  NA_real_
}

calc_phi_one <- function(df) {
  t   <- df[[time_col]]
  y   <- df[[mag_col]]
  esd <- df[[esd_col]]
  
  ok <- is.finite(t) & is.finite(y) & is.finite(esd)
  t <- t[ok]; y <- y[ok]; esd <- esd[ok]
  esd[!is.finite(esd) | esd <= 0] <- 0.01
  
  o <- order(t)
  t <- t[o]; y <- y[o]; esd <- esd[o]
  dup <- duplicated(t)
  if (any(dup)) { t <- t[!dup]; y <- y[!dup]; esd <- esd[!dup] }
  
  if (length(t) < 10) return(NA_real_)
  
  tryCatch({
    fit <- iAR(family="norm", times=t, series=y, series_esd=esd)
    fit <- loglik(fit)
    get_phi_from_fit(fit)
  }, error = function(e) NA_real_)
}

dat_f <- newdata %>% filter(fid == use_fid)

cat("Filas después de filtrar fid:", nrow(dat_f), "\n")
cat("OIDs distintos:", length(unique(dat_f$oid)), "\n")

phi_df <- dat_f %>%
  group_by(oid) %>%
  summarise(phi = calc_phi_one(cur_data()), .groups = "drop") %>%
  filter(is.finite(phi))

cat("Phi calculados:", nrow(phi_df), "\n")

phi_class <- phi_df %>%
  left_join(join %>% select(oid, survey_class_mapped), by="oid") %>%
  filter(!is.na(survey_class_mapped))

cat("Con clase asignada:", nrow(phi_class), "\n")
print(table(phi_class$survey_class_mapped))

phi_no_out <- phi_class %>%
  group_by(survey_class_mapped) %>%
  filter(n() >= 5) %>%
  mutate(
    q1 = quantile(phi, 0.25, na.rm=TRUE),
    q3 = quantile(phi, 0.75, na.rm=TRUE),
    iqr = q3 - q1,
    low = q1 - 1.5*iqr,
    high = q3 + 1.5*iqr
  ) %>%
  filter(phi >= low, phi <= high) %>%
  ungroup()

cat("Post-outliers:", nrow(phi_no_out), "\n")

if (nrow(phi_no_out) == 0) {
  cat("No hay datos para graficar tras IQR.\n")
} else {
  
  g1 <- ggplot(phi_no_out, aes(x = survey_class_mapped, y = phi)) +
    geom_boxplot(fill="#7B2CBF", color="black", alpha=0.35, outlier.shape=NA,
                 linewidth=0.4) +
    scale_y_continuous(limits=c(0.998, 1.0001), breaks=c(0.998, 0.999, 1.0)) +
    labs(title="(a) Boxplot de \u03C6 por clase",
         x="Clase de Objeto", y=expression(phi)) +
    theme_minimal(base_size=12) +
    theme(
      plot.title = element_text(hjust=0.5, face="bold"),
      axis.text.x = element_text(size = 14, face="bold"),
      panel.grid.major = element_line(color="grey85"),
      panel.grid.minor = element_line(color="grey92")
    )
  
  phi_plot <- phi_no_out
  phi_plot$survey_class_mapped <- reorder(phi_plot$survey_class_mapped, phi_plot$phi, FUN = median)
  
  g2 <- ggplot(phi_plot, aes(x = phi, y = survey_class_mapped)) +
    geom_density_ridges(
      fill = "#7B2CBF",
      alpha = 0.60,
      color = "#2D004D",
      scale = 1.2,
      rel_min_height = 0.01
    ) +
    labs(
      title = "(b) Densidad de \u03C6 por clase",
      x = expression(phi),
      y = "Clase de objeto"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      axis.text.y = element_text(size = 14, face = "bold")  # <- agranda clases
    )
  
  (g1 | g2) + plot_layout(widths = c(1, 1.15))
}

library(ggplot2)
library(ggridges)
library(patchwork)

panel_theme <- theme_minimal(base_size = 13) +
  theme(
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.9),
    plot.margin = margin(6, 6, 6, 6),
    panel.grid.minor = element_blank()
  )

g1 <- ggplot(
  phi_no_out,
  aes(x = survey_class_mapped, y = phi, fill = survey_class_mapped)
) +
  geom_boxplot(
    color = "black",
    alpha = 0.70,
    outlier.shape = NA,
    linewidth = 0.4,
    width = 0.6
  ) +
  scale_fill_manual(values = purple_palette) +
  scale_y_continuous(
    limits = c(0.998, 1.0001),
    breaks = c(0.998, 0.999, 1.0),
    expand = expansion(mult = c(0.02, 0.08))
  ) +
  labs(
    x = "Clase de objeto",
    y = expression(phi)
  ) +
  panel_theme +
  theme(
    axis.text.x = element_text(size = 14, face = "bold"),
    axis.title.x = element_text(margin = margin(t = 6)),
    axis.title.y = element_text(margin = margin(r = 6)),
    legend.position = "none"
  )

phi_plot <- phi_no_out
phi_plot$survey_class_mapped <- reorder(
  phi_plot$survey_class_mapped,
  phi_plot$phi,
  FUN = median
)

g2 <- ggplot(
  phi_plot,
  aes(x = phi, y = survey_class_mapped, fill = survey_class_mapped)
) +
  geom_density_ridges(
    color = "#2D004D",
    alpha = 0.75,
    scale = 1.15,
    rel_min_height = 0.01,
    linewidth = 0.6
  ) +
  scale_fill_manual(values = purple_palette) +
  labs(
    x = expression(phi),
    y = "Clase de objeto"
  ) +
  panel_theme +
  theme(
    axis.text.y = element_text(size = 14, face = "bold"),
    axis.title.x = element_text(margin = margin(t = 6)),
    axis.title.y = element_text(margin = margin(r = 6)),
    legend.position = "none"   
  )

final_plot <- (g1 | g2) +
  plot_layout(widths = c(1, 1)) +
  plot_annotation(
    title = "Distribución del coeficiente iAR (φ) por clase",
    theme = theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
      plot.margin = margin(6, 6, 6, 6)
    )
  )

final_plot

if (!exists("newdata")) stop("No existe 'newdata'. Ejecuta primero el loop.")

primeros_obj <- unique(newdata$oid)[1:3]

data_plot <- newdata[newdata$oid %in% primeros_obj, ]

par(mfrow = c(3,1), mar = c(4,4,2,1))

for(obj in primeros_obj){
  
  d <- data_plot[data_plot$oid == obj, ]
  d <- d[order(d$mjd), ]   
  
  plot(
    d$mjd, d$magpsf_corr,
    type = "l",
    main = paste("Objeto:", obj),
    xlab = "Tiempo (MJD)",
    ylab = "Magnitud",
    col = "black",
    lwd = 1.6
  )
  
  points(
    d$mjd, d$magpsf_corr,
    pch = 20,
    col = "black",
    cex = 0.8
  )
}

# Volver a layout normal
par(mfrow = c(1,1))

if (!exists("newdata")) stop("No existe 'newdata'. Ejecuta primero el loop.")

primeros_obj <- unique(AGNF$oid)[1:3]
data_plot <- newdata[AGNF$oid %in% primeros_obj, ]

par(mfrow = c(3,1),
    mar = c(4,4,2,1),
    oma = c(0,0,3,0))

for(obj in primeros_obj){
  
  d <- data_plot[data_plot$oid == obj, ]
  d <- d[order(d$mjd), ]
  
  plot(
    d$mjd, d$magpsf_corr,
    type = "l",
    main = paste("Objeto:", obj),
    xlab = "Tiempo (MJD)",
    ylab = "Magnitud",
    col = "black",
    lwd = 1.7
  )
  
  points(
    d$mjd, d$magpsf_corr,
    pch = 20,
    col = "black",
    cex = 0.8
  )
}

# Título general
mtext("Primeras tres curvas de luz reales (banda g)",
      outer = TRUE,
      cex = 1,
      font = 2)

par(mfrow = c(1,1))

# Mostrar primeras 3 curvas ORIGINALES (sin corrección artificial)

primeros_obj <- unique(AGNF$oid)[1:3]

par(mfrow = c(3,1),
    mar = c(4,4,2,1),
    oma = c(0,0,3,0))

for(obj in primeros_obj){
  
  lc <- AGNF[AGNF$oid == obj, ]
  
  lc <- lc[lc$fid == 1, c("mjd","magpsf_corr","sigmapsf_corr_ext")]
  lc <- na.omit(lc)
  
  lc <- lc[order(lc$mjd), ]
  
  lc <- lc[!duplicated(lc$mjd), ]
  
  if(nrow(lc) < 2) next
  
  plot(
    lc$mjd, lc$magpsf_corr,
    type = "l",
    main = paste("Objeto:", obj),
    xlab = "Tiempo (MJD)",
    ylab = "Magnitud",
    col = "black",
    lwd = 1.7
  )
  
  points(
    lc$mjd, lc$magpsf_corr,
    pch = 20,
    col = "black",
    cex = 0.8
  )
}

mtext("Primeras tres curvas de luz (banda g)",
      outer = TRUE,
      cex = 1.3,
      font = 2)

par(mfrow = c(1,1))