Univariate Exploration
train_numeric <- train |>
mutate(
spectral_type = as.numeric(as.factor(spectral_type)),
galaxy_population = as.numeric(as.factor(galaxy_population)),
class = as.numeric(as.factor(class))
)
pit_corr <- stats::cor(train_numeric, use = "complete.obs")
corrplot(pit_corr, method = 'color', , addCoef.col = 'black', col = COL2('RdYlBu'))

p1 <- ggplot(train, aes(x = alpha, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "alpha",
y = "Density",
x = "alpha"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p2 <- ggplot(train, aes(x = delta, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "delta",
y = "Density",
x = "delta"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p3 <- ggplot(train, aes(x = u, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "u",
y = "Density",
x = "u"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p4 <- ggplot(train, aes(x = g, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "g",
y = "Density",
x = "g"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p5 <- ggplot(train, aes(x = r, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "r",
y = "Density",
x = "r"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p6 <- ggplot(train, aes(x = i, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "i",
y = "Density",
x = "i"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p7 <- ggplot(train, aes(x = z, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "z",
y = "Density",
x = "z"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p8 <- ggplot(train, aes(x = redshift, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Redshift",
y = "Density",
x = "Redshift"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p8

p9 <- ggplot(train, aes(x = factor(spectral_type), fill = factor(class))) +
geom_bar() +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Spectral Type",
y = "Count",
x = "Spectral Type"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p10 <- ggplot(train, aes(x = factor(galaxy_population), fill = factor(class))) +
geom_bar() +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Galaxy Population",
y = "Count",
x = "Galaxy Population"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, ncol=2, nrow=5)

Feature Engineering
train <- train |>
mutate(rsu = redshift * u,
rsg = redshift * g,
rsr = redshift * r,
rsi = redshift * i,
rsz = redshift * z,
u_g = (u - g),
u_r = (u - r),
u_i = (u - i),
u_z = (u - z),
g_r = (g - r),
g_i = (g - i),
g_z = (g - z),
r_i = (r - i),
r_z = (r - z),
z_i = (z - i)
)
p11 <- ggplot(train, aes(x = rsu, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Redshift * u",
y = "Density",
x = "rsu"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p12 <- ggplot(train, aes(x = rsg, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Redshift * g",
y = "Density",
x = "rsg"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p13 <- ggplot(train, aes(x = rsr, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Redshift * r",
y = "Density",
x = "rsr"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p14 <- ggplot(train, aes(x = rsi, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Redshift * i",
y = "Density",
x = "rsi"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p15 <- ggplot(train, aes(x = rsz, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Redshift * z",
y = "Density",
x = "rsz"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p16 <- ggplot(train, aes(x = u_g, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "u-g",
y = "Density",
x = "u_g"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p17 <- ggplot(train, aes(x = u_r, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "u-r",
y = "Density",
x = "u_r"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p18 <- ggplot(train, aes(x = u_i, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "u-i",
y = "Density",
x = "u_i"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p19 <- ggplot(train, aes(x = u_z, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "u-z",
y = "Density",
x = "u_z"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p20 <- ggplot(train, aes(x = g_r, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "g-r",
y = "Density",
x = "g_r"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p21 <- ggplot(train, aes(x = g_i, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "g-i",
y = "Density",
x = "g_i"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p22 <- ggplot(train, aes(x = g_z, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "g-z",
y = "Density",
x = "g_z"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p23 <- ggplot(train, aes(x = r_i, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "r-i",
y = "Density",
x = "r_i"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p24 <- ggplot(train, aes(x = r_z, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "r-z",
y = "Density",
x = "r_z"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
p25 <- ggplot(train, aes(x = z_i, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "z_i",
y = "Density",
x = "z_i"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
grid.arrange(p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, ncol=3, nrow=5)

train <- train |>
mutate(redshift_u_g = redshift * sqrt(u_g),
stellar_locus_dist = sqrt((g_r - 0.52)**2 + (r_i - 0.25)**2),
qso_locus_dist = sqrt((g_r - 0.24)**2 + (r_i- 0.15)**2)
)
ggplot(train, aes(x = stellar_locus_dist, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Stellar Locus Distance",
y = "Density",
x = "Stellar Locus Distance"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)

ggplot(train, aes(x = qso_locus_dist, fill = factor(class))) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")) +
labs(
title = "Quasar Locus Distance",
y = "Density",
x = "Quasar Locus Distance"
) +
theme_minimal(base_size = 12, base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
plot.margin = margin(16, 16, 16, 16),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(color = "grey30", size = 10),
axis.text = element_text(color = "grey30"),
axis.ticks.x = element_line(color = "grey80"),
legend.position = "bottom",
legend.title = element_blank()
)
