library(dplyr)
library(ggplot2)
library(gt)
library(ggiraph)
library(tidyr)
library(tidyverse)
library(corrplot)
library(bestNormalize)
library(patchwork)
library(gridExtra)
library(zoo)
library(stringr)
library(grid)
library(pracma)

Missing Variables

train <- read.csv("train.csv")
test <- read.csv("test.csv")
missing_train <- train |>
  dplyr::mutate(across(where(is.character), ~ dplyr::na_if(.x, ""))) |>
  dplyr::summarise(across(everything(), ~ sum(is.na(.x)))) |>
  tidyr::pivot_longer(cols = everything(), names_to = "Variables", values_to = "Total_train")

missing_test <- test |>
  dplyr::mutate(across(where(is.character), ~ dplyr::na_if(.x, ""))) |>
  dplyr::summarise(across(everything(), ~ sum(is.na(.x)))) |>
  tidyr::pivot_longer(cols = everything(), names_to = "Variables", values_to = "Total_test")

missing <- dplyr::left_join(missing_train, missing_test, by="Variables")

missing |> gt()
Variables Total_train Total_test
id 0 0
alpha 0 0
delta 0 0
u 0 0
g 0 0
r 0 0
i 0 0
z 0 0
redshift 0 0
spectral_type 0 0
galaxy_population 0 0
class 0 NA

Exploratory Data

sum_raw <- summary(train)

stat_names <- sapply(strsplit(sum_raw[, 1], ":"), function(x) trimws(x[1]))

sum_clean <- apply(sum_raw, 2, function(col) {
  sapply(strsplit(col, ":"), function(x) trimws(tail(x, 1)))
})

sum_df <- as.data.frame(sum_clean, stringsAsFactors = FALSE)
sum_df <- cbind(Statistic = stat_names, sum_df)

sum_df |>
  gt(rowname_col = "Statistic") |>
  tab_header(
    title = "Data Summary of Training Set",
    subtitle = "Summary statistics for all columns"
  ) |>
  tab_options(
    table.font.size = px(14),
    heading.align = "left",
    stub.font.weight = "bold"
  )
Data Summary of Training Set
Summary statistics for all columns
id alpha delta u g r i z redshift spectral_type galaxy_population class
Min. 0 0.0117 -17.967 -0.1392 13.54 12.58 11.96 11.68 -0.00997 577347 577347 577347
1st Qu. 144336 132.1615 2.474 20.9771 19.87 18.82 18.31 17.97 0.18105 character character character
Median 288673 188.6815 21.484 22.5702 21.47 20.43 19.63 19.19 0.49752 character character character
Mean 288673 181.6167 21.835 22.4419 21.01 19.96 19.38 19.04 0.72313 NA NA NA
3rd Qu. 433010 231.8297 36.988 23.8691 22.29 21.16 20.61 20.16 0.88139 NA NA NA
Max. 577346 359.9998 79.158 28.2533 27.62 25.25 27.91 26.83 7.01078 NA NA NA

Class Imbalance Check

target_imbalance <- as.data.frame(table(train$class)) 
colnames(target_imbalance) <- c("Target", "Frequency")
target_imbalance |> 
  dplyr::mutate(Per_Freq = round(Frequency / sum(Frequency) *100, 2)) |>
  gt()
Target Frequency Per_Freq
GALAXY 377480 65.38
QSO 117143 20.29
STAR 82724 14.33

Univariate Exploration

train_numeric <- train |>
  mutate(
    spectral_type = as.numeric(as.factor(spectral_type)), 
    galaxy_population = as.numeric(as.factor(galaxy_population)),
    class = as.numeric(as.factor(class))
  )

pit_corr <- stats::cor(train_numeric, use = "complete.obs")

corrplot(pit_corr, method = 'color', , addCoef.col = 'black', col = COL2('RdYlBu'))

p1 <- ggplot(train, aes(x = alpha, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "alpha",
    y = "Density",
    x = "alpha"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p2 <- ggplot(train, aes(x = delta, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "delta",
    y = "Density",
    x = "delta"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p3 <- ggplot(train, aes(x = u, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "u",
    y = "Density",
    x = "u"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p4 <- ggplot(train, aes(x = g, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "g",
    y = "Density",
    x = "g"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p5 <- ggplot(train, aes(x = r, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "r",
    y = "Density",
    x = "r"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p6 <- ggplot(train, aes(x = i, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "i",
    y = "Density",
    x = "i"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p7 <- ggplot(train, aes(x = z, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "z",
    y = "Density",
    x = "z"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p8 <- ggplot(train, aes(x = redshift, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Redshift",
    y = "Density",
    x = "Redshift"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p8

p9 <- ggplot(train, aes(x = factor(spectral_type), fill = factor(class))) +
  geom_bar() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Spectral Type",
    y = "Count",
    x = "Spectral Type"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p10 <- ggplot(train, aes(x = factor(galaxy_population), fill = factor(class))) +
  geom_bar() +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Galaxy Population",
    y = "Count",
    x = "Galaxy Population"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, ncol=2, nrow=5)

Feature Engineering

train <- train |>
  mutate(rsu = redshift * u,
         rsg = redshift * g,
         rsr = redshift * r,
         rsi = redshift * i,
         rsz = redshift * z,
         u_g = (u - g),
         u_r = (u - r),
         u_i = (u - i),
         u_z = (u - z),
         g_r = (g - r),
         g_i = (g - i),
         g_z = (g - z),
         r_i = (r - i),
         r_z = (r - z),
         z_i = (z - i)
         )
p11 <- ggplot(train, aes(x = rsu, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Redshift * u",
    y = "Density",
    x = "rsu"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p12 <- ggplot(train, aes(x = rsg, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Redshift * g",
    y = "Density",
    x = "rsg"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p13 <- ggplot(train, aes(x = rsr, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Redshift * r",
    y = "Density",
    x = "rsr"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p14 <- ggplot(train, aes(x = rsi, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Redshift * i",
    y = "Density",
    x = "rsi"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p15 <- ggplot(train, aes(x = rsz, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Redshift * z",
    y = "Density",
    x = "rsz"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p16 <- ggplot(train, aes(x = u_g, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "u-g",
    y = "Density",
    x = "u_g"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p17 <- ggplot(train, aes(x = u_r, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "u-r",
    y = "Density",
    x = "u_r"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p18 <- ggplot(train, aes(x = u_i, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "u-i",
    y = "Density",
    x = "u_i"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p19 <- ggplot(train, aes(x = u_z, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "u-z",
    y = "Density",
    x = "u_z"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p20 <- ggplot(train, aes(x = g_r, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "g-r",
    y = "Density",
    x = "g_r"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p21 <- ggplot(train, aes(x = g_i, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "g-i",
    y = "Density",
    x = "g_i"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p22 <- ggplot(train, aes(x = g_z, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "g-z",
    y = "Density",
    x = "g_z"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p23 <- ggplot(train, aes(x = r_i, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "r-i",
    y = "Density",
    x = "r_i"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p24 <- ggplot(train, aes(x = r_z, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "r-z",
    y = "Density",
    x = "r_z"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
p25 <- ggplot(train, aes(x = z_i, fill = factor(class))) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", 
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "z_i",
    y = "Density",
    x = "z_i"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )
grid.arrange(p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, ncol=3, nrow=5)

train <- train |>
  mutate(redshift_u_g = redshift * sqrt(u_g),
         stellar_locus_dist = sqrt((g_r - 0.52)**2 + (r_i - 0.25)**2),
         qso_locus_dist = sqrt((g_r - 0.24)**2 + (r_i- 0.15)**2)
         )
ggplot(train, aes(x = stellar_locus_dist, fill = factor(class))) +
  geom_density(alpha = 0.5)  +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Stellar Locus Distance",
    y = "Density",
    x = "Stellar Locus Distance"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

ggplot(train, aes(x = qso_locus_dist, fill = factor(class))) +
  geom_density(alpha = 0.5)  +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Quasar Locus Distance",
    y = "Density",
    x = "Quasar Locus Distance"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

Coordinate Mapping

train <- train |>
  mutate(alpha_rad = deg2rad(alpha),
         delta_rad = deg2rad(delta),
         coord_sin_alpha = sin(alpha),
         coord_cos_alpha = cos(alpha),
         coord_sin_delta = sin(delta),
         coord_cos_delta = cos(delta),
         xc = cos(delta_rad) * cos(alpha_rad),
         yc = cos(delta_rad) * sin(alpha_rad),
         )
ggplot(train, aes(x = round(alpha,1), y = round(delta,1), color = factor(class))) +
  geom_jitter(alpha = 0.3)  +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Coordinate Star Map",
    y = "Delta",
    x = "Alpha"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

ggplot(train, aes(x = round(xc,1), y = round(yc,1), color = factor(class))) +
  geom_jitter(alpha = 0.3)  +
  scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
                                "#0072B2", "#D55E00", "#CC79A7")) +
  labs(
    title = "Coordinate Star Map",
    y = "Y Converted Coordinate",
    x = "X Converted Coordinate"
  ) +
  theme_minimal(base_size = 12, base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 14, margin = margin(b = 4)),
    plot.subtitle = element_text(color = "grey40", size = 11, margin = margin(b = 12)),
    plot.caption = element_text(color = "grey60", size = 9, hjust = 0),
    plot.margin = margin(16, 16, 16, 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(color = "grey30", size = 10),
    axis.text = element_text(color = "grey30"),
    axis.ticks.x = element_line(color = "grey80"),
    legend.position = "bottom",
    legend.title = element_blank()
  )