# install packages if needed
install.packages(c("tidyverse", "readr"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
# load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load data directly from NASA Exoplanet Archive
url <- "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_orbper,pl_bmassj,discoverymethod+from+pscomppars&format=csv"

exo <- read_csv(url)
## Rows: 6273 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): discoverymethod
## dbl (2): pl_orbper, pl_bmassj
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean data
data_clean <- exo %>%
  filter(!is.na(pl_orbper),
         !is.na(pl_bmassj),
         pl_orbper > 0,
         pl_bmassj > 0)

# group detection methods
data_clean$method_group <- case_when(
  data_clean$discoverymethod == "Radial Velocity" ~ "Radial Velocity",
  data_clean$discoverymethod == "Transit" ~ "Transits",
  data_clean$discoverymethod == "Microlensing" ~ "Microlensing",
  data_clean$discoverymethod == "Imaging" ~ "Imaging",
  data_clean$discoverymethod == "Pulsar Timing" ~ "Pulsar Timing",
  TRUE ~ "Other"
)

# plot
ggplot(data_clean, aes(x = pl_orbper, y = pl_bmassj, color = method_group)) +
  geom_point(alpha = 0.7, size = 2) +
  scale_x_log10() +
  scale_y_log10() +
  scale_color_manual(values = c(
    "Radial Velocity" = "red",
    "Transits" = "green",
    "Microlensing" = "blue",
    "Imaging" = "purple",
    "Pulsar Timing" = "orange",
    "Other" = "gray"
  )) +
  labs(
    x = "Period (days)",
    y = "Mass (Jupiter Masses)",
    color = "Detection Method"
  ) +
  theme_minimal()

I recreated the graph using data from the NASA Exoplanet Archive. I used orbital period, planet mass, and discovery method. I removed missing values so the graph would work correctly. The I made a scatter plot with period on the x-axis and mass on the y-axis and used log scales for both to match the original. I used a cleaner style and transparency to make it easier to read,but it still shows the same overall patterns as the orginal graph. Also, r wasnt reading the csv file correctly, so I asked Chat gpt for an alternative and so i used the URL instead to read it.

library(tidyverse)

# load data
url <- "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+disc_year,discoverymethod+from+pscomppars&format=csv"
exo <- read_csv(url)
## Rows: 6273 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): discoverymethod
## dbl (1): disc_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean + EXCLUDE 2013
data_clean <- exo %>%
  filter(!is.na(disc_year),
         !is.na(discoverymethod),
         disc_year < 2013)

# group methods
data_clean$method_group <- case_when(
  data_clean$discoverymethod == "Radial Velocity" ~ "Radial Velocity",
  data_clean$discoverymethod == "Transit" ~ "Transits",
  data_clean$discoverymethod == "Microlensing" ~ "Microlensing",
  data_clean$discoverymethod == "Imaging" ~ "Imaging",
  data_clean$discoverymethod == "Pulsar Timing" ~ "Pulsar Timing",
  TRUE ~ "Other"
)

# plot
ggplot(data_clean, aes(x = disc_year, fill = method_group)) +
  geom_bar() +
  scale_fill_manual(values = c(
    "Radial Velocity" = "red",
    "Transits" = "green",
    "Microlensing" = "blue",
    "Imaging" = "purple",
    "Pulsar Timing" = "orange",
    "Other" = "gray"
  )) +
  labs(
    x = "Discovery Year",
    y = "Number of Detections",
    fill = "Detection Method"
  ) +
  theme_minimal()

```

For this graph I used NASA exoplanet data with discovery year and method, removed missing values, and I remvoed the 2013 data becasue the original figure only included partial data for that year.