# install packages if needed
install.packages(c("tidyverse", "readr"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
# load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load data directly from NASA Exoplanet Archive
url <- "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_orbper,pl_bmassj,discoverymethod+from+pscomppars&format=csv"
exo <- read_csv(url)
## Rows: 6273 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): discoverymethod
## dbl (2): pl_orbper, pl_bmassj
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean data
data_clean <- exo %>%
filter(!is.na(pl_orbper),
!is.na(pl_bmassj),
pl_orbper > 0,
pl_bmassj > 0)
# group detection methods
data_clean$method_group <- case_when(
data_clean$discoverymethod == "Radial Velocity" ~ "Radial Velocity",
data_clean$discoverymethod == "Transit" ~ "Transits",
data_clean$discoverymethod == "Microlensing" ~ "Microlensing",
data_clean$discoverymethod == "Imaging" ~ "Imaging",
data_clean$discoverymethod == "Pulsar Timing" ~ "Pulsar Timing",
TRUE ~ "Other"
)
# plot
ggplot(data_clean, aes(x = pl_orbper, y = pl_bmassj, color = method_group)) +
geom_point(alpha = 0.7, size = 2) +
scale_x_log10() +
scale_y_log10() +
scale_color_manual(values = c(
"Radial Velocity" = "red",
"Transits" = "green",
"Microlensing" = "blue",
"Imaging" = "purple",
"Pulsar Timing" = "orange",
"Other" = "gray"
)) +
labs(
x = "Period (days)",
y = "Mass (Jupiter Masses)",
color = "Detection Method"
) +
theme_minimal()
I recreated the graph using data from the NASA Exoplanet Archive. I used orbital period, planet mass, and discovery method. I removed missing values so the graph would work correctly. The I made a scatter plot with period on the x-axis and mass on the y-axis and used log scales for both to match the original. I used a cleaner style and transparency to make it easier to read,but it still shows the same overall patterns as the orginal graph. Also, r wasnt reading the csv file correctly, so I asked Chat gpt for an alternative and so i used the URL instead to read it.
library(tidyverse)
# load data
url <- "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+disc_year,discoverymethod+from+pscomppars&format=csv"
exo <- read_csv(url)
## Rows: 6273 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): discoverymethod
## dbl (1): disc_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean + EXCLUDE 2013
data_clean <- exo %>%
filter(!is.na(disc_year),
!is.na(discoverymethod),
disc_year < 2013)
# group methods
data_clean$method_group <- case_when(
data_clean$discoverymethod == "Radial Velocity" ~ "Radial Velocity",
data_clean$discoverymethod == "Transit" ~ "Transits",
data_clean$discoverymethod == "Microlensing" ~ "Microlensing",
data_clean$discoverymethod == "Imaging" ~ "Imaging",
data_clean$discoverymethod == "Pulsar Timing" ~ "Pulsar Timing",
TRUE ~ "Other"
)
# plot
ggplot(data_clean, aes(x = disc_year, fill = method_group)) +
geom_bar() +
scale_fill_manual(values = c(
"Radial Velocity" = "red",
"Transits" = "green",
"Microlensing" = "blue",
"Imaging" = "purple",
"Pulsar Timing" = "orange",
"Other" = "gray"
)) +
labs(
x = "Discovery Year",
y = "Number of Detections",
fill = "Detection Method"
) +
theme_minimal()
```
For this graph I used NASA exoplanet data with discovery year and method, removed missing values, and I remvoed the 2013 data becasue the original figure only included partial data for that year.