New hypothesis

The method used to detect exoplanets significantly influences the observed planetary characteristics due to detection bias. Planets detected by the transit method will have shorter orbital periods and lower masses, while planets detected by radial velocity will have higher masses.

# Load libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Read data directly from NASA
url <- "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_bmasse,pl_orbper,discoverymethod+from+ps&format=csv"
exo <- read.csv(url)

# Clean data
exo_clean <- exo %>%
  filter(!is.na(pl_bmasse),
         discoverymethod %in% c("Transit", "Radial Velocity"))

# Create boxplot
ggplot(exo_clean, aes(x = discoverymethod, y = pl_bmasse, fill = discoverymethod)) +
  geom_boxplot(alpha = 0.7) +
  scale_y_log10() +
  labs(
    title = "Planet Mass by Detection Method",
    subtitle = "Evidence of Detection Bias",
    x = "Detection Method",
    y = "Planet Mass (Earth Masses, log scale)"
  ) +
  theme_minimal()

# Load libraries
library(ggplot2)
library(dplyr)
library(ggridges)

# Read data from NASA
url <- "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_bmasse,pl_orbper,discoverymethod+from+ps&format=csv"
exo <- read.csv(url)

# Clean data
exo_clean <- exo %>%
  filter(!is.na(pl_bmasse),
         discoverymethod %in% c("Transit", "Radial Velocity"))

# Ridgeline plot
ggplot(exo_clean, aes(x = pl_bmasse, y = discoverymethod, fill = discoverymethod)) +
  geom_density_ridges(alpha = 0.7) +
  scale_x_log10() +
  labs(
    title = "Distribution of Planet Mass by Detection Method",
    subtitle = "Ridgeline Plot Showing Detection Bias",
    x = "Planet Mass (Earth Masses, log scale)",
    y = "Detection Method"
  ) +
  theme_minimal()
## Picking joint bandwidth of 0.171