# A tibble: 6 × 5
SOURCE_ID measurement_type band replicate value
<dbl> <chr> <chr> <chr> <dbl>
1 4.04e18 dec bp 1 -34.4
2 4.04e18 dec bp 2 -34.4
3 4.04e18 dec g 1 -34.4
4 4.04e18 dec g 2 -34.4
5 4.04e18 dec rp 1 -34.4
6 4.04e18 dec rp 2 -34.4
# Scatter plot of Proper Motion in RA vs Decgaiadf2_pm <- gaiadf2_tidy %>%filter(attribute %in%c("pmra", "pmdec")) %>%pivot_wider(names_from = attribute, values_from = value)parallax_magnitude <- gaiadf2_tidy %>%filter(attribute %in%c("parallax", "phot_g_mean_mag")) %>%pivot_wider(names_from = attribute, values_from = value)
Research Question
How do stellar kinematics and luminosity distributions vary based on the position within the Milky Way galaxy, as observed in the Gaia catalog?
Cases
The cases in the Gaia dataset are individual stars observed in the Milky Way galaxy. The dataset contains millions of cases, but we will focus on a manageable subset based on specific criteria, such as brightness and distance.
Data Collection
The data was collected through the Gaia space observatory, a mission by the European Space Agency. Gaia continuously surveys the sky to map the positions, velocities, and physical characteristics of billions of stars with unprecedented precision.
Type of Study
This is an observational study. The data was collected without manipulating any variables, purely observing and recording the characteristics of stars in their natural state.
Data Source
The data is sourced from the European Space Agency’s Gaia catalog. Citation: European Space Agency, Gaia Archive. Access the Gaia Archive. The dataset includes various measurements and attributes of stars observed by the Gaia space observatory, and it will be on my Github Page
Variables
Stellar Parallax: The apparent shift in a star’s position due to the Earth’s orbit around the Sun.
Proper Motion: The angular change in a star’s position over time.
Radial Velocity: The speed at which a star moves towards or away from the observer.
Apparent Magnitude: The brightness of a star as seen from Earth.
Luminosity: The total amount of energy emitted by a star per unit time.
Color Index: A measure of a star’s color based on the difference in brightness between two spectral bands.
Spectral Classification: The classification of stars based on their spectral characteristics.
Star Types: Categories of stars based on their evolutionary stage, such as main sequence, giant, supergiant, etc.
Describe Your Variables
Quantitative Variables: Stellar parallax, proper motion, radial velocity, apparent magnitude, luminosity, and color index.
Qualitative Variables: Spectral classification and star types (e.g., main sequence, giant, etc.).
Dependent Variable: If running a regression analysis, the dependent variable could be luminosity or apparent magnitude, depending on the research focus.
Relevant Summary Statistics
Below are summary statistics for key variables and visualizations that help address the research question.
# Summary statisticssummary(gaiadf)
SOURCE_ID dec_bp_1 dec_bp_2 dec_g_1
Min. :4.045e+18 Min. :-70.19 Min. :-70.19 Min. :-70.19
1st Qu.:4.668e+18 1st Qu.:-67.31 1st Qu.:-67.31 1st Qu.:-67.31
Median :4.671e+18 Median :-66.85 Median :-66.85 Median :-66.85
Mean :4.670e+18 Mean :-66.22 Mean :-66.22 Mean :-66.22
3rd Qu.:4.674e+18 3rd Qu.:-63.71 3rd Qu.:-63.71 3rd Qu.:-63.71
Max. :4.676e+18 Max. :-34.35 Max. :-34.35 Max. :-34.35
dec_g_2 dec_rp_1 dec_rp_2 magnitude_bp_1
Min. :-70.19 Min. :-70.19 Min. :-70.19 Min. : 7.612
1st Qu.:-67.31 1st Qu.:-67.31 1st Qu.:-67.31 1st Qu.:18.015
Median :-66.85 Median :-66.85 Median :-66.85 Median :19.715
Mean :-66.22 Mean :-66.22 Mean :-66.22 Mean :19.137
3rd Qu.:-63.71 3rd Qu.:-63.71 3rd Qu.:-63.71 3rd Qu.:20.775
Max. :-34.35 Max. :-34.35 Max. :-34.35 Max. :22.459
NA's :36
magnitude_bp_2 magnitude_g_1 magnitude_g_2 magnitude_rp_1
Min. : 7.712 Min. : 7.278 Min. : 7.378 Min. : 5.929
1st Qu.:18.115 1st Qu.:17.415 1st Qu.:17.515 1st Qu.:16.636
Median :19.815 Median :18.974 Median :19.074 Median :18.106
Mean :19.237 Mean :18.466 Mean :18.566 Mean :17.640
3rd Qu.:20.875 3rd Qu.:19.993 3rd Qu.:20.093 3rd Qu.:19.058
Max. :22.559 Max. :20.954 Max. :21.054 Max. :20.672
NA's :36 NA's :1 NA's :1 NA's :35
magnitude_rp_2 parallax_bp_1 parallax_bp_2 parallax_g_1
Min. : 6.029 Min. : 0.00052 Min. : 0.00052 Min. : 0.00052
1st Qu.:16.736 1st Qu.: 0.34031 1st Qu.: 0.34031 1st Qu.: 0.34031
Median :18.206 Median : 0.68036 Median : 0.68036 Median : 0.68036
Mean :17.740 Mean : 1.00771 Mean : 1.00771 Mean : 1.00771
3rd Qu.:19.158 3rd Qu.: 1.29593 3rd Qu.: 1.29593 3rd Qu.: 1.29593
Max. :20.772 Max. :61.22712 Max. :61.22712 Max. :61.22712
NA's :35
parallax_g_2 parallax_rp_1 parallax_rp_2 ra_bp_1
Min. : 0.00052 Min. : 0.00052 Min. : 0.00052 Min. : 50.72
1st Qu.: 0.34031 1st Qu.: 0.34031 1st Qu.: 0.34031 1st Qu.: 53.32
Median : 0.68036 Median : 0.68036 Median : 0.68036 Median : 57.66
Mean : 1.00771 Mean : 1.00771 Mean : 1.00771 Mean : 58.08
3rd Qu.: 1.29593 3rd Qu.: 1.29593 3rd Qu.: 1.29593 3rd Qu.: 62.78
Max. :61.22712 Max. :61.22712 Max. :61.22712 Max. :274.43
ra_bp_2 ra_g_1 ra_g_2 ra_rp_1
Min. : 50.72 Min. : 50.72 Min. : 50.72 Min. : 50.72
1st Qu.: 53.32 1st Qu.: 53.32 1st Qu.: 53.32 1st Qu.: 53.32
Median : 57.66 Median : 57.66 Median : 57.66 Median : 57.66
Mean : 58.08 Mean : 58.08 Mean : 58.08 Mean : 58.08
3rd Qu.: 62.78 3rd Qu.: 62.78 3rd Qu.: 62.78 3rd Qu.: 62.78
Max. :274.43 Max. :274.43 Max. :274.43 Max. :274.43
ra_rp_2
Min. : 50.72
1st Qu.: 53.32
Median : 57.66
Mean : 58.08
3rd Qu.: 62.78
Max. :274.43
summary(gaiadf2)
source_id ra dec parallax
Min. :1.117e+16 Min. : 0.3976 Min. :-83.668 Min. : 0.119
1st Qu.:1.891e+18 1st Qu.: 95.8385 1st Qu.:-37.057 1st Qu.: 5.722
Median :3.511e+18 Median :183.8217 Median : -3.801 Median : 10.257
Mean :3.582e+18 Mean :182.8800 Mean : -3.431 Mean : 18.254
3rd Qu.:5.381e+18 3rd Qu.:271.3863 3rd Qu.: 28.793 3rd Qu.: 21.315
Max. :6.914e+18 Max. :359.8285 Max. : 87.020 Max. :310.577
phot_g_mean_mag phot_bp_mean_mag phot_rp_mean_mag pmra
Min. :1.943 Min. :2.884 Min. :1.844 Min. :-2240.085
1st Qu.:3.491 1st Qu.:3.926 1st Qu.:2.845 1st Qu.: -31.536
Median :3.941 Median :4.335 Median :3.281 Median : -3.744
Mean :3.796 Mean :4.397 Mean :3.284 Mean : 5.689
3rd Qu.:4.224 3rd Qu.:4.803 3rd Qu.:3.726 3rd Qu.: 28.247
Max. :4.437 Max. :8.051 Max. :4.590 Max. : 3966.661
pmdec
Min. :-3421.809
1st Qu.: -41.816
Median : -10.685
Mean : -27.139
3rd Qu.: 8.686
Max. : 1164.959
ggplot(gaiadf_tidy, aes(x = band, y =as.numeric(value), fill = band)) +geom_boxplot() +facet_wrap(~ measurement_type, scales ="free_y") +labs(title ="Distribution of Measurements by Band and Type",y ="Value", x ="Band") +theme_minimal() +theme(plot.title =element_text(hjust =0.5) )
ggplot(gaiadf2_tidy, aes(x =as.numeric(value), fill = attribute)) +geom_histogram(binwidth =0.5, color ="black", alpha =0.6) +facet_wrap(~ attribute, scales ="free") +labs(title ="Histogram of Attributes in gaiadf2_tidy",x ="Value", y ="Frequency") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))
ggplot(gaiadf2_pm, aes(x =as.numeric(pmra), y =as.numeric(pmdec))) +geom_point(alpha =0.6, color ="red") +labs(title ="Proper Motion: PMRA vs PMDEC",x ="Proper Motion in RA", y ="Proper Motion in Dec") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))
ggplot(parallax_magnitude, aes(x =as.numeric(parallax), y =as.numeric(phot_g_mean_mag))) +geom_point(color ="purple", alpha =0.6) +labs(title ="Parallax vs G-band Magnitude",x ="Parallax", y ="G-band Magnitude") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))