Instead of simply looking at diamond pricing in isolation, this analysis explores how a diamond’s physical characteristics — specifically carat weight and cut quality — interact to influence its market value. The goal is to understand not just whether larger diamonds are more expensive, but how cut quality may amplify or moderate that relationship.
glimpse(diamonds)
## Rows: 53,940
## Columns: 10
## $ carat <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
## $ cut <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
## $ color <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
## $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
## $ depth <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
## $ table <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
## $ price <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
## $ x <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
## $ y <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
## $ z <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…
diamonds <- diamonds %>%
rename(karat = carat)
summary_stats <- diamonds %>%
summarize(
avg_karat = mean(karat),
avg_price = mean(price),
max_price = max(price),
min_price = min(price)
)
summary_stats
## # A tibble: 1 × 4
## avg_karat avg_price max_price min_price
## <dbl> <dbl> <int> <int>
## 1 0.798 3933. 18823 326
ggplot(diamonds, aes(x = karat, y = price)) +
geom_point(alpha = 0.4) +
labs(
title = "Diamond Price vs Carat Weight",
x = "Carat",
y = "Price (USD)"
)
ggplot(diamonds, aes(x = karat, y = price, color = cut)) +
geom_point(alpha = 0.5) +
labs(
title = "Diamond Price vs Carat Weight by Cut Quality",
x = "Carat",
y = "Price (USD)",
color = "Cut Quality"
)
ggplot(diamonds, aes(x = karat, y = price, color = cut)) +
geom_point(alpha = 0.5) +
facet_wrap(~cut) +
labs(
title = "Diamond Price Distribution Across Cut Categories",
x = "Carat",
y = "Price (USD)"
)