---
title: "daimond_eda"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: scroll
theme: cosmo
social: menu
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(MASS)
install.packages('DT')
library(DT)
library(ggplot2)
library(dplyr)
```
## dataset description {.tabsest}
### Histogram of Diamond Prices
```{r}
ggplot(diamonds, aes(x = price)) +
geom_histogram(binwidth = 500, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Diamond Prices",
x = "Price",
y = "Frequency") +
theme_minimal()
# Histogram of Diamond Carat
ggplot(diamonds, aes(x = carat)) +
geom_histogram(binwidth = 0.1, fill = "green", color = "black", alpha = 0.7) +
labs(title = "Histogram of Diamond Carat",
x = "Carat",
y = "Frequency")
```
### BAR PLOT OF DIAMOND CUT
```{r}
# Bar Plot of Diamond Cut
ggplot(diamonds, aes(x = cut)) +
geom_bar(fill = "purple", color = "black", alpha = 0.7) +
labs(title = "Bar Plot of Diamond Cut",
x = "Cut",
y = "Count") +
theme_minimal()
```
-----------------------------------------------------------------------
### Bivariate Analysis
```{r}
ggplot(diamonds, aes(x = carat, y = price, color = cut)) +
geom_point(alpha = 0.5) +
labs(title = "Scatter Plot of Carat vs. Price by Cut",
x = "Carat",
y = "Price") +
theme_minimal()
```
### BOXPLOT
```{r}
ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
geom_boxplot() +
labs(title = "Boxplot of Price by Cut",
x = "Cut",
y = "Price") +
theme_minimal()
```
### Multivariate Analysis
```{r}
# Scatter Plot of Carat vs. Price by Cut and Color
ggplot(diamonds, aes(x = carat, y = price, color = color)) +
geom_point(alpha = 0.5) +
facet_wrap(~cut) +
labs(title = "Scatter Plot of Carat vs. Price by Cut and Color",
x = "Carat",
y = "Price") +
theme_minimal()
```
### remove outlires
```{r}
remove_outliers <- function(data, variable) {
Q1 <- quantile(data[[variable]], 0.25)
Q3 <- quantile(data[[variable]], 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
data %>% filter(data[[variable]] >= lower_bound & data[[variable]] <= upper_bound)
}
diamonds_no_outliers <- diamonds %>%
remove_outliers("price") %>%
remove_outliers("carat")
ggplot(diamonds_no_outliers, aes(y = price)) +
geom_boxplot(fill = "lightblue", outlier.colour = "red", outlier.shape = 16, outlier.size = 2) +
labs(title = "Boxplot of Diamond Prices Without Outliers",
y = "Price") +
theme_minimal()
```
### Boxplot of Diamond Carat Without Outliers
```{r}
ggplot(diamonds_no_outliers, aes(y = carat)) +
geom_boxplot(fill = "lightgreen", outlier.colour = "red", outlier.shape = 16, outlier.size = 2) +
labs(title = "Boxplot of Diamond Carat Without Outliers",
y = "Carat") +
theme_minimal()
```