# A tibble: 6 × 15
price cost sales profit_margin inventory discount_percentage delivery_days
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 219. 182. 40 16.9 105 27.8 9
2 478. 385. 7 19.4 192 26.9 6
3 379. 277. 32 27.1 59 21.9 2
4 319. 282. 48 11.8 45 11.0 2
5 120. 69.7 19 42.0 35 3.18 9
6 120. 65.4 6 45.6 185 20.7 8
# ℹ 8 more variables: category <chr>, material <chr>, color <chr>,
# location <chr>, season <chr>, store_type <chr>, brand <chr>, revenue <dbl>
# A tibble: 6 × 15
price cost sales profit_margin inventory discount_percentage delivery_days
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 280. 148. 26 47.3 33 25.0 8
2 272. 233. 8 14.1 102 9.79 9
3 310. 170. 24 45.2 9 21.8 8
4 440. 277. 48 36.9 127 2.82 6
5 491. 406. 38 17.4 6 19.8 4
6 233. 172. 6 26.1 55 9.50 9
# ℹ 8 more variables: category <chr>, material <chr>, color <chr>,
# location <chr>, season <chr>, store_type <chr>, brand <chr>, revenue <dbl>
spc_tbl_ [2,500 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ price : num [1:2500] 219 478 379 319 120 ...
$ cost : num [1:2500] 181.6 385 276.7 281.8 69.7 ...
$ sales : num [1:2500] 40 7 32 48 19 6 20 27 43 37 ...
$ profit_margin : num [1:2500] 16.9 19.4 27.1 11.8 42 ...
$ inventory : num [1:2500] 105 192 59 45 35 185 165 10 11 169 ...
$ discount_percentage: num [1:2500] 27.8 26.94 21.95 11.01 3.18 ...
$ delivery_days : num [1:2500] 9 6 2 2 9 8 9 4 4 2 ...
$ category : chr [1:2500] "Bed" "Chair" "Table" "Table" ...
$ material : chr [1:2500] "Plastic" "Glass" "Metal" "Glass" ...
$ color : chr [1:2500] "Red" "Blue" "Black" "Green" ...
$ location : chr [1:2500] "Rural" "Rural" "Suburban" "Rural" ...
$ season : chr [1:2500] "Spring" "Summer" "Fall" "Summer" ...
$ store_type : chr [1:2500] "Online" "Online" "Online" "Retail" ...
$ brand : chr [1:2500] "BrandA" "BrandD" "BrandD" "BrandD" ...
$ revenue : num [1:2500] 3949 -3521 14286 12261 -4588 ...
- attr(*, "spec")=
.. cols(
.. price = col_double(),
.. cost = col_double(),
.. sales = col_double(),
.. profit_margin = col_double(),
.. inventory = col_double(),
.. discount_percentage = col_double(),
.. delivery_days = col_double(),
.. category = col_character(),
.. material = col_character(),
.. color = col_character(),
.. location = col_character(),
.. season = col_character(),
.. store_type = col_character(),
.. brand = col_character(),
.. revenue = col_double()
.. )
- attr(*, "problems")=<externalptr>
price cost sales profit_margin
Min. : 50.7 Min. : 26.51 Min. : 1.00 Min. :10.02
1st Qu.:159.1 1st Qu.:106.40 1st Qu.:13.00 1st Qu.:20.23
Median :277.6 Median :189.34 Median :25.00 Median :30.30
Mean :274.5 Mean :191.93 Mean :24.92 Mean :30.21
3rd Qu.:387.4 3rd Qu.:263.19 3rd Qu.:37.00 3rd Qu.:40.13
Max. :499.9 Max. :447.02 Max. :49.00 Max. :50.00
inventory discount_percentage delivery_days category
Min. : 0.00 Min. : 0.005556 Min. :1.000 Length:2500
1st Qu.: 50.00 1st Qu.: 7.760214 1st Qu.:3.000 Class :character
Median : 94.00 Median :14.915143 Median :5.000 Mode :character
Mean : 97.72 Mean :14.947616 Mean :4.894
3rd Qu.:147.00 3rd Qu.:22.292661 3rd Qu.:7.000
Max. :199.00 Max. :29.991229 Max. :9.000
material color location season
Length:2500 Length:2500 Length:2500 Length:2500
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
store_type brand revenue
Length:2500 Length:2500 Min. :-14215
Class :character Class :character 1st Qu.: 1217
Mode :character Mode :character Median : 5523
Mean : 5927
3rd Qu.: 10234
Max. : 32922
# A tibble: 6 × 15
price cost sales profit_margin inventory discount_percentage delivery_days
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 219. 182. 40 16.9 105 27.8 9
2 478. 385. 7 19.4 192 26.9 6
3 379. 277. 32 27.1 59 21.9 2
4 319. 282. 48 11.8 45 11.0 2
5 120. 69.7 19 42.0 35 3.18 9
6 120. 65.4 6 45.6 185 20.7 8
# ℹ 8 more variables: category <chr>, material <chr>, color <chr>,
# location <chr>, season <chr>, store_type <chr>, brand <chr>, revenue <dbl>
price cost sales profit_margin
0 0 0 0
inventory discount_percentage delivery_days category
0 0 0 0
material color location season
0 0 0 0
store_type brand revenue
0 0 0
---
title: "EDA_furniture"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: flatly
social: menu
source_code: embed
navbar:
- { title: "Dataset Description", href: "#dataset-description" }
- { title: "Univariate Analysis", href: "#univariate-analysis" }
- { title: "Bivariate Analysis", href: "#bivariate-analysis" }
- { title: "Multivariate Analysis", href: "#multivariate-analysis" }
---
```{r setup, include=FALSE}
library(flexdashboard)
library(readr)
library(ggplot2)
library(dplyr)
library(reshape2)
library(lubridate)
library(corrplot)
```
## Dataset Description {.tabset}
### view of the dataset
```{r}
# Load the dataset
df <- read_csv("Furniture.csv")
head(df)
tail(df)
```
### About the dataset
```{r}
# View the structure of the dataset
str(df)
```
### summary of the dataset
```{r}
# Summary statistics for numeric columns
summary(df)
```
### Head of DataSet
```{r}
# View the first few rows
head(df)
```
### Checking for missing values
```{r}
# Checking for missing values
colSums(is.na(df))
```
## Univariate Analysis {.tabset}
### Histogram for 'price'
```{r}
ggplot(df, aes(x = price)) +
geom_histogram(binwidth = 50, fill = "blue", color = "black") +
labs(title = "Histogram of Price", x = "Price", y = "Frequency") +
theme_minimal()
```
### Histogram for 'cost'
```{r}
# Histogram for 'cost'
ggplot(df, aes(x = cost)) +
geom_histogram(binwidth = 50, fill = "green", color = "black") +
labs(title = "Histogram of Cost", x = "Cost", y = "Frequency") +
theme_minimal()
```
### Histogram for 'sales'
```{r}
# Histogram for 'sales'
ggplot(df, aes(x = sales)) +
geom_histogram(binwidth = 5, fill = "orange", color = "black") +
labs(title = "Histogram of Sales", x = "Sales", y = "Frequency") +
theme_minimal()
```
### Histogram for 'profit_margin'
```{r}
# Histogram for 'profit_margin'
ggplot(df, aes(x = profit_margin)) +
geom_histogram(binwidth = 5, fill = "purple", color = "black") +
labs(title = "Histogram of Profit Margin", x = "Profit Margin (%)", y = "Frequency") +
theme_minimal()
```
### Histogram for 'inventory'
```{r}
# Histogram for 'inventory'
ggplot(df, aes(x = inventory)) +
geom_histogram(binwidth = 10, fill = "red", color = "black") +
labs(title = "Histogram of Inventory", x = "Inventory", y = "Frequency") +
theme_minimal()
```
### Histogram for 'discount_percentage'
```{r}
# Histogram for 'discount_percentage'
ggplot(df, aes(x = discount_percentage)) +
geom_histogram(binwidth = 5, fill = "cyan", color = "black") +
labs(title = "Histogram of Discount Percentage", x = "Discount Percentage", y = "Frequency") +
theme_minimal()
```
### Histogram for 'delivery_days'
```{r}
# Histogram for 'delivery_days'
ggplot(df, aes(x = delivery_days)) +
geom_histogram(binwidth = 2, fill = "yellow", color = "black") +
labs(title = "Histogram of Delivery Days", x = "Delivery Days", y = "Frequency") +
theme_minimal()
```
### Histogram for 'price'
```{r}
ggplot(df, aes(x = price)) +
geom_histogram(binwidth = 50, fill = "blue", color = "black") +
labs(title = "Histogram of Price", x = "Price", y = "Frequency") +
theme_minimal()
```
## Bivariate Analysis {.tabset}
### Scatter plot for 'price' vs 'sales'
```{r}
# Scatter plot for 'price' vs 'sales'
ggplot(df, aes(x = price, y = sales)) +
geom_point(color = "blue", size = 3, alpha = 0.6) +
labs(title = "Scatter Plot of Price vs Sales", x = "Price", y = "Sales") +
theme_minimal()
```
### Scatter plot for 'cost' vs 'revenue'
```{r}
# Scatter plot for 'cost' vs 'revenue'
ggplot(df, aes(x = cost, y = revenue)) +
geom_point(color = "green", size = 3, alpha = 0.6) +
labs(title = "Scatter Plot of Cost vs Revenue", x = "Cost", y = "Revenue") +
theme_minimal()
```
### Scatter plot for 'inventory' vs 'profit_margin'
```{r}
# Scatter plot for 'inventory' vs 'profit_margin'
ggplot(df, aes(x = inventory, y = profit_margin)) +
geom_point(color = "red", size = 3, alpha = 0.6) +
labs(title = "Scatter Plot of Inventory vs Profit Margin", x = "Inventory", y = "Profit Margin") +
theme_minimal()
```
### Scatter plot for 'discount_percentage' vs 'delivery_days'
```{r}
# Scatter plot for 'discount_percentage' vs 'delivery_days'
ggplot(df, aes(x = discount_percentage, y = delivery_days)) +
geom_point(color = "purple", size = 3, alpha = 0.6) +
labs(title = "Scatter Plot of Discount Percentage vs Delivery Days", x = "Discount Percentage", y = "Delivery Days") +
theme_minimal()
```
## Multivariate Analysis {.tabset}
### heatmap
```{r}
# Select only numeric columns
numeric_df <- df %>% select_if(is.numeric)
# Compute the correlation matrix
cor_matrix <- cor(numeric_df, use = "complete.obs")
# Create the heatmap
corrplot(cor_matrix, method = "color", type = "full",
col = colorRampPalette(c("blue", "white", "red"))(200),
tl.col = "black", tl.srt = 45,
addCoef.col = "black", number.cex = 0.7,
title = "Correlation Heatmap", mar = c(0, 0, 1, 0))
```