Dataset Description

view of the dataset

# A tibble: 6 × 15
  price  cost sales profit_margin inventory discount_percentage delivery_days
  <dbl> <dbl> <dbl>         <dbl>     <dbl>               <dbl>         <dbl>
1  219. 182.     40          16.9       105               27.8              9
2  478. 385.      7          19.4       192               26.9              6
3  379. 277.     32          27.1        59               21.9              2
4  319. 282.     48          11.8        45               11.0              2
5  120.  69.7    19          42.0        35                3.18             9
6  120.  65.4     6          45.6       185               20.7              8
# ℹ 8 more variables: category <chr>, material <chr>, color <chr>,
#   location <chr>, season <chr>, store_type <chr>, brand <chr>, revenue <dbl>
# A tibble: 6 × 15
  price  cost sales profit_margin inventory discount_percentage delivery_days
  <dbl> <dbl> <dbl>         <dbl>     <dbl>               <dbl>         <dbl>
1  280.  148.    26          47.3        33               25.0              8
2  272.  233.     8          14.1       102                9.79             9
3  310.  170.    24          45.2         9               21.8              8
4  440.  277.    48          36.9       127                2.82             6
5  491.  406.    38          17.4         6               19.8              4
6  233.  172.     6          26.1        55                9.50             9
# ℹ 8 more variables: category <chr>, material <chr>, color <chr>,
#   location <chr>, season <chr>, store_type <chr>, brand <chr>, revenue <dbl>

About the dataset

spc_tbl_ [2,500 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ price              : num [1:2500] 219 478 379 319 120 ...
 $ cost               : num [1:2500] 181.6 385 276.7 281.8 69.7 ...
 $ sales              : num [1:2500] 40 7 32 48 19 6 20 27 43 37 ...
 $ profit_margin      : num [1:2500] 16.9 19.4 27.1 11.8 42 ...
 $ inventory          : num [1:2500] 105 192 59 45 35 185 165 10 11 169 ...
 $ discount_percentage: num [1:2500] 27.8 26.94 21.95 11.01 3.18 ...
 $ delivery_days      : num [1:2500] 9 6 2 2 9 8 9 4 4 2 ...
 $ category           : chr [1:2500] "Bed" "Chair" "Table" "Table" ...
 $ material           : chr [1:2500] "Plastic" "Glass" "Metal" "Glass" ...
 $ color              : chr [1:2500] "Red" "Blue" "Black" "Green" ...
 $ location           : chr [1:2500] "Rural" "Rural" "Suburban" "Rural" ...
 $ season             : chr [1:2500] "Spring" "Summer" "Fall" "Summer" ...
 $ store_type         : chr [1:2500] "Online" "Online" "Online" "Retail" ...
 $ brand              : chr [1:2500] "BrandA" "BrandD" "BrandD" "BrandD" ...
 $ revenue            : num [1:2500] 3949 -3521 14286 12261 -4588 ...
 - attr(*, "spec")=
  .. cols(
  ..   price = col_double(),
  ..   cost = col_double(),
  ..   sales = col_double(),
  ..   profit_margin = col_double(),
  ..   inventory = col_double(),
  ..   discount_percentage = col_double(),
  ..   delivery_days = col_double(),
  ..   category = col_character(),
  ..   material = col_character(),
  ..   color = col_character(),
  ..   location = col_character(),
  ..   season = col_character(),
  ..   store_type = col_character(),
  ..   brand = col_character(),
  ..   revenue = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 

summary of the dataset

     price            cost            sales       profit_margin  
 Min.   : 50.7   Min.   : 26.51   Min.   : 1.00   Min.   :10.02  
 1st Qu.:159.1   1st Qu.:106.40   1st Qu.:13.00   1st Qu.:20.23  
 Median :277.6   Median :189.34   Median :25.00   Median :30.30  
 Mean   :274.5   Mean   :191.93   Mean   :24.92   Mean   :30.21  
 3rd Qu.:387.4   3rd Qu.:263.19   3rd Qu.:37.00   3rd Qu.:40.13  
 Max.   :499.9   Max.   :447.02   Max.   :49.00   Max.   :50.00  
   inventory      discount_percentage delivery_days     category        
 Min.   :  0.00   Min.   : 0.005556   Min.   :1.000   Length:2500       
 1st Qu.: 50.00   1st Qu.: 7.760214   1st Qu.:3.000   Class :character  
 Median : 94.00   Median :14.915143   Median :5.000   Mode  :character  
 Mean   : 97.72   Mean   :14.947616   Mean   :4.894                     
 3rd Qu.:147.00   3rd Qu.:22.292661   3rd Qu.:7.000                     
 Max.   :199.00   Max.   :29.991229   Max.   :9.000                     
   material            color             location            season         
 Length:2500        Length:2500        Length:2500        Length:2500       
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
  store_type           brand              revenue      
 Length:2500        Length:2500        Min.   :-14215  
 Class :character   Class :character   1st Qu.:  1217  
 Mode  :character   Mode  :character   Median :  5523  
                                       Mean   :  5927  
                                       3rd Qu.: 10234  
                                       Max.   : 32922  

Head of DataSet

# A tibble: 6 × 15
  price  cost sales profit_margin inventory discount_percentage delivery_days
  <dbl> <dbl> <dbl>         <dbl>     <dbl>               <dbl>         <dbl>
1  219. 182.     40          16.9       105               27.8              9
2  478. 385.      7          19.4       192               26.9              6
3  379. 277.     32          27.1        59               21.9              2
4  319. 282.     48          11.8        45               11.0              2
5  120.  69.7    19          42.0        35                3.18             9
6  120.  65.4     6          45.6       185               20.7              8
# ℹ 8 more variables: category <chr>, material <chr>, color <chr>,
#   location <chr>, season <chr>, store_type <chr>, brand <chr>, revenue <dbl>

Checking for missing values

              price                cost               sales       profit_margin 
                  0                   0                   0                   0 
          inventory discount_percentage       delivery_days            category 
                  0                   0                   0                   0 
           material               color            location              season 
                  0                   0                   0                   0 
         store_type               brand             revenue 
                  0                   0                   0 

Univariate Analysis

Histogram for ‘price’

Histogram for ‘cost’

Histogram for ‘sales’

Histogram for ‘profit_margin’

Histogram for ‘inventory’

Histogram for ‘discount_percentage’

Histogram for ‘delivery_days’

Histogram for ‘price’

Bivariate Analysis

Scatter plot for ‘price’ vs ‘sales’

Scatter plot for ‘cost’ vs ‘revenue’

Scatter plot for ‘inventory’ vs ‘profit_margin’

Scatter plot for ‘discount_percentage’ vs ‘delivery_days’

Multivariate Analysis

heatmap

---
title: "EDA_furniture"
output:
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    theme: flatly
    social: menu
    source_code: embed
    navbar:
      - { title: "Dataset Description", href: "#dataset-description" }
      - { title: "Univariate Analysis", href: "#univariate-analysis" }
      - { title: "Bivariate Analysis", href: "#bivariate-analysis" }
      - { title: "Multivariate Analysis", href: "#multivariate-analysis" }
---


```{r setup, include=FALSE}
library(flexdashboard)
library(readr)
library(ggplot2)
library(dplyr)

library(reshape2)
library(lubridate)
library(corrplot)
```
## Dataset Description {.tabset}

### view of the dataset
```{r}
# Load the dataset
df <- read_csv("Furniture.csv")
head(df)
tail(df)
```

### About the dataset
```{r}
# View the structure of the dataset
str(df)
```

### summary of the dataset

```{r}
# Summary statistics for numeric columns
summary(df)

```

### Head of DataSet

```{r}
# View the first few rows
head(df)
```
### Checking for missing values
```{r}
# Checking for missing values
colSums(is.na(df))

```
## Univariate Analysis {.tabset} 

###  Histogram for 'price'


```{r}
ggplot(df, aes(x = price)) +
  geom_histogram(binwidth = 50, fill = "blue", color = "black") +
  labs(title = "Histogram of Price", x = "Price", y = "Frequency") +
  theme_minimal()

```


###  Histogram for 'cost'


```{r}

# Histogram for 'cost'
ggplot(df, aes(x = cost)) +
  geom_histogram(binwidth = 50, fill = "green", color = "black") +
  labs(title = "Histogram of Cost", x = "Cost", y = "Frequency") +
  theme_minimal()

```


###  Histogram for  'sales'


```{r}

# Histogram for 'sales'
ggplot(df, aes(x = sales)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black") +
  labs(title = "Histogram of Sales", x = "Sales", y = "Frequency") +
  theme_minimal()

```


###  Histogram for 'profit_margin'


```{r}
# Histogram for 'profit_margin'
ggplot(df, aes(x = profit_margin)) +
  geom_histogram(binwidth = 5, fill = "purple", color = "black") +
  labs(title = "Histogram of Profit Margin", x = "Profit Margin (%)", y = "Frequency") +
  theme_minimal()

```




###  Histogram for 'inventory'


```{r}
# Histogram for 'inventory'
ggplot(df, aes(x = inventory)) +
  geom_histogram(binwidth = 10, fill = "red", color = "black") +
  labs(title = "Histogram of Inventory", x = "Inventory", y = "Frequency") +
  theme_minimal()


```


###  Histogram for 'discount_percentage'


```{r}
# Histogram for 'discount_percentage'
ggplot(df, aes(x = discount_percentage)) +
  geom_histogram(binwidth = 5, fill = "cyan", color = "black") +
  labs(title = "Histogram of Discount Percentage", x = "Discount Percentage", y = "Frequency") +
  theme_minimal()

```


###  Histogram for 'delivery_days'


```{r}
# Histogram for 'delivery_days'
ggplot(df, aes(x = delivery_days)) +
  geom_histogram(binwidth = 2, fill = "yellow", color = "black") +
  labs(title = "Histogram of Delivery Days", x = "Delivery Days", y = "Frequency") +
  theme_minimal()
```


###  Histogram for 'price'


```{r}
ggplot(df, aes(x = price)) +
  geom_histogram(binwidth = 50, fill = "blue", color = "black") +
  labs(title = "Histogram of Price", x = "Price", y = "Frequency") +
  theme_minimal()

```



## Bivariate Analysis {.tabset}

### Scatter plot for 'price' vs 'sales'

```{r}
# Scatter plot for 'price' vs 'sales'
ggplot(df, aes(x = price, y = sales)) +
  geom_point(color = "blue", size = 3, alpha = 0.6) +  
  labs(title = "Scatter Plot of Price vs Sales", x = "Price", y = "Sales") +
  theme_minimal()

```


### Scatter plot for 'cost' vs 'revenue'

```{r}

# Scatter plot for 'cost' vs 'revenue'
ggplot(df, aes(x = cost, y = revenue)) +
  geom_point(color = "green", size = 3, alpha = 0.6) +
  labs(title = "Scatter Plot of Cost vs Revenue", x = "Cost", y = "Revenue") +
  theme_minimal()

```



### Scatter plot for 'inventory' vs 'profit_margin'

```{r}

# Scatter plot for 'inventory' vs 'profit_margin'
ggplot(df, aes(x = inventory, y = profit_margin)) +
  geom_point(color = "red", size = 3, alpha = 0.6) +
  labs(title = "Scatter Plot of Inventory vs Profit Margin", x = "Inventory", y = "Profit Margin") +
  theme_minimal()


```


### Scatter plot for 'discount_percentage' vs 'delivery_days'

```{r}

# Scatter plot for 'discount_percentage' vs 'delivery_days'
ggplot(df, aes(x = discount_percentage, y = delivery_days)) +
  geom_point(color = "purple", size = 3, alpha = 0.6) +
  labs(title = "Scatter Plot of Discount Percentage vs Delivery Days", x = "Discount Percentage", y = "Delivery Days") +
  theme_minimal()


```


## Multivariate Analysis {.tabset}


### heatmap


```{r}

# Select only numeric columns
numeric_df <- df %>% select_if(is.numeric)

# Compute the correlation matrix
cor_matrix <- cor(numeric_df, use = "complete.obs")

# Create the heatmap
corrplot(cor_matrix, method = "color", type = "full", 
         col = colorRampPalette(c("blue", "white", "red"))(200),
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.7, 
         title = "Correlation Heatmap", mar = c(0, 0, 1, 0))




```