# Load the dataset
data <- read.csv("data.split.csv")
# Convert date columns to Date format
data$conventional.date <- as.Date(data$conventional.date, format="%Y/%m/%d")
data$organic.date <- as.Date(data$organic.date, format="%Y/%m/%d")
# Check for NA values and handle them (if needed)
summary(data)
## conventional.date conventional.average_price conventional.total_volume
## Min. :2017-12-03 Min. :0.500 Min. : 43610
## 1st Qu.:2018-08-19 1st Qu.:0.980 1st Qu.: 233778
## Median :2019-06-12 Median :1.130 Median : 429995
## Mean :2019-06-02 Mean :1.143 Mean : 625297
## 3rd Qu.:2020-03-08 3rd Qu.:1.300 3rd Qu.: 787970
## Max. :2020-11-29 Max. :2.020 Max. :5660216
## conventional.type conventional.year conventional.geography
## Length:6314 Min. :2017 Length:6314
## Class :character 1st Qu.:2018 Class :character
## Mode :character Median :2019 Mode :character
## Mean :2019
## 3rd Qu.:2020
## Max. :2020
## conventional.Mileage conventional.total_sales organic.date
## Min. : 111 Min. : 63986 Min. :2017-12-03
## 1st Qu.:1097 1st Qu.: 253914 1st Qu.:2018-08-19
## Median :2193 Median : 485151 Median :2019-06-12
## Mean :1911 Mean : 689118 Mean :2019-06-02
## 3rd Qu.:2632 3rd Qu.: 926973 3rd Qu.:2020-03-08
## Max. :2998 Max. :4815268 Max. :2020-11-29
## organic.average_price organic.total_volume organic.type organic.year
## Min. :0.690 Min. : 253 Length:6314 Min. :2017
## 1st Qu.:1.350 1st Qu.: 8698 Class :character 1st Qu.:2018
## Median :1.550 Median : 15730 Mode :character Median :2019
## Mean :1.575 Mean : 25221 Mean :2019
## 3rd Qu.:1.770 3rd Qu.: 30548 3rd Qu.:2020
## Max. :2.780 Max. :495084 Max. :2020
## organic.geography organic.Mileage organic.total_sales
## Length:6314 Min. : 111 Min. : 432.6
## Class :character 1st Qu.:1097 1st Qu.: 12690.8
## Mode :character Median :2193 Median : 24619.5
## Mean :1911 Mean : 40450.3
## 3rd Qu.:2632 3rd Qu.: 46821.5
## Max. :2998 Max. :673314.2
## organic.share_total_sales conventional.share_total_sales ave.con.share
## Min. :0.0006739 Min. :0.7225 Min. :0.8704
## 1st Qu.:0.0375196 1st Qu.:0.9295 1st Qu.:0.9334
## Median :0.0547658 Median :0.9452 Median :0.9419
## Mean :0.0563783 Mean :0.9436 Mean :0.9436
## 3rd Qu.:0.0705413 3rd Qu.:0.9625 3rd Qu.:0.9534
## Max. :0.2775082 Max. :0.9993 Max. :0.9873
## ave.org.share
## Min. :0.01272
## 1st Qu.:0.04656
## Median :0.05813
## Mean :0.05638
## 3rd Qu.:0.06656
## Max. :0.12958
# Create a bar plot for total sales comparison
sales_data <- data %>%
gather(key="product_type", value="total_sales", conventional.total_sales, organic.total_sales) %>%
group_by(product_type) %>%
summarise(total_sales = sum(total_sales))
plot_ly(sales_data, x = ~product_type, y = ~total_sales, type = 'bar', name = 'Total Sales') %>%
layout(title = "Total Sales by Product Type",
xaxis = list(title = "Product Type"),
yaxis = list(title = "Total Sales"))
# Line plot for price trends over time
price_data <- data %>%
gather(key="product_type", value="price", conventional.average_price, organic.average_price) %>%
mutate(date = as.Date(ifelse(product_type == "conventional", conventional.date, organic.date))) %>%
group_by(date, product_type) %>%
summarise(average_price = mean(price))
## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.
plot_ly(price_data, x = ~date, y = ~average_price, color = ~product_type, type = 'scatter', mode = 'lines') %>%
layout(title = "Price Trends Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Average Price"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# Create a dashboard with multiple plots
subplot(
plot_ly(sales_data, x = ~product_type, y = ~total_sales, type = 'bar', name = 'Total Sales') %>%
layout(title = "Total Sales by Product Type"),
plot_ly(price_data, x = ~date, y = ~average_price, color = ~product_type, type = 'scatter', mode = 'lines') %>%
layout(title = "Price Trends Over Time"),
nrows = 1
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# Add filter for product type or region
plot_ly(data, x = ~conventional.date, y = ~conventional.total_sales, color = ~conventional.geography, type = 'scatter', mode = 'lines')
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors