# Load the dataset
data <- read.csv("data.split.csv")

# Convert date columns to Date format
data$conventional.date <- as.Date(data$conventional.date, format="%Y/%m/%d")
data$organic.date <- as.Date(data$organic.date, format="%Y/%m/%d")

# Check for NA values and handle them (if needed)
summary(data)
##  conventional.date    conventional.average_price conventional.total_volume
##  Min.   :2017-12-03   Min.   :0.500              Min.   :  43610          
##  1st Qu.:2018-08-19   1st Qu.:0.980              1st Qu.: 233778          
##  Median :2019-06-12   Median :1.130              Median : 429995          
##  Mean   :2019-06-02   Mean   :1.143              Mean   : 625297          
##  3rd Qu.:2020-03-08   3rd Qu.:1.300              3rd Qu.: 787970          
##  Max.   :2020-11-29   Max.   :2.020              Max.   :5660216          
##  conventional.type  conventional.year conventional.geography
##  Length:6314        Min.   :2017      Length:6314           
##  Class :character   1st Qu.:2018      Class :character      
##  Mode  :character   Median :2019      Mode  :character      
##                     Mean   :2019                            
##                     3rd Qu.:2020                            
##                     Max.   :2020                            
##  conventional.Mileage conventional.total_sales  organic.date       
##  Min.   : 111         Min.   :  63986          Min.   :2017-12-03  
##  1st Qu.:1097         1st Qu.: 253914          1st Qu.:2018-08-19  
##  Median :2193         Median : 485151          Median :2019-06-12  
##  Mean   :1911         Mean   : 689118          Mean   :2019-06-02  
##  3rd Qu.:2632         3rd Qu.: 926973          3rd Qu.:2020-03-08  
##  Max.   :2998         Max.   :4815268          Max.   :2020-11-29  
##  organic.average_price organic.total_volume organic.type        organic.year 
##  Min.   :0.690         Min.   :   253       Length:6314        Min.   :2017  
##  1st Qu.:1.350         1st Qu.:  8698       Class :character   1st Qu.:2018  
##  Median :1.550         Median : 15730       Mode  :character   Median :2019  
##  Mean   :1.575         Mean   : 25221                          Mean   :2019  
##  3rd Qu.:1.770         3rd Qu.: 30548                          3rd Qu.:2020  
##  Max.   :2.780         Max.   :495084                          Max.   :2020  
##  organic.geography  organic.Mileage organic.total_sales
##  Length:6314        Min.   : 111    Min.   :   432.6   
##  Class :character   1st Qu.:1097    1st Qu.: 12690.8   
##  Mode  :character   Median :2193    Median : 24619.5   
##                     Mean   :1911    Mean   : 40450.3   
##                     3rd Qu.:2632    3rd Qu.: 46821.5   
##                     Max.   :2998    Max.   :673314.2   
##  organic.share_total_sales conventional.share_total_sales ave.con.share   
##  Min.   :0.0006739         Min.   :0.7225                 Min.   :0.8704  
##  1st Qu.:0.0375196         1st Qu.:0.9295                 1st Qu.:0.9334  
##  Median :0.0547658         Median :0.9452                 Median :0.9419  
##  Mean   :0.0563783         Mean   :0.9436                 Mean   :0.9436  
##  3rd Qu.:0.0705413         3rd Qu.:0.9625                 3rd Qu.:0.9534  
##  Max.   :0.2775082         Max.   :0.9993                 Max.   :0.9873  
##  ave.org.share    
##  Min.   :0.01272  
##  1st Qu.:0.04656  
##  Median :0.05813  
##  Mean   :0.05638  
##  3rd Qu.:0.06656  
##  Max.   :0.12958
# Create a bar plot for total sales comparison
sales_data <- data %>%
  gather(key="product_type", value="total_sales", conventional.total_sales, organic.total_sales) %>%
  group_by(product_type) %>%
  summarise(total_sales = sum(total_sales))

plot_ly(sales_data, x = ~product_type, y = ~total_sales, type = 'bar', name = 'Total Sales') %>%
  layout(title = "Total Sales by Product Type",
         xaxis = list(title = "Product Type"),
         yaxis = list(title = "Total Sales"))
# Line plot for price trends over time
price_data <- data %>%
  gather(key="product_type", value="price", conventional.average_price, organic.average_price) %>%
  mutate(date = as.Date(ifelse(product_type == "conventional", conventional.date, organic.date))) %>%
  group_by(date, product_type) %>%
  summarise(average_price = mean(price))
## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.
plot_ly(price_data, x = ~date, y = ~average_price, color = ~product_type, type = 'scatter', mode = 'lines') %>%
  layout(title = "Price Trends Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Average Price"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# Create a dashboard with multiple plots
subplot(
  plot_ly(sales_data, x = ~product_type, y = ~total_sales, type = 'bar', name = 'Total Sales') %>%
    layout(title = "Total Sales by Product Type"),
  
  plot_ly(price_data, x = ~date, y = ~average_price, color = ~product_type, type = 'scatter', mode = 'lines') %>%
    layout(title = "Price Trends Over Time"),
  
  nrows = 1
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# Add filter for product type or region
plot_ly(data, x = ~conventional.date, y = ~conventional.total_sales, color = ~conventional.geography, type = 'scatter', mode = 'lines')
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors