Load Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Read the Data

data <- read.csv("data.split.csv")

Preview the Data

head(data)
##   conventional.date conventional.average_price conventional.total_volume
## 1         2017/12/3                       1.39                    139970
## 2         2017/12/3                       1.07                    504933
## 3         2017/12/3                       1.43                    658939
## 4         2017/12/3                       1.14                     86646
## 5         2017/12/3                       1.40                    488588
## 6         2017/12/3                       1.13                    153282
##   conventional.type conventional.year conventional.geography
## 1      conventional              2017                 Albany
## 2      conventional              2017                Atlanta
## 3      conventional              2017   Baltimore/Washington
## 4      conventional              2017                  Boise
## 5      conventional              2017                 Boston
## 6      conventional              2017      Buffalo/Rochester
##   conventional.Mileage conventional.total_sales organic.date
## 1                 2832                194558.30    2017/12/3
## 2                 2199                540278.31    2017/12/3
## 3                 2679                942282.77    2017/12/3
## 4                  827                 98776.44    2017/12/3
## 5                 2998                684023.20    2017/12/3
## 6                 2552                173208.66    2017/12/3
##   organic.average_price organic.total_volume organic.type organic.year
## 1                  1.44                 3577      organic         2017
## 2                  1.62                10609      organic         2017
## 3                  1.58                38754      organic         2017
## 4                  1.77                 1829      organic         2017
## 5                  1.88                21338      organic         2017
## 6                  1.18                 7575      organic         2017
##      organic.geography organic.Mileage organic.total_sales
## 1               Albany            2832             5150.88
## 2              Atlanta            2199            17186.58
## 3 Baltimore/Washington            2679            61231.32
## 4                Boise             827             3237.33
## 5               Boston            2998            40115.44
## 6    Buffalo/Rochester            2552             8938.50
##   organic.share_total_sales conventional.share_total_sales ave.con.share
## 1                0.02579190                      0.9742081     0.9675597
## 2                0.03082989                      0.9691701     0.9501917
## 3                0.06101690                      0.9389831     0.9449357
## 4                0.03173425                      0.9682658     0.9438373
## 5                0.05539746                      0.9446025     0.9444026
## 6                0.04907296                      0.9509270     0.9261546
##   ave.org.share
## 1    0.03244026
## 2    0.04980829
## 3    0.05506429
## 4    0.05616271
## 5    0.05559737
## 6    0.07384535

Summary Statistics

summary(data)
##  conventional.date  conventional.average_price conventional.total_volume
##  Length:6314        Min.   :0.500              Min.   :  43610          
##  Class :character   1st Qu.:0.980              1st Qu.: 233778          
##  Mode  :character   Median :1.130              Median : 429995          
##                     Mean   :1.143              Mean   : 625297          
##                     3rd Qu.:1.300              3rd Qu.: 787970          
##                     Max.   :2.020              Max.   :5660216          
##  conventional.type  conventional.year conventional.geography
##  Length:6314        Min.   :2017      Length:6314           
##  Class :character   1st Qu.:2018      Class :character      
##  Mode  :character   Median :2019      Mode  :character      
##                     Mean   :2019                            
##                     3rd Qu.:2020                            
##                     Max.   :2020                            
##  conventional.Mileage conventional.total_sales organic.date      
##  Min.   : 111         Min.   :  63986          Length:6314       
##  1st Qu.:1097         1st Qu.: 253914          Class :character  
##  Median :2193         Median : 485151          Mode  :character  
##  Mean   :1911         Mean   : 689118                            
##  3rd Qu.:2632         3rd Qu.: 926973                            
##  Max.   :2998         Max.   :4815268                            
##  organic.average_price organic.total_volume organic.type        organic.year 
##  Min.   :0.690         Min.   :   253       Length:6314        Min.   :2017  
##  1st Qu.:1.350         1st Qu.:  8698       Class :character   1st Qu.:2018  
##  Median :1.550         Median : 15730       Mode  :character   Median :2019  
##  Mean   :1.575         Mean   : 25221                          Mean   :2019  
##  3rd Qu.:1.770         3rd Qu.: 30548                          3rd Qu.:2020  
##  Max.   :2.780         Max.   :495084                          Max.   :2020  
##  organic.geography  organic.Mileage organic.total_sales
##  Length:6314        Min.   : 111    Min.   :   432.6   
##  Class :character   1st Qu.:1097    1st Qu.: 12690.8   
##  Mode  :character   Median :2193    Median : 24619.5   
##                     Mean   :1911    Mean   : 40450.3   
##                     3rd Qu.:2632    3rd Qu.: 46821.5   
##                     Max.   :2998    Max.   :673314.2   
##  organic.share_total_sales conventional.share_total_sales ave.con.share   
##  Min.   :0.0006739         Min.   :0.7225                 Min.   :0.8704  
##  1st Qu.:0.0375196         1st Qu.:0.9295                 1st Qu.:0.9334  
##  Median :0.0547658         Median :0.9452                 Median :0.9419  
##  Mean   :0.0563783         Mean   :0.9436                 Mean   :0.9436  
##  3rd Qu.:0.0705413         3rd Qu.:0.9625                 3rd Qu.:0.9534  
##  Max.   :0.2775082         Max.   :0.9993                 Max.   :0.9873  
##  ave.org.share    
##  Min.   :0.01272  
##  1st Qu.:0.04656  
##  Median :0.05813  
##  Mean   :0.05638  
##  3rd Qu.:0.06656  
##  Max.   :0.12958

Basic Plot

# Automatically select the first numeric column for plotting
numeric_cols <- sapply(data, is.numeric)

if (any(numeric_cols)) {
  first_numeric_col <- names(data)[which(numeric_cols)[1]]
  ggplot(data, aes(x = .data[[first_numeric_col]])) +
    geom_histogram(bins = 30, fill = "blue", color = "white") +
    theme_minimal() +
    labs(x = first_numeric_col, y = "Count", title = paste("Histogram of", first_numeric_col))
} else {
  first_col <- names(data)[1]
  ggplot(data, aes(x = .data[[first_col]])) +
    geom_bar(fill = "orange") +
    theme_minimal() +
    labs(x = first_col, y = "Count", title = paste("Bar Plot of", first_col))
}