Load Libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Read the Data
data <- read.csv("data.split.csv")
Preview the Data
head(data)
## conventional.date conventional.average_price conventional.total_volume
## 1 2017/12/3 1.39 139970
## 2 2017/12/3 1.07 504933
## 3 2017/12/3 1.43 658939
## 4 2017/12/3 1.14 86646
## 5 2017/12/3 1.40 488588
## 6 2017/12/3 1.13 153282
## conventional.type conventional.year conventional.geography
## 1 conventional 2017 Albany
## 2 conventional 2017 Atlanta
## 3 conventional 2017 Baltimore/Washington
## 4 conventional 2017 Boise
## 5 conventional 2017 Boston
## 6 conventional 2017 Buffalo/Rochester
## conventional.Mileage conventional.total_sales organic.date
## 1 2832 194558.30 2017/12/3
## 2 2199 540278.31 2017/12/3
## 3 2679 942282.77 2017/12/3
## 4 827 98776.44 2017/12/3
## 5 2998 684023.20 2017/12/3
## 6 2552 173208.66 2017/12/3
## organic.average_price organic.total_volume organic.type organic.year
## 1 1.44 3577 organic 2017
## 2 1.62 10609 organic 2017
## 3 1.58 38754 organic 2017
## 4 1.77 1829 organic 2017
## 5 1.88 21338 organic 2017
## 6 1.18 7575 organic 2017
## organic.geography organic.Mileage organic.total_sales
## 1 Albany 2832 5150.88
## 2 Atlanta 2199 17186.58
## 3 Baltimore/Washington 2679 61231.32
## 4 Boise 827 3237.33
## 5 Boston 2998 40115.44
## 6 Buffalo/Rochester 2552 8938.50
## organic.share_total_sales conventional.share_total_sales ave.con.share
## 1 0.02579190 0.9742081 0.9675597
## 2 0.03082989 0.9691701 0.9501917
## 3 0.06101690 0.9389831 0.9449357
## 4 0.03173425 0.9682658 0.9438373
## 5 0.05539746 0.9446025 0.9444026
## 6 0.04907296 0.9509270 0.9261546
## ave.org.share
## 1 0.03244026
## 2 0.04980829
## 3 0.05506429
## 4 0.05616271
## 5 0.05559737
## 6 0.07384535
Summary Statistics
summary(data)
## conventional.date conventional.average_price conventional.total_volume
## Length:6314 Min. :0.500 Min. : 43610
## Class :character 1st Qu.:0.980 1st Qu.: 233778
## Mode :character Median :1.130 Median : 429995
## Mean :1.143 Mean : 625297
## 3rd Qu.:1.300 3rd Qu.: 787970
## Max. :2.020 Max. :5660216
## conventional.type conventional.year conventional.geography
## Length:6314 Min. :2017 Length:6314
## Class :character 1st Qu.:2018 Class :character
## Mode :character Median :2019 Mode :character
## Mean :2019
## 3rd Qu.:2020
## Max. :2020
## conventional.Mileage conventional.total_sales organic.date
## Min. : 111 Min. : 63986 Length:6314
## 1st Qu.:1097 1st Qu.: 253914 Class :character
## Median :2193 Median : 485151 Mode :character
## Mean :1911 Mean : 689118
## 3rd Qu.:2632 3rd Qu.: 926973
## Max. :2998 Max. :4815268
## organic.average_price organic.total_volume organic.type organic.year
## Min. :0.690 Min. : 253 Length:6314 Min. :2017
## 1st Qu.:1.350 1st Qu.: 8698 Class :character 1st Qu.:2018
## Median :1.550 Median : 15730 Mode :character Median :2019
## Mean :1.575 Mean : 25221 Mean :2019
## 3rd Qu.:1.770 3rd Qu.: 30548 3rd Qu.:2020
## Max. :2.780 Max. :495084 Max. :2020
## organic.geography organic.Mileage organic.total_sales
## Length:6314 Min. : 111 Min. : 432.6
## Class :character 1st Qu.:1097 1st Qu.: 12690.8
## Mode :character Median :2193 Median : 24619.5
## Mean :1911 Mean : 40450.3
## 3rd Qu.:2632 3rd Qu.: 46821.5
## Max. :2998 Max. :673314.2
## organic.share_total_sales conventional.share_total_sales ave.con.share
## Min. :0.0006739 Min. :0.7225 Min. :0.8704
## 1st Qu.:0.0375196 1st Qu.:0.9295 1st Qu.:0.9334
## Median :0.0547658 Median :0.9452 Median :0.9419
## Mean :0.0563783 Mean :0.9436 Mean :0.9436
## 3rd Qu.:0.0705413 3rd Qu.:0.9625 3rd Qu.:0.9534
## Max. :0.2775082 Max. :0.9993 Max. :0.9873
## ave.org.share
## Min. :0.01272
## 1st Qu.:0.04656
## Median :0.05813
## Mean :0.05638
## 3rd Qu.:0.06656
## Max. :0.12958
Basic Plot
# Automatically select the first numeric column for plotting
numeric_cols <- sapply(data, is.numeric)
if (any(numeric_cols)) {
first_numeric_col <- names(data)[which(numeric_cols)[1]]
ggplot(data, aes(x = .data[[first_numeric_col]])) +
geom_histogram(bins = 30, fill = "blue", color = "white") +
theme_minimal() +
labs(x = first_numeric_col, y = "Count", title = paste("Histogram of", first_numeric_col))
} else {
first_col <- names(data)[1]
ggplot(data, aes(x = .data[[first_col]])) +
geom_bar(fill = "orange") +
theme_minimal() +
labs(x = first_col, y = "Count", title = paste("Bar Plot of", first_col))
}
