This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Note: this analysis was performed using the open source software R and Rstudio.
library(readr)
data <- read_csv('conventional.csv')
## Rows: 6314 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot (total_volume ~ average_price, data=data)
summary (data)
## date average_price total_volume type
## Length:6314 Min. :0.500 Min. : 43610 Length:6314
## Class :character 1st Qu.:0.980 1st Qu.: 233778 Class :character
## Mode :character Median :1.130 Median : 429995 Mode :character
## Mean :1.143 Mean : 625297
## 3rd Qu.:1.300 3rd Qu.: 787970
## Max. :2.020 Max. :5660216
## year geography Mileage
## Min. :2017 Length:6314 Min. : 111
## 1st Qu.:2018 Class :character 1st Qu.:1097
## Median :2019 Mode :character Median :2193
## Mean :2019 Mean :1911
## 3rd Qu.:2020 3rd Qu.:2632
## Max. :2020 Max. :2998
library(ggplot2)
head(data)
## # A tibble: 6 × 7
## date average_price total_volume type year geography Mileage
## <chr> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 12/3/2017 1.39 139970 conventional 2017 Albany 2832
## 2 12/3/2017 1.07 504933 conventional 2017 Atlanta 2199
## 3 12/3/2017 1.43 658939 conventional 2017 Baltimore/Was… 2679
## 4 12/3/2017 1.14 86646 conventional 2017 Boise 827
## 5 12/3/2017 1.4 488588 conventional 2017 Boston 2998
## 6 12/3/2017 1.13 153282 conventional 2017 Buffalo/Roche… 2552
ggplot(data=data, aes(x=average_price ))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(readr)
data <- read_csv('organic2.csv')
## Rows: 6312 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot (total_volume ~ average_price, data=data)
summary (data)
## date average_price total_volume type
## Length:6312 Min. :0.690 Min. : 253 Length:6312
## Class :character 1st Qu.:1.350 1st Qu.: 8698 Class :character
## Mode :character Median :1.550 Median : 15740 Mode :character
## Mean :1.575 Mean : 25227
## 3rd Qu.:1.770 3rd Qu.: 30554
## Max. :2.780 Max. :495084
## year geography Mileage
## Min. :2017 Length:6312 Min. : 111
## 1st Qu.:2018 Class :character 1st Qu.:1097
## Median :2019 Mode :character Median :2193
## Mean :2019 Mean :1910
## 3rd Qu.:2020 3rd Qu.:2632
## Max. :2020 Max. :2998
library(ggplot2)
head(data)
## # A tibble: 6 × 7
## date average_price total_volume type year geography Mileage
## <chr> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 12/3/2017 1.58 38754 organic 2017 Baltimore/Washingt… 2679
## 2 12/3/2017 1.77 1829 organic 2017 Boise 827
## 3 12/3/2017 1.88 21338 organic 2017 Boston 2998
## 4 12/3/2017 1.18 7575 organic 2017 Buffalo/Rochester 2552
## 5 12/3/2017 1.9 9558 organic 2017 Charlotte 2428
## 6 12/3/2017 1.75 32233 organic 2017 Chicago 2030
ggplot(data=data, aes(x=average_price ))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(readr)
data <- read_csv('avocado.csv')
## Rows: 12628 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot (total_volume ~ average_price, data=data)
summary (data)
## date average_price total_volume type
## Length:12628 Min. :0.500 Min. : 253 Length:12628
## Class :character 1st Qu.:1.100 1st Qu.: 15733 Class :character
## Mode :character Median :1.320 Median : 94806 Mode :character
## Mean :1.359 Mean : 325259
## 3rd Qu.:1.570 3rd Qu.: 430222
## Max. :2.780 Max. :5660216
## year geography Mileage
## Min. :2017 Length:12628 Min. : 111
## 1st Qu.:2018 Class :character 1st Qu.:1097
## Median :2019 Mode :character Median :2193
## Mean :2019 Mean :1911
## 3rd Qu.:2020 3rd Qu.:2632
## Max. :2020 Max. :2998
ggplot(data, aes(x = average_price, fill = type)) +
geom_histogram(bins = 30, col = "red") +
scale_fill_manual(values = c("green", "pink")) +
ggtitle("Test 1")
ggplot() +
geom_col(data, mapping = aes(x = reorder(geography,total_volume),
y = total_volume, fill = year ))
This is the end of part 1 for my analysis.