R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

Note: this analysis was performed using the open source software R and Rstudio.

library(readr)
data <- read_csv('conventional.csv')
## Rows: 12628 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(total_volume ~ average_price, data = data)

Answers

library(ggplot2)
head(data)
## # A tibble: 6 × 7
##   date      average_price total_volume type          year geography      Mileage
##   <chr>             <dbl>        <dbl> <chr>        <dbl> <chr>            <dbl>
## 1 12/3/2017          1.39       139970 conventional  2017 Albany            2832
## 2 12/3/2017          1.07       504933 conventional  2017 Atlanta           2199
## 3 12/3/2017          1.43       658939 conventional  2017 Baltimore/Was…    2679
## 4 12/3/2017          1.14        86646 conventional  2017 Boise              827
## 5 12/3/2017          1.4        488588 conventional  2017 Boston            2998
## 6 12/3/2017          1.13       153282 conventional  2017 Buffalo/Roche…    2552
ggplot(data = data, aes(x = average_price)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

For organic avocados

library(readr)
data <- read_csv('organic.csv')
## Rows: 12628 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(total_volume ~ average_price, data = data)

Answers

library(ggplot2)
head(data)
## # A tibble: 6 × 7
##   date      average_price total_volume type     year geography           Mileage
##   <chr>             <dbl>        <dbl> <chr>   <dbl> <chr>                 <dbl>
## 1 12/3/2017          1.44         3577 organic  2017 Albany                 2832
## 2 12/3/2017          1.62        10609 organic  2017 Atlanta                2199
## 3 12/3/2017          1.58        38754 organic  2017 Baltimore/Washingt…    2679
## 4 12/3/2017          1.77         1829 organic  2017 Boise                   827
## 5 12/3/2017          1.88        21338 organic  2017 Boston                 2998
## 6 12/3/2017          1.18         7575 organic  2017 Buffalo/Rochester      2552
ggplot(data = data, aes(x = average_price)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.