R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

Note: this analysis was performed using the open source software R and Rstudio.

For Conventional Avacados

library(readr)
data <- read_csv('conventional.csv')
## Rows: 6314 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot (total_volume ~ average_price, data=data)

summary (data)
##      date           average_price    total_volume         type          
##  Length:6314        Min.   :0.500   Min.   :  43610   Length:6314       
##  Class :character   1st Qu.:0.980   1st Qu.: 233778   Class :character  
##  Mode  :character   Median :1.130   Median : 429995   Mode  :character  
##                     Mean   :1.143   Mean   : 625297                     
##                     3rd Qu.:1.300   3rd Qu.: 787970                     
##                     Max.   :2.020   Max.   :5660216                     
##       year       geography            Mileage    
##  Min.   :2017   Length:6314        Min.   : 111  
##  1st Qu.:2018   Class :character   1st Qu.:1097  
##  Median :2019   Mode  :character   Median :2193  
##  Mean   :2019                      Mean   :1911  
##  3rd Qu.:2020                      3rd Qu.:2632  
##  Max.   :2020                      Max.   :2998
library(ggplot2)
head(data)
## # A tibble: 6 × 7
##   date      average_price total_volume type          year geography      Mileage
##   <chr>             <dbl>        <dbl> <chr>        <dbl> <chr>            <dbl>
## 1 12/3/2017          1.39       139970 conventional  2017 Albany            2832
## 2 12/3/2017          1.07       504933 conventional  2017 Atlanta           2199
## 3 12/3/2017          1.43       658939 conventional  2017 Baltimore/Was…    2679
## 4 12/3/2017          1.14        86646 conventional  2017 Boise              827
## 5 12/3/2017          1.4        488588 conventional  2017 Boston            2998
## 6 12/3/2017          1.13       153282 conventional  2017 Buffalo/Roche…    2552
ggplot(data=data, aes(x=average_price ))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(readr)
data <- read_csv('organic2.csv')
## Rows: 6312 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot (total_volume ~ average_price, data=data)

summary (data)
##      date           average_price    total_volume        type          
##  Length:6312        Min.   :0.690   Min.   :   253   Length:6312       
##  Class :character   1st Qu.:1.350   1st Qu.:  8698   Class :character  
##  Mode  :character   Median :1.550   Median : 15740   Mode  :character  
##                     Mean   :1.575   Mean   : 25227                     
##                     3rd Qu.:1.770   3rd Qu.: 30554                     
##                     Max.   :2.780   Max.   :495084                     
##       year       geography            Mileage    
##  Min.   :2017   Length:6312        Min.   : 111  
##  1st Qu.:2018   Class :character   1st Qu.:1097  
##  Median :2019   Mode  :character   Median :2193  
##  Mean   :2019                      Mean   :1910  
##  3rd Qu.:2020                      3rd Qu.:2632  
##  Max.   :2020                      Max.   :2998
library(ggplot2)
head(data)
## # A tibble: 6 × 7
##   date      average_price total_volume type     year geography           Mileage
##   <chr>             <dbl>        <dbl> <chr>   <dbl> <chr>                 <dbl>
## 1 12/3/2017          1.58        38754 organic  2017 Baltimore/Washingt…    2679
## 2 12/3/2017          1.77         1829 organic  2017 Boise                   827
## 3 12/3/2017          1.88        21338 organic  2017 Boston                 2998
## 4 12/3/2017          1.18         7575 organic  2017 Buffalo/Rochester      2552
## 5 12/3/2017          1.9          9558 organic  2017 Charlotte              2428
## 6 12/3/2017          1.75        32233 organic  2017 Chicago                2030
ggplot(data=data, aes(x=average_price ))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(readr)
data <- read_csv('avocado.csv')
## Rows: 12628 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot (total_volume ~ average_price, data=data)

summary (data)
##      date           average_price    total_volume         type          
##  Length:12628       Min.   :0.500   Min.   :    253   Length:12628      
##  Class :character   1st Qu.:1.100   1st Qu.:  15733   Class :character  
##  Mode  :character   Median :1.320   Median :  94806   Mode  :character  
##                     Mean   :1.359   Mean   : 325259                     
##                     3rd Qu.:1.570   3rd Qu.: 430222                     
##                     Max.   :2.780   Max.   :5660216                     
##       year       geography            Mileage    
##  Min.   :2017   Length:12628       Min.   : 111  
##  1st Qu.:2018   Class :character   1st Qu.:1097  
##  Median :2019   Mode  :character   Median :2193  
##  Mean   :2019                      Mean   :1911  
##  3rd Qu.:2020                      3rd Qu.:2632  
##  Max.   :2020                      Max.   :2998
ggplot(data, aes(x = average_price, fill = type)) + 
  geom_histogram(bins = 30, col = "red") + 
  scale_fill_manual(values = c("green", "pink")) +
  ggtitle("Test 1")

ggplot() + 
  geom_col(data, mapping = aes(x = reorder(geography,total_volume), 
                               y = total_volume, fill = year ))

This is the end of part 1 for my analysis.