library(ggplot2)
library(readr)
avocado <- read_csv('avocado.csv')
## Rows: 12628 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, type, geography
## dbl (4): average_price, total_volume, year, Mileage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(avocado)
## date average_price total_volume type
## Length:12628 Min. :0.500 Min. : 253 Length:12628
## Class :character 1st Qu.:1.100 1st Qu.: 15733 Class :character
## Mode :character Median :1.320 Median : 94806 Mode :character
## Mean :1.359 Mean : 325259
## 3rd Qu.:1.570 3rd Qu.: 430222
## Max. :2.780 Max. :5660216
## year geography Mileage
## Min. :2017 Length:12628 Min. : 111
## 1st Qu.:2018 Class :character 1st Qu.:1097
## Median :2019 Mode :character Median :2193
## Mean :2019 Mean :1911
## 3rd Qu.:2020 3rd Qu.:2632
## Max. :2020 Max. :2998
ggplot(avocado, aes(x=type, y=average_price)) +
geom_point() +
ggtitle("Avocado Prices by Date")
#When comparing the price of Avocados by type (Conventional v. Organic), it is found that the price of avocados for organic tends to be much higher.
#Draw a regression plot using the variable AveragePrice and Total Volume.
ggplot(avocado, aes(x=total_volume, y=average_price)) +
geom_point() + stat_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggtitle("Avocado Prices by Volume")
## $title
## [1] "Avocado Prices by Volume"
##
## attr(,"class")
## [1] "labels"