Now for the package
#Loading just the "tidyverse" library !
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# For this exercise we are using a dataset from kaggle.com which contain
#information about avacado sales in various cities in the USA
# ---------- Using the "rear" function "read_csv" -------
avacado <- data.frame(read_csv(file = "https://raw.githubusercontent.com/tagensingh/SPS-DATA607-TIDYVERSE/main/avocado.csv"))
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## Date = col_date(format = ""),
## AveragePrice = col_double(),
## `Total Volume` = col_double(),
## `4046` = col_double(),
## `4225` = col_double(),
## `4770` = col_double(),
## `Total Bags` = col_double(),
## `Small Bags` = col_double(),
## `Large Bags` = col_double(),
## `XLarge Bags` = col_double(),
## type = col_character(),
## year = col_double(),
## region = col_character()
## )
# ---------- Using the "tibble" function to look at a snapshot of the dataframe -------
tibble(avacado)
## # A tibble: 18,249 x 14
## X1 Date AveragePrice Total.Volume X4046 X4225 X4770 Total.Bags
## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 2015-12-27 1.33 64237. 1037. 54455. 48.2 8697.
## 2 1 2015-12-20 1.35 54877. 674. 44639. 58.3 9506.
## 3 2 2015-12-13 0.93 118220. 795. 109150. 130. 8145.
## 4 3 2015-12-06 1.08 78992. 1132 71976. 72.6 5811.
## 5 4 2015-11-29 1.28 51040. 941. 43838. 75.8 6184.
## 6 5 2015-11-22 1.26 55980. 1184. 48068. 43.6 6684.
## 7 6 2015-11-15 0.99 83454. 1369. 73673. 93.3 8319.
## 8 7 2015-11-08 0.98 109428. 704. 101815. 80 6829.
## 9 8 2015-11-01 1.02 99811. 1022. 87316. 85.3 11388.
## 10 9 2015-10-25 1.07 74339. 842. 64757. 113 8626.
## # ... with 18,239 more rows, and 6 more variables: Small.Bags <dbl>,
## # Large.Bags <dbl>, XLarge.Bags <dbl>, type <chr>, year <dbl>, region <chr>
# ---------- Using the "arrange" function from "dplyr"to sort the dataframe by date-------
avacado_date <- arrange(avacado,Date)
tibble(avacado_date)
## # A tibble: 18,249 x 14
## X1 Date AveragePrice Total.Volume X4046 X4225 X4770 Total.Bags
## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 51 2015-01-04 1.22 40873. 2.82e3 2.83e4 4.99e1 9716.
## 2 51 2015-01-04 1 435021. 3.64e5 2.38e4 8.22e1 46816.
## 3 51 2015-01-04 1.08 788025. 5.40e4 5.53e5 4.00e4 141137.
## 4 51 2015-01-04 1.01 80034. 4.46e4 2.50e4 2.75e3 7756.
## 5 51 2015-01-04 1.02 491738 7.19e3 3.97e5 1.29e2 87663.
## 6 51 2015-01-04 1.4 116253. 3.27e3 5.57e4 1.10e2 57183.
## 7 51 2015-01-04 0.93 5777335. 2.84e6 2.27e6 1.37e5 528452.
## 8 51 2015-01-04 1.19 166006. 2.94e4 4.72e4 3.86e4 50798.
## 9 51 2015-01-04 1.11 783068. 3.03e4 5.51e5 1.25e5 77539.
## 10 51 2015-01-04 0.88 228570. 3.27e3 1.69e5 1.45e3 55083.
## # ... with 18,239 more rows, and 6 more variables: Small.Bags <dbl>,
## # Large.Bags <dbl>, XLarge.Bags <dbl>, type <chr>, year <dbl>, region <chr>
# ---------- Using the "ggplot" function from "ggplot2"to chart the pricing density of avacados-------
# Histogram overlaid with kernel density curve
ggplot(avacado, aes(x=AveragePrice)) +
geom_histogram(aes(y=..density..), # Histogram with density instead of count on y-axis
binwidth=.1,
colour="black", fill="white") +
geom_density(alpha=.1, fill="#FF6666")+# Overlay with transparent density plot
ggtitle("Avacados Pricing Density")
