This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
urlfile<-'https://raw.github.com/utjimmyx/resources/master/avocado_HAA.csv'
data<-read.csv(urlfile, fileEncoding="UTF-8-BOM")
summary(data)
## date average_price total_volume type
## Length:12628 Min. :0.500 Min. : 253 Length:12628
## Class :character 1st Qu.:1.100 1st Qu.: 15733 Class :character
## Mode :character Median :1.320 Median : 94806 Mode :character
## Mean :1.359 Mean : 325259
## 3rd Qu.:1.570 3rd Qu.: 430222
## Max. :2.780 Max. :5660216
## year geography
## Min. :2017 Length:12628
## 1st Qu.:2018 Class :character
## Median :2019 Mode :character
## Mean :2019
## 3rd Qu.:2020
## Max. :2020
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
str(data)
## 'data.frame': 12628 obs. of 6 variables:
## $ date : chr "2017/12/3" "2017/12/3" "2017/12/3" "2017/12/3" ...
## $ average_price: num 1.39 1.44 1.07 1.62 1.43 1.58 1.14 1.77 1.4 1.88 ...
## $ total_volume : int 139970 3577 504933 10609 658939 38754 86646 1829 488588 21338 ...
## $ type : chr "conventional" "organic" "conventional" "organic" ...
## $ year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ geography : chr "Albany" "Albany" "Atlanta" "Atlanta" ...
# Let's build a simple histogram
hist(data$average_price ,
main = "Histogram of average_price",
xlab = "Price in USD (US Dollar)")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
ggplot(data, aes(x = average_price, fill = type)) +
geom_histogram(bins = 30, col = "red") +
scale_fill_manual(values = c("blue", "green")) +
ggtitle("Frequency of Average Price - Oragnic vs. Conventional")
#ggplot
p <- ggplot() +
geom_col(data, mapping = aes(x = reorder(geography,total_volume),
y = total_volume, fill = year )) +
xlab("geography")+
ylab("total_volume")+
theme(axis.text.x = element_text(angle = 90, size = 7))
# displays as you require
library(scales)
p + scale_y_continuous(labels = label_comma())
ggplot(data, aes(x = factor(geography), y = total_volume,
fill = geography))+geom_boxplot()
p1 <- ggplot(data, aes(x = factor(geography), y = total_volume
))+geom_boxplot()+
theme(axis.text.x = element_text(angle = 90, size = 7))
# displays as you require
library(scales)
p1 + scale_y_continuous(labels = label_comma())
# displays as you require
library(scales)
p + scale_y_continuous(labels = label_comma())