1 Introduction

The goal is to read the data as provided from the following suggestion in discussion board (Forum: Discussion 5: Untidy Data) by Samriti Malhotra https://bbhosted.cuny.edu/webapps/discussionboard/do/message?action=list_messages&course_id=_1705328_1&nav=discussion_board&conf_id=_1845527_1&forum_id=_1908779_1&message_id=_31309467_1

2 Challenges

3 Pre-Requistes : Available Libraries

  • readxl
  • DT
  • data.table
  • tidyr
  • dplyr

4 Read File into R from local working directory

5 Read XLSX into R

excel_sheets(path = mySourceFileConventional)
## [1] "Worksheet"
excel_sheets(path = mySourceFileOrganic)
## [1] "Worksheet"

5.1 Data Table Conventional

5.2 Data Table Organic

6 Data Scrubbing

6.1 Grab the data description

df_Conventional_description <- basename(mySourceFileConventional)
df_Conventional_description
## [1] "HAB_Retail_Volume_and_Price_2018_conventional_TotalUS.xls"
df_Organic_description <- basename(mySourceFileOrganic)
df_Organic_description
## [1] "HAB_Retail_Volume_and_Price_2018_organic_TotalUS.xls"

6.2 Update the header

colnames(df_Conventional) <- c("Date","Conventional Average Sales Price","Conventional Total Volume Sold","Conventional Volume Sold for PLU4046","Conventional Volume Sold for PLU4225","Conventional Volume Sold for PLU4770","Conventional Total Volume Bagged","Conventional Volume SM Bagged","Conventional Volume L Bagged","Conventional Volume XL Bagged")
colnames(df_Organic) <- str_replace_all(colnames(df_Conventional), "Conventional", "Organic")

6.3 Modify data type of Date column to be of type Date

typeof(df_Conventional$Date)
## [1] "character"
typeof(df_Organic$Date)
## [1] "character"
df_Conventional <- transform(df_Conventional, Date = as.Date(Date,"%Y-%m-%d"))
df_Organic <- transform(df_Organic, Date = as.Date(Date,"%Y-%m-%d"))
typeof(df_Conventional$Date)
## [1] "double"
typeof(df_Organic$Date)
## [1] "double"

6.4 Add new column - Type

df_Conventional$Type="Conventional"
df_Organic <- cbind(df_Organic, Type="Organic")

6.5 Merge the data frames By Date

df_Avocados_HorizontalMerge <-  full_join(df_Conventional, df_Organic, by = "Date")

#df_Avocados_VerticalMerge <- union_all(df_Conventional, df_Organic)
df_Conventional2 <- df_Conventional
df_Organic2 <- df_Organic
colnames(df_Conventional2) <- str_replace_all(colnames(df_Conventional2), "Conventional.", "")
colnames(df_Organic2) <- str_replace_all(colnames(df_Organic2), "Organic.", "")
df_Avocados_VerticalMerge <- union_all(df_Conventional2, df_Organic2)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

7 Show tidy data

7.1 Data Table Conventional

DT::datatable(df_Conventional, options = list(pagelength=5))

7.2 Data Table Organic

DT::datatable(df_Organic, options = list(pagelength=5))

7.3 Data Table Horizontal Merge Conventional+Organic

DT::datatable(df_Avocados_HorizontalMerge, options = list(pagelength=5))

7.4 Data Table Vertical Merge Conventional+Organic

DT::datatable(df_Avocados_VerticalMerge, options = list(pagelength=5))

8 Visualizations

df_Avocados_HorizontalMerge %>% ggplot(aes(x=Date)) + 
  geom_point(aes(y=Conventional.Average.Sales.Price)) + 
    geom_line(aes(y=Conventional.Average.Sales.Price, color="Conventional Average Sales Price")) + 
  geom_point(aes(y=Organic.Average.Sales.Price)) + 
    geom_line(aes(y=Organic.Average.Sales.Price, color="Organic Average Sales Price")) + 
  labs(title="Avocado ASP - Average Sales Price", x="Date", y="Dollars", colour="") + 
  scale_colour_manual(values=c("red", "green"))

df_Avocados_VerticalMerge$Month <- month(df_Avocados_VerticalMerge$Date)
ggplot(df_Avocados_VerticalMerge, aes(x = Month, y = Total.Volume.Sold/1000, fill = Type)) + 
  geom_bar(position = "stack", stat = "identity") + facet_grid(~ Month) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_fill_discrete(labels = c("Conventional", "Organic")) +
  ylab("Volume")

ggplot(df_Avocados_VerticalMerge, aes(x=Date, y=Total.Volume.Sold/1000)) + 
  geom_bar(aes(fill = Type), position = "dodge", stat = "identity") + 
  xlab("Date") +
  ylab("Total Volume Sold")

9 Conclusion

  • The average price of Conventional is consistently lower than Organic by considerable amount
  • The total sales of Conventional is consistently higher than Organic by considerable amount