Creating work directory, Calling libraries, Reading the file and importing it into a data frame

#Creating work directory
setwd("C:/Users/keiva/Dropbox (Personal)/GW/06- Fall 2017/01- Programming in business analytics/03- Week 03 ( 14 Sep 2017)/Assignment")

#Calling libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.1
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.4.1
## Warning: package 'tibble' was built under R version 3.4.1
## Warning: package 'tidyr' was built under R version 3.4.1
## Warning: package 'readr' was built under R version 3.4.1
## Warning: package 'purrr' was built under R version 3.4.1
## Warning: package 'dplyr' was built under R version 3.4.1
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(dplyr)


#Reading the file and importing it into a data frame
mydf <- read_csv("SupermarketTransactions.csv")
## Parsed with column specification:
## cols(
##   Transaction = col_integer(),
##   `Purchase Date` = col_character(),
##   `Customer ID` = col_integer(),
##   Gender = col_character(),
##   `Marital Status` = col_character(),
##   Homeowner = col_character(),
##   Children = col_integer(),
##   `Annual Income` = col_character(),
##   City = col_character(),
##   `State or Province` = col_character(),
##   Country = col_character(),
##   `Product Family` = col_character(),
##   `Product Department` = col_character(),
##   `Product Category` = col_character(),
##   `Units Sold` = col_integer(),
##   Revenue = col_double()
## )

B.a — All purchases made during January and February of 2008

mydf$`Purchase Date` <- as.Date(mydf$`Purchase Date`, "%m/%d/%Y")
sort1 <- with(mydf, mydf[(`Purchase Date` >= "2008-01-01" & `Purchase Date` <= "2008-02-28"),])
par(pin=c(6, 2), mar=c(2,2,2,2), oma=c(7,4,0,2))
barplot(table(sort1$`Units Sold`),main = "Bar plot for Units sold between Jan & Feb 2008", xlab="Units Sold",ylab="Frequency",
       border="red", col="yellow", las=1)

hist(sort1$Revenue,col="brown", main="Histogram for Revenue between Jan & Feb 2008",
     xlab="Revenue", las=1, breaks=10, border="red", prob = TRUE)

B.b — All purchase made by married female homeowners.

sort2 <- with(mydf, mydf[(Gender=="F" & `Marital Status`=="M"),])
barplot(table(sort2$`Units Sold`),main = "Bar plot for Units sold to married women", xlab="Units sold",ylab="Frequency",
        border="red", col="yellow", las=1)

hist(sort2$Revenue,col="brown", main="Histogram for Revenue from married women",
     xlab="Revenue", las=1, breaks=10, border="red", prob = TRUE)

B.c — All purchases made in the state of California

sort3 <- with(mydf, mydf[(`State or Province`=="CA"),])
barplot(table(sort3$`Units Sold`),main = "Bar plot for Units sold in California", xlab="Units sold",ylab="Frequency",
        border="red", col="yellow", las=1)

hist(sort3$Revenue,col="brown", main="Histogram for Revenue in California",
     xlab="Revenue", las=1, breaks=10, border="red", prob = TRUE)

B.d — All purchases made in the Produce product department

sort4 <- with(mydf, mydf[(`Product Department`=="Produce"),])
barplot(table(sort4$`Units Sold`),main = "Bar plot for Units sold in Produce product department", xlab="Units sold",ylab="Frequency",
        border="red", col="yellow", las=1)

hist(sort4$Revenue,col="brown", main="Histogram for Revenue in Produce product department",
     xlab="Revenue", las=1, breaks=10, border="red", prob = TRUE)

Write a report that is less than 250 words that summarizes your analysis:

It is shown that none of these filters apply any dramatic change in the distribution of revenue and units sold. It is apparent that these two variables are not correlated with the other variables applied. for example, revenue is not correlated with the gender variable.