Part B
a
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Parsed with column specification:
## cols(
## Transaction = col_integer(),
## `Purchase Date` = col_character(),
## `Customer ID` = col_integer(),
## Gender = col_character(),
## `Marital Status` = col_character(),
## Homeowner = col_character(),
## Children = col_integer(),
## `Annual Income` = col_character(),
## City = col_character(),
## `State or Province` = col_character(),
## Country = col_character(),
## `Product Family` = col_character(),
## `Product Department` = col_character(),
## `Product Category` = col_character(),
## `Units Sold` = col_integer(),
## Revenue = col_double()
## )
b, c, d
# b
mydf_b = subset(mydf, `Marital Status` == "M") # subset
barplot(table(mydf_b$`Units Sold`))
hist(mydf_b$Revenue)
# c
mydf_c = subset(mydf, `State or Province` == "CA")
barplot(table(mydf_c$`Units Sold`), main = "Units sold distribution", ylab = "Units sold", xlab = "Transaction #",border = "yellow")
hist(mydf_c$Revenue)
# d
mydf_d = subset(mydf, `Product Department` == "Produce")
barplot(table(mydf_d$`Units Sold`), main = "Units sold distribution", ylab = "Units sold", xlab = "Transaction #",border = "yellow")
hist(mydf_d$Revenue)