Part B

a

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Parsed with column specification:
## cols(
##   Transaction = col_integer(),
##   `Purchase Date` = col_character(),
##   `Customer ID` = col_integer(),
##   Gender = col_character(),
##   `Marital Status` = col_character(),
##   Homeowner = col_character(),
##   Children = col_integer(),
##   `Annual Income` = col_character(),
##   City = col_character(),
##   `State or Province` = col_character(),
##   Country = col_character(),
##   `Product Family` = col_character(),
##   `Product Department` = col_character(),
##   `Product Category` = col_character(),
##   `Units Sold` = col_integer(),
##   Revenue = col_double()
## )

b, c, d

# b
mydf_b = subset(mydf, `Marital Status` == "M") # subset 
barplot(table(mydf_b$`Units Sold`))

hist(mydf_b$Revenue)

# c 
mydf_c = subset(mydf, `State or Province` == "CA")
barplot(table(mydf_c$`Units Sold`), main = "Units sold distribution", ylab = "Units sold", xlab = "Transaction #",border = "yellow")

hist(mydf_c$Revenue)

# d 
mydf_d = subset(mydf, `Product Department` == "Produce")
barplot(table(mydf_d$`Units Sold`), main = "Units sold distribution", ylab = "Units sold", xlab = "Transaction #",border = "yellow")

hist(mydf_d$Revenue)