R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring

HTML PDF *MS Word documents

For more details on using R Markdown click here link.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

##Setting up my environment Notes: Install dplyr and ggplot2

install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
require(ggplot2)
## Loading required package: ggplot2

##Let’s Load the Dataset and check the summary

store <- read.csv("Department Store.csv")
summary(store)
##   UNIQUE_ID         PRODUCT_NAME         COMPANY          PRODUCT_TYPE      
##  Length:550         Length:550         Length:550         Length:550        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  PRODUCT_CATEGORY     COST_PRICE     SELLING_PRICE    QUANTITY_DEMANDED
##  Length:550         Min.   :  9.00   Min.   :  10.0   Min.   :   2.0   
##  Class :character   1st Qu.: 72.97   1st Qu.:  98.5   1st Qu.: 120.0   
##  Mode  :character   Median :180.06   Median : 229.0   Median : 316.0   
##                     Mean   :216.53   Mean   : 274.9   Mean   : 327.2   
##                     3rd Qu.:320.54   3rd Qu.: 422.0   3rd Qu.: 500.0   
##                     Max.   :912.00   Max.   :1200.0   Max.   :1100.0   
##      PROFIT       PROFIT_PERCENT    NET_PROFIT      
##  Min.   :  0.66   Min.   : 5.00   Min.   :   223.4  
##  1st Qu.: 18.71   1st Qu.:15.00   1st Qu.:  4299.4  
##  Median : 41.16   Median :20.00   Median :  9350.0  
##  Mean   : 58.41   Mean   :29.82   Mean   : 12092.5  
##  3rd Qu.: 80.81   3rd Qu.:31.58   3rd Qu.: 13661.9  
##  Max.   :288.00   Max.   :90.00   Max.   :136800.0

##PART 1 : Data Transformation

i.GET THE INFORMATION OF THE COLUMNS 4-10 ii.WHERE PRODUCT_CATEGORY IS ‘household’ iii.ARRANGED IN ASCENDING ORDER OF QUANTITY_DEMANDED.

store1 <- select(store, 4:10)
store2 <- filter(store1, PRODUCT_TYPE=='household')
store3<-arrange(store2, QUANTITY_DEMANDED)
summary(store3)
##  PRODUCT_TYPE       PRODUCT_CATEGORY     COST_PRICE     SELLING_PRICE  
##  Length:21          Length:21          Min.   : 47.88   Min.   : 63.0  
##  Class :character   Class :character   1st Qu.:171.00   1st Qu.:225.0  
##  Mode  :character   Mode  :character   Median :218.88   Median :288.0  
##                                        Mean   :257.78   Mean   :339.2  
##                                        3rd Qu.:277.40   3rd Qu.:365.0  
##                                        Max.   :649.80   Max.   :855.0  
##  QUANTITY_DEMANDED     PROFIT       PROFIT_PERCENT 
##  Min.   : 70.0     Min.   : 15.12   Min.   :31.58  
##  1st Qu.:223.0     1st Qu.: 54.00   1st Qu.:31.58  
##  Median :311.0     Median : 69.12   Median :31.58  
##  Mean   :276.5     Mean   : 81.41   Mean   :31.58  
##  3rd Qu.:350.0     3rd Qu.: 87.60   3rd Qu.:31.58  
##  Max.   :425.0     Max.   :205.20   Max.   :31.58

##PART 2 : Statistical Interpretation

  1. FIND THE AVERAGE AND SUMMATION OF QUANTITY_DEMANDED
  2. GROUPED BY PRODUCT CATEGORY
store4 <- group_by(store3, PRODUCT_CATEGORY)
summarise(store3, AVERAGE=mean(QUANTITY_DEMANDED),SUM=sum(QUANTITY_DEMANDED))
##    AVERAGE  SUM
## 1 276.5238 5807

##PART 3 : DATA VISUALIZATION

  1. BUILD A LINE PLOT FOR AVERAGE_PROFIT & COMPANY
store %>% group_by(COMPANY) %>% 
  summarise(AVERAGE_PROFIT=mean(PROFIT)) %>%
  ggplot(aes(x=COMPANY, y=AVERAGE_PROFIT, group=1))+geom_line(color="PURPLE")

  1. BUILD A COXCOMB CHART FOR EACH OF THE TYPE’S QUANTITY DEMANDED WHERE THE PRODUCT_TYPE IS “Organic food”.
store1 <-  filter(store, PRODUCT_TYPE=="Organic food")%>%
  group_by(PRODUCT_CATEGORY)%>%
  summarise(QUANTITY_DEMANDED=sum(QUANTITY_DEMANDED))
store2 <- store1 %>% 
  arrange(desc(PRODUCT_CATEGORY)) %>%mutate(percentage=round(QUANTITY_DEMANDED*100/sum(QUANTITY_DEMANDED))) %>% mutate(y_pos = cumsum(percentage)-0.5*percentage)
store2 %>% ggplot(aes(PRODUCT_CATEGORY,QUANTITY_DEMANDED, fill=PRODUCT_CATEGORY)) + 
  geom_bar(stat="identity") + 
  coord_polar("x", start=0,direction = -1)+
  xlab("DIFFERENT PRODUCTS") +
  ylab("QUANTITY")