This is an R Markdown document. Markdown is a simple formatting syntax for authoring
HTML PDF *MS Word documents
For more details on using R Markdown click here link.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
##Setting up my environment Notes: Install dplyr and ggplot2
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
require(ggplot2)
## Loading required package: ggplot2
##Let’s Load the Dataset and check the summary
store <- read.csv("Department Store.csv")
summary(store)
## UNIQUE_ID PRODUCT_NAME COMPANY PRODUCT_TYPE
## Length:550 Length:550 Length:550 Length:550
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## PRODUCT_CATEGORY COST_PRICE SELLING_PRICE QUANTITY_DEMANDED
## Length:550 Min. : 9.00 Min. : 10.0 Min. : 2.0
## Class :character 1st Qu.: 72.97 1st Qu.: 98.5 1st Qu.: 120.0
## Mode :character Median :180.06 Median : 229.0 Median : 316.0
## Mean :216.53 Mean : 274.9 Mean : 327.2
## 3rd Qu.:320.54 3rd Qu.: 422.0 3rd Qu.: 500.0
## Max. :912.00 Max. :1200.0 Max. :1100.0
## PROFIT PROFIT_PERCENT NET_PROFIT
## Min. : 0.66 Min. : 5.00 Min. : 223.4
## 1st Qu.: 18.71 1st Qu.:15.00 1st Qu.: 4299.4
## Median : 41.16 Median :20.00 Median : 9350.0
## Mean : 58.41 Mean :29.82 Mean : 12092.5
## 3rd Qu.: 80.81 3rd Qu.:31.58 3rd Qu.: 13661.9
## Max. :288.00 Max. :90.00 Max. :136800.0
##PART 1 : Data Transformation
i.GET THE INFORMATION OF THE COLUMNS 4-10 ii.WHERE PRODUCT_CATEGORY IS ‘household’ iii.ARRANGED IN ASCENDING ORDER OF QUANTITY_DEMANDED.
store1 <- select(store, 4:10)
store2 <- filter(store1, PRODUCT_TYPE=='household')
store3<-arrange(store2, QUANTITY_DEMANDED)
summary(store3)
## PRODUCT_TYPE PRODUCT_CATEGORY COST_PRICE SELLING_PRICE
## Length:21 Length:21 Min. : 47.88 Min. : 63.0
## Class :character Class :character 1st Qu.:171.00 1st Qu.:225.0
## Mode :character Mode :character Median :218.88 Median :288.0
## Mean :257.78 Mean :339.2
## 3rd Qu.:277.40 3rd Qu.:365.0
## Max. :649.80 Max. :855.0
## QUANTITY_DEMANDED PROFIT PROFIT_PERCENT
## Min. : 70.0 Min. : 15.12 Min. :31.58
## 1st Qu.:223.0 1st Qu.: 54.00 1st Qu.:31.58
## Median :311.0 Median : 69.12 Median :31.58
## Mean :276.5 Mean : 81.41 Mean :31.58
## 3rd Qu.:350.0 3rd Qu.: 87.60 3rd Qu.:31.58
## Max. :425.0 Max. :205.20 Max. :31.58
##PART 2 : Statistical Interpretation
store4 <- group_by(store3, PRODUCT_CATEGORY)
summarise(store3, AVERAGE=mean(QUANTITY_DEMANDED),SUM=sum(QUANTITY_DEMANDED))
## AVERAGE SUM
## 1 276.5238 5807
##PART 3 : DATA VISUALIZATION
store %>% group_by(COMPANY) %>%
summarise(AVERAGE_PROFIT=mean(PROFIT)) %>%
ggplot(aes(x=COMPANY, y=AVERAGE_PROFIT, group=1))+geom_line(color="PURPLE")
store1 <- filter(store, PRODUCT_TYPE=="Organic food")%>%
group_by(PRODUCT_CATEGORY)%>%
summarise(QUANTITY_DEMANDED=sum(QUANTITY_DEMANDED))
store2 <- store1 %>%
arrange(desc(PRODUCT_CATEGORY)) %>%mutate(percentage=round(QUANTITY_DEMANDED*100/sum(QUANTITY_DEMANDED))) %>% mutate(y_pos = cumsum(percentage)-0.5*percentage)
store2 %>% ggplot(aes(PRODUCT_CATEGORY,QUANTITY_DEMANDED, fill=PRODUCT_CATEGORY)) +
geom_bar(stat="identity") +
coord_polar("x", start=0,direction = -1)+
xlab("DIFFERENT PRODUCTS") +
ylab("QUANTITY")