##(王府井)购物中心化妆品数据分析
## 该数据集为经过与处理的数据集
## 2016.07.04
## Daitu
## 更改工作路径
setwd("/Users/daitu/数据分析/化妆品/2015年")
getwd()
## [1] "/Users/daitu/数据分析/化妆品/2015年"
## 加载包####
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(treemap)
## 读取(王府井)的数据####
filename <- "王府井商场2015年化妆品销售月报(改)副本.csv"
tongluowan_2015 <- read.csv(filename,fileEncoding = "GBK",stringsAsFactors =FALSE)
dim(tongluowan_2015)
## [1] 30350 12
head(tongluowan_2015)
## 年 月 类别 商品编码 品牌
## 1 2015 1 301101 化妆品香水类 266501 AURA BY SWAROVSKI
## 2 2015 1 301101 化妆品香水类 266503 AURA BY SWAROVSKI
## 3 2015 1 301101 化妆品香水类 266505 AURA BY SWAROVSKI
## 4 2015 1 301101 化妆品香水类 264356 BENEFIT
## 5 2015 1 301101 化妆品香水类 41532 BURBERRY
## 6 2015 1 301101 化妆品香水类 41542 BURBERRY
## 商品名称 单位 零售金额 规格 数量 实际销售额 折扣金额
## 1 AURA BY SWAROVSKI魅力女香 瓶 495 30ml 1 495 0
## 2 AURA BY SWAROVSKI淡香氛 瓶 430 30ml 1 430 0
## 3 AURA BY SWAROVSKI淡香氛 瓶 750 75ml 1 750 0
## 4 BENEFIT香氛露 瓶 380 30ml 1 380 0
## 5 BURBERRY新伦敦男士 甁 630 50ml 1 630 0
## 6 BURBERRY粉红恋歌 甁 650 50ml 1 650 0
str(tongluowan_2015)
## 'data.frame': 30350 obs. of 12 variables:
## $ 年 : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ 月 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ 类别 : chr "301101 化妆品香水类" "301101 化妆品香水类" "301101 化妆品香水类" "301101 化妆品香水类" ...
## $ 商品编码 : int 266501 266503 266505 264356 41532 41542 41547 94462 192402 248187 ...
## $ 品牌 : chr "AURA BY SWAROVSKI" "AURA BY SWAROVSKI" "AURA BY SWAROVSKI" "BENEFIT" ...
## $ 商品名称 : chr "AURA BY SWAROVSKI魅力女香" "AURA BY SWAROVSKI淡香氛" "AURA BY SWAROVSKI淡香氛" "BENEFIT香氛露" ...
## $ 单位 : chr "瓶" "瓶" "瓶" "瓶" ...
## $ 零售金额 : num 495 430 750 380 630 650 870 665 515 1010 ...
## $ 规格 : chr "30ml" "30ml" "75ml" "30ml" ...
## $ 数量 : num 1 1 1 1 1 1 1 1 2 1 ...
## $ 实际销售额: num 495 430 750 380 630 650 870 665 1030 1010 ...
## $ 折扣金额 : num 0 0 0 0 0 0 0 0 0 0 ...
## 数据分析前的准备工作####
## 查看有多少种产品------------------------------------
# 有些产品名称带有特殊的字符--(@,#,*,$)需要去除
# a <- str_extract(proname$Var1,"@")
# str_replace(proname$Var1,"@","")
# bb <- proname$Var1[1]
# bb
# str_replace( proname$Var1,"@","")[1:50]
proname <- tongluowan_2015$商品名称
proname <- str_replace(proname,"@","")
proname <- str_replace(proname,"#","")
proname <- str_replace(proname,"*","")
proname <- str_replace(proname,"\\*","")
proname <- str_replace(proname,"\\$","")
tongluowan_2015$商品名称 <- proname
proname <- data.frame(table(tongluowan_2015$商品名称))
# a <- data.frame(table(proname))
# a
dim(proname) #约有5321种产品
## [1] 5321 2
## 添加一个时间 ------------------------------------------
tongluowan_2015$时间 <- as.Date(str_c(tongluowan_2015$年,tongluowan_2015$月,"1",sep = "/"),
format = "%Y/%m/%d")
head(tongluowan_2015)
## 年 月 类别 商品编码 品牌
## 1 2015 1 301101 化妆品香水类 266501 AURA BY SWAROVSKI
## 2 2015 1 301101 化妆品香水类 266503 AURA BY SWAROVSKI
## 3 2015 1 301101 化妆品香水类 266505 AURA BY SWAROVSKI
## 4 2015 1 301101 化妆品香水类 264356 BENEFIT
## 5 2015 1 301101 化妆品香水类 41532 BURBERRY
## 6 2015 1 301101 化妆品香水类 41542 BURBERRY
## 商品名称 单位 零售金额 规格 数量 实际销售额 折扣金额
## 1 AURA BY SWAROVSKI魅力女香 瓶 495 30ml 1 495 0
## 2 AURA BY SWAROVSKI淡香氛 瓶 430 30ml 1 430 0
## 3 AURA BY SWAROVSKI淡香氛 瓶 750 75ml 1 750 0
## 4 BENEFIT香氛露 瓶 380 30ml 1 380 0
## 5 BURBERRY新伦敦男士 甁 630 50ml 1 630 0
## 6 BURBERRY粉红恋歌 甁 650 50ml 1 650 0
## 时间
## 1 2015-01-01
## 2 2015-01-01
## 3 2015-01-01
## 4 2015-01-01
## 5 2015-01-01
## 6 2015-01-01
## 添加一列字符串--时间
tongluowan_2015$Time <- str_c(tongluowan_2015$年,tongluowan_2015$月,sep = "/")
head(tongluowan_2015)
## 年 月 类别 商品编码 品牌
## 1 2015 1 301101 化妆品香水类 266501 AURA BY SWAROVSKI
## 2 2015 1 301101 化妆品香水类 266503 AURA BY SWAROVSKI
## 3 2015 1 301101 化妆品香水类 266505 AURA BY SWAROVSKI
## 4 2015 1 301101 化妆品香水类 264356 BENEFIT
## 5 2015 1 301101 化妆品香水类 41532 BURBERRY
## 6 2015 1 301101 化妆品香水类 41542 BURBERRY
## 商品名称 单位 零售金额 规格 数量 实际销售额 折扣金额
## 1 AURA BY SWAROVSKI魅力女香 瓶 495 30ml 1 495 0
## 2 AURA BY SWAROVSKI淡香氛 瓶 430 30ml 1 430 0
## 3 AURA BY SWAROVSKI淡香氛 瓶 750 75ml 1 750 0
## 4 BENEFIT香氛露 瓶 380 30ml 1 380 0
## 5 BURBERRY新伦敦男士 甁 630 50ml 1 630 0
## 6 BURBERRY粉红恋歌 甁 650 50ml 1 650 0
## 时间 Time
## 1 2015-01-01 2015/1
## 2 2015-01-01 2015/1
## 3 2015-01-01 2015/1
## 4 2015-01-01 2015/1
## 5 2015-01-01 2015/1
## 6 2015-01-01 2015/1
## 数据的查看
summary(tongluowan_2015)
## 年 月 类别 商品编码
## Min. :2015 Min. : 1.000 Length:30350 Min. : 2187
## 1st Qu.:2015 1st Qu.: 3.000 Class :character 1st Qu.:238172
## Median :2015 Median : 6.000 Mode :character Median :266728
## Mean :2015 Mean : 6.246 Mean :249795
## 3rd Qu.:2015 3rd Qu.: 9.000 3rd Qu.:282843
## Max. :2015 Max. :12.000 Max. :319175
## 品牌 商品名称 单位
## Length:30350 Length:30350 Length:30350
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## 零售金额 规格 数量 实际销售额
## Min. : 0.4 Length:30350 Min. : -10.00 Min. : -5900
## 1st Qu.: 170.0 Class :character 1st Qu.: 1.00 1st Qu.: 340
## Median : 320.0 Mode :character Median : 3.00 Median : 980
## Mean : 541.2 Mean : 16.62 Mean : 3964
## 3rd Qu.: 620.0 3rd Qu.: 8.00 3rd Qu.: 3180
## Max. :23800.0 Max. :27309.00 Max. :347760
## 折扣金额 时间 Time
## Min. : -40.000 Min. :2015-01-01 Length:30350
## 1st Qu.: 0.000 1st Qu.:2015-03-01 Class :character
## Median : 0.000 Median :2015-06-01 Mode :character
## Mean : 1.601 Mean :2015-06-08
## 3rd Qu.: 0.000 3rd Qu.:2015-09-01
## Max. :1100.000 Max. :2015-12-01
## 针对销量 (为何会有销量为负) ------------------------
## 个人理解,可能是过期了都没卖出去,算为卖家的损失,即该产品并不售欢迎
head(tongluowan_2015[which(tongluowan_2015$数量 < 0),])
## 年 月 类别 商品编码 品牌
## 1895 2015 1 301103 化妆品护肤类 276943 茱莉蔻
## 2029 2015 1 301105 化妆品美容类 266864 BOBBI BROWN
## 2056 2015 1 301105 化妆品美容类 266736 BOBBI BROWN
## 2143 2015 1 301105 化妆品美容类 282472 CHANEL
## 2734 2015 1 301107 化妆品礼品套装 205685 碧欧泉
## 3223 2015 2 301103 化妆品护肤类 282305 CHANEL
## 商品名称 单位 零售金额 规格 数量 实际销售额
## 1895 茱莉蔻玫瑰衡肤花卉水沁乳限量 瓶 600 200ml -1 -600
## 2029 BOBBI BROWN润色护唇膏 支 180 3.7g -1 -180
## 2056 BOBBI BROWN弹力修护粉底霜 瓶 780 30mL -1 -780
## 2143 CHANEL四色眼影37 支 580 2g -1 -580
## 2734 碧欧泉干性礼盒 支 750 <NA> -1 -750
## 3223 CHANEL山茶花保湿滋养唇霜 瓶 355 10g -1 -355
## 折扣金额 时间 Time
## 1895 0 2015-01-01 2015/1
## 2029 0 2015-01-01 2015/1
## 2056 0 2015-01-01 2015/1
## 2143 0 2015-01-01 2015/1
## 2734 0 2015-01-01 2015/1
## 3223 0 2015-02-01 2015/2
## 针对折扣金额 ----------------------------------------
tongluowan_2015[which(tongluowan_2015$折扣金额 < 0),]
## 年 月 类别 商品编码 品牌 商品名称 单位
## 4044 2015 2 301103 化妆品护肤类 161993 兰蔻 兰蔻男士保湿霜 瓶
## 4070 2015 2 301103 化妆品护肤类 161915 兰蔻 兰蔻小雪杉水 瓶
## 5200 2015 2 301105 化妆品美容类 170028 TRIM TRIM男士豪华硅胶鞋垫 个
## 11583 2015 5 301103 化妆品护肤类 161914 兰蔻 兰蔻大雪杉水 瓶
## 零售金额 规格 数量 实际销售额 折扣金额 时间 Time
## 4044 650.0 50ml 34 22100.00 -20.1 2015-02-01 2015/2
## 4070 320.0 200ml 12 3840.00 -9.9 2015-02-01 2015/2
## 5200 132.6 <NA> -1 -125.97 -6.7 2015-02-01 2015/2
## 11583 420.0 400ml 152 63840.00 -40.0 2015-05-01 2015/5
## 可以看出 有三项的折扣金额并不能对上,所以应该修改为0
tongluowan_2015$折扣金额[which(tongluowan_2015$折扣金额 < 0)[-3]] <- 0
## 还有一项销量为负,可以认为-6.7为该产品少赔的金额(该条记录不好理解,可以删除)
tongluowan_2015 <- tongluowan_2015[-(which(tongluowan_2015$折扣金额 < 0)),]
## 对销量进行分析
tongluowan_2015[which(tongluowan_2015$数量 %% 1 != 0),]
## 年 月 类别 商品编码 品牌 商品名称 单位 零售金额
## 4415 2015 2 301103 化妆品护肤类 174820 施丹兰 施丹兰洛奶 1g 0.4
## 11626 2015 5 301103 化妆品护肤类 174820 施丹兰 施丹兰洛奶 1g 0.4
## 规格 数量 实际销售额 折扣金额 时间 Time
## 4415 散称 15852.5 6341.0 0 2015-02-01 2015/2
## 11626 散称 17766.5 7106.6 0 2015-05-01 2015/5
## 规格为散称,所以允许数量为 0.5g
## 数据的初步分析####
## 分析每个品牌的年数据 ####---------------------------------------
## 品牌 :brand_year
brand_year <- tongluowan_2015 %>%
group_by(品牌) %>% ## 按照品牌和时间划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
mutate(MeanPrice = round(SumrealPrice / SumAcount,2), #平均售价
MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
Discount_rate = round(SumDiscount / SumrealPrice,4)*100) %>% #折扣率,百分比
arrange(desc(SumrealPrice))
head(brand_year)
## # A tibble: 6 × 7
## 品牌 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 兰蔻 0 21686553 34077 636.40 0
## 2 雅诗兰黛 0 15588065 23922 651.62 0
## 3 CHANEL 0 11757723 18155 647.63 0
## 4 SISLEY 0 6030860 3844 1568.90 0
## 5 海蓝之谜 0 5944640 2954 2012.40 0
## 6 娇兰 0 5533267 5944 930.90 0
## # ... with 1 more variables: Discount_rate <dbl>
dim(brand_year)
## [1] 167 7
## 对一年的数据实用树图,176个品牌
treemap(brand_year,index = "品牌",vSize = "SumrealPrice",
vColor = "Discount_rate",type="value",
palette="RdBu",fontfamily.labels = "STKaiti",
fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
title = "2015年销售额",title.legend = "折扣百分比")

## 兰蔻销售额最高,而且销售额和是否打折没有太大的关系。
## 对一年的销售数据可视化,销量前30的产品
ggplot(brand_year[1:30,],aes(品牌,SumrealPrice,fill = Discount_rate)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
theme(axis.text.x = element_text(size = 9,angle = 90)) +
labs(x = "",y = "销售额",title = "2015年前30个品牌") +
scale_y_continuous(labels = function(x) paste(x/1e6,"m")) +
scale_fill_gradient(name = "折扣\n百分比") +
theme(plot.title = element_text(hjust = 0.5))

## 查看所有品牌的累积销量占百分比
## 累积百分比
brand_year$accumulation <- cumsum(brand_year$SumrealPrice) / sum(brand_year$SumrealPrice)
s <- seq(1:dim(brand_year)[1])
ggplot() +theme_bw(base_family = "STKaiti") +
geom_line(aes(x = s,y = brand_year$accumulation)) +
labs(x = "品牌",y = "累积比率",title= "2015销售额") +
geom_vline(xintercept = 30) + geom_hline(yintercept = 0.95) +
geom_point(aes(30,0.95),color = "red") +
geom_text(aes(40,0.9),label = "(30,0.95)") +
scale_x_continuous(breaks = seq(0,length(s),20))+
theme(plot.title = element_text(hjust = 0.5))

## 前30个品牌的产品占据了全年超过95%的销售额
## 分析该商场的每月销售数据#### ---------------------------------------
Price_month <- tongluowan_2015 %>%
group_by(月) %>% ## 按照月份划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
mutate(MeanPrice = round(SumrealPrice / SumAcount,2), #平均售价
MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
Discount_rate = round(SumDiscount / SumrealPrice,4)*100) %>% #折扣率,百分比
arrange(desc(SumrealPrice))
head(Price_month)
## # A tibble: 6 × 7
## 月 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5 9653.10 21975636 90879.5 241.81 0.11
## 2 3 4133.01 14100855 50881.0 277.13 0.08
## 3 9 7143.25 12459808 28280.0 440.59 0.25
## 4 1 3020.27 12139839 77911.0 155.82 0.04
## 5 2 2263.70 11248734 58830.5 191.21 0.04
## 6 12 7412.06 11187530 37252.0 300.32 0.20
## # ... with 1 more variables: Discount_rate <dbl>
dim(Price_month)
## [1] 12 7
## 对一年12个月的数据绘制树图,
treemap(Price_month,index = "月",vSize = "SumrealPrice",
vColor = "Discount_rate",type="value",
palette="RdBu",fontfamily.labels = "STKaiti",
fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
title = "2015年销售额",title.legend = "折扣百分比")

## 每月的销售额相差并不是很大
## 对一年中每月的的销售数据可视化,
ggplot(Price_month,aes(月,SumrealPrice,fill = Discount_rate)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.7) +
theme(axis.text.x = element_text(size = 9)) +
labs(x = "月份",y = "销售额",title = "2015年销售额") +
scale_y_continuous(labels = function(x) paste(x/1e6,"m")) +
scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
scale_fill_gradient(name = "折扣\n百分比")+
theme(plot.title = element_text(hjust = 0.5))

## 5月份销售额最高超过20,000,000 元 ,销售情况为何是这样分布的,
## 需要查找一下原因,尤其为何4、6、7、8四个月和5月相差这么大
## 查看所有品牌的累积销量占百分比
## 累积百分比
Price_month$accumulation <- cumsum(Price_month$SumrealPrice) / sum(Price_month$SumrealPrice)
s <- seq(1:dim(Price_month)[1])
ggplot() +theme_bw(base_family = "STKaiti") +
geom_line(aes(x = s,y = Price_month$accumulation)) +
scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
labs(x = "月份",y = "累积比率",title= "2015销售额") +
theme(plot.title = element_text(hjust = 0.5))

## 分析一年中哪个产品卖的较好 ####------------------------------------
Product <- tongluowan_2015 %>%
group_by(商品名称) %>% ## 商品名称划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
mutate(MeanPrice = round(SumrealPrice / SumAcount,2), #平均售价
MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
Discount_rate = round(SumDiscount / SumrealPrice,6)*100) %>% #折扣率,百分比
arrange(desc(SumrealPrice))
head(Product)
## # A tibble: 6 × 7
## 商品名称 SumDiscount SumrealPrice SumAcount MeanPrice
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 雅诗兰黛小棕瓶R1 0 1524640 1712 890.56
## 2 兰芝雪凝自由礼盒 0 1505300 75265 20.00
## 3 兰蔻大黑瓶 0 1489320 1379 1080.00
## 4 兰蔻超大黑瓶 0 951640 643 1480.00
## 5 SK-Ⅱ护肤精华露 0 791840 587 1348.96
## 6 CHANEL可可小姐香水 0 728310 560 1300.55
## # ... with 2 more variables: MeanDiscount <dbl>, Discount_rate <dbl>
dim(Product) # 5321种产品
## [1] 5321 7
## 对销量前100的产品树图,根据销售额
treemap(Product[1:100,],index = "商品名称",vSize = "SumrealPrice",
vColor = "MeanPrice",type="value",
palette="RdBu",fontfamily.labels = "STKaiti",
fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
title = "2015年前100产品销售额",title.legend = "单价")

## 对销量前200的产品树图,根据销售额
treemap(Product[1:200,],index = "商品名称",vSize = "SumrealPrice",
vColor = "MeanPrice",type="value",
palette="RdBu",fontfamily.labels = "STKaiti",
fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
title = "2015年前200产品销售额",title.legend = "单价")

## 查看所有品牌的累积销量占百分比
## 累积百分比
Product$accumulation <- cumsum(Product$SumrealPrice) / sum(Product$SumrealPrice)
s <- seq(1:dim(Product)[1])
ggplot() +theme_bw(base_family = "STKaiti") +
geom_line(aes(x = s,y = Product$accumulation)) +
labs(x = "产品(种)",y = "累积比率",title= "2015年销售额") +
scale_x_continuous(breaks = seq(0,dim(Product)[1],500))+
theme(plot.title = element_text(hjust = 0.5))

## 前1000个产品占据了全年约85%的销售额
## 对一年的销售数据可视化,销量前50的产品
ggplot(Product[1:50,],aes(商品名称,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
theme(axis.text.x = element_text(size = 6,angle = 90)) +
labs(x = "",y = "销售额",title = "2015年前50个产品") +
scale_y_continuous(labels = function(x) paste(x/1e3,"k")) +
scale_fill_gradient(name = "单价")+
theme(plot.title = element_text(hjust = 0.5))

ggplot(Product[1:50,],aes(商品名称,SumrealPrice,fill = SumAcount)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
theme(axis.text.x = element_text(size = 6,angle = 90)) +
labs(x = "",y = "销售额",title = "2015年前50个产品") +
scale_y_continuous(labels = function(x) paste(x/1e3,"k")) +
scale_fill_gradient(name = "销量")+
theme(plot.title = element_text(hjust = 0.5))

## 对一年的销售数据可视化,销量前50的产品
ggplot(Product[1:100,],aes(商品名称,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
theme(axis.text.x = element_text(size = 3,angle = 90)) +
labs(x = "",y = "销售额",title = "2015年前100个产品") +
scale_y_continuous(labels = function(x) paste(x/1e3,"k")) +
scale_fill_gradient(name = "单价")+
theme(plot.title = element_text(hjust = 0.5))

## 分析每个品牌2015年的每月数据 ####------------------------------------
brand_month <- tongluowan_2015 %>%
group_by(品牌,月) %>% ## 月,品牌 划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
mutate(MeanPrice = round(SumrealPrice / SumAcount,2), #平均售价
MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
Discount_rate = round(SumDiscount / SumrealPrice,6)*100) #折扣率,百分比
# brand_month$月 <- as.character(brand_month$月)
brand_month <- arrange(brand_month,desc(SumrealPrice))
brand_month$brandmonth <- paste(brand_month$品牌,"(",brand_month$月,")")
head(brand_month)
## Source: local data frame [6 x 9]
## Groups: 品牌 [3]
##
## 品牌 月 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 兰蔻 5 0 5057200 8562 590.66 0
## 2 兰蔻 1 0 3330825 4554 731.41 0
## 3 雅诗兰黛 5 0 3326270 5766 576.88 0
## 4 兰蔻 9 0 2995100 4172 717.91 0
## 5 兰蔻 3 0 2925063 5276 554.41 0
## 6 CHANEL 5 0 2696830 4151 649.68 0
## # ... with 2 more variables: Discount_rate <dbl>, brandmonth <chr>
dim(brand_month)
## [1] 1223 9
## 另一种形式的排序
# brand_month2 <- brand_month$SumrealPrice[order(brand_month$SumrealPrice,decreasing = TRUE)]
brand_month2 <- brand_month[order(brand_month$SumrealPrice,decreasing = TRUE),]
head(brand_month2)
## Source: local data frame [6 x 9]
## Groups: 品牌 [3]
##
## 品牌 月 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 兰蔻 5 0 5057200 8562 590.66 0
## 2 兰蔻 1 0 3330825 4554 731.41 0
## 3 雅诗兰黛 5 0 3326270 5766 576.88 0
## 4 兰蔻 9 0 2995100 4172 717.91 0
## 5 兰蔻 3 0 2925063 5276 554.41 0
## 6 CHANEL 5 0 2696830 4151 649.68 0
## # ... with 2 more variables: Discount_rate <dbl>, brandmonth <chr>
## 对每月销量前50品牌树图,根据销售额
treemap(brand_month2[1:50,],index = "brandmonth",vSize = "SumrealPrice",
vColor = "MeanPrice",type="value",
palette="RdBu",fontfamily.labels = "STKaiti",
fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
title = "2015年月销售额前50",title.legend = "单价")

## 对月销量前100品牌树图,根据销售额
treemap(brand_month2[1:100,],index = "brandmonth",vSize = "SumrealPrice",
vColor = "MeanPrice",type="value",
palette="RdBu",fontfamily.labels = "STKaiti",
fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
title = "2015年月销售额前100",title.legend = "单价")

## 对销量前30的品牌查看每月的销售数据
brand_30 <- brand_year$品牌[1:30]
brand_30
## [1] "兰蔻" "雅诗兰黛" "CHANEL" "SISLEY"
## [5] "海蓝之谜" "娇兰" "碧欧泉" "BOBBI BROWN"
## [9] "la prairie" "植村秀" "娇韵诗" "兰芝"
## [13] "ORIGINS" "SK-Ⅱ" "赫莲娜" "GIORGIO ARMANI"
## [17] "L’OCCITANE" "SHISEIDO" "欧珀莱" "BENEFIT"
## [21] "BODYSTYLE" "科颜氏" "YSL" "欧莱雅"
## [25] "茱莉蔻" "CLARISONIC" "ckone" "施丹兰"
## [29] "羽西" "安娜苏"
ggplot(brand_month[(brand_month$品牌 %in% brand_30),],
aes(月,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~品牌,scales = "free") +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年前30个产品") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_fill_gradient(name = "单价")+
theme(plot.title = element_text(hjust = 0.5))

ggplot(brand_month[(brand_month$品牌 %in% brand_30),],
aes(月,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~品牌) +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年前30个产品") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_fill_gradient(name = "单价")+
theme(plot.title = element_text(hjust = 0.5))

ggplot(brand_month[(brand_month$品牌 %in% brand_30[1:12]),],
aes(月,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~品牌) +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年前12个产品") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_fill_gradient(name = "单价")+
theme(plot.title = element_text(hjust = 0.5))

## 将数据按照 类别 和 时间进行分组,分析销售额之间的关系####--------------------------------
classfiy_month <- tongluowan_2015 %>%
group_by(类别,月) %>% ## 类别 月, 划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
mutate(MeanPrice = round(SumrealPrice / SumAcount,2), #平均售价
MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
Discount_rate = round(SumDiscount / SumrealPrice,6)*100) %>% #折扣率,百分比
arrange(desc(SumrealPrice))
dim(classfiy_month)
## [1] 48 8
classfiy_month$class <- str_extract(classfiy_month$类别,"[\u4e00-\u9fff]+[\u4e00-\u9fff]")
head(classfiy_month)
## Source: local data frame [6 x 9]
## Groups: 类别 [1]
##
## 类别 月 SumDiscount SumrealPrice SumAcount MeanPrice
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 301103 化妆品护肤类 5 7442.40 13123972 40752.5 322.04
## 2 301103 化妆品护肤类 3 3553.55 10903563 36797.0 296.32
## 3 301103 化妆品护肤类 9 6009.05 9839010 17536.0 561.07
## 4 301103 化妆品护肤类 1 2601.95 8773794 65829.0 133.28
## 5 301103 化妆品护肤类 2 1703.87 8028806 45288.5 177.28
## 6 301103 化妆品护肤类 12 6455.57 7647073 16055.0 476.30
## # ... with 3 more variables: MeanDiscount <dbl>, Discount_rate <dbl>,
## # class <chr>
## 对销量4个类别查看每月的销售数据
## 每月销售额和单价
ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~class,scales = "free") +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
scale_fill_gradient(name = "单价")

ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanPrice)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~class) +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
scale_fill_gradient(name = "单价")

## 每月销售额与折扣
ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanDiscount)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~class,scales = "free") +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
scale_fill_gradient(name = "折扣\n百分比")

ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanDiscount)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",width = 0.8) +
facet_wrap(~class) +
theme(axis.text.x = element_text(size = 5)) +
labs(x = "月份",y = "销售额",title = "2015年") +
scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
scale_fill_gradient(name = "折扣\n百分比")

## 用于分析各个品牌的一年销售数据的数据结构####
## 变量分别为:品牌,月,SumDiscount,SumrealPrice,SumAcount
brand_every <- tongluowan_2015 %>%
group_by(品牌,月) %>% ## 月,品牌 划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
arrange(desc(SumrealPrice))
head(brand_every)
## Source: local data frame [6 x 5]
## Groups: 品牌 [3]
##
## 品牌 月 SumDiscount SumrealPrice SumAcount
## <chr> <int> <dbl> <dbl> <dbl>
## 1 兰蔻 5 0 5057200 8562
## 2 兰蔻 1 0 3330825 4554
## 3 雅诗兰黛 5 0 3326270 5766
## 4 兰蔻 9 0 2995100 4172
## 5 兰蔻 3 0 2925063 5276
## 6 CHANEL 5 0 2696830 4151
dim(brand_every)
## [1] 1223 5
## 导入函数
source("/Users/daitu/数据分析/化妆品/need_function_hzp.R")
brand_seal(brand_every,"兰蔻",color = "lightblue")

brand_seal(brand_every,"植村秀")

# brand_seal(brand_every,"植村秀2")
## 用于分析各个商品的一年销售数据的数据结构####
## 变量分别为:商品名称,月,SumDiscount,SumrealPrice,SumAcount
Product_every <- tongluowan_2015 %>%
group_by(商品名称,月) %>% ## 月,品牌 划分
summarise(SumDiscount = sum(折扣金额), #折扣金额总和
SumrealPrice = sum(实际销售额), # 实际销售额的和
SumAcount = sum(数量)) %>% #销售量
arrange(desc(SumrealPrice))
head(Product_every)
## Source: local data frame [6 x 5]
## Groups: 商品名称 [5]
##
## 商品名称 月 SumDiscount SumrealPrice SumAcount
## <chr> <int> <dbl> <dbl> <dbl>
## 1 兰蔻大黑瓶 5 0 695520 644
## 2 兰芝雪凝自由礼盒 5 0 520200 26010
## 3 SISLEY 154062S 5 0 274560 22
## 4 兰蔻大黑瓶 1 0 273240 253
## 5 兰蔻超大黑瓶 5 0 260480 176
## 6 雅诗兰黛小棕瓶R1 9 0 259390 307
dim(Product_every)
## [1] 28046 5
product_seal(Product_every,"海蓝之谜活颜焕肤精华露",color = "lightblue")

product_seal(Product_every,"赫莲娜极致之美升华液")

product_seal(Product_every,"娇韵诗美胸紧实乳")

product_seal(Product_every,"兰蔻高清微整美容液")

product_seal(Product_every,"兰蔻黑膜6片")
