##(王府井)购物中心化妆品数据分析
## 该数据集为经过与处理的数据集
## 2016.07.04
## Daitu
## 更改工作路径


setwd("/Users/daitu/数据分析/化妆品/2015年")
getwd()
## [1] "/Users/daitu/数据分析/化妆品/2015年"
## 加载包####
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(treemap)


## 读取(王府井)的数据####
filename <- "王府井商场2015年化妆品销售月报(改)副本.csv"
tongluowan_2015 <- read.csv(filename,fileEncoding = "GBK",stringsAsFactors =FALSE)
dim(tongluowan_2015)
## [1] 30350    12
head(tongluowan_2015)
##     年 月                类别 商品编码              品牌
## 1 2015  1 301101 化妆品香水类   266501 AURA BY SWAROVSKI
## 2 2015  1 301101 化妆品香水类   266503 AURA BY SWAROVSKI
## 3 2015  1 301101 化妆品香水类   266505 AURA BY SWAROVSKI
## 4 2015  1 301101 化妆品香水类   264356           BENEFIT
## 5 2015  1 301101 化妆品香水类    41532          BURBERRY
## 6 2015  1 301101 化妆品香水类    41542          BURBERRY
##                    商品名称 单位 零售金额 规格 数量 实际销售额 折扣金额
## 1 AURA BY SWAROVSKI魅力女香   瓶      495 30ml    1        495        0
## 2   AURA BY SWAROVSKI淡香氛   瓶      430 30ml    1        430        0
## 3   AURA BY SWAROVSKI淡香氛   瓶      750 75ml    1        750        0
## 4             BENEFIT香氛露   瓶      380 30ml    1        380        0
## 5        BURBERRY新伦敦男士   甁      630 50ml    1        630        0
## 6          BURBERRY粉红恋歌   甁      650 50ml    1        650        0
str(tongluowan_2015)
## 'data.frame':    30350 obs. of  12 variables:
##  $ 年        : int  2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
##  $ 月        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ 类别      : chr  "301101 化妆品香水类" "301101 化妆品香水类" "301101 化妆品香水类" "301101 化妆品香水类" ...
##  $ 商品编码  : int  266501 266503 266505 264356 41532 41542 41547 94462 192402 248187 ...
##  $ 品牌      : chr  "AURA BY SWAROVSKI" "AURA BY SWAROVSKI" "AURA BY SWAROVSKI" "BENEFIT" ...
##  $ 商品名称  : chr  "AURA BY SWAROVSKI魅力女香" "AURA BY SWAROVSKI淡香氛" "AURA BY SWAROVSKI淡香氛" "BENEFIT香氛露" ...
##  $ 单位      : chr  "瓶" "瓶" "瓶" "瓶" ...
##  $ 零售金额  : num  495 430 750 380 630 650 870 665 515 1010 ...
##  $ 规格      : chr  "30ml" "30ml" "75ml" "30ml" ...
##  $ 数量      : num  1 1 1 1 1 1 1 1 2 1 ...
##  $ 实际销售额: num  495 430 750 380 630 650 870 665 1030 1010 ...
##  $ 折扣金额  : num  0 0 0 0 0 0 0 0 0 0 ...
## 数据分析前的准备工作####

## 查看有多少种产品------------------------------------
# 有些产品名称带有特殊的字符--(@,#,*,$)需要去除
# a <- str_extract(proname$Var1,"@")
# str_replace(proname$Var1,"@","")
# bb <- proname$Var1[1]
# bb
# str_replace( proname$Var1,"@","")[1:50]
proname <- tongluowan_2015$商品名称
proname <- str_replace(proname,"@","")
proname <- str_replace(proname,"#","")
proname <- str_replace(proname,"*","")
proname <- str_replace(proname,"\\*","")
proname <- str_replace(proname,"\\$","")


tongluowan_2015$商品名称 <- proname

proname <- data.frame(table(tongluowan_2015$商品名称))
# a <- data.frame(table(proname))
# a
dim(proname)  #约有5321种产品
## [1] 5321    2
## 添加一个时间 ------------------------------------------
tongluowan_2015$时间 <- as.Date(str_c(tongluowan_2015$年,tongluowan_2015$月,"1",sep = "/"),
                              format = "%Y/%m/%d")
head(tongluowan_2015)
##     年 月                类别 商品编码              品牌
## 1 2015  1 301101 化妆品香水类   266501 AURA BY SWAROVSKI
## 2 2015  1 301101 化妆品香水类   266503 AURA BY SWAROVSKI
## 3 2015  1 301101 化妆品香水类   266505 AURA BY SWAROVSKI
## 4 2015  1 301101 化妆品香水类   264356           BENEFIT
## 5 2015  1 301101 化妆品香水类    41532          BURBERRY
## 6 2015  1 301101 化妆品香水类    41542          BURBERRY
##                    商品名称 单位 零售金额 规格 数量 实际销售额 折扣金额
## 1 AURA BY SWAROVSKI魅力女香   瓶      495 30ml    1        495        0
## 2   AURA BY SWAROVSKI淡香氛   瓶      430 30ml    1        430        0
## 3   AURA BY SWAROVSKI淡香氛   瓶      750 75ml    1        750        0
## 4             BENEFIT香氛露   瓶      380 30ml    1        380        0
## 5        BURBERRY新伦敦男士   甁      630 50ml    1        630        0
## 6          BURBERRY粉红恋歌   甁      650 50ml    1        650        0
##         时间
## 1 2015-01-01
## 2 2015-01-01
## 3 2015-01-01
## 4 2015-01-01
## 5 2015-01-01
## 6 2015-01-01
## 添加一列字符串--时间
tongluowan_2015$Time <- str_c(tongluowan_2015$年,tongluowan_2015$月,sep = "/")
head(tongluowan_2015)
##     年 月                类别 商品编码              品牌
## 1 2015  1 301101 化妆品香水类   266501 AURA BY SWAROVSKI
## 2 2015  1 301101 化妆品香水类   266503 AURA BY SWAROVSKI
## 3 2015  1 301101 化妆品香水类   266505 AURA BY SWAROVSKI
## 4 2015  1 301101 化妆品香水类   264356           BENEFIT
## 5 2015  1 301101 化妆品香水类    41532          BURBERRY
## 6 2015  1 301101 化妆品香水类    41542          BURBERRY
##                    商品名称 单位 零售金额 规格 数量 实际销售额 折扣金额
## 1 AURA BY SWAROVSKI魅力女香   瓶      495 30ml    1        495        0
## 2   AURA BY SWAROVSKI淡香氛   瓶      430 30ml    1        430        0
## 3   AURA BY SWAROVSKI淡香氛   瓶      750 75ml    1        750        0
## 4             BENEFIT香氛露   瓶      380 30ml    1        380        0
## 5        BURBERRY新伦敦男士   甁      630 50ml    1        630        0
## 6          BURBERRY粉红恋歌   甁      650 50ml    1        650        0
##         时间   Time
## 1 2015-01-01 2015/1
## 2 2015-01-01 2015/1
## 3 2015-01-01 2015/1
## 4 2015-01-01 2015/1
## 5 2015-01-01 2015/1
## 6 2015-01-01 2015/1
## 数据的查看
summary(tongluowan_2015)
##        年             月             类别              商品编码     
##  Min.   :2015   Min.   : 1.000   Length:30350       Min.   :  2187  
##  1st Qu.:2015   1st Qu.: 3.000   Class :character   1st Qu.:238172  
##  Median :2015   Median : 6.000   Mode  :character   Median :266728  
##  Mean   :2015   Mean   : 6.246                      Mean   :249795  
##  3rd Qu.:2015   3rd Qu.: 9.000                      3rd Qu.:282843  
##  Max.   :2015   Max.   :12.000                      Max.   :319175  
##      品牌             商品名称             单位          
##  Length:30350       Length:30350       Length:30350      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##     零售金额           规格                数量            实际销售额    
##  Min.   :    0.4   Length:30350       Min.   :  -10.00   Min.   : -5900  
##  1st Qu.:  170.0   Class :character   1st Qu.:    1.00   1st Qu.:   340  
##  Median :  320.0   Mode  :character   Median :    3.00   Median :   980  
##  Mean   :  541.2                      Mean   :   16.62   Mean   :  3964  
##  3rd Qu.:  620.0                      3rd Qu.:    8.00   3rd Qu.:  3180  
##  Max.   :23800.0                      Max.   :27309.00   Max.   :347760  
##     折扣金额             时间                Time          
##  Min.   : -40.000   Min.   :2015-01-01   Length:30350      
##  1st Qu.:   0.000   1st Qu.:2015-03-01   Class :character  
##  Median :   0.000   Median :2015-06-01   Mode  :character  
##  Mean   :   1.601   Mean   :2015-06-08                     
##  3rd Qu.:   0.000   3rd Qu.:2015-09-01                     
##  Max.   :1100.000   Max.   :2015-12-01
## 针对销量  (为何会有销量为负) ------------------------
## 个人理解,可能是过期了都没卖出去,算为卖家的损失,即该产品并不售欢迎
head(tongluowan_2015[which(tongluowan_2015$数量 < 0),])
##        年 月                  类别 商品编码        品牌
## 1895 2015  1   301103 化妆品护肤类   276943      茱莉蔻
## 2029 2015  1   301105 化妆品美容类   266864 BOBBI BROWN
## 2056 2015  1   301105 化妆品美容类   266736 BOBBI BROWN
## 2143 2015  1   301105 化妆品美容类   282472      CHANEL
## 2734 2015  1 301107 化妆品礼品套装   205685      碧欧泉
## 3223 2015  2   301103 化妆品护肤类   282305      CHANEL
##                          商品名称 单位 零售金额  规格 数量 实际销售额
## 1895 茱莉蔻玫瑰衡肤花卉水沁乳限量   瓶      600 200ml   -1       -600
## 2029        BOBBI BROWN润色护唇膏   支      180  3.7g   -1       -180
## 2056    BOBBI BROWN弹力修护粉底霜   瓶      780  30mL   -1       -780
## 2143             CHANEL四色眼影37   支      580    2g   -1       -580
## 2734               碧欧泉干性礼盒   支      750  <NA>   -1       -750
## 3223     CHANEL山茶花保湿滋养唇霜   瓶      355   10g   -1       -355
##      折扣金额       时间   Time
## 1895        0 2015-01-01 2015/1
## 2029        0 2015-01-01 2015/1
## 2056        0 2015-01-01 2015/1
## 2143        0 2015-01-01 2015/1
## 2734        0 2015-01-01 2015/1
## 3223        0 2015-02-01 2015/2
## 针对折扣金额 ----------------------------------------
tongluowan_2015[which(tongluowan_2015$折扣金额 < 0),]
##         年 月                类别 商品编码 品牌             商品名称 单位
## 4044  2015  2 301103 化妆品护肤类   161993 兰蔻       兰蔻男士保湿霜   瓶
## 4070  2015  2 301103 化妆品护肤类   161915 兰蔻         兰蔻小雪杉水   瓶
## 5200  2015  2 301105 化妆品美容类   170028 TRIM TRIM男士豪华硅胶鞋垫   个
## 11583 2015  5 301103 化妆品护肤类   161914 兰蔻         兰蔻大雪杉水   瓶
##       零售金额  规格 数量 实际销售额 折扣金额       时间   Time
## 4044     650.0  50ml   34   22100.00    -20.1 2015-02-01 2015/2
## 4070     320.0 200ml   12    3840.00     -9.9 2015-02-01 2015/2
## 5200     132.6  <NA>   -1    -125.97     -6.7 2015-02-01 2015/2
## 11583    420.0 400ml  152   63840.00    -40.0 2015-05-01 2015/5
## 可以看出 有三项的折扣金额并不能对上,所以应该修改为0
tongluowan_2015$折扣金额[which(tongluowan_2015$折扣金额 < 0)[-3]] <- 0
## 还有一项销量为负,可以认为-6.7为该产品少赔的金额(该条记录不好理解,可以删除)
tongluowan_2015 <- tongluowan_2015[-(which(tongluowan_2015$折扣金额 < 0)),]

## 对销量进行分析
tongluowan_2015[which(tongluowan_2015$数量 %% 1 != 0),]
##         年 月                类别 商品编码   品牌   商品名称 单位 零售金额
## 4415  2015  2 301103 化妆品护肤类   174820 施丹兰 施丹兰洛奶   1g      0.4
## 11626 2015  5 301103 化妆品护肤类   174820 施丹兰 施丹兰洛奶   1g      0.4
##       规格    数量 实际销售额 折扣金额       时间   Time
## 4415  散称 15852.5     6341.0        0 2015-02-01 2015/2
## 11626 散称 17766.5     7106.6        0 2015-05-01 2015/5
## 规格为散称,所以允许数量为 0.5g


## 数据的初步分析####

## 分析每个品牌的年数据 ####---------------------------------------
## 品牌 :brand_year
brand_year <- tongluowan_2015 %>%
  group_by(品牌) %>%      ## 按照品牌和时间划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  mutate(MeanPrice = round(SumrealPrice / SumAcount,2),   #平均售价
         MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
         Discount_rate = round(SumDiscount / SumrealPrice,4)*100) %>%  #折扣率,百分比
  arrange(desc(SumrealPrice))
head(brand_year)
## # A tibble: 6 × 7
##       品牌 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
##      <chr>       <dbl>        <dbl>     <dbl>     <dbl>        <dbl>
## 1     兰蔻           0     21686553     34077    636.40            0
## 2 雅诗兰黛           0     15588065     23922    651.62            0
## 3   CHANEL           0     11757723     18155    647.63            0
## 4   SISLEY           0      6030860      3844   1568.90            0
## 5 海蓝之谜           0      5944640      2954   2012.40            0
## 6     娇兰           0      5533267      5944    930.90            0
## # ... with 1 more variables: Discount_rate <dbl>
dim(brand_year)
## [1] 167   7
## 对一年的数据实用树图,176个品牌

treemap(brand_year,index = "品牌",vSize = "SumrealPrice",
        vColor = "Discount_rate",type="value",
        palette="RdBu",fontfamily.labels = "STKaiti",
        fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
        title = "2015年销售额",title.legend = "折扣百分比")

## 兰蔻销售额最高,而且销售额和是否打折没有太大的关系。

## 对一年的销售数据可视化,销量前30的产品
ggplot(brand_year[1:30,],aes(品牌,SumrealPrice,fill = Discount_rate)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) +
  theme(axis.text.x = element_text(size = 9,angle = 90)) +
  labs(x = "",y = "销售额",title = "2015年前30个品牌") +
  scale_y_continuous(labels = function(x) paste(x/1e6,"m")) +
  scale_fill_gradient(name = "折扣\n百分比") +
  theme(plot.title = element_text(hjust = 0.5))

## 查看所有品牌的累积销量占百分比
## 累积百分比
brand_year$accumulation <- cumsum(brand_year$SumrealPrice) / sum(brand_year$SumrealPrice)
s <- seq(1:dim(brand_year)[1])

ggplot() +theme_bw(base_family = "STKaiti") +
  geom_line(aes(x = s,y = brand_year$accumulation)) +
  labs(x = "品牌",y = "累积比率",title= "2015销售额") +
  geom_vline(xintercept = 30) + geom_hline(yintercept = 0.95) +
  geom_point(aes(30,0.95),color = "red") +
  geom_text(aes(40,0.9),label = "(30,0.95)") +
  scale_x_continuous(breaks = seq(0,length(s),20))+
  theme(plot.title = element_text(hjust = 0.5))

## 前30个品牌的产品占据了全年超过95%的销售额




## 分析该商场的每月销售数据#### ---------------------------------------

Price_month <- tongluowan_2015 %>%
  group_by(月) %>%      ## 按照月份划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  mutate(MeanPrice = round(SumrealPrice / SumAcount,2),   #平均售价
         MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
         Discount_rate = round(SumDiscount / SumrealPrice,4)*100) %>%  #折扣率,百分比
  arrange(desc(SumrealPrice))
head(Price_month)
## # A tibble: 6 × 7
##      月 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
##   <int>       <dbl>        <dbl>     <dbl>     <dbl>        <dbl>
## 1     5     9653.10     21975636   90879.5    241.81         0.11
## 2     3     4133.01     14100855   50881.0    277.13         0.08
## 3     9     7143.25     12459808   28280.0    440.59         0.25
## 4     1     3020.27     12139839   77911.0    155.82         0.04
## 5     2     2263.70     11248734   58830.5    191.21         0.04
## 6    12     7412.06     11187530   37252.0    300.32         0.20
## # ... with 1 more variables: Discount_rate <dbl>
dim(Price_month)
## [1] 12  7
## 对一年12个月的数据绘制树图,

treemap(Price_month,index = "月",vSize = "SumrealPrice",
        vColor = "Discount_rate",type="value",
        palette="RdBu",fontfamily.labels = "STKaiti",
        fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
        title = "2015年销售额",title.legend = "折扣百分比")

## 每月的销售额相差并不是很大

## 对一年中每月的的销售数据可视化,
ggplot(Price_month,aes(月,SumrealPrice,fill = Discount_rate)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.7) +
  theme(axis.text.x = element_text(size = 9)) +
  labs(x = "月份",y = "销售额",title = "2015年销售额") +
  scale_y_continuous(labels = function(x) paste(x/1e6,"m")) +
  scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
  scale_fill_gradient(name = "折扣\n百分比")+
  theme(plot.title = element_text(hjust = 0.5))

## 5月份销售额最高超过20,000,000 元 ,销售情况为何是这样分布的,
## 需要查找一下原因,尤其为何4、6、7、8四个月和5月相差这么大


## 查看所有品牌的累积销量占百分比
## 累积百分比
Price_month$accumulation <- cumsum(Price_month$SumrealPrice) / sum(Price_month$SumrealPrice)
s <- seq(1:dim(Price_month)[1])

ggplot() +theme_bw(base_family = "STKaiti") +
  geom_line(aes(x = s,y = Price_month$accumulation)) +
  scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
  labs(x = "月份",y = "累积比率",title= "2015销售额") +
  theme(plot.title = element_text(hjust = 0.5))

## 分析一年中哪个产品卖的较好 ####------------------------------------

Product <- tongluowan_2015 %>%
  group_by(商品名称) %>%      ## 商品名称划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  mutate(MeanPrice = round(SumrealPrice / SumAcount,2),   #平均售价
         MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
         Discount_rate = round(SumDiscount / SumrealPrice,6)*100) %>%  #折扣率,百分比
  arrange(desc(SumrealPrice))
head(Product)
## # A tibble: 6 × 7
##             商品名称 SumDiscount SumrealPrice SumAcount MeanPrice
##                <chr>       <dbl>        <dbl>     <dbl>     <dbl>
## 1   雅诗兰黛小棕瓶R1           0      1524640      1712    890.56
## 2   兰芝雪凝自由礼盒           0      1505300     75265     20.00
## 3         兰蔻大黑瓶           0      1489320      1379   1080.00
## 4       兰蔻超大黑瓶           0       951640       643   1480.00
## 5    SK-Ⅱ护肤精华露           0       791840       587   1348.96
## 6 CHANEL可可小姐香水           0       728310       560   1300.55
## # ... with 2 more variables: MeanDiscount <dbl>, Discount_rate <dbl>
dim(Product)  # 5321种产品
## [1] 5321    7
## 对销量前100的产品树图,根据销售额
treemap(Product[1:100,],index = "商品名称",vSize = "SumrealPrice",
        vColor = "MeanPrice",type="value",
        palette="RdBu",fontfamily.labels = "STKaiti",
        fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
        title = "2015年前100产品销售额",title.legend = "单价")

## 对销量前200的产品树图,根据销售额
treemap(Product[1:200,],index = "商品名称",vSize = "SumrealPrice",
        vColor = "MeanPrice",type="value",
        palette="RdBu",fontfamily.labels = "STKaiti",
        fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
        title = "2015年前200产品销售额",title.legend = "单价")

## 查看所有品牌的累积销量占百分比
## 累积百分比
Product$accumulation <- cumsum(Product$SumrealPrice) / sum(Product$SumrealPrice)
s <- seq(1:dim(Product)[1])

ggplot() +theme_bw(base_family = "STKaiti") +
  geom_line(aes(x = s,y = Product$accumulation)) +
  labs(x = "产品(种)",y = "累积比率",title= "2015年销售额") +
  scale_x_continuous(breaks = seq(0,dim(Product)[1],500))+
  theme(plot.title = element_text(hjust = 0.5))

## 前1000个产品占据了全年约85%的销售额


## 对一年的销售数据可视化,销量前50的产品
ggplot(Product[1:50,],aes(商品名称,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) +
  theme(axis.text.x = element_text(size = 6,angle = 90)) +
  labs(x = "",y = "销售额",title = "2015年前50个产品") +
  scale_y_continuous(labels = function(x) paste(x/1e3,"k")) +
  scale_fill_gradient(name = "单价")+
  theme(plot.title = element_text(hjust = 0.5))

ggplot(Product[1:50,],aes(商品名称,SumrealPrice,fill = SumAcount)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) +
  theme(axis.text.x = element_text(size = 6,angle = 90)) +
  labs(x = "",y = "销售额",title = "2015年前50个产品") +
  scale_y_continuous(labels = function(x) paste(x/1e3,"k")) +
  scale_fill_gradient(name = "销量")+
  theme(plot.title = element_text(hjust = 0.5))

## 对一年的销售数据可视化,销量前50的产品
ggplot(Product[1:100,],aes(商品名称,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) +
  theme(axis.text.x = element_text(size = 3,angle = 90)) +
  labs(x = "",y = "销售额",title = "2015年前100个产品") +
  scale_y_continuous(labels = function(x) paste(x/1e3,"k")) +
  scale_fill_gradient(name = "单价")+
  theme(plot.title = element_text(hjust = 0.5))

## 分析每个品牌2015年的每月数据 ####------------------------------------

brand_month <- tongluowan_2015 %>%
  group_by(品牌,月) %>%      ## 月,品牌 划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  mutate(MeanPrice = round(SumrealPrice / SumAcount,2),   #平均售价
         MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
         Discount_rate = round(SumDiscount / SumrealPrice,6)*100)   #折扣率,百分比
# brand_month$月 <- as.character(brand_month$月) 
brand_month <- arrange(brand_month,desc(SumrealPrice))
brand_month$brandmonth <- paste(brand_month$品牌,"(",brand_month$月,")")
head(brand_month)
## Source: local data frame [6 x 9]
## Groups: 品牌 [3]
## 
##       品牌    月 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
##      <chr> <int>       <dbl>        <dbl>     <dbl>     <dbl>        <dbl>
## 1     兰蔻     5           0      5057200      8562    590.66            0
## 2     兰蔻     1           0      3330825      4554    731.41            0
## 3 雅诗兰黛     5           0      3326270      5766    576.88            0
## 4     兰蔻     9           0      2995100      4172    717.91            0
## 5     兰蔻     3           0      2925063      5276    554.41            0
## 6   CHANEL     5           0      2696830      4151    649.68            0
## # ... with 2 more variables: Discount_rate <dbl>, brandmonth <chr>
dim(brand_month)  
## [1] 1223    9
##  另一种形式的排序
# brand_month2 <- brand_month$SumrealPrice[order(brand_month$SumrealPrice,decreasing = TRUE)]
brand_month2 <- brand_month[order(brand_month$SumrealPrice,decreasing = TRUE),]
head(brand_month2)
## Source: local data frame [6 x 9]
## Groups: 品牌 [3]
## 
##       品牌    月 SumDiscount SumrealPrice SumAcount MeanPrice MeanDiscount
##      <chr> <int>       <dbl>        <dbl>     <dbl>     <dbl>        <dbl>
## 1     兰蔻     5           0      5057200      8562    590.66            0
## 2     兰蔻     1           0      3330825      4554    731.41            0
## 3 雅诗兰黛     5           0      3326270      5766    576.88            0
## 4     兰蔻     9           0      2995100      4172    717.91            0
## 5     兰蔻     3           0      2925063      5276    554.41            0
## 6   CHANEL     5           0      2696830      4151    649.68            0
## # ... with 2 more variables: Discount_rate <dbl>, brandmonth <chr>
## 对每月销量前50品牌树图,根据销售额
treemap(brand_month2[1:50,],index = "brandmonth",vSize = "SumrealPrice",
        vColor = "MeanPrice",type="value",
        palette="RdBu",fontfamily.labels = "STKaiti",
        fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
        title = "2015年月销售额前50",title.legend = "单价")

## 对月销量前100品牌树图,根据销售额
treemap(brand_month2[1:100,],index = "brandmonth",vSize = "SumrealPrice",
        vColor = "MeanPrice",type="value",
        palette="RdBu",fontfamily.labels = "STKaiti",
        fontfamily.title = "STKaiti",fontfamily.legend = "STKaiti",
        title = "2015年月销售额前100",title.legend = "单价")

## 对销量前30的品牌查看每月的销售数据
brand_30 <- brand_year$品牌[1:30]
brand_30
##  [1] "兰蔻"           "雅诗兰黛"       "CHANEL"         "SISLEY"        
##  [5] "海蓝之谜"       "娇兰"           "碧欧泉"         "BOBBI BROWN"   
##  [9] "la prairie"     "植村秀"         "娇韵诗"         "兰芝"          
## [13] "ORIGINS"        "SK-Ⅱ"          "赫莲娜"         "GIORGIO ARMANI"
## [17] "L’OCCITANE"    "SHISEIDO"       "欧珀莱"         "BENEFIT"       
## [21] "BODYSTYLE"      "科颜氏"         "YSL"            "欧莱雅"        
## [25] "茱莉蔻"         "CLARISONIC"     "ckone"          "施丹兰"        
## [29] "羽西"           "安娜苏"
ggplot(brand_month[(brand_month$品牌 %in% brand_30),],
       aes(月,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~品牌,scales = "free") +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年前30个产品") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_fill_gradient(name = "单价")+
  theme(plot.title = element_text(hjust = 0.5))

ggplot(brand_month[(brand_month$品牌 %in% brand_30),],
       aes(月,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~品牌) +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年前30个产品") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_fill_gradient(name = "单价")+
  theme(plot.title = element_text(hjust = 0.5))

ggplot(brand_month[(brand_month$品牌 %in% brand_30[1:12]),],
       aes(月,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~品牌) +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年前12个产品") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_fill_gradient(name = "单价")+
  theme(plot.title = element_text(hjust = 0.5))

## 将数据按照 类别 和 时间进行分组,分析销售额之间的关系####--------------------------------
classfiy_month <- tongluowan_2015 %>%
  group_by(类别,月) %>%      ## 类别 月, 划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  mutate(MeanPrice = round(SumrealPrice / SumAcount,2),   #平均售价
         MeanDiscount = round(SumDiscount / SumAcount,2),# 平均折扣
         Discount_rate = round(SumDiscount / SumrealPrice,6)*100) %>%   #折扣率,百分比
  arrange(desc(SumrealPrice))
dim(classfiy_month)  
## [1] 48  8
classfiy_month$class <- str_extract(classfiy_month$类别,"[\u4e00-\u9fff]+[\u4e00-\u9fff]")
head(classfiy_month)
## Source: local data frame [6 x 9]
## Groups: 类别 [1]
## 
##                  类别    月 SumDiscount SumrealPrice SumAcount MeanPrice
##                 <chr> <int>       <dbl>        <dbl>     <dbl>     <dbl>
## 1 301103 化妆品护肤类     5     7442.40     13123972   40752.5    322.04
## 2 301103 化妆品护肤类     3     3553.55     10903563   36797.0    296.32
## 3 301103 化妆品护肤类     9     6009.05      9839010   17536.0    561.07
## 4 301103 化妆品护肤类     1     2601.95      8773794   65829.0    133.28
## 5 301103 化妆品护肤类     2     1703.87      8028806   45288.5    177.28
## 6 301103 化妆品护肤类    12     6455.57      7647073   16055.0    476.30
## # ... with 3 more variables: MeanDiscount <dbl>, Discount_rate <dbl>,
## #   class <chr>
## 对销量4个类别查看每月的销售数据
## 每月销售额和单价
ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~class,scales = "free") +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
  scale_fill_gradient(name = "单价")

ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanPrice)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~class) +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
  scale_fill_gradient(name = "单价")

## 每月销售额与折扣
ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanDiscount)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~class,scales = "free") +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
  scale_fill_gradient(name = "折扣\n百分比")

ggplot(classfiy_month,aes(月,SumrealPrice,fill = MeanDiscount)) +
  theme_bw(base_family = "STKaiti") +
  geom_bar(stat = "identity",width = 0.8) + 
  facet_wrap(~class) +
  theme(axis.text.x = element_text(size = 5)) +
  labs(x = "月份",y = "销售额",title = "2015年") +
  scale_y_continuous(labels = function(x) paste(x/1e4,"万")) +
  scale_x_continuous(breaks = 1:12,labels = function(x) paste(x,"月")) +
  scale_fill_gradient(name = "折扣\n百分比")

## 用于分析各个品牌的一年销售数据的数据结构####
## 变量分别为:品牌,月,SumDiscount,SumrealPrice,SumAcount
brand_every <- tongluowan_2015 %>%
  group_by(品牌,月) %>%      ## 月,品牌 划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  arrange(desc(SumrealPrice))
head(brand_every)
## Source: local data frame [6 x 5]
## Groups: 品牌 [3]
## 
##       品牌    月 SumDiscount SumrealPrice SumAcount
##      <chr> <int>       <dbl>        <dbl>     <dbl>
## 1     兰蔻     5           0      5057200      8562
## 2     兰蔻     1           0      3330825      4554
## 3 雅诗兰黛     5           0      3326270      5766
## 4     兰蔻     9           0      2995100      4172
## 5     兰蔻     3           0      2925063      5276
## 6   CHANEL     5           0      2696830      4151
dim(brand_every)  
## [1] 1223    5
## 导入函数
source("/Users/daitu/数据分析/化妆品/need_function_hzp.R")

brand_seal(brand_every,"兰蔻",color = "lightblue")

brand_seal(brand_every,"植村秀")

# brand_seal(brand_every,"植村秀2")


## 用于分析各个商品的一年销售数据的数据结构####
## 变量分别为:商品名称,月,SumDiscount,SumrealPrice,SumAcount
Product_every <- tongluowan_2015 %>%
  group_by(商品名称,月) %>%      ## 月,品牌 划分
  summarise(SumDiscount = sum(折扣金额),  #折扣金额总和
            SumrealPrice = sum(实际销售额),   # 实际销售额的和
            SumAcount = sum(数量)) %>%  #销售量
  arrange(desc(SumrealPrice))
head(Product_every)
## Source: local data frame [6 x 5]
## Groups: 商品名称 [5]
## 
##           商品名称    月 SumDiscount SumrealPrice SumAcount
##              <chr> <int>       <dbl>        <dbl>     <dbl>
## 1       兰蔻大黑瓶     5           0       695520       644
## 2 兰芝雪凝自由礼盒     5           0       520200     26010
## 3   SISLEY 154062S     5           0       274560        22
## 4       兰蔻大黑瓶     1           0       273240       253
## 5     兰蔻超大黑瓶     5           0       260480       176
## 6 雅诗兰黛小棕瓶R1     9           0       259390       307
dim(Product_every)  
## [1] 28046     5
product_seal(Product_every,"海蓝之谜活颜焕肤精华露",color = "lightblue")

product_seal(Product_every,"赫莲娜极致之美升华液")

product_seal(Product_every,"娇韵诗美胸紧实乳")

product_seal(Product_every,"兰蔻高清微整美容液")

product_seal(Product_every,"兰蔻黑膜6片")