자료 출처

https://www.kaggle.com/datasets/hosubjeong/bakery-sales

자료 전처리

자료 읽어오기

Price <- read.csv('./bakery/Bakery price.csv')
Sales <- read.csv('./bakery/Bakery Sales.csv')
Sales<-Sales[1:2420,]
str(Price)
## 'data.frame':    22 obs. of  2 variables:
##  $ Name : chr  "angbutter" "plain bread" "jam" "ice coffe" ...
##  $ price: chr  "4800" "3500" "1500" "4000" ...
str(Sales)
## 'data.frame':    2420 obs. of  27 variables:
##  $ datetime          : chr  "2019-07-11 15:35" "2019-07-11 16:10" "2019-07-12 11:49" "2019-07-13 13:19" ...
##  $ day.of.week       : chr  "Thur" "Thur" "Fri" "Sat" ...
##  $ total             : int  23800 15800 58000 14800 15600 15800 15800 14000 19100 22300 ...
##  $ place             : chr  "" "" "" "" ...
##  $ angbutter         : int  1 1 NA 1 2 1 1 NA 2 1 ...
##  $ plain.bread       : int  NA NA NA 1 NA NA NA NA 1 1 ...
##  $ jam               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ americano         : int  1 NA NA NA NA NA NA NA NA NA ...
##  $ croissant         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ caffe.latte       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ tiramisu.croissant: int  3 1 14 NA 1 NA NA 2 1 1 ...
##  $ cacao.deep        : int  NA NA NA NA NA NA NA 1 NA 1 ...
##  $ pain.au.chocolat  : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ almond.croissant  : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ croque.monsieur   : logi  NA NA NA NA NA NA ...
##  $ mad.garlic        : logi  NA NA NA NA NA NA ...
##  $ milk.tea          : int  NA NA NA NA NA 1 NA NA NA NA ...
##  $ gateau.chocolat   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ pandoro           : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ cheese.cake       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ lemon.ade         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ orange.pound      : int  NA 1 NA NA NA NA 1 NA NA NA ...
##  $ wiener            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ vanila.latte      : int  1 NA NA 1 NA 1 1 NA NA NA ...
##  $ berry.ade         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ tiramisu          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ merinque.cookies  : int  NA NA NA NA NA NA NA NA NA NA ...

결측값 확인

library('VIM')
## 필요한 패키지를 로딩중입니다: colorspace
## 필요한 패키지를 로딩중입니다: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## 다음의 패키지를 부착합니다: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
aggr(Sales[,1:4], prop = F,numbers = T)

Price 자료 정리

Price$Name
##  [1] "angbutter"          "plain bread"        "jam"               
##  [4] "ice coffe"          "croissant"          "ice coffe latter"  
##  [7] "tiramisu croissant" "cacao deep"         "pain au chocolat"  
## [10] "almond croissant"   "ice milk tea"       "gateau chocolat"   
## [13] "pandoro"            "cheese cake"        "lemon ade"         
## [16] "orange pound"       "wiener"             "valina latte"      
## [19] "berry ade"          "tiramisu"           "merinque cookies"  
## [22] "delivery fee"
Price$Name[Price$Name== 'valina latte'] <- 'vanila latte'

결제 기록에서 판매기록와 다른 상품이름 존재 - 결제 기록에 임의로 추론한 가격의 데이터 추가

menus <- Price$Name
saled_menus <- names(Sales)[-(1:4)]
cat('\n판매기록 상의 상품 명 :','\n')
## 
## 판매기록 상의 상품 명 :
saled_menus
##  [1] "angbutter"          "plain.bread"        "jam"               
##  [4] "americano"          "croissant"          "caffe.latte"       
##  [7] "tiramisu.croissant" "cacao.deep"         "pain.au.chocolat"  
## [10] "almond.croissant"   "croque.monsieur"    "mad.garlic"        
## [13] "milk.tea"           "gateau.chocolat"    "pandoro"           
## [16] "cheese.cake"        "lemon.ade"          "orange.pound"      
## [19] "wiener"             "vanila.latte"       "berry.ade"         
## [22] "tiramisu"           "merinque.cookies"
cat('\nPrice 자료와 다른 상품 명 :\n')
## 
## Price 자료와 다른 상품 명 :
saled_menus[! saled_menus %in% menus]
##  [1] "plain.bread"        "americano"          "caffe.latte"       
##  [4] "tiramisu.croissant" "cacao.deep"         "pain.au.chocolat"  
##  [7] "almond.croissant"   "croque.monsieur"    "mad.garlic"        
## [10] "milk.tea"           "gateau.chocolat"    "cheese.cake"       
## [13] "lemon.ade"          "orange.pound"       "vanila.latte"      
## [16] "berry.ade"          "merinque.cookies"
saled_menus <- gsub('[.]',' ' ,saled_menus)
names(Sales)[-(1:4)]<-saled_menus

Price[dim(Price)[1]+1,] <- c('americano', 4000)
Price[dim(Price)[1]+1,] <- c('caffe latte', 4500)
Price[dim(Price)[1]+1,] <- c('croque monsieur',4000 )
Price[dim(Price)[1]+1,] <- c('mad garlic', 3000)
Price[dim(Price)[1]+1,] <- c('milk tea', 4500)
cat('\n판매기록 상의 상품 명 추가후 Price :\n')
## 
## 판매기록 상의 상품 명 추가후 Price :
Price$Name
##  [1] "angbutter"          "plain bread"        "jam"               
##  [4] "ice coffe"          "croissant"          "ice coffe latter"  
##  [7] "tiramisu croissant" "cacao deep"         "pain au chocolat"  
## [10] "almond croissant"   "ice milk tea"       "gateau chocolat"   
## [13] "pandoro"            "cheese cake"        "lemon ade"         
## [16] "orange pound"       "wiener"             "vanila latte"      
## [19] "berry ade"          "tiramisu"           "merinque cookies"  
## [22] "delivery fee"       "americano"          "caffe latte"       
## [25] "croque monsieur"    "mad garlic"         "milk tea"

이상치 탐색

boxplot(Sales$total)

outlier<-Sales[Sales$total == max(Sales$total), ]
Sales<- Sales[! Sales$total == max(Sales$total),]
outlier
##            datetime day.of.week   total place angbutter plain bread jam
## 90 2019-07-26 11:36         Fri 1293000               6           5  NA
##    americano croissant caffe latte tiramisu croissant cacao deep
## 90        NA         5          NA                 NA         NA
##    pain au chocolat almond croissant croque monsieur mad garlic milk tea
## 90                5                5              NA         NA       NA
##    gateau chocolat pandoro cheese cake lemon ade orange pound wiener
## 90              NA       5          NA        NA           NA     NA
##    vanila latte berry ade tiramisu merinque cookies
## 90           NA        NA       NA               NA
boxplot(Sales$total)

시각화

요일별 주문횟수

## 
##  Fri  Mon  Sat  Sun Thur Tues  Wed 
##  333  332  452  554  385    3  360

요일별 판매 금액 평균

##   day.of.week        x
## 1         Fri 20674.77
## 2         Mon 20791.57
## 3         Sat 20128.32
## 4         Sun 20346.03
## 5        Thur 21144.68
## 6        Tues 18666.67
## 7         Wed 21085.00

월별 매출 합계

상품별 판매 수

##          angbutter        plain bread                jam          americano 
##               3223               1023                249                513 
##          croissant        caffe latte tiramisu croissant         cacao deep 
##               1044                214                945                364 
##   pain au chocolat   almond croissant    croque monsieur         mad garlic 
##                721                230                  0                  0 
##           milk tea    gateau chocolat            pandoro        cheese cake 
##                160                210                389                 92 
##          lemon ade       orange pound             wiener       vanila latte 
##                 38                566                476                241 
##          berry ade           tiramisu   merinque cookies 
##                 55                  7                 49

음료, 제빵/ 제과 류 구분

커피