1.1 데이터 불러오기

library(readxl)
order <- read_excel("~/Desktop/Order1501.xlsx")

1.2 데이터 정제

order2=subset(order,ItemType=="TAN") ## TAN 데이터만 남기기
order3=subset(order2, is.na(order2$CancelCode)) ## CancelCode NA 남기기

1.3 정제된 데이터 확인

head(order3) ; dim(order3)
## # A tibble: 6 x 17
##   OrderDate           SapID OrderNo    SoldToParty FdealerName    ItemType
##   <dttm>              <chr> <chr>      <chr>       <chr>          <chr>   
## 1 2015-01-05 00:00:00 15699 ORDERW_00~ 1006063     "\u5218\u4f1a~ TAN     
## 2 2015-01-05 00:00:00 15699 ORDERW_00~ 1006063     "\u5218\u4f1a~ TAN     
## 3 2015-01-05 00:00:00 15699 ORDERW_00~ 1006063     "\u5218\u4f1a~ TAN     
## 4 2015-01-05 00:00:00 15699 ORDERW_00~ 1006063     "\u5218\u4f1a~ TAN     
## 5 2015-01-05 00:00:00 15699 ORDERW_00~ 1006063     "\u5218\u4f1a~ TAN     
## 6 2015-01-05 00:00:00 15699 ORDERW_00~ 1006063     "\u5218\u4f1a~ TAN     
## # ... with 11 more variables: ItemNo <chr>, ItemName <chr>,
## #   MarketPriceAmt <dbl>, RDC <chr>, CancelCode <chr>,
## #   DealerPriceAmt <dbl>, C53Amt <dbl>, DeliveryStatus <chr>,
## #   OrderStatus <chr>, Code <chr>, OrderQty <dbl>
## [1] 165064     17
dim(order2)-dim(order3)
## [1] 2562    0

1.4 새로운 변수 생성

market=aggregate(MarketPriceAmt~SapID,order3,sum) # 대리점별 소매가격 합 
orderqty=aggregate(OrderQty~SapID,order3,sum) # 대리점별 주문량 합
dealer=aggregate(DealerPriceAmt~SapID,order3,sum) # 대리점별 도매가격 합  

margin=(market[2]-dealer[2])/100 # 대리점별 margin 변수 생성
names(margin)="margin"
head(margin)
##    margin
## 1 148.986
## 2  26.367
## 3 146.542
## 4 128.160
## 5 147.166
## 6 150.045
sales=market[2]*orderqty[2] # 대리점별 sales 변수 생성
names(sales)="sales"
head(sales)
##     sales
## 1 1360000
## 2  162690
## 3 1960000
## 4  160000
## 5 2000000
## 6 1200360
marketprice=market[2]/orderqty[2] # 소매가 단위 수정 
dealerprice=dealer[2]/orderqty[2] # 도매가 단위 수정
newmargin=marketprice-dealerprice # 마진 단위 수정 
names(newmargin)="newmargin"
head(newmargin)
##    newmargin
## 1  219.09706
## 2   90.92069
## 3  149.53265
## 4 1602.00000
## 5  147.16600
## 6  250.07500

1.5 데이터 합치기

data=cbind(marketprice,orderqty)
data2=cbind(data,newmargin)
head(data2,20)
##    MarketPriceAmt SapID OrderQty  newmargin
## 1        294.1176 15699       68  219.09706
## 2        193.4483 15700       29   90.92069
## 3        204.0816 15701       98  149.53265
## 4       2500.0000 15702        8 1602.00000
## 5        200.0000 15703      100  147.16600
## 6        333.4333 15704       60  250.07500
## 7        237.5297 15706      421  175.67696
## 8        205.1282 15707      195  152.34615
## 9        229.3578 15709      436  165.57798
## 10       311.5000 15710       80  232.40625
## 11       219.7802 15711      182  164.83516
## 12       213.5231 15712      281  157.21851
## 13       254.4733 15713      393  190.16692
## 14       224.7191 15714       89  167.96966
## 15       246.9136 15715       81  185.18519
## 16       208.3333 15716       96  156.25000
## 17       173.9652 15717      115  129.71652
## 18       301.2349 15718      332  215.78855
## 19       204.1020 15719       98  150.75000
## 20       235.7647 15720      170  172.49529
summary(data2)
##  MarketPriceAmt       SapID              OrderQty        newmargin     
##  Min.   :   50.0   Length:6568        Min.   :   1.0   Min.   :  23.5  
##  1st Qu.:  192.3   Class :character   1st Qu.:  55.0   1st Qu.: 109.3  
##  Median :  241.0   Mode  :character   Median :  91.0   Median : 149.2  
##  Mean   :  373.9                      Mean   : 110.6   Mean   : 225.1  
##  3rd Qu.:  328.6                      3rd Qu.: 135.0   3rd Qu.: 208.9  
##  Max.   :11100.0                      Max.   :1515.0   Max.   :5217.0

1.6 회귀분석 시행

lm2=lm(newmargin~OrderQty,data=data2)
summary(lm2)
## 
## Call:
## lm(formula = newmargin ~ OrderQty, data = data2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -290.2 -121.3  -67.6    6.3 4904.1 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 314.48905    4.78335   65.75   <2e-16 ***
## OrderQty     -0.80838    0.03185  -25.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 262.2 on 6566 degrees of freedom
## Multiple R-squared:  0.08936,    Adjusted R-squared:  0.08923 
## F-statistic: 644.4 on 1 and 6566 DF,  p-value: < 2.2e-16

1.7 예측

printsam=print(sample(x = 1:200,size = 10))
##  [1] 170 156 111  24  72 197 102 171  49   3
new.data=data.frame(OrderQty=printsam)
pred=predict(lm2,newdata = new.data)
pred2=predict(lm2,newdata = new.data,interval = "confidence")
data.frame(orderqty=printsam,marginpred=pred)
##    orderqty marginpred
## 1       170   177.0645
## 2       156   188.3818
## 3       111   224.7589
## 4        24   295.0879
## 5        72   256.2857
## 6       197   155.2383
## 7       102   232.0343
## 8       171   176.2562
## 9        49   274.8785
## 10        3   312.0639
data.frame(orderqty=printsam,marginpred=pred2)
##    orderqty marginpred.fit marginpred.lwr marginpred.upr
## 1       170       177.0645       169.7194       184.4097
## 2       156       188.3818       181.4365       195.3272
## 3       111       224.7589       218.4170       231.1008
## 4        24       295.0879       286.7528       303.4231
## 5        72       256.2857       249.5006       263.0708
## 6       197       155.2383       146.9145       163.5621
## 7       102       232.0343       225.6696       238.3991
## 8       171       176.2562       168.8793       183.6330
## 9        49       274.8785       267.4605       282.2965
## 10        3       312.0639       302.8241       321.3037

1.8 시각화

par(mfrow=c(2,2))
plot(lm2)

1.9 분석 한계점

summary(data2)
##  MarketPriceAmt       SapID              OrderQty        newmargin     
##  Min.   :   50.0   Length:6568        Min.   :   1.0   Min.   :  23.5  
##  1st Qu.:  192.3   Class :character   1st Qu.:  55.0   1st Qu.: 109.3  
##  Median :  241.0   Mode  :character   Median :  91.0   Median : 149.2  
##  Mean   :  373.9                      Mean   : 110.6   Mean   : 225.1  
##  3rd Qu.:  328.6                      3rd Qu.: 135.0   3rd Qu.: 208.9  
##  Max.   :11100.0                      Max.   :1515.0   Max.   :5217.0
boxplot(data2$MarketPriceAmt)

——————-아래부터는 수정 필요 ————————-

margina=margin/orderqty[2]
marketa=market[2]/orderqty[2]
data4=cbind(margina=margina,marketa)
head(data4)
##       margin MarketPriceAmt
## 1  2.1909706       294.1176
## 2  0.9092069       193.4483
## 3  1.4953265       204.0816
## 4 16.0200000      2500.0000
## 5  1.4716600       200.0000
## 6  2.5007500       333.4333
lm3=lm(data2$OrderQty~data4$margin)
summary(lm3)
## 
## Call:
## lm(formula = data2$OrderQty ~ data4$margin)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -131.92  -48.09  -24.08   16.55 1398.50 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  135.5187     1.5466   87.63   <2e-16 ***
## data4$margin -11.0548     0.4355  -25.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.96 on 6566 degrees of freedom
## Multiple R-squared:  0.08936,    Adjusted R-squared:  0.08923 
## F-statistic: 644.4 on 1 and 6566 DF,  p-value: < 2.2e-16
plot(data4$margin,data2$OrderQty)
abline(coef(lm(data4$margin ~ data2$OrderQty)))